diff options
60 files changed, 1030 insertions, 519 deletions
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 7082bedc5e..d5ac34186b 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1126,7 +1126,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); @@ -1141,7 +1141,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); } } } else { diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 3c6a41df34..be05691741 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -203,13 +203,13 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; - for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + for (SlowPathCode* slow_path : slow_paths_) { if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } - slow_paths_.Get(i)->EmitNativeCode(this); + slow_path->EmitNativeCode(this); if (disasm_info_ != nullptr) { - disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize()); + disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a1c6db0a2c..b58a3ff7f2 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -261,7 +261,7 @@ class CodeGenerator { bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.Add(slow_path); + slow_paths_.push_back(slow_path); } void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } @@ -441,10 +441,12 @@ class CodeGenerator { graph_(graph), compiler_options_(compiler_options), src_map_(nullptr), - slow_paths_(graph->GetArena(), 8), + slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), - requires_current_method_(false) {} + requires_current_method_(false) { + slow_paths_.reserve(8); + } // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -485,8 +487,20 @@ class CodeGenerator { return instruction_set == kX86 || instruction_set == kX86_64; } - // Arm64 has its own type for a label, so we need to templatize this method + // Arm64 has its own type for a label, so we need to templatize these methods // to share the logic. + + template <typename LabelType> + LabelType* CommonInitializeLabels() { + size_t size = GetGraph()->GetBlocks().size(); + LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, + kArenaAllocCodeGenerator); + for (size_t i = 0; i != size; ++i) { + new(labels + i) LabelType(); + } + return labels; + } + template <typename LabelType> LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { block = FirstNonEmptyBlock(block); @@ -539,7 +553,7 @@ class CodeGenerator { // Native to dex_pc map used for native debugging/profiling tools. DefaultSrcMap* src_map_; - GrowableArray<SlowPathCode*> slow_paths_; + ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block // we are generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7b1d24887..da7a6755e9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -432,7 +432,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -459,8 +459,8 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { for (HBasicBlock* block : *block_order_) { // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid // FirstNonEmptyBlock() which could lead to adjusting a label more than once. - DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); - Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size()); + Label* block_label = &block_labels_[block->GetBlockId()]; DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump()); if (block_label->IsBound()) { __ AdjustLabelPosition(block_label); @@ -4034,7 +4034,8 @@ ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { } void ParallelMoveResolverARM::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4166,7 +4167,8 @@ void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { } void ParallelMoveResolverARM::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1d98789213..111112e9b2 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,11 +299,11 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -335,7 +335,7 @@ class CodeGeneratorARM : public CodeGenerator { Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d175532f4c..31900d536a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -610,7 +610,8 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { } void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 89671088c7..7178081bf8 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -326,12 +326,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void Initialize() OVERRIDE { - HGraph* graph = GetGraph(); - int length = graph->GetBlocks().size(); - block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length); - for (int i = 0; i < length; ++i) { - new(block_labels_ + i) vixl::Label(); - } + block_labels_ = CommonInitializeLabels<vixl::Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -400,7 +395,7 @@ class CodeGeneratorARM64 : public CodeGenerator { }; // Labels for each block that will be compiled. - vixl::Label* block_labels_; + vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; LocationsBuilderARM64 location_builder_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 25ef3880bd..c9f849318c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -431,7 +431,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -453,12 +453,14 @@ Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const { } void ParallelMoveResolverMIPS64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); } void ParallelMoveResolverMIPS64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index f66ecb3711..16461d6c04 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -270,11 +270,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { } Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -315,7 +315,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ab3d1d1924..277f6b48c8 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,7 +470,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, 0, compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -4630,7 +4630,8 @@ void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { } void ParallelMoveResolverX86::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4782,7 +4783,8 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { } void ParallelMoveResolverX86::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f38e1ea09c..2c2fc65444 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -316,11 +316,11 @@ class CodeGeneratorX86 : public CodeGenerator { bool value_can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { @@ -356,7 +356,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cfce7a0faa..453c6fd6e1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -620,7 +620,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -4373,7 +4373,8 @@ X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { } void ParallelMoveResolverX86_64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4531,7 +4532,8 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { } void ParallelMoveResolverX86_64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1ec3580040..197ce63847 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -286,11 +286,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Move(Location destination, Location source); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { @@ -334,7 +334,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { }; // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86_64 location_builder_; InstructionCodeGeneratorX86_64 instruction_visitor_; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d05c514912..2c6c3b726a 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -374,6 +374,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << instance_of->MustDoNullCheck() << std::noboolalpha; } + void VisitArraySet(HArraySet* array_set) OVERRIDE { + StartAttributeStream("value_can_be_null") << std::boolalpha + << array_set->GetValueCanBeNull() << std::noboolalpha; + } + void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); StartAttributeStream("method_name") << PrettyMethod( diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0ac26de674..22bca2f111 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -71,7 +71,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; - bool IsDominatedByInputNullCheck(HInstruction* instr); + + bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -187,14 +188,18 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { } } -bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) { - HInstruction* input = instr->InputAt(0); +bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const { + if (!input->CanBeNull()) { + return true; + } + for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) { HInstruction* use = it.Current()->GetUser(); - if (use->IsNullCheck() && use->StrictlyDominates(instr)) { + if (use->IsNullCheck() && use->StrictlyDominates(at)) { return true; } } + return false; } @@ -231,7 +236,7 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + if (CanEnsureNotNullAt(object, check_cast)) { check_cast->ClearMustDoNullCheck(); } @@ -267,7 +272,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); bool can_be_null = true; - if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + if (CanEnsureNotNullAt(object, instruction)) { can_be_null = false; instruction->ClearMustDoNullCheck(); } @@ -305,14 +310,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } @@ -437,7 +442,7 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { instruction->ClearNeedsTypeCheck(); } - if (!value->CanBeNull()) { + if (CanEnsureNotNullAt(value, instruction)) { instruction->ClearValueCanBeNull(); } } @@ -502,14 +507,45 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { - // Replace code looking like - // AND dst, src, 0xFFF...FF - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); - return; + if (input_cst != nullptr) { + int64_t value = Int64FromConstant(input_cst); + if (value == -1) { + // Replace code looking like + // AND dst, src, 0xFFF...FF + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + // Eliminate And from UShr+And if the And-mask contains all the bits that + // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask + // precisely clears the shifted-in sign bits. + if ((input_other->IsUShr() || input_other->IsShr()) && input_other->InputAt(1)->IsConstant()) { + size_t reg_bits = (instruction->GetResultType() == Primitive::kPrimLong) ? 64 : 32; + size_t shift = Int64FromConstant(input_other->InputAt(1)->AsConstant()) & (reg_bits - 1); + size_t num_tail_bits_set = CTZ(value + 1); + if ((num_tail_bits_set >= reg_bits - shift) && input_other->IsUShr()) { + // This AND clears only bits known to be clear, for example "(x >>> 24) & 0xff". + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } else if ((num_tail_bits_set == reg_bits - shift) && IsPowerOfTwo(value + 1) && + input_other->HasOnlyOneNonEnvironmentUse()) { + DCHECK(input_other->IsShr()); // For UShr, we would have taken the branch above. + // Replace SHR+AND with USHR, for example "(x >> 24) & 0xff" -> "x >>> 24". + HUShr* ushr = new (GetGraph()->GetArena()) HUShr(instruction->GetType(), + input_other->InputAt(0), + input_other->InputAt(1), + input_other->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, ushr); + input_other->GetBlock()->RemoveInstruction(input_other); + RecordSimplification(); + return; + } + } } // We assume that GVN has run before, so we only perform a pointer comparison. diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 2eeba18a4d..76bd595fc1 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -22,7 +22,6 @@ #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/growable_array.h" namespace art { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ef89932e3b..989970fb49 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -22,7 +22,6 @@ #include "base/bit_utils.h" #include "base/stl_util.h" #include "mirror/class-inl.h" -#include "utils/growable_array.h" #include "scoped_thread_state_change.h" namespace art { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 26df2419a7..486968cf9e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -35,7 +35,6 @@ #include "offsets.h" #include "primitive.h" #include "utils/arena_bit_vector.h" -#include "utils/growable_array.h" namespace art { @@ -5056,7 +5055,10 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {} + : HTemplateInstruction(SideEffects::None(), dex_pc), + moves_(arena->Adapter(kArenaAllocMoveOperands)) { + moves_.reserve(kDefaultNumberOfMoves); + } void AddMove(Location source, Location destination, @@ -5066,15 +5068,15 @@ class HParallelMove : public HTemplateInstruction<0> { DCHECK(destination.IsValid()); if (kIsDebugBuild) { if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - if (moves_.Get(i).GetInstruction() == instruction) { + for (const MoveOperands& move : moves_) { + if (move.GetInstruction() == instruction) { // Special case the situation where the move is for the spill slot // of the instruction. if ((GetPrevious() == instruction) || ((GetPrevious() == nullptr) && instruction->IsPhi() && instruction->GetBlock() == GetBlock())) { - DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind()) << "Doing parallel moves for the same instruction."; } else { DCHECK(false) << "Doing parallel moves for the same instruction."; @@ -5082,26 +5084,27 @@ class HParallelMove : public HTemplateInstruction<0> { } } } - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + for (const MoveOperands& move : moves_) { + DCHECK(!destination.OverlapsWith(move.GetDestination())) << "Overlapped destination for two moves in a parallel move: " - << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and " + << move.GetSource() << " ==> " << move.GetDestination() << " and " << source << " ==> " << destination; } } - moves_.Add(MoveOperands(source, destination, type, instruction)); + moves_.emplace_back(source, destination, type, instruction); } - MoveOperands* MoveOperandsAt(size_t index) const { - return moves_.GetRawStorage() + index; + MoveOperands* MoveOperandsAt(size_t index) { + DCHECK_LT(index, moves_.size()); + return &moves_[index]; } - size_t NumMoves() const { return moves_.Size(); } + size_t NumMoves() const { return moves_.size(); } DECLARE_INSTRUCTION(ParallelMove); private: - GrowableArray<MoveOperands> moves_; + ArenaVector<MoveOperands> moves_; DISALLOW_COPY_AND_ASSIGN(HParallelMove); }; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index f9d812f6a6..fce776920d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -16,6 +16,8 @@ #include <iostream> #include "parallel_move_resolver.h" + +#include "base/stl_util.h" #include "nodes.h" namespace art { @@ -28,19 +30,19 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); if (!move->IsRedundant()) { - moves_.Add(move); + moves_.push_back(move); } } } void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { - DCHECK(moves_.IsEmpty()); + DCHECK(moves_.empty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); // Move stack/stack slot to take advantage of a free register on constrained machines. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Ignore constants and moves already eliminated. if (move.IsEliminated() || move.GetSource().IsConstant()) { continue; @@ -52,8 +54,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves // and skipping such moves with register destinations keeps those // registers free for the whole algorithm. @@ -63,8 +65,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } // Perform the moves with constant sources. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { DCHECK(move->GetSource().IsConstant()); EmitMove(i); @@ -73,7 +75,7 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - moves_.Reset(); + moves_.clear(); } Location LowOf(Location location) { @@ -123,7 +125,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); if (move->IsRedundant()) { // Because we swap register pairs first, following, un-pending @@ -143,8 +146,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // as this one's destination blocks this one so recursively perform all // such moves. MoveOperands* required_swap = nullptr; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, // calling `PerformMove` cannot create a blocking move via a swap @@ -163,7 +166,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // at the next moves. Swapping is not blocked by anything, it just // updates other moves's source. break; - } else if (required_swap == moves_.Get(i)) { + } else if (required_swap == moves_[i]) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; @@ -171,7 +174,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. - moves_.Get(index)->ClearPending(destination); + moves_[index]->ClearPending(destination); return required_swap; } } @@ -197,14 +200,13 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { DCHECK_EQ(required_swap, move); do_swap = true; } else { - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - if (!move->Is64BitMove() && other_move.Is64BitMove()) { + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(destination)) { + DCHECK(other_move->IsPending()); + if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. - return moves_.Get(i); + return other_move; } do_swap = true; break; @@ -220,12 +222,11 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { Location source = move->GetSource(); Location swap_destination = move->GetDestination(); move->Eliminate(); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(source)) { - UpdateSourceOf(moves_.Get(i), source, swap_destination); - } else if (other_move.Blocks(swap_destination)) { - UpdateSourceOf(moves_.Get(i), swap_destination, source); + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(source)) { + UpdateSourceOf(other_move, source, swap_destination); + } else if (other_move->Blocks(swap_destination)) { + UpdateSourceOf(other_move, swap_destination, source); } } // If the swap was required because of a 64bits move in the middle of a cycle, @@ -242,14 +243,14 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return false; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->GetDestination().Equals(loc)) { + for (MoveOperands* move : moves_) { + if (move->GetDestination().Equals(loc)) { return true; } } @@ -302,8 +303,8 @@ ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK_EQ(GetNumberOfPendingMoves(), 0u); - DCHECK(moves_.IsEmpty()); - DCHECK(scratches_.IsEmpty()); + DCHECK(moves_.empty()); + DCHECK(scratches_.empty()); // Backend dependent initialization. PrepareForEmitNativeCode(); @@ -311,8 +312,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Build up a worklist of moves. BuildInitialMoveList(parallel_move); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves and // skipping such moves with register destinations keeps those registers // free for the whole algorithm. @@ -324,8 +325,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Perform the moves with constant sources and register destinations with UpdateMoveSource() // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit // from changing the constant sources to stack locations. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; Location destination = move->GetDestination(); if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { Location source = move->GetSource(); @@ -344,8 +345,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { } // Perform the rest of the moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { EmitMove(i); move->Eliminate(); @@ -358,19 +359,18 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Backend dependent cleanup. FinishEmitNativeCode(); - moves_.Reset(); - scratches_.Reset(); + moves_.clear(); + scratches_.clear(); } Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location loc = scratches_.Get(i); + for (Location loc : scratches_) { if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - Location loc = moves_.Get(i)->GetDestination(); + for (MoveOperands* move : moves_) { + Location loc = move->GetDestination(); if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } @@ -380,18 +380,18 @@ Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { if (kIsDebugBuild) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - DCHECK(!loc.Equals(scratches_.Get(i))); + for (Location scratch : scratches_) { + CHECK(!loc.Equals(scratch)); } } - scratches_.Add(loc); + scratches_.push_back(loc); } void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { DCHECK(!IsBlockedByMoves(loc)); - for (size_t i = 0; i < scratches_.Size(); ++i) { - if (loc.Equals(scratches_.Get(i))) { - scratches_.DeleteAt(i); + for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) { + if (loc.Equals(*it)) { + scratches_.erase(it); break; } } @@ -406,7 +406,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // we will update source operand in the move graph to reduce dependencies in // the graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); DCHECK(!move->IsEliminated()); if (move->IsRedundant()) { @@ -433,8 +434,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { PerformMove(i); } @@ -490,8 +491,11 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { move->Eliminate(); UpdateMoveSource(pending_source, pending_destination); // Free any unblocked locations in the scratch location list. - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location scratch = scratches_.Get(i); + // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop. + // FIXME: If FreeScratchLocation() removes the location from scratches_, + // we skip the next location. This happens for arm64. + for (size_t i = 0; i < scratches_.size(); ++i) { + Location scratch = scratches_[i]; // Only scratch overlapping with performed move source can be unblocked. if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { FreeScratchLocation(pending_source); @@ -512,8 +516,7 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This is not something we must do, but we can use fewer scratch locations with // this trick. For example, we can avoid using additional scratch locations for // moves (0 -> 1), (1 -> 2), (1 -> 0). - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (MoveOperands* move : moves_) { if (move->GetSource().Equals(from)) { move->SetSource(to); } @@ -522,16 +525,15 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { void ParallelMoveResolverNoSwap::AddPendingMove(Location source, Location destination, Primitive::Type type) { - pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); + pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); } void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { - pending_moves_.Delete(move); + RemoveElement(pending_moves_, move); } MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - MoveOperands* move = pending_moves_.Get(i); + for (MoveOperands* move : pending_moves_) { Location destination = move->GetDestination(); // Only moves with destination overlapping with input loc can be unblocked. if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { @@ -542,13 +544,13 @@ MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) } bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - if (pending_moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : pending_moves_) { + if (move->Blocks(loc)) { return true; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return true; } } @@ -558,7 +560,7 @@ bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { // So far it is only used for debugging purposes to make sure all pending moves // have been performed. size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { - return pending_moves_.Size(); + return pending_moves_.size(); } } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 9ede91013e..4278861690 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ +#include "base/arena_containers.h" #include "base/value_object.h" -#include "utils/growable_array.h" #include "locations.h" #include "primitive.h" @@ -31,7 +31,10 @@ class MoveOperands; // have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: - explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} + explicit ParallelMoveResolver(ArenaAllocator* allocator) + : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) { + moves_.reserve(32); + } virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -41,7 +44,7 @@ class ParallelMoveResolver : public ValueObject { // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); - GrowableArray<MoveOperands*> moves_; + ArenaVector<MoveOperands*> moves_; private: DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); @@ -120,8 +123,13 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { class ParallelMoveResolverNoSwap : public ParallelMoveResolver { public: explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) - : ParallelMoveResolver(allocator), scratches_(allocator, 32), - pending_moves_(allocator, 8), allocator_(allocator) {} + : ParallelMoveResolver(allocator), + scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + allocator_(allocator) { + scratches_.reserve(32); + pending_moves_.reserve(8); + } virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -160,7 +168,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { void RemoveScratchLocation(Location loc); // List of scratch locations. - GrowableArray<Location> scratches_; + ArenaVector<Location> scratches_; private: // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy @@ -183,7 +191,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { size_t GetNumberOfPendingMoves(); // Additional pending moves which might be added to resolve dependency cycle. - GrowableArray<MoveOperands*> pending_moves_; + ArenaVector<MoveOperands*> pending_moves_; // Used to allocate pending MoveOperands. ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index f8f70105cf..da91cb811d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -56,7 +56,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -68,7 +69,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { } void EmitSwap(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -127,7 +129,8 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index fb11d76320..40c75af6ef 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -56,6 +56,24 @@ class DeadPhiHandling : public ValueObject { DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); }; +static bool HasConflictingEquivalent(HPhi* phi) { + if (phi->GetNext() == nullptr) { + return false; + } + HPhi* next = phi->GetNext()->AsPhi(); + if (next->GetRegNumber() == phi->GetRegNumber()) { + if (next->GetType() == Primitive::kPrimVoid) { + // We only get a void type for an equivalent phi we processed and found out + // it was conflicting. + return true; + } else { + // Go to the next phi, in case it is also an equivalent. + return HasConflictingEquivalent(next); + } + } + return false; +} + bool DeadPhiHandling::UpdateType(HPhi* phi) { if (phi->IsDead()) { // Phi was rendered dead while waiting in the worklist because it was replaced @@ -87,21 +105,26 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { if (new_type == Primitive::kPrimVoid) { new_type = input_type; } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { + if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { + // If we already asked for an equivalent of the input phi, but that equivalent + // ended up conflicting, make this phi conflicting too. + conflict = true; + break; + } HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); if (equivalent == nullptr) { conflict = true; break; - } else { - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } + } + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); + // We created a new phi, but that phi has the same inputs as the old phi. We + // add it to the worklist to ensure its inputs can also be converted to reference. + // If not, it will remain dead, and the algorithm will make the current phi dead + // as well. + equivalent->AsPhi()->SetLive(); + AddToWorklist(equivalent->AsPhi()); } } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { new_type = Primitive::kPrimNot; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1f0bac59e0..f27cecc8fa 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -30,8 +30,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.sp_mask = sp_mask; current_entry_.num_dex_registers = num_dex_registers; current_entry_.inlining_depth = inlining_depth; - current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); - current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_locations_start_index = dex_register_locations_.size(); + current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.dex_register_map_hash = 0; current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; if (num_dex_registers != 0) { @@ -55,7 +55,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, void StackMapStream::EndStackMapEntry() { current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); - stack_maps_.Add(current_entry_); + stack_maps_.push_back(current_entry_); current_entry_ = StackMapEntry(); } @@ -73,12 +73,12 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t auto it = location_catalog_entries_indices_.Find(location); if (it != location_catalog_entries_indices_.end()) { // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); + dex_register_locations_.push_back(it->second); } else { // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); + size_t index = location_catalog_entries_.size(); + location_catalog_entries_.push_back(location); + dex_register_locations_.push_back(index); location_catalog_entries_indices_.Insert(std::make_pair(location, index)); } @@ -108,7 +108,7 @@ void StackMapStream::BeginInlineInfoEntry(uint32_t method_index, current_inline_info_.dex_pc = dex_pc; current_inline_info_.invoke_type = invoke_type; current_inline_info_.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size(); if (num_dex_registers != 0) { current_inline_info_.live_dex_registers_mask = new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); @@ -123,14 +123,14 @@ void StackMapStream::EndInlineInfoEntry() { DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers) << "Inline information contains less registers than expected"; in_inline_frame_ = false; - inline_infos_.Add(current_inline_info_); + inline_infos_.push_back(current_inline_info_); current_inline_info_ = InlineInfoEntry(); } uint32_t StackMapStream::ComputeMaxNativePcOffset() const { uint32_t max_native_pc_offset = 0u; - for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { - max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + for (const StackMapEntry& entry : stack_maps_) { + max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset); } return max_native_pc_offset; } @@ -147,7 +147,7 @@ size_t StackMapStream::PrepareForFillIn() { dex_pc_max_, max_native_pc_offset, register_mask_max_); - stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); + stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. @@ -170,33 +170,28 @@ size_t StackMapStream::PrepareForFillIn() { size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); + for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { size += DexRegisterLocationCatalog::EntrySize(dex_register_location); } return size; } size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const { + const BitVector* live_dex_registers_mask) const { + // For num_dex_registers == 0u live_dex_registers_mask may be null. + if (num_dex_registers == 0u) { + return 0u; // No register map will be emitted. + } + DCHECK(live_dex_registers_mask != nullptr); + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; // Add the live bit mask for the Dex register liveness. size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } + size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size()) * number_of_live_dex_registers; size_t map_entries_size_in_bytes = RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; @@ -207,24 +202,24 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, size_t StackMapStream::ComputeDexRegisterMapsSize() const { size_t size = 0; size_t inline_info_index = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - StackMapEntry entry = stack_maps_.Get(i); + for (const StackMapEntry& entry : stack_maps_) { if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { - size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask); + size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask); } else { // Entries with the same dex map will have the same offset. } for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++); + DCHECK_LT(inline_info_index, inline_infos_.size()); + InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask); + inline_entry.live_dex_registers_mask); } } return size; } size_t StackMapStream::ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() + return inline_infos_.size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } @@ -244,19 +239,18 @@ void StackMapStream::FillIn(MemoryRegion region) { inline_infos_start_, inline_info_size_); code_info.SetEncoding(stack_map_encoding_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetNumberOfStackMaps(stack_maps_.size()); DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); // Offset in `dex_register_location_catalog` where to store the next // register location. size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + for (DexRegisterLocation dex_register_location : location_catalog_entries_) { dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); } @@ -265,9 +259,9 @@ void StackMapStream::FillIn(MemoryRegion region) { uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_); - StackMapEntry entry = stack_maps_.Get(i); + StackMapEntry entry = stack_maps_[i]; stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc); stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset); @@ -291,7 +285,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // New dex registers maps should be added to the stack map. MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask)); + ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( @@ -318,8 +312,9 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map_encoding_, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); + DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index); + InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index); inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc); inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type); @@ -331,7 +326,7 @@ void StackMapStream::FillIn(MemoryRegion region) { MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask)); + inline_entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); inline_info.SetDexRegisterMapOffsetAtDepth( @@ -357,42 +352,43 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, uint32_t start_index_in_dex_register_locations) const { dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = dex_register_locations_.Get( - start_index_in_dex_register_locations + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } + size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(start_index_in_dex_register_locations, + dex_register_locations_.size() - number_of_live_dex_registers); + for (size_t index_in_dex_register_locations = 0; + index_in_dex_register_locations != number_of_live_dex_registers; + ++index_in_dex_register_locations) { + size_t location_catalog_entry_index = dex_register_locations_[ + start_index_in_dex_register_locations + index_in_dex_register_locations]; + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + num_dex_registers, + location_catalog_entries_.size()); } } size_t StackMapStream::FindEntryWithTheSameDexMap() { - size_t current_entry_index = stack_maps_.Size(); + size_t current_entry_index = stack_maps_.size(); auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); + stack_map_indices.push_back(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, + std::move(stack_map_indices)); return kNoSameDexMapFound; } // We might have collisions, so we need to check whether or not we really have a match. - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + for (uint32_t test_entry_index : entries_it->second) { + if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) { return test_entry_index; } } - entries_it->second.Add(current_entry_index); + entries_it->second.push_back(current_entry_index); return kNoSameDexMapFound; } @@ -406,21 +402,22 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn if (a.num_dex_registers != b.num_dex_registers) { return false; } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + if (a.num_dex_registers != 0u) { + DCHECK(a.live_dex_registers_mask != nullptr); + DCHECK(b.live_dex_registers_mask != nullptr); + if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { return false; } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; + size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(a.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + DCHECK_LE(b.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index; + auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index; + if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { + return false; } } return true; diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 703b6f7e13..4783e283b3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -24,7 +24,6 @@ #include "memory_region.h" #include "nodes.h" #include "stack_map.h" -#include "utils/growable_array.h" namespace art { @@ -62,15 +61,16 @@ class StackMapStream : public ValueObject { public: explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), - stack_maps_(allocator, 10), - location_catalog_entries_(allocator, 4), - dex_register_locations_(allocator, 10 * 4), - inline_infos_(allocator, 2), + stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), + location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), + inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), dex_pc_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocStackMapStream)), current_entry_(), current_inline_info_(), stack_mask_size_(0), @@ -84,7 +84,12 @@ class StackMapStream : public ValueObject { inline_infos_start_(0), needed_size_(0), current_dex_register_(0), - in_inline_frame_(false) {} + in_inline_frame_(false) { + stack_maps_.reserve(10); + location_catalog_entries_.reserve(4); + dex_register_locations_.reserve(10 * 4); + inline_infos_.reserve(2); + } // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -127,17 +132,17 @@ class StackMapStream : public ValueObject { void EndInlineInfoEntry(); size_t GetNumberOfStackMaps() const { - return stack_maps_.Size(); + return stack_maps_.size(); } const StackMapEntry& GetStackMap(size_t i) const { - DCHECK_LT(i, stack_maps_.Size()); - return stack_maps_.GetRawStorage()[i]; + DCHECK_LT(i, stack_maps_.size()); + return stack_maps_[i]; } void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - DCHECK_LT(i, stack_maps_.Size()); - stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + DCHECK_LT(i, stack_maps_.size()); + stack_maps_[i].native_pc_offset = native_pc_offset; } uint32_t ComputeMaxNativePcOffset() const; @@ -150,7 +155,7 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const; + const BitVector* live_dex_registers_mask) const; size_t ComputeDexRegisterMapsSize() const; size_t ComputeInlineInfoSize() const; @@ -164,10 +169,10 @@ class StackMapStream : public ValueObject { uint32_t start_index_in_dex_register_locations) const; ArenaAllocator* allocator_; - GrowableArray<StackMapEntry> stack_maps_; + ArenaVector<StackMapEntry> stack_maps_; // A catalog of unique [location_kind, register_value] pairs (per method). - GrowableArray<DexRegisterLocation> location_catalog_entries_; + ArenaVector<DexRegisterLocation> location_catalog_entries_; // Map from Dex register location catalog entries to their indices in the // location catalog. typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, @@ -175,14 +180,14 @@ class StackMapStream : public ValueObject { LocationCatalogEntriesIndices location_catalog_entries_indices_; // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - GrowableArray<size_t> dex_register_locations_; - GrowableArray<InlineInfoEntry> inline_infos_; + ArenaVector<size_t> dex_register_locations_; + ArenaVector<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; - ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; StackMapEntry current_entry_; InlineInfoEntry current_inline_info_; diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h deleted file mode 100644 index f85e026f16..0000000000 --- a/compiler/utils/growable_array.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ -#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ - -#include <stdint.h> -#include <stddef.h> - -#include "base/arena_object.h" - -namespace art { - -// Deprecated -// TODO: Replace all uses with ArenaVector<T>. -template<typename T> -class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { - public: - GrowableArray(ArenaAllocator* arena, size_t init_length) - : arena_(arena), - num_allocated_(init_length), - num_used_(0) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - } - - GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data) - : arena_(arena), - num_allocated_(init_length), - num_used_(init_length) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - for (size_t i = 0; i < init_length; ++i) { - elem_list_[i] = initial_data; - } - } - - bool Contains(T value, size_t start_from = 0) const { - for (size_t i = start_from; i < num_used_; ++i) { - if (elem_list_[i] == value) { - return true; - } - } - return false; - } - - // Expand the list size to at least new length. - void Resize(size_t new_length) { - if (new_length <= num_allocated_) return; - // If it's a small list double the size, else grow 1.5x. - size_t target_length = - (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1); - if (new_length > target_length) { - target_length = new_length; - } - T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray); - memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); - num_allocated_ = target_length; - elem_list_ = new_array; - } - - // NOTE: does not return storage, just resets use count. - void Reset() { - num_used_ = 0; - } - - // Insert an element to the end of a list, resizing if necessary. - void Insert(T elem) { - if (num_used_ == num_allocated_) { - Resize(num_used_ + 1); - } - elem_list_[num_used_++] = elem; - } - - void InsertAt(size_t index, T elem) { - DCHECK(index <= Size()); - Insert(elem); - for (size_t i = Size() - 1; i > index; --i) { - elem_list_[i] = elem_list_[i - 1]; - } - elem_list_[index] = elem; - } - - void Add(T elem) { - Insert(elem); - } - - T Get(size_t index) const { - DCHECK_LT(index, num_used_); - return elem_list_[index]; - } - - // Overwrite existing element at position index. List must be large enough. - void Put(size_t index, T elem) { - DCHECK_LT(index, num_used_); - elem_list_[index] = elem; - } - - void Increment(size_t index) { - DCHECK_LT(index, num_used_); - elem_list_[index]++; - } - - /* - * Remove an existing element from list. If there are more than one copy - * of the element, only the first one encountered will be deleted. - */ - // TODO: consider renaming this. - void Delete(T element) { - bool found = false; - for (size_t i = 0; i < num_used_ - 1; i++) { - if (!found && elem_list_[i] == element) { - found = true; - } - if (found) { - elem_list_[i] = elem_list_[i+1]; - } - } - // We should either have found the element, or it was the last (unscanned) element. - DCHECK(found || (element == elem_list_[num_used_ - 1])); - num_used_--; - } - - void DeleteAt(size_t index) { - for (size_t i = index; i < num_used_ - 1; i++) { - elem_list_[i] = elem_list_[i + 1]; - } - num_used_--; - } - - size_t GetNumAllocated() const { return num_allocated_; } - - size_t Size() const { return num_used_; } - - bool IsEmpty() const { return num_used_ == 0; } - - T Pop() { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[--num_used_]; - } - - T Peek() const { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[num_used_ - 1]; - } - - void SetSize(size_t new_size) { - Resize(new_size); - num_used_ = new_size; - } - - T* GetRawStorage() const { return elem_list_; } - - private: - ArenaAllocator* const arena_; - size_t num_allocated_; - size_t num_used_; - T* elem_list_; -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 403d348752..8f6b1ff0a5 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -30,9 +30,11 @@ void ArmContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[PC] = &pc_; + gprs_[R0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = ArmContext::kBadGprBase + SP; pc_ = ArmContext::kBadGprBase + PC; + arg0_ = 0; } void ArmContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h index 77bb5c8399..ea31055e9d 100644 --- a/runtime/arch/arm/context_arm.h +++ b/runtime/arch/arm/context_arm.h @@ -45,6 +45,10 @@ class ArmContext : public Context { SetGPR(PC, new_pc); } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(R0, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters)); return gprs_[reg] != nullptr; @@ -84,7 +88,7 @@ class ArmContext : public Context { uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfSRegisters]; // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + uintptr_t sp_, pc_, arg0_; }; } // namespace arm diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index e45d828584..dc1cf8ab51 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -437,8 +437,8 @@ ARM_ENTRY art_quick_do_long_jump ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ - mov r0, #0 @ clear result registers r0 and r1 - mov r1, #0 + ldr r0, [r0, #-12] @ load r0 value + mov r1, #0 @ clear result register r1 bx r2 @ do long jump END art_quick_do_long_jump @@ -1142,7 +1142,7 @@ END art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc index 60becc6aea..4477631c67 100644 --- a/runtime/arch/arm64/context_arm64.cc +++ b/runtime/arch/arm64/context_arm64.cc @@ -31,10 +31,12 @@ void Arm64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[LR] = &pc_; + gprs_[kPC] = &pc_; + gprs_[X0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Arm64Context::kBadGprBase + SP; - pc_ = Arm64Context::kBadGprBase + LR; + pc_ = Arm64Context::kBadGprBase + kPC; + arg0_ = 0; } void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { @@ -58,8 +60,8 @@ void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { } void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); - DCHECK_NE(reg, static_cast<uint32_t>(XZR)); + DCHECK_LT(reg, arraysize(gprs_)); + // Note: we use kPC == XZR, so do not ensure that reg != XZR. DCHECK(IsAccessibleGPR(reg)); DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset. *gprs_[reg] = value; @@ -124,13 +126,13 @@ void Arm64Context::SmashCallerSaves() { extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*); void Arm64Context::DoLongJump() { - uint64_t gprs[kNumberOfXRegisters]; + uint64_t gprs[arraysize(gprs_)]; uint64_t fprs[kNumberOfDRegisters]; // The long jump routine called below expects to find the value for SP at index 31. DCHECK_EQ(SP, 31); - for (size_t i = 0; i < kNumberOfXRegisters; ++i) { + for (size_t i = 0; i < arraysize(gprs_); ++i) { gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i; } for (size_t i = 0; i < kNumberOfDRegisters; ++i) { diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h index 1c99f3c42d..11314e08ed 100644 --- a/runtime/arch/arm64/context_arm64.h +++ b/runtime/arch/arm64/context_arm64.h @@ -42,20 +42,25 @@ class Arm64Context : public Context { } void SetPC(uintptr_t new_lr) OVERRIDE { - SetGPR(LR, new_lr); + SetGPR(kPC, new_lr); + } + + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(X0, new_arg0_value); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg] != nullptr; } uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg]; } uintptr_t GetGPR(uint32_t reg) OVERRIDE { + // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value. DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); DCHECK(IsAccessibleGPR(reg)); return *gprs_[reg]; @@ -79,12 +84,15 @@ class Arm64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + static constexpr size_t kPC = kNumberOfXRegisters; + private: - // Pointers to register locations, initialized to null or the specific registers below. - uintptr_t* gprs_[kNumberOfXRegisters]; + // Pointers to register locations, initialized to null or the specific registers below. We need + // an additional one for the PC. + uintptr_t* gprs_[kNumberOfXRegisters + 1]; uint64_t * fprs_[kNumberOfDRegisters]; - // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + // Hold values for sp, pc and arg0 if they are not located within a stack frame. + uintptr_t sp_, pc_, arg0_; }; } // namespace arm64 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 169bc384a8..68121781ca 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -941,7 +941,7 @@ ENTRY art_quick_do_long_jump // Load GPRs // TODO: lots of those are smashed, could optimize. add x0, x0, #30*8 - ldp x30, x1, [x0], #-16 + ldp x30, x1, [x0], #-16 // LR & SP ldp x28, x29, [x0], #-16 ldp x26, x27, [x0], #-16 ldp x24, x25, [x0], #-16 @@ -958,10 +958,12 @@ ENTRY art_quick_do_long_jump ldp x2, x3, [x0], #-16 mov sp, x1 - // TODO: Is it really OK to use LR for the target PC? - mov x0, #0 - mov x1, #0 - br xLR + // Need to load PC, it's at the end (after the space for the unused XZR). Use x1. + ldr x1, [x0, #33*8] + // And the value of x0. + ldr x0, [x0] + + br x1 END art_quick_do_long_jump /* diff --git a/runtime/arch/context.h b/runtime/arch/context.h index 9ef761e981..9af7c04f5c 100644 --- a/runtime/arch/context.h +++ b/runtime/arch/context.h @@ -50,6 +50,9 @@ class Context { // Sets the program counter value. virtual void SetPC(uintptr_t new_pc) = 0; + // Sets the first argument register. + virtual void SetArg0(uintptr_t new_arg0_value) = 0; + // Returns whether the given GPR is accessible (read or write). virtual bool IsAccessibleGPR(uint32_t reg) = 0; diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index bc2bf68993..08ab356855 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -30,9 +30,11 @@ void MipsContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[RA] = &ra_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = MipsContext::kBadGprBase + SP; ra_ = MipsContext::kBadGprBase + RA; + arg0_ = 0; } void MipsContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h index 38cf29a6aa..0affe5397a 100644 --- a/runtime/arch/mips/context_mips.h +++ b/runtime/arch/mips/context_mips.h @@ -78,12 +78,17 @@ class MipsContext : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfFRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and ra (return address) if they are not located within a stack frame, as + // well as the first argument. + uintptr_t sp_, ra_, arg0_; }; } // namespace mips } // namespace art diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc index cc6dc7e17c..2c17f1c118 100644 --- a/runtime/arch/mips64/context_mips64.cc +++ b/runtime/arch/mips64/context_mips64.cc @@ -30,9 +30,11 @@ void Mips64Context::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[T9] = &t9_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Mips64Context::kBadGprBase + SP; t9_ = Mips64Context::kBadGprBase + T9; + arg0_ = 0; } void Mips64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h index 26fbcfe9d4..84b1c9bad4 100644 --- a/runtime/arch/mips64/context_mips64.h +++ b/runtime/arch/mips64/context_mips64.h @@ -78,14 +78,20 @@ class Mips64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfGpuRegisters]; uint64_t* fprs_[kNumberOfFpuRegisters]; // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the - // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). - uintptr_t sp_, t9_; + // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We + // also need the first argument for single-frame deopt. + uintptr_t sp_, t9_, arg0_; }; + } // namespace mips64 } // namespace art diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc index 7096c82aad..987ad60fd8 100644 --- a/runtime/arch/x86/context_x86.cc +++ b/runtime/arch/x86/context_x86.cc @@ -29,9 +29,11 @@ void X86Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[ESP] = &esp_; + gprs_[EAX] = &arg0_; // Initialize registers with easy to spot debug values. esp_ = X86Context::kBadGprBase + ESP; eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h index c4a11d8a88..59beb12ffa 100644 --- a/runtime/arch/x86/context_x86.h +++ b/runtime/arch/x86/context_x86.h @@ -44,6 +44,10 @@ class X86Context : public Context { eip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(EAX, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -95,10 +99,10 @@ class X86Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint32_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat + // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t esp_, eip_; + uintptr_t esp_, eip_, arg0_; }; } // namespace x86 } // namespace art diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 029a296e5a..f3b15c9ab2 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1695,7 +1695,7 @@ END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index 1fe2ef8fd8..3dc7d71df4 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -29,9 +29,11 @@ void X86_64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[RSP] = &rsp_; + gprs_[RDI] = &arg0_; // Initialize registers with easy to spot debug values. rsp_ = X86_64Context::kBadGprBase + RSP; rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86_64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index 30bb9ec362..f05b7f093f 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -44,6 +44,10 @@ class X86_64Context : public Context { rip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(RDI, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -82,10 +86,10 @@ class X86_64Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint64_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat + // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t rsp_, rip_; + uintptr_t rsp_, rip_, arg0_; }; } // namespace x86_64 } // namespace art diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 861f8025a5..2f438a3c8f 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1724,18 +1724,18 @@ END_FUNCTION art_quick_instrumentation_exit * will long jump to the upcall with a special exception of -1. */ DEFINE_FUNCTION art_quick_deoptimize - pushq %rsi // Entry point for a jump. Fake that we were called. - // Use hidden arg. + pushq %rsi // Entry point for a jump. Fake that we were called. + // Use hidden arg. SETUP_SAVE_ALL_CALLEE_SAVE_FRAME - // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + // Stack should be aligned now. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. + call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index c1a108839b..691b57ff2d 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -89,6 +89,9 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "PrimTypeProp ", "SideEffects ", "RegAllocator ", + "StackMapStm ", + "CodeGen ", + "ParallelMove ", }; template <bool kCount> diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index be9686287a..17045c62d3 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -99,6 +99,9 @@ enum ArenaAllocKind { kArenaAllocPrimitiveTypePropagation, kArenaAllocSideEffectsAnalysis, kArenaAllocRegisterAllocator, + kArenaAllocStackMapStream, + kArenaAllocCodeGenerator, + kArenaAllocParallelMoveResolver, kNumArenaAllocKinds }; diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h index ab97d0cb66..56e35d8751 100644 --- a/runtime/base/arena_object.h +++ b/runtime/base/arena_object.h @@ -40,6 +40,10 @@ class ArenaObject { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } + + // NOTE: Providing placement new (and matching delete) for constructing container elements. + ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } + ALWAYS_INLINE void operator delete(void*, void*) noexcept { } }; diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index d749664d12..dfd9fcddb8 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -22,13 +22,16 @@ #include "mirror/class-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "quick_exception_handler.h" #include "stack.h" #include "thread.h" #include "verifier/method_verifier.h" namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + if (VLOG_IS_ON(deopt)) { LOG(INFO) << "Deopting:"; self->Dump(LOG(INFO)); @@ -39,19 +42,26 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mut self->QuickDeliverException(); } -extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { - ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self); -} - extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); + + // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the + // specialized visitor that will show whether a method is Quick or Shadow. + // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, self->GetException()); - artDeoptimizeImpl(self); + + QuickExceptionHandler exception_handler(self, true); + exception_handler.DeoptimizeSingleFrame(); + exception_handler.UpdateInstrumentationStack(); + exception_handler.DeoptimizeSingleFrameArchDependentFixup(); + // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would + // be caller-saved. This has the downside that we cannot track incorrect register usage down the + // line. + exception_handler.DoLongJump(false); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index b567303b55..1e9e4fb3c6 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -29,6 +29,7 @@ #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "quick_exception_handler.h" #include "runtime.h" #include "scoped_thread_state_change.h" #include "stack.h" @@ -647,28 +648,86 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, if (method->IsAbstract()) { ThrowAbstractMethodError(method); return 0; + } + + JValue tmp_value; + ShadowFrame* deopt_frame = self->PopStackedShadowFrame( + StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false); + const DexFile::CodeItem* code_item = method->GetCodeItem(); + DCHECK(code_item != nullptr) << PrettyMethod(method); + ManagedStack fragment; + + DCHECK(!method->IsNative()) << PrettyMethod(method); + uint32_t shorty_len = 0; + auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); + const char* shorty = non_proxy_method->GetShorty(&shorty_len); + + JValue result; + + if (deopt_frame != nullptr) { + // Coming from single-frame deopt. + + if (kIsDebugBuild) { + // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom + // of the call-stack) corresponds to the called method. + ShadowFrame* linked = deopt_frame; + while (linked->GetLink() != nullptr) { + linked = linked->GetLink(); + } + CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " " + << PrettyMethod(linked->GetMethod()); + } + + if (VLOG_IS_ON(deopt)) { + // Print out the stack to verify that it was a single-frame deopt. + LOG(INFO) << "Continue-ing from deopt. Stack is:"; + QuickExceptionHandler::DumpFramesWithType(self, true); + } + + mirror::Throwable* pending_exception = nullptr; + self->PopDeoptimizationContext(&result, &pending_exception); + + // Push a transition back into managed code onto the linked list in thread. + self->PushManagedStackFragment(&fragment); + + // Ensure that the stack is still in order. + if (kIsDebugBuild) { + class DummyStackVisitor : public StackVisitor { + public: + explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking + // logic. Just always say we want to continue. + return true; + } + }; + DummyStackVisitor dsv(self); + dsv.WalkStack(); + } + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + self->SetException(pending_exception); + } + interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result); } else { - DCHECK(!method->IsNative()) << PrettyMethod(method); const char* old_cause = self->StartAssertNoThreadSuspension( "Building interpreter shadow frame"); - const DexFile::CodeItem* code_item = method->GetCodeItem(); - DCHECK(code_item != nullptr) << PrettyMethod(method); uint16_t num_regs = code_item->registers_size_; // No last shadow coming from quick. ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0); ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_; - uint32_t shorty_len = 0; - auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); - const char* shorty = non_proxy_method->GetShorty(&shorty_len); BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len, shadow_frame, first_arg_reg); shadow_frame_builder.VisitArguments(); const bool needs_initialization = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); // Push a transition back into managed code onto the linked list in thread. - ManagedStack fragment; self->PushManagedStackFragment(&fragment); self->PushShadowFrame(shadow_frame); self->EndAssertNoThreadSuspension(old_cause); @@ -683,24 +742,26 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, return 0; } } - JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); - // Pop transition. - self->PopManagedStackFragment(fragment); - - // Request a stack deoptimization if needed - ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); - if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { - // Push the context of the deoptimization stack so we can restore the return value and the - // exception before executing the deoptimized frames. - self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); - - // Set special exception to cause deoptimization. - self->SetException(Thread::GetDeoptimizationException()); - } - // No need to restore the args since the method has already been run by the interpreter. - return result.GetJ(); + result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); } + + // Pop transition. + self->PopManagedStackFragment(fragment); + + // Request a stack deoptimization if needed + ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); + if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { + // Push the context of the deoptimization stack so we can restore the return value and the + // exception before executing the deoptimized frames. + self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); + + // Set special exception to cause deoptimization. + self->SetException(Thread::GetDeoptimizationException()); + } + + // No need to restore the args since the method has already been run by the interpreter. + return result.GetJ(); } // Visits arguments on the stack placing them into the args vector, Object* arguments are converted diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5c13e13f90..63f43cf3b2 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -20,6 +20,7 @@ #include "art_method-inl.h" #include "dex_instruction.h" #include "entrypoints/entrypoint_utils.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" @@ -36,8 +37,9 @@ QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimizatio : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization), method_tracing_active_(is_deoptimization || Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()), - handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr), - handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) { + handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0), + handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false), + handler_frame_depth_(kInvalidFrameDepth) { } // Finds catch handler. @@ -260,19 +262,25 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* // Prepares deoptimization. class DeoptimizeStackVisitor FINAL : public StackVisitor { public: - DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler) + DeoptimizeStackVisitor(Thread* self, + Context* context, + QuickExceptionHandler* exception_handler, + bool single_frame) SHARED_REQUIRES(Locks::mutator_lock_) : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), exception_handler_(exception_handler), prev_shadow_frame_(nullptr), - stacked_shadow_frame_pushed_(false) { + stacked_shadow_frame_pushed_(false), + single_frame_deopt_(single_frame), + single_frame_done_(false) { } bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { exception_handler_->SetHandlerFrameDepth(GetFrameDepth()); ArtMethod* method = GetMethod(); - if (method == nullptr) { - // This is the upcall, we remember the frame and last pc so that we may long jump to them. + if (method == nullptr || single_frame_done_) { + // This is the upcall (or the next full frame in single-frame deopt), we remember the frame + // and last pc so that we may long jump to them. exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc()); exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame()); if (!stacked_shadow_frame_pushed_) { @@ -295,7 +303,13 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { CHECK_EQ(GetFrameDepth(), 1U); return true; } else { - return HandleDeoptimization(method); + HandleDeoptimization(method); + if (single_frame_deopt_ && !IsInInlinedFrame()) { + // Single-frame deopt ends at the first non-inlined frame and needs to store that method. + exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method)); + single_frame_done_ = true; + } + return true; } } @@ -304,7 +318,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { return static_cast<VRegKind>(kinds.at(reg * 2)); } - bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { + void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { const DexFile::CodeItem* code_item = m->GetCodeItem(); CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m); uint16_t num_regs = code_item->registers_size_; @@ -448,16 +462,20 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { // Will be popped after the long jump after DeoptimizeStack(), // right before interpreter::EnterInterpreterFromDeoptimize(). stacked_shadow_frame_pushed_ = true; - GetThread()->PushStackedShadowFrame(new_frame, - StackedShadowFrameType::kDeoptimizationShadowFrame); + GetThread()->PushStackedShadowFrame( + new_frame, + single_frame_deopt_ + ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame + : StackedShadowFrameType::kDeoptimizationShadowFrame); } prev_shadow_frame_ = new_frame; - return true; } QuickExceptionHandler* const exception_handler_; ShadowFrame* prev_shadow_frame_; bool stacked_shadow_frame_pushed_; + const bool single_frame_deopt_; + bool single_frame_done_; DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor); }; @@ -468,13 +486,46 @@ void QuickExceptionHandler::DeoptimizeStack() { self_->DumpStack(LOG(INFO) << "Deoptimizing: "); } - DeoptimizeStackVisitor visitor(self_, context_, this); + DeoptimizeStackVisitor visitor(self_, context_, this, false); visitor.WalkStack(true); // Restore deoptimization exception self_->SetException(Thread::GetDeoptimizationException()); } +void QuickExceptionHandler::DeoptimizeSingleFrame() { + DCHECK(is_deoptimization_); + + if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { + LOG(INFO) << "Single-frame deopting:"; + DumpFramesWithType(self_, true); + } + + DeoptimizeStackVisitor visitor(self_, context_, this, true); + visitor.WalkStack(true); + + // PC needs to be of the quick-to-interpreter bridge. + int32_t offset; + #ifdef __LP64__ + offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value(); + #else + offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value(); + #endif + handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>( + reinterpret_cast<uint8_t*>(self_) + offset); +} + +void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() { + // Architecture-dependent work. This is to get the LR right for x86 and x86-64. + + if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) { + // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to + // change how longjump works. + handler_quick_frame_ = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*)); + } +} + // Unwinds all instrumentation stack frame prior to catch handler or upcall. class InstrumentationStackVisitor : public StackVisitor { public: @@ -529,15 +580,67 @@ void QuickExceptionHandler::UpdateInstrumentationStack() { } } -void QuickExceptionHandler::DoLongJump() { +void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) { // Place context back on thread so it will be available when we continue. self_->ReleaseLongJumpContext(context_); context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_)); CHECK_NE(handler_quick_frame_pc_, 0u); context_->SetPC(handler_quick_frame_pc_); - context_->SmashCallerSaves(); + context_->SetArg0(handler_quick_arg0_); + if (smash_caller_saves) { + context_->SmashCallerSaves(); + } context_->DoLongJump(); UNREACHABLE(); } +// Prints out methods with their type of frame. +class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor { + public: + DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + show_details_(show_details) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* method = GetMethod(); + if (show_details_) { + LOG(INFO) << "|> pc = " << std::hex << GetCurrentQuickFramePc(); + LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame()); + if (GetCurrentQuickFrame() != nullptr && method != nullptr) { + LOG(INFO) << "|> ret = " << std::hex << GetReturnPc(); + } + } + if (method == nullptr) { + // Transition, do go on, we want to unwind over bridges, all the way. + if (show_details_) { + LOG(INFO) << "N <transition>"; + } + return true; + } else if (method->IsRuntimeMethod()) { + if (show_details_) { + LOG(INFO) << "R " << PrettyMethod(method, true); + } + return true; + } else { + bool is_shadow = GetCurrentShadowFrame() != nullptr; + LOG(INFO) << (is_shadow ? "S" : "Q") + << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ") + << " " + << PrettyMethod(method, true); + return true; // Go on. + } + } + + private: + bool show_details_; + + DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor); +}; + +void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) { + DumpFramesWithTypeStackVisitor visitor(self, details); + visitor.WalkStack(true); +} + } // namespace art diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 2e05c7e1e5..89d6a25128 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -49,6 +49,9 @@ class QuickExceptionHandler { // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy" // shadow frame that will be executed with the interpreter. void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_); + // Update the instrumentation stack by removing all methods that will be unwound // by the exception being thrown. void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_); @@ -58,7 +61,7 @@ class QuickExceptionHandler { SHARED_REQUIRES(Locks::mutator_lock_); // Long jump either to a catch handler or to the upcall. - NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_); + NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_); void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) { handler_quick_frame_ = handler_quick_frame; @@ -68,6 +71,10 @@ class QuickExceptionHandler { handler_quick_frame_pc_ = handler_quick_frame_pc; } + void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) { + handler_quick_arg0_ = handler_quick_arg0; + } + ArtMethod* GetHandlerMethod() const { return handler_method_; } @@ -92,6 +99,11 @@ class QuickExceptionHandler { handler_frame_depth_ = frame_depth; } + // Walk the stack frames of the given thread, printing out non-runtime methods with their types + // of frames. Helps to verify that single-frame deopt really only deopted one frame. + static void DumpFramesWithType(Thread* self, bool details = false) + SHARED_REQUIRES(Locks::mutator_lock_); + private: Thread* const self_; Context* const context_; @@ -103,6 +115,8 @@ class QuickExceptionHandler { ArtMethod** handler_quick_frame_; // PC to branch to for the handler. uintptr_t handler_quick_frame_pc_; + // The value for argument 0. + uintptr_t handler_quick_arg0_; // The handler method to report to the debugger. ArtMethod* handler_method_; // The handler's dex PC, zero implies an uncaught exception. diff --git a/runtime/thread.cc b/runtime/thread.cc index 5bf895ef80..82e6fb0f00 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -250,10 +250,16 @@ void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type tlsPtr_.stacked_shadow_frame_record = record; } -ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) { +ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) { StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; - DCHECK(record != nullptr); - DCHECK_EQ(record->GetType(), type); + if (must_be_present) { + DCHECK(record != nullptr); + DCHECK_EQ(record->GetType(), type); + } else { + if (record == nullptr || record->GetType() != type) { + return nullptr; + } + } tlsPtr_.stacked_shadow_frame_record = record->GetLink(); ShadowFrame* shadow_frame = record->GetShadowFrame(); delete record; diff --git a/runtime/thread.h b/runtime/thread.h index 11f2e285a1..d21644d179 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -108,7 +108,8 @@ enum ThreadFlag { enum class StackedShadowFrameType { kShadowFrameUnderConstruction, - kDeoptimizationShadowFrame + kDeoptimizationShadowFrame, + kSingleFrameDeoptimizationShadowFrame }; static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34; @@ -843,7 +844,7 @@ class Thread { void AssertHasDeoptimizationContext() SHARED_REQUIRES(Locks::mutator_lock_); void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); - ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); + ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true); // For debugger, find the shadow frame that corresponds to a frame id. // Or return null if there is none. diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index a746664160..f06c250dc7 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -249,6 +249,25 @@ public class Main { array[Integer.MAX_VALUE - 998] = 1; } + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArraySet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after) + /// CHECK: Deoptimize + + static void constantIndexing6(int[] array) { + array[3] = 1; + array[4] = 1; + } + + // A helper into which the actual throwing function should be inlined. + static void constantIndexingForward6(int[] array) { + constantIndexing6(array); + } + /// CHECK-START: void Main.loopPattern1(int[]) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -602,7 +621,12 @@ public class Main { // This will cause AIOOBE. constantIndexing2(new int[3]); } catch (ArrayIndexOutOfBoundsException e) { - return 99; + try { + // This will cause AIOOBE. + constantIndexingForward6(new int[3]); + } catch (ArrayIndexOutOfBoundsException e2) { + return 99; + } } return 0; } diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index a14200e7ce..c32d34aa6f 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -84,6 +84,172 @@ public class Main { return arg & -1; } + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-NOT: And + + public static int UShr28And15(int arg) { + return (arg >>> 28) & 15; + } + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const15:j\d+>> LongConstant 15 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-NOT: And + + public static long UShr60And15(long arg) { + return (arg >>> 60) & 15; + } + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static int UShr28And7(int arg) { + return (arg >>> 28) & 7; + } + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static long UShr60And7(long arg) { + return (arg >>> 60) & 7; + } + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const255:i\d+>> IntConstant 255 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static int Shr24And255(int arg) { + return (arg >> 24) & 255; + } + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const255:j\d+>> LongConstant 255 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static long Shr56And255(long arg) { + return (arg >> 56) & 255; + } + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static int Shr24And127(int arg) { + return (arg >> 24) & 127; + } + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static long Shr56And127(long arg) { + return (arg >> 56) & 127; + } + /// CHECK-START: long Main.Div1(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const1:j\d+>> LongConstant 1 @@ -1109,5 +1275,13 @@ public class Main { assertFloatEquals(DivMP25(100.0f), -400.0f); assertDoubleEquals(DivMP25(150.0), -600.0); assertLongEquals(Shl1(100), 200); + assertIntEquals(UShr28And15(0xc1234567), 0xc); + assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL); + assertIntEquals(UShr28And7(0xc1234567), 0x4); + assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L); + assertIntEquals(Shr24And255(0xc1234567), 0xc1); + assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L); + assertIntEquals(Shr24And127(0xc1234567), 0x41); + assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L); } } diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/expected.txt diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt new file mode 100644 index 0000000000..e1578c8f14 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/info.txt @@ -0,0 +1 @@ +Test that we optimize ArraySet when the value is not null. diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java new file mode 100644 index 0000000000..7d8fff46ba --- /dev/null +++ b/test/532-checker-nonnull-arrayset/src/Main.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + // Check that we don't put a null check in the card marking code. + + /// CHECK-START: void Main.test() instruction_simplifier (before) + /// CHECK: ArraySet value_can_be_null:true + + /// CHECK-START: void Main.test() instruction_simplifier (after) + /// CHECK: ArraySet value_can_be_null:false + + /// CHECK-START-X86: void Main.test() disassembly (after) + /// CHECK: ArraySet value_can_be_null:false + /// CHECK-NOT: test + /// CHECK: ReturnVoid + public static void test() { + Object[] array = new Object[1]; + Object nonNull = array[0]; + nonNull.getClass(); // Ensure nonNull has an implicit null check. + array[0] = nonNull; + } + + public static void main(String[] args) {} +} diff --git a/test/533-regression-debugphi/expected.txt b/test/533-regression-debugphi/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/533-regression-debugphi/expected.txt diff --git a/test/533-regression-debugphi/info.txt b/test/533-regression-debugphi/info.txt new file mode 100644 index 0000000000..a4d4857035 --- /dev/null +++ b/test/533-regression-debugphi/info.txt @@ -0,0 +1,2 @@ +Test a regression where DeadPhiHandling would infinitely loop over +complicated phi dependencies. diff --git a/test/533-regression-debugphi/smali/TestCase.smali b/test/533-regression-debugphi/smali/TestCase.smali new file mode 100644 index 0000000000..1908e72c57 --- /dev/null +++ b/test/533-regression-debugphi/smali/TestCase.smali @@ -0,0 +1,72 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +# This is a reduced test case that used to trigger an infinite loop +# in the DeadPhiHandling phase of the optimizing compiler (only used +# with debuggable flag). +.method public static testCase(IILjava/lang/Object;)V + .registers 5 + const/4 v0, 0x0 + + :B4 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B7 + + :B7 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :Btmp + goto :B111 + + :Btmp + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B9 + goto :B110 + + :B13 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B7 + + :B110 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B111 + + :B111 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B4 + + :B9 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B10 + + :B11 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move v1, v0 + goto :B12 + + :B10 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move-object v1, p2 + goto :B12 + + :B12 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B13 + + return-void +.end method diff --git a/test/533-regression-debugphi/src/Main.java b/test/533-regression-debugphi/src/Main.java new file mode 100644 index 0000000000..858770f508 --- /dev/null +++ b/test/533-regression-debugphi/src/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) {} +} |