diff options
Diffstat (limited to 'compiler/optimizing')
46 files changed, 2113 insertions, 1666 deletions
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index f650ff21df..b6aaab5ef5 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -19,12 +19,19 @@ #include "gvn.h" #include "nodes.h" #include "optimizing_unit_test.h" +#include "side_effects_analysis.h" #include "utils/arena_allocator.h" #include "gtest/gtest.h" namespace art { +static void RunGvn(HGraph* graph) { + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); + GVNOptimization(graph, side_effects).Run(); +} + // if (i < 0) { array[i] = 1; // Can't eliminate. } // else if (i >= array.length) { array[i] = 1; // Can't eliminate. } // else { array[i] = 1; // Can eliminate. } @@ -120,7 +127,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { block3->AddSuccessor(block4); // False successor graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check2)); @@ -195,7 +202,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -270,7 +277,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -344,7 +351,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { exit->AddInstruction(new (&allocator) HExit()); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check5)); @@ -443,7 +450,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -451,7 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -459,7 +466,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -467,7 +474,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -476,7 +483,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -484,7 +491,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -584,7 +591,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -592,7 +599,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -600,7 +607,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -608,7 +615,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -616,7 +623,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -703,7 +710,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -712,7 +719,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -721,7 +728,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -730,7 +737,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -831,7 +838,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -839,7 +846,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -847,7 +854,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -1023,7 +1030,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); - GlobalValueNumberer(&allocator, graph).Run(); + RunGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); ASSERT_FALSE(IsRemoved(bounds_check2)); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 85724fa7c8..0a405c4bbe 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -140,6 +140,7 @@ void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots) { + ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; SetFrameSize(RoundUp( @@ -236,7 +237,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } - SetupBlockedRegisters(); + static constexpr bool kBaseline = true; + SetupBlockedRegisters(kBaseline); // Allocate all unallocated input locations. for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { @@ -330,23 +332,25 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(HGraph* graph, InstructionSet instruction_set, - const InstructionSetFeatures& isa_features) { + const InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) { switch (instruction_set) { case kArm: case kThumb2: { return new arm::CodeGeneratorARM(graph, - isa_features.AsArmInstructionSetFeatures()); + *isa_features.AsArmInstructionSetFeatures(), + compiler_options); } case kArm64: { - return new arm64::CodeGeneratorARM64(graph); + return new arm64::CodeGeneratorARM64(graph, compiler_options); } case kMips: return nullptr; case kX86: { - return new x86::CodeGeneratorX86(graph); + return new x86::CodeGeneratorX86(graph, compiler_options); } case kX86_64: { - return new x86_64::CodeGeneratorX86_64(graph); + return new x86_64::CodeGeneratorX86_64(graph, compiler_options); } default: return nullptr; @@ -373,7 +377,7 @@ void CodeGenerator::BuildNativeGCMap( uint32_t native_offset = pc_info.native_pc; uint32_t dex_pc = pc_info.dex_pc; const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); - CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc; + CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; builder.AddEntry(native_offset, references); } } @@ -545,8 +549,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { size_t environment_size = instruction->EnvironmentSize(); - size_t register_mask = 0; size_t inlining_depth = 0; + uint32_t register_mask = locations->GetRegisterMask(); + if (locations->OnlyCallsOnSlowPath()) { + // In case of slow path, we currently set the location of caller-save registers + // to register (instead of their stack location when pushed before the slow-path + // call). Therefore register_mask contains both callee-save and caller-save + // registers that hold objects. We must remove the caller-save from the mask, since + // they will be overwritten by the callee. + register_mask &= core_callee_save_mask_; + } + // The register mask must be a subset of callee-save registers. + DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); stack_map_stream_.AddStackMapEntry( dex_pc, pc_info.native_pc, register_mask, locations->GetStackMask(), environment_size, inlining_depth); @@ -630,30 +644,76 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } } +bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { + HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); + return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck(); +} + +void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { + // If we are from a static path don't record the pc as we can't throw NPE. + // NB: having the checks here makes the code much less verbose in the arch + // specific code generators. + if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) { + return; + } + + if (!compiler_options_.GetImplicitNullChecks()) { + return; + } + + if (!instr->CanDoImplicitNullCheck()) { + return; + } + + // Find the first previous instruction which is not a move. + HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves(); + + // If the instruction is a null check it means that `instr` is the first user + // and needs to record the pc. + if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) { + HNullCheck* null_check = first_prev_not_move->AsNullCheck(); + // TODO: The parallel moves modify the environment. Their changes need to be reverted + // otherwise the stack maps at the throw point will not be correct. + RecordPcInfo(null_check, null_check->GetDexPc()); + } +} + void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (register_set->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); + if (!IsCoreCalleeSaveRegister(i)) { + if (register_set->ContainsCoreRegister(i)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += SaveCoreRegister(stack_offset, i); } - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveFloatingPointRegister(stack_offset, i); + if (!IsFloatingPointCalleeSaveRegister(i)) { + if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += SaveFloatingPointRegister(stack_offset, i); + } } } } @@ -662,16 +722,20 @@ void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (register_set->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreCoreRegister(stack_offset, i); + if (!IsCoreCalleeSaveRegister(i)) { + if (register_set->ContainsCoreRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += RestoreCoreRegister(stack_offset, i); + } } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreFloatingPointRegister(stack_offset, i); + if (!IsFloatingPointCalleeSaveRegister(i)) { + if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += RestoreFloatingPointRegister(stack_offset, i); + } } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 88e50b6c88..45f02e53dc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -20,6 +20,7 @@ #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" #include "base/bit_field.h" +#include "driver/compiler_options.h" #include "globals.h" #include "locations.h" #include "memory_region.h" @@ -85,7 +86,8 @@ class CodeGenerator { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(HGraph* graph, InstructionSet instruction_set, - const InstructionSetFeatures& isa_features); + const InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGenerator() {} HGraph* GetGraph() const { return graph_; } @@ -115,21 +117,38 @@ class CodeGenerator { size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); - virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } uint32_t GetCoreSpillMask() const { return core_spill_mask_; } + uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } - virtual void SetupBlockedRegisters() const = 0; + virtual void SetupBlockedRegisters(bool is_baseline) const = 0; + + virtual void ComputeSpillMask() { + core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; + } + + static uint32_t ComputeRegisterMask(const int* registers, size_t length) { + uint32_t mask = 0; + for (size_t i = 0, e = length; i < e; ++i) { + mask |= (1 << registers[i]); + } + return mask; + } virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; + + const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } + // Saves the register in the stack. Returns the size taken on stack. virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. @@ -146,7 +165,17 @@ class CodeGenerator { } virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + bool IsCoreCalleeSaveRegister(int reg) const { + return (core_callee_save_mask_ & (1 << reg)) != 0; + } + + bool IsFloatingPointCalleeSaveRegister(int reg) const { + return (fpu_callee_save_mask_ & (1 << reg)) != 0; + } + void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); + bool CanMoveNullCheckToUser(HNullCheck* null_check); + void MaybeRecordImplicitNullCheck(HInstruction* instruction); void AddSlowPath(SlowPathCode* slow_path) { slow_paths_.Add(slow_path); @@ -196,13 +225,21 @@ class CodeGenerator { return type == Primitive::kPrimNot && !value->IsIntConstant(); } + void AddAllocatedRegister(Location location) { + allocated_registers_.Add(location); + } + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, size_t number_of_fpu_registers, - size_t number_of_register_pairs) + size_t number_of_register_pairs, + uint32_t core_callee_save_mask, + uint32_t fpu_callee_save_mask, + const CompilerOptions& compiler_options) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), + fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), @@ -210,7 +247,10 @@ class CodeGenerator { number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), + core_callee_save_mask_(core_callee_save_mask), + fpu_callee_save_mask_(fpu_callee_save_mask), graph_(graph), + compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), is_leaf_(true), @@ -229,11 +269,34 @@ class CodeGenerator { virtual ParallelMoveResolver* GetMoveResolver() = 0; + // Returns the location of the first spilled entry for floating point registers, + // relative to the stack pointer. + uint32_t GetFpuSpillStart() const { + DCHECK_NE(frame_size_, kUninitializedFrameSize); + return GetFrameSize() - FrameEntrySpillSize(); + } + + uint32_t GetFpuSpillSize() const { + return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); + } + + uint32_t GetCoreSpillSize() const { + return POPCOUNT(core_spill_mask_) * GetWordSize(); + } + + uint32_t FrameEntrySpillSize() const { + return GetFpuSpillSize() + GetCoreSpillSize(); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; + uint32_t fpu_spill_mask_; uint32_t first_register_slot_in_slow_path_; + // Registers that were allocated during linear scan. + RegisterSet allocated_registers_; + // Arrays used when doing register allocation to know which // registers we can allocate. `SetupBlockedRegisters` updates the // arrays. @@ -243,12 +306,15 @@ class CodeGenerator { size_t number_of_core_registers_; size_t number_of_fpu_registers_; size_t number_of_register_pairs_; + const uint32_t core_callee_save_mask_; + const uint32_t fpu_callee_save_mask_; private: void InitLocations(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; + const CompilerOptions& compiler_options_; GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d40c2d170d..0fe28e8352 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -37,9 +37,11 @@ static DRegister FromLowSToD(SRegister reg) { return static_cast<DRegister>(reg / 2); } -static constexpr bool kExplicitStackOverflowCheck = false; +static bool ExpectedPairLayout(Location location) { + // We expected this for both core and fpu register pairs. + return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); +} -static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; @@ -48,6 +50,13 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +// We unconditionally allocate R5 to ensure we can do long operations +// with baseline. +static constexpr Register kCoreSavedRegisterForBaseline = R5; +static constexpr Register kCoreCalleeSaves[] = + { R5, R6, R7, R8, R10, R11, PC }; +static constexpr SRegister kFpuCalleeSaves[] = + { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 }; class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: @@ -110,20 +119,6 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM); }; -class StackOverflowCheckSlowPathARM : public SlowPathCodeARM { - public: - StackOverflowCheckSlowPathARM() {} - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ LoadFromOffset(kLoadWord, PC, TR, - QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM); -}; - class SuspendCheckSlowPathARM : public SlowPathCodeARM { public: SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor) @@ -384,17 +379,29 @@ size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32 } CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, - const ArmInstructionSetFeatures* isa_features) - : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : CodeGenerator(graph, + kNumberOfCoreRegisters, + kNumberOfSRegisters, + kNumberOfRegisterPairs, + ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), + arraysize(kCoreCalleeSaves)), + ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), + arraysize(kFpuCalleeSaves)), + compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(true), - isa_features_(isa_features) {} - -size_t CodeGeneratorARM::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kArmWordSize; + isa_features_(isa_features) { + // Save one extra register for baseline. Note that on thumb2, there is no easy + // instruction to restore just the PC, so this actually helps both baseline + // and non-baseline to save and restore at least two registers at entry and exit. + AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline)); + // Save the PC register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(PC)); } Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { @@ -448,7 +455,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorARM::SetupBlockedRegisters() const { +void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[R1_R2] = true; @@ -463,31 +470,17 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { // Reserve temp register. blocked_core_registers_[IP] = true; - // TODO: We currently don't use Quick's callee saved registers. - // We always save and restore R6 and R7 to make sure we can use three - // register pairs for long operations. - blocked_core_registers_[R4] = true; - blocked_core_registers_[R5] = true; - blocked_core_registers_[R8] = true; - blocked_core_registers_[R10] = true; - blocked_core_registers_[R11] = true; - - blocked_fpu_registers_[S16] = true; - blocked_fpu_registers_[S17] = true; - blocked_fpu_registers_[S18] = true; - blocked_fpu_registers_[S19] = true; - blocked_fpu_registers_[S20] = true; - blocked_fpu_registers_[S21] = true; - blocked_fpu_registers_[S22] = true; - blocked_fpu_registers_[S23] = true; - blocked_fpu_registers_[S24] = true; - blocked_fpu_registers_[S25] = true; - blocked_fpu_registers_[S26] = true; - blocked_fpu_registers_[S27] = true; - blocked_fpu_registers_[S28] = true; - blocked_fpu_registers_[S29] = true; - blocked_fpu_registers_[S30] = true; - blocked_fpu_registers_[S31] = true; + if (is_baseline) { + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + blocked_core_registers_[kCoreCalleeSaves[i]] = true; + } + + blocked_core_registers_[kCoreSavedRegisterForBaseline] = false; + + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } UpdateBlockedPairRegisters(); } @@ -508,35 +501,56 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} +static uint32_t LeastSignificantBit(uint32_t mask) { + // ffs starts at 1. + return ffs(mask) - 1; +} + +void CodeGeneratorARM::ComputeSpillMask() { + core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; + // We use vpush and vpop for saving and restoring floating point registers, which take + // a SRegister and the number of registers to save/restore after that SRegister. We + // therefore update the `fpu_spill_mask_` to also contain those registers not allocated, + // but in the range. + if (fpu_spill_mask_ != 0) { + uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_); + uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_); + for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) { + fpu_spill_mask_ |= (1 << i); + } + } +} + void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); if (!skip_overflow_check) { - if (kExplicitStackOverflowCheck) { - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM(); - AddSlowPath(slow_path); - - __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value()); - __ cmp(SP, ShifterOperand(IP)); - __ b(slow_path->GetEntryLabel(), CC); - } else { - __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); - __ LoadFromOffset(kLoadWord, IP, IP, 0); - RecordPcInfo(nullptr, 0); - } + __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); + __ LoadFromOffset(kLoadWord, IP, IP, 0); + RecordPcInfo(nullptr, 0); } - core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7); - __ PushList(1 << LR | 1 << R6 | 1 << R7); - - // The return PC has already been pushed on the stack. - __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); + // PC is in the list of callee-save to mimic Quick, but we need to push + // LR at entry instead. + __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR); + if (fpu_spill_mask_ != 0) { + SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); + __ vpushs(start_register, POPCOUNT(fpu_spill_mask_)); + } + __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize())); __ StoreToOffset(kStoreWord, R0, SP, 0); } void CodeGeneratorARM::GenerateFrameExit() { - __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); - __ PopList(1 << PC | 1 << R6 | 1 << R7); + __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); + if (fpu_spill_mask_ != 0) { + SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); + __ vpops(start_register, POPCOUNT(fpu_spill_mask_)); + } + __ PopList(core_spill_mask_); } void CodeGeneratorARM::Bind(HBasicBlock* block) { @@ -625,12 +639,11 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { uint32_t index = double_index_; double_index_ += 2; - DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) + 1, - calling_convention.GetFpuRegisterAt(index + 1)); - DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) & 1, 0); - return Location::FpuRegisterPairLocation( + Location result = Location::FpuRegisterPairLocation( calling_convention.GetFpuRegisterAt(index), calling_convention.GetFpuRegisterAt(index + 1)); + DCHECK(ExpectedPairLayout(result)); + return result; } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -721,16 +734,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); } else { - // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); - if (destination.AsRegisterPairLow<Register>() == R1) { - DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2); - __ LoadFromOffset(kLoadWord, R1, SP, source.GetStackIndex()); - __ LoadFromOffset(kLoadWord, R2, SP, source.GetHighStackIndex(kArmWordSize)); - } else { - __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), - SP, source.GetStackIndex()); - } + DCHECK(ExpectedPairLayout(destination)); + __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), + SP, source.GetStackIndex()); } } else if (destination.IsFpuRegisterPair()) { if (source.IsDoubleStackSlot()) { @@ -937,6 +944,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); Register left = locations->InAt(0).AsRegister<Register>(); if (locations->InAt(1).IsRegister()) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); @@ -1226,6 +1234,7 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { } else { __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); @@ -1264,6 +1273,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) } else { __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); @@ -1282,7 +1292,9 @@ void LocationsBuilderARM::VisitNeg(HNeg* neg) { switch (neg->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), output_overlaps); break; @@ -1809,12 +1821,17 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } @@ -1849,7 +1866,8 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { } break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ adds(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -1857,6 +1875,7 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { first.AsRegisterPairHigh<Register>(), ShifterOperand(second.AsRegisterPairHigh<Register>())); break; + } case Primitive::kPrimFloat: __ vadds(out.AsFpuRegister<SRegister>(), @@ -1879,12 +1898,17 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } case Primitive::kPrimFloat: @@ -1919,6 +1943,7 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ subs(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -2054,8 +2079,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } case Primitive::kPrimFloat: @@ -2092,7 +2116,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc()); break; @@ -2275,8 +2299,8 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + // The runtime helper puts the output in R0,R1. + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } default: @@ -2330,7 +2354,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); int32_t entry_point_offset; if (op->IsShl()) { @@ -2437,10 +2461,6 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* not_) { Location out = locations->Out(); Location in = locations->InAt(0); switch (not_->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - __ eor(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(1)); - break; - case Primitive::kPrimInt: __ mvn(out.AsRegister<Register>(), ShifterOperand(in.AsRegister<Register>())); break; @@ -2579,7 +2599,8 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register value_lo, Register value_hi, Register temp1, - Register temp2) { + Register temp2, + HInstruction* instruction) { Label fail; if (offset != 0) { __ LoadImmediate(temp1, offset); @@ -2590,6 +2611,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, // We need a load followed by store. (The address used in a STREX instruction must // be the same as the address in the most recently executed LDREX instruction.) __ ldrexd(temp1, temp2, addr); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ strexd(temp1, value_lo, value_hi, addr); __ cmp(temp1, ShifterOperand(0)); __ b(&fail, NE); @@ -2608,7 +2630,7 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; bool generate_volatile = field_info.IsVolatile() && is_wide - && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { @@ -2641,7 +2663,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); - bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2664,13 +2686,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value_reg = value.AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value_reg, base, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, base, value_reg); - } + __ StoreToOffset(kStoreWord, value.AsRegister<Register>(), base, offset); break; } @@ -2680,9 +2696,11 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, value.AsRegisterPairLow<Register>(), value.AsRegisterPairHigh<Register>(), locations->GetTemp(0).AsRegister<Register>(), - locations->GetTemp(1).AsRegister<Register>()); + locations->GetTemp(1).AsRegister<Register>(), + instruction); } else { __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } break; } @@ -2704,9 +2722,11 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, value_reg_lo, value_reg_hi, locations->GetTemp(2).AsRegister<Register>(), - locations->GetTemp(3).AsRegister<Register>()); + locations->GetTemp(3).AsRegister<Register>(), + instruction); } else { __ StoreDToOffset(value_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } break; } @@ -2716,6 +2736,17 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } + // Longs and doubles are handled in the switch. + if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } @@ -2730,7 +2761,7 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI bool generate_volatile = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) - && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive @@ -2751,7 +2782,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); - bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2804,9 +2835,11 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register lo = locations->GetTemp(0).AsRegister<Register>(); Register hi = locations->GetTemp(1).AsRegister<Register>(); GenerateWideAtomicLoad(base, offset, lo, hi); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ vmovdrr(out_reg, lo, hi); } else { __ LoadDFromOffset(out_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } break; } @@ -2816,6 +2849,11 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } + // Doubles are handled in the switch. + if (field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -2862,20 +2900,32 @@ void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { } } -void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ LoadFromOffset(kLoadWord, IP, obj.AsRegister<Register>(), 0); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location obj = locations->InAt(0); - if (obj.IsRegister()) { - __ cmp(obj.AsRegister<Register>(), ShifterOperand(0)); - __ b(slow_path->GetEntryLabel(), EQ); + __ cmp(obj.AsRegister<Register>(), ShifterOperand(0)); + __ b(slow_path->GetEntryLabel(), EQ); +} + +void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); } else { - DCHECK(obj.IsConstant()) << obj; - DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0); - __ b(slow_path->GetEntryLabel()); + GenerateExplicitNullCheck(instruction); } } @@ -3011,6 +3061,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { @@ -3094,6 +3145,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); Register temp = locations->GetTemp(0).AsRegister<Register>(); @@ -3148,6 +3200,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); } + break; } @@ -3155,6 +3208,11 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } + + // Ints and objects are handled in the switch. + if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) { @@ -3170,6 +3228,7 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3295,16 +3354,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); } } else if (source.IsDoubleStackSlot()) { - if (destination.IsFpuRegisterPair()) { - __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), - SP, source.GetStackIndex()); - } else { - DCHECK(destination.IsDoubleStackSlot()) << destination; - __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); - } + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); } else { DCHECK(source.IsConstant()) << source; HInstruction* constant = source.GetConstant(); @@ -3317,8 +3371,47 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ LoadImmediate(IP, value); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + if (destination.IsRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value)); + } else { + __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else if (constant->IsDoubleConstant()) { + double value = constant->AsDoubleConstant()->GetValue(); + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (destination.IsFpuRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(Low32Bits(int_value))); + } else { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(High32Bits(int_value))); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } } else { - DCHECK(constant->IsFloatConstant()); + DCHECK(constant->IsFloatConstant()) << constant->DebugName(); float value = constant->AsFloatConstant()->GetValue(); if (destination.IsFpuRegister()) { __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value); @@ -3609,7 +3702,9 @@ void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), output_overlaps); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 267d9a2cef..dd69e4dd9c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" @@ -138,12 +139,14 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateWideAtomicStore(Register addr, uint32_t offset, Register value_lo, Register value_hi, - Register temp1, Register temp2); + Register temp1, Register temp2, + HInstruction* instruction); void GenerateWideAtomicLoad(Register addr, uint32_t offset, Register out_lo, Register out_hi); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); - + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; @@ -153,7 +156,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); + CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -174,8 +179,6 @@ class CodeGeneratorARM : public CodeGenerator { return kArmWordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -192,7 +195,7 @@ class CodeGeneratorARM : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; @@ -234,7 +237,7 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } - const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } @@ -242,6 +245,8 @@ class CodeGeneratorARM : public CodeGenerator { return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; } + void ComputeSpillMask() OVERRIDE; + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -249,7 +254,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; - const ArmInstructionSetFeatures* isa_features_; + const ArmInstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 6d2c3de5d5..1f561b725a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -22,6 +22,7 @@ #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" +#include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" @@ -42,7 +43,6 @@ namespace arm64 { // TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. // For now we prefer the use of load-acquire, store-release over explicit memory barriers. static constexpr bool kUseAcquireRelease = true; -static constexpr bool kExplicitStackOverflowCheck = false; static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; @@ -434,21 +434,6 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); }; -class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 { - public: - StackOverflowCheckSlowPathARM64() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - __ Bind(GetEntryLabel()); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0); - CheckEntrypointTypes<kQuickThrowStackOverflow, void, void>(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64); -}; - class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction, @@ -562,15 +547,21 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return next_location; } -CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph) +CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, - kNumberOfAllocatableRegisterPairs), + kNumberOfAllocatableRegisterPairs, + (1 << LR), + 0, + compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} + move_resolver_(graph->GetArena(), this) { + // Save the link register (containing the return address) to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(LR)); +} #undef __ #define __ GetVIXLAssembler()-> @@ -604,26 +595,15 @@ void CodeGeneratorARM64::GenerateFrameEntry() { if (do_overflow_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); - if (kExplicitStackOverflowCheck) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64(); - AddSlowPath(slow_path); - - __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value())); - __ Cmp(sp, temp); - __ B(lo, slow_path->GetEntryLabel()); - } else { - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); - __ Ldr(wzr, MemOperand(temp, 0)); - RecordPcInfo(nullptr, 0); - } + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Ldr(wzr, MemOperand(temp, 0)); + RecordPcInfo(nullptr, 0); } - CPURegList preserved_regs = GetFramePreservedRegisters(); int frame_size = GetFrameSize(); - core_spill_mask_ |= preserved_regs.list(); - __ Str(w0, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes()); + __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); // Stack layout: // sp[frame_size - 8] : lr. @@ -635,8 +615,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { void CodeGeneratorARM64::GenerateFrameExit() { int frame_size = GetFrameSize(); - CPURegList preserved_regs = GetFramePreservedRegisters(); - __ PeekCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes()); + __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); __ Drop(frame_size); } @@ -687,10 +666,6 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, } } -size_t CodeGeneratorARM64::FrameEntrySpillSize() const { - return GetFramePreservedRegistersSize(); -} - Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const { Primitive::Type type = load->GetType(); @@ -728,7 +703,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorARM64::SetupBlockedRegisters() const { +void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { // Block reserved registers: // ip0 (VIXL temporary) // ip1 (VIXL temporary) @@ -996,11 +971,12 @@ void CodeGeneratorARM64::Load(Primitive::Type type, } } -void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, +void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, const MemOperand& src) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp_base = temps.AcquireX(); + Primitive::Type type = instruction->GetType(); DCHECK(!src.IsRegisterOffset()); DCHECK(!src.IsPreIndex()); @@ -1012,16 +988,20 @@ void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, switch (type) { case Primitive::kPrimBoolean: __ Ldarb(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); break; case Primitive::kPrimByte: __ Ldarb(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimChar: __ Ldarh(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); break; case Primitive::kPrimShort: __ Ldarh(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimInt: @@ -1029,6 +1009,7 @@ void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, case Primitive::kPrimLong: DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); __ Ldar(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: { @@ -1037,6 +1018,7 @@ void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); __ Ldar(temp, base); + MaybeRecordImplicitNullCheck(instruction); __ Fmov(FPRegister(dst), temp); break; } @@ -1398,6 +1380,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { @@ -1409,6 +1392,7 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset())); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { @@ -1453,6 +1437,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1815,14 +1800,17 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ins if (instruction->IsVolatile()) { if (kUseAcquireRelease) { - codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); // For IRIW sequential consistency kLoadAny is not sufficient. GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } else { codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1842,13 +1830,16 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins if (instruction->IsVolatile()) { if (kUseAcquireRelease) { codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { GenerateMemoryBarrier(MemBarrierKind::kAnyStore); codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } else { codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { @@ -1862,7 +1853,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), true); // The output does overlap inputs. + // The output does overlap inputs. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { @@ -1952,6 +1944,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok } else { __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, HeapOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -2017,6 +2010,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { DCHECK(receiver.IsRegister()); __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, HeapOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -2268,10 +2262,6 @@ void LocationsBuilderARM64::VisitNot(HNot* instruction) { void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { switch (instruction->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(1)); - break; - case Primitive::kPrimInt: case Primitive::kPrimLong: __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); @@ -2291,18 +2281,31 @@ void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { } } -void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location obj = locations->InAt(0); - if (obj.IsRegister()) { - __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); + + __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); +} + +void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); } else { - DCHECK(obj.IsConstant()) << obj; - DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0); - __ B(slow_path->GetEntryLabel()); + GenerateExplicitNullCheck(instruction); } } @@ -2496,7 +2499,8 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruc if (instruction->IsVolatile()) { if (kUseAcquireRelease) { - codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); // For IRIW sequential consistency kLoadAny is not sufficient. diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 590bc1d778..96013e55c6 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -113,6 +114,8 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); void HandleShift(HBinaryOperation* instr); + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -164,7 +167,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { class CodeGeneratorARM64 : public CodeGenerator { public: - explicit CodeGeneratorARM64(HGraph* graph); + CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM64() {} void GenerateFrameEntry() OVERRIDE; @@ -175,9 +178,6 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit()); return frame_preserved_regs; } - static int GetFramePreservedRegistersSize() { - return GetFramePreservedRegisters().TotalSizeInBytes(); - } void Bind(HBasicBlock* block) OVERRIDE; @@ -202,8 +202,6 @@ class CodeGeneratorARM64 : public CodeGenerator { return block_entry_label->location(); } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; } @@ -214,7 +212,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; // AllocateFreeRegister() is only used when allocating registers locally // during CompileBaseline(). Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; @@ -264,7 +262,7 @@ class CodeGeneratorARM64 : public CodeGenerator { void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); - void LoadAcquire(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); + void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index bdd0979cae..c0fdcaa8aa 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -31,9 +31,6 @@ namespace art { namespace x86 { -static constexpr bool kExplicitStackOverflowCheck = false; - -static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; @@ -42,8 +39,11 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr int kC2ConditionMask = 0x400; + // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; +static constexpr int kFakeReturnRegister = Register(8); class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: @@ -123,21 +123,6 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 { DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86); }; -class StackOverflowCheckSlowPathX86 : public SlowPathCodeX86 { - public: - StackOverflowCheckSlowPathX86() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ addl(ESP, - Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); - __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow))); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86); -}; - class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { public: BoundsCheckSlowPathX86(HBoundsCheck* instruction, @@ -373,15 +358,15 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kX86WordSize; } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, kNumberOfRegisterPairs), +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) + : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, + kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} - -size_t CodeGeneratorX86::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kX86WordSize; + move_resolver_(graph->GetArena(), this) { + // Use a fake return address register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { @@ -430,7 +415,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorX86::SetupBlockedRegisters() const { +void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[ECX_EDX] = true; @@ -463,33 +448,21 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene codegen_(codegen) {} void CodeGeneratorX86::GenerateFrameEntry() { - // Create a fake register to mimic Quick. - static const int kFakeReturnRegister = 8; - core_spill_mask_ |= (1 << kFakeReturnRegister); - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); - if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + if (!skip_overflow_check) { __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); RecordPcInfo(nullptr, 0); } - // The return PC has already been pushed on the stack. - __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); - - if (!skip_overflow_check && kExplicitStackOverflowCheck) { - SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86(); - AddSlowPath(slow_path); - - __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>())); - __ j(kLess, slow_path->GetEntryLabel()); - } - + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); } void CodeGeneratorX86::Bind(HBasicBlock* block) { @@ -1199,6 +1172,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } else { __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1235,6 +1209,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) } else { __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -2075,6 +2050,81 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move32(stack_temp, source); + __ flds(Address(ESP, temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move64(stack_temp, source); + __ fldl(Address(ESP, temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subl(ESP, Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(EAX, Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(ESP, 0)); + } else { + __ fstl(Address(ESP, 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } + + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2208,10 +2258,8 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = type == Primitive::kPrimInt - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: { @@ -2230,24 +2278,12 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } + case Primitive::kPrimDouble: case Primitive::kPrimFloat: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX, ECX). - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); - break; - } - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX). - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(EAX)); break; } @@ -2264,14 +2300,9 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: @@ -2549,10 +2580,6 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { Location out = locations->Out(); DCHECK(in.Equals(out)); switch (not_->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - __ xorl(out.AsRegister<Register>(), Immediate(1)); - break; - case Primitive::kPrimInt: __ notl(out.AsRegister<Register>()); break; @@ -2749,11 +2776,13 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, if (is_volatile) { XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movsd(temp, Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movd(out.AsRegisterPairLow<Register>(), temp); __ psrlq(temp, Immediate(32)); __ movd(out.AsRegisterPairHigh<Register>(), temp); } else { __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); } break; @@ -2774,6 +2803,11 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } + // Longs are handled in the switch. + if (field_type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -2845,12 +2879,6 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimInt: case Primitive::kPrimNot: { __ movl(Address(base, offset), value.AsRegister<Register>()); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); - } break; } @@ -2862,8 +2890,10 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ movd(temp2, value.AsRegisterPairHigh<Register>()); __ punpckldq(temp1, temp2); __ movsd(Address(base, offset), temp1); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); } break; @@ -2884,6 +2914,17 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } + // Longs are handled in the switch. + if (field_type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } @@ -2924,13 +2965,27 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instr void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::Any()); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() + ? Location::RequiresRegister() + : Location::Any(); + locations->SetInAt(0, loc); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } } -void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + + __ testl(EAX, Address(obj.AsRegister<Register>(), 0)); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); @@ -2950,6 +3005,14 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { __ j(kEqual, slow_path->GetEntryLabel()); } +void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); + } else { + GenerateExplicitNullCheck(instruction); + } +} + void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2963,7 +3026,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); - switch (instruction->GetType()) { + Primitive::Type type = instruction->GetType(); + switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); @@ -3031,10 +3095,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); } else { __ movl(out.AsRegisterPairLow<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); } @@ -3043,12 +3109,16 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + LOG(FATAL) << "Unimplemented register type " << type; UNREACHABLE(); case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } + + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { @@ -3125,6 +3195,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -3148,6 +3219,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -3176,6 +3248,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { Register temp = locations->GetTemp(0).AsRegister<Register>(); @@ -3197,17 +3270,20 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; if (value.IsRegisterPair()) { __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { DCHECK(value.IsConstant()); int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); __ movl(Address(obj, offset), Immediate(Low32Bits(val))); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val))); } } else { if (value.IsRegisterPair()) { __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { @@ -3215,6 +3291,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), Immediate(Low32Bits(val))); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), Immediate(High32Bits(val))); } @@ -3245,6 +3322,7 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 2d8adb2cf1..73b647c1c4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" @@ -136,6 +137,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); @@ -143,6 +145,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); + + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -152,7 +159,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - explicit CodeGeneratorX86(HGraph* graph); + CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -171,8 +178,6 @@ class CodeGeneratorX86 : public CodeGenerator { return 2 * kX86WordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -189,7 +194,7 @@ class CodeGeneratorX86 : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 3d7f122d36..6bc28ff247 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -34,12 +34,9 @@ namespace art { namespace x86_64 { -static constexpr bool kExplicitStackOverflowCheck = false; - // Some x86_64 instructions require a register to be available as temp. static constexpr Register TMP = R11; -static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; @@ -48,6 +45,10 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; +static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; + +static constexpr int kC2ConditionMask = 0x400; class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: @@ -127,22 +128,6 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); }; -class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 { - public: - StackOverflowCheckSlowPathX86_64() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ addq(CpuRegister(RSP), - Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); - __ gs()->jmp( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true)); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64); -}; - class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) @@ -416,15 +401,25 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin return kX86_64WordSize; } -CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0), +static constexpr int kNumberOfCpuRegisterPairs = 0; +// Use a fake return address register to mimic Quick. +static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) + : CodeGenerator(graph, + kNumberOfCpuRegisters, + kNumberOfFloatRegisters, + kNumberOfCpuRegisterPairs, + ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), + arraysize(kCoreCalleeSaves)) + | (1 << kFakeReturnRegister), + ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), + arraysize(kFpuCalleeSaves)), + compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} - -size_t CodeGeneratorX86_64::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kX86_64WordSize; + move_resolver_(graph->GetArena(), this) { + AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, @@ -459,60 +454,73 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorX86_64::SetupBlockedRegisters() const { +void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { // Stack register is always reserved. blocked_core_registers_[RSP] = true; // Block the register used as TMP. blocked_core_registers_[TMP] = true; - // TODO: We currently don't use Quick's callee saved registers. - blocked_core_registers_[RBX] = true; - blocked_core_registers_[RBP] = true; - blocked_core_registers_[R12] = true; - blocked_core_registers_[R13] = true; - blocked_core_registers_[R14] = true; - blocked_core_registers_[R15] = true; - - blocked_fpu_registers_[XMM12] = true; - blocked_fpu_registers_[XMM13] = true; - blocked_fpu_registers_[XMM14] = true; - blocked_fpu_registers_[XMM15] = true; + if (is_baseline) { + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + blocked_core_registers_[kCoreCalleeSaves[i]] = true; + } + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } } void CodeGeneratorX86_64::GenerateFrameEntry() { - // Create a fake register to mimic Quick. - static const int kFakeReturnRegister = 16; - core_spill_mask_ |= (1 << kFakeReturnRegister); - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + if (!skip_overflow_check) { __ testq(CpuRegister(RAX), Address( CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); RecordPcInfo(nullptr, 0); } - // The return PC has already been pushed on the stack. - __ subq(CpuRegister(RSP), - Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushq(CpuRegister(reg)); + } + } - if (!skip_overflow_check && kExplicitStackOverflowCheck) { - SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64(); - AddSlowPath(slow_path); + __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); - __ gs()->cmpq(CpuRegister(RSP), - Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true)); - __ j(kLess, slow_path->GetEntryLabel()); + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)), + XmmRegister(kFpuCalleeSaves[i])); + } } __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); } void CodeGeneratorX86_64::GenerateFrameExit() { - __ addq(CpuRegister(RSP), - Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + __ movsd(XmmRegister(kFpuCalleeSaves[i]), + Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i))); + } + } + + __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ popq(CpuRegister(reg)); + } + } } void CodeGeneratorX86_64::Bind(HBasicBlock* block) { @@ -584,8 +592,18 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int32_t value; + if (constant->IsFloatConstant()) { + value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } else { + DCHECK(constant->IsIntConstant()); + value = constant->AsIntConstant()->GetValue(); + } + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { - DCHECK(source.IsStackSlot()); + DCHECK(source.IsStackSlot()) << source; __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } @@ -597,6 +615,17 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value = constant->AsLongConstant()->GetValue(); + if (constant->IsDoubleConstant()) { + value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } else { + DCHECK(constant->IsLongConstant()); + value = constant->AsLongConstant()->GetValue(); + } + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -1222,6 +1251,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } else { __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1258,6 +1288,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo } else { __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -2001,6 +2032,81 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ flds(Address(CpuRegister(RSP), temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ fldl(Address(CpuRegister(RSP), temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(CpuRegister(RSP), 0)); + } else { + __ fstl(Address(CpuRegister(RSP), 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } + + // And remove the temporary stack space we allocated. + __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2100,11 +2206,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { void LocationsBuilderX86_64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: @@ -2118,11 +2221,10 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(RAX)); break; } @@ -2139,14 +2241,9 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: @@ -2381,10 +2478,6 @@ void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { locations->Out().AsRegister<CpuRegister>().AsRegister()); Location out = locations->Out(); switch (not_->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - __ xorq(out.AsRegister<CpuRegister>(), Immediate(1)); - break; - case Primitive::kPrimInt: __ notl(out.AsRegister<CpuRegister>()); break; @@ -2501,6 +2594,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -2555,11 +2650,6 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimInt: case Primitive::kPrimNot: { __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); - } break; } @@ -2583,6 +2673,14 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); + } + if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } @@ -2623,13 +2721,27 @@ void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instru void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::Any()); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() + ? Location::RequiresRegister() + : Location::Any(); + locations->SetInAt(0, loc); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } } -void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + + __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); @@ -2649,6 +2761,14 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { __ j(kEqual, slow_path->GetEntryLabel()); } +void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); + } else { + GenerateExplicitNullCheck(instruction); + } +} + void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2766,6 +2886,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { @@ -2834,6 +2955,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2860,6 +2982,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2888,7 +3011,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } - + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); @@ -2916,6 +3039,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), value.AsRegister<CpuRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2930,6 +3054,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), value.AsFpuRegister<XmmRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2944,6 +3069,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), value.AsFpuRegister<XmmRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2966,6 +3092,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c501568a89..1ac2ab76a4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" @@ -154,11 +155,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); + void GenerateRemFP(HRem *rem); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -168,7 +174,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { class CodeGeneratorX86_64 : public CodeGenerator { public: - explicit CodeGeneratorX86_64(HGraph* graph); + CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86_64() {} void GenerateFrameEntry() OVERRIDE; @@ -188,8 +194,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -212,7 +216,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location GetStackLocation(HLoadLocal* load) const OVERRIDE; - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 18722f732b..dfa4748811 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -27,6 +27,7 @@ #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -38,6 +39,31 @@ namespace art { +// Provide our own codegen, that ensures the C calling conventions +// are preserved. Currently, ART and C do not match as R4 is caller-save +// in ART, and callee-save in C. Alternatively, we could use or write +// the stub that saves and restores all registers, but it is easier +// to just overwrite the code generator. +class TestCodeGeneratorARM : public arm::CodeGeneratorARM { + public: + TestCodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : arm::CodeGeneratorARM(graph, isa_features, compiler_options) { + AddAllocatedRegister(Location::RegisterLocation(6)); + AddAllocatedRegister(Location::RegisterLocation(7)); + } + + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE { + arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline); + blocked_core_registers_[4] = true; + blocked_core_registers_[6] = false; + blocked_core_registers_[7] = false; + // Makes pair R6-R7 available. + blocked_register_pairs_[6 >> 1] = false; + } +}; + class InternalCodeAllocator : public CodeAllocator { public: InternalCodeAllocator() : size_(0) { } @@ -80,7 +106,8 @@ template <typename Expected> static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { InternalCodeAllocator allocator; - x86::CodeGeneratorX86 codegenX86(graph); + CompilerOptions compiler_options; + x86::CodeGeneratorX86 codegenX86(graph, compiler_options); // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); @@ -90,19 +117,19 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { std::unique_ptr<const ArmInstructionSetFeatures> features( ArmInstructionSetFeatures::FromCppDefines()); - arm::CodeGeneratorARM codegenARM(graph, features.get()); + TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); } - x86_64::CodeGeneratorX86_64 codegenX86_64(graph); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); codegenX86_64.CompileBaseline(&allocator, true); if (kRuntimeISA == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); } - arm64::CodeGeneratorARM64 codegenARM64(graph); + arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); codegenARM64.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm64) { Run(allocator, codegenARM64, has_result, expected); @@ -132,17 +159,20 @@ static void RunCodeOptimized(HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { + CompilerOptions compiler_options; if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); + TestCodeGeneratorARM codegenARM(graph, + *ArmInstructionSetFeatures::FromCppDefines(), + compiler_options); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { - arm64::CodeGeneratorARM64 codegenARM64(graph); + arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph); + x86::CodeGeneratorX86 codegenX86(graph, compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { - x86_64::CodeGeneratorX86_64 codegenX86_64(graph); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } } diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index ed7e57be7c..6ceccfbf0e 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -45,7 +46,7 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); HConstantFolding(graph).Run(); SSAChecker ssa_checker_cf(&allocator, graph); ssa_checker_cf.Run(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 3dbd04e250..a644719622 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" #include "dead_code_elimination.h" +#include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -39,7 +40,7 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index b20d5892eb..35c52690de 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -17,10 +17,10 @@ #include "graph_checker.h" #include <map> -#include <sstream> #include <string> #include "base/bit_vector-inl.h" +#include "base/stringprintf.h" namespace art { @@ -45,15 +45,11 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } if (p_count_in_block_predecessors != block_count_in_p_successors) { - std::stringstream error; - error << "Block " << block->GetBlockId() - << " lists " << p_count_in_block_predecessors - << " occurrences of block " << p->GetBlockId() - << " in its predecessors, whereas block " << p->GetBlockId() - << " lists " << block_count_in_p_successors - << " occurrences of block " << block->GetBlockId() - << " in its successors."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Block %d lists %zu occurrences of block %d in its predecessors, whereas " + "block %d lists %zu occurrences of block %d in its successors.", + block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(), + p->GetBlockId(), block_count_in_p_successors, block->GetBlockId())); } } @@ -75,35 +71,27 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } if (s_count_in_block_successors != block_count_in_s_predecessors) { - std::stringstream error; - error << "Block " << block->GetBlockId() - << " lists " << s_count_in_block_successors - << " occurrences of block " << s->GetBlockId() - << " in its successors, whereas block " << s->GetBlockId() - << " lists " << block_count_in_s_predecessors - << " occurrences of block " << block->GetBlockId() - << " in its predecessors."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Block %d lists %zu occurrences of block %d in its successors, whereas " + "block %d lists %zu occurrences of block %d in its predecessors.", + block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(), + s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId())); } } // Ensure `block` ends with a branch instruction. HInstruction* last_inst = block->GetLastInstruction(); if (last_inst == nullptr || !last_inst->IsControlFlow()) { - std::stringstream error; - error << "Block " << block->GetBlockId() - << " does not end with a branch instruction."; - errors_.push_back(error.str()); + AddError(StringPrintf("Block %d does not end with a branch instruction.", + block->GetBlockId())); } // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { // Ensure this block's list of phis contains only phis. if (!it.Current()->IsPhi()) { - std::stringstream error; - error << "Block " << current_block_->GetBlockId() - << " has a non-phi in its phi list."; - errors_.push_back(error.str()); + AddError(StringPrintf("Block %d has a non-phi in its phi list.", + current_block_->GetBlockId())); } it.Current()->Accept(this); } @@ -113,10 +101,8 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { it.Advance()) { // Ensure this block's list of instructions does not contains phis. if (it.Current()->IsPhi()) { - std::stringstream error; - error << "Block " << current_block_->GetBlockId() - << " has a phi in its non-phi list."; - errors_.push_back(error.str()); + AddError(StringPrintf("Block %d has a phi in its non-phi list.", + current_block_->GetBlockId())); } it.Current()->Accept(this); } @@ -124,30 +110,24 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { - std::stringstream error; - error << "Duplicate id in graph " << instruction->GetId() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf("Instruction id %d is duplicate in graph.", + instruction->GetId())); } else { seen_ids_.SetBit(instruction->GetId()); } // Ensure `instruction` is associated with `current_block_`. - if (instruction->GetBlock() != current_block_) { - std::stringstream error; - if (instruction->IsPhi()) { - error << "Phi "; - } else { - error << "Instruction "; - } - error << instruction->GetId() << " in block " - << current_block_->GetBlockId(); - if (instruction->GetBlock() != nullptr) { - error << " associated with block " - << instruction->GetBlock()->GetBlockId() << "."; - } else { - error << " not associated with any block."; - } - errors_.push_back(error.str()); + if (instruction->GetBlock() == nullptr) { + AddError(StringPrintf("%s %d in block %d not associated with any block.", + instruction->IsPhi() ? "Phi" : "Instruction", + instruction->GetId(), + current_block_->GetBlockId())); + } else if (instruction->GetBlock() != current_block_) { + AddError(StringPrintf("%s %d in block %d associated with block %d.", + instruction->IsPhi() ? "Phi" : "Instruction", + instruction->GetId(), + current_block_->GetBlockId(), + instruction->GetBlock()->GetBlockId())); } // Ensure the inputs of `instruction` are defined in a block of the graph. @@ -158,27 +138,25 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { ? input->GetBlock()->GetPhis() : input->GetBlock()->GetInstructions(); if (!list.Contains(input)) { - std::stringstream error; - error << "Input " << input->GetId() - << " of instruction " << instruction->GetId() - << " is not defined in a basic block of the control-flow graph."; - errors_.push_back(error.str()); + AddError(StringPrintf("Input %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + input->GetId(), + instruction->GetId())); } } // Ensure the uses of `instruction` are defined in a block of the graph. - for (HUseIterator<HInstruction> use_it(instruction->GetUses()); + for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); const HInstructionList& list = use->IsPhi() ? use->GetBlock()->GetPhis() : use->GetBlock()->GetInstructions(); if (!list.Contains(use)) { - std::stringstream error; - error << "User " << use->GetId() - << " of instruction " << instruction->GetId() - << " is not defined in a basic block of the control-flow graph."; - errors_.push_back(error.str()); + AddError(StringPrintf("User %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + use->GetId(), + instruction->GetId())); } } } @@ -193,10 +171,9 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); if (successor->GetPredecessors().Size() > 1) { - std::stringstream error; - error << "Critical edge between blocks " << block->GetBlockId() - << " and " << successor->GetBlockId() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf("Critical edge between blocks %d and %d.", + block->GetBlockId(), + successor->GetBlockId())); } } } @@ -212,47 +189,52 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { // Ensure the pre-header block is first in the list of // predecessors of a loop header. if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { - std::stringstream error; - error << "Loop pre-header is not the first predecessor of the loop header " - << id << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Loop pre-header is not the first predecessor of the loop header %d.", + id)); } // Ensure the loop header has only two predecessors and that only the // second one is a back edge. - if (loop_header->GetPredecessors().Size() < 2) { - std::stringstream error; - error << "Loop header " << id << " has less than two predecessors."; - errors_.push_back(error.str()); - } else if (loop_header->GetPredecessors().Size() > 2) { - std::stringstream error; - error << "Loop header " << id << " has more than two predecessors."; - errors_.push_back(error.str()); + size_t num_preds = loop_header->GetPredecessors().Size(); + if (num_preds < 2) { + AddError(StringPrintf( + "Loop header %d has less than two predecessors: %zu.", + id, + num_preds)); + } else if (num_preds > 2) { + AddError(StringPrintf( + "Loop header %d has more than two predecessors: %zu.", + id, + num_preds)); } else { HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(first_predecessor)) { - std::stringstream error; - error << "First predecessor of loop header " << id << " is a back edge."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "First predecessor of loop header %d is a back edge.", + id)); } HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); if (!loop_information->IsBackEdge(second_predecessor)) { - std::stringstream error; - error << "Second predecessor of loop header " << id - << " is not a back edge."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Second predecessor of loop header %d is not a back edge.", + id)); } } // Ensure there is only one back edge per loop. size_t num_back_edges = loop_header->GetLoopInformation()->GetBackEdges().Size(); - if (num_back_edges != 1) { - std::stringstream error; - error << "Loop defined by header " << id << " has " - << num_back_edges << " back edge(s)."; - errors_.push_back(error.str()); + if (num_back_edges == 0) { + AddError(StringPrintf( + "Loop defined by header %d has no back edge.", + id)); + } else if (num_back_edges > 1) { + AddError(StringPrintf( + "Loop defined by header %d has several back edges: %zu.", + id, + num_back_edges)); } // Ensure all blocks in the loop are dominated by the loop header. @@ -261,10 +243,9 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { for (uint32_t i : loop_blocks.Indexes()) { HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); if (!loop_header->Dominates(loop_block)) { - std::stringstream error; - error << "Loop block " << loop_block->GetBlockId() - << " not dominated by loop header " << id; - errors_.push_back(error.str()); + AddError(StringPrintf("Loop block %d not dominated by loop header %d.", + loop_block->GetBlockId(), + id)); } } } @@ -273,16 +254,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { super_type::VisitInstruction(instruction); // Ensure an instruction dominates all its uses. - for (HUseIterator<HInstruction> use_it(instruction->GetUses()); + for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); if (!use->IsPhi() && !instruction->StrictlyDominates(use)) { - std::stringstream error; - error << "Instruction " << instruction->GetId() - << " in block " << current_block_->GetBlockId() - << " does not dominate use " << use->GetId() - << " in block " << use->GetBlock()->GetBlockId() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf("Instruction %d in block %d does not dominate " + "use %d in block %d.", + instruction->GetId(), current_block_->GetBlockId(), + use->GetId(), use->GetBlock()->GetBlockId())); } } @@ -294,13 +273,12 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { HInstruction* env_instruction = environment->GetInstructionAt(i); if (env_instruction != nullptr && !env_instruction->StrictlyDominates(instruction)) { - std::stringstream error; - error << "Instruction " << env_instruction->GetId() - << " in environment of instruction " << instruction->GetId() - << " from block " << current_block_->GetBlockId() - << " does not dominate instruction " << instruction->GetId() - << "."; - errors_.push_back(error.str()); + AddError(StringPrintf("Instruction %d in environment of instruction %d " + "from block %d does not dominate instruction %d.", + env_instruction->GetId(), + instruction->GetId(), + current_block_->GetBlockId(), + instruction->GetId())); } } } @@ -311,25 +289,21 @@ void SSAChecker::VisitPhi(HPhi* phi) { // Ensure the first input of a phi is not itself. if (phi->InputAt(0) == phi) { - std::stringstream error; - error << "Loop phi " << phi->GetId() - << " in block " << phi->GetBlock()->GetBlockId() - << " is its own first input."; - errors_.push_back(error.str()); + AddError(StringPrintf("Loop phi %d in block %d is its own first input.", + phi->GetId(), + phi->GetBlock()->GetBlockId())); } - // Ensure the number of phi inputs is the same as the number of + // Ensure the number of inputs of a phi is the same as the number of // its predecessors. const GrowableArray<HBasicBlock*>& predecessors = phi->GetBlock()->GetPredecessors(); if (phi->InputCount() != predecessors.Size()) { - std::stringstream error; - error << "Phi " << phi->GetId() - << " in block " << phi->GetBlock()->GetBlockId() - << " has " << phi->InputCount() << " inputs, but block " - << phi->GetBlock()->GetBlockId() << " has " - << predecessors.Size() << " predecessors."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Phi %d in block %d has %zu inputs, " + "but block %d has %zu predecessors.", + phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), + phi->GetBlock()->GetBlockId(), predecessors.Size())); } else { // Ensure phi input at index I either comes from the Ith // predecessor or from a block that dominates this predecessor. @@ -338,13 +312,11 @@ void SSAChecker::VisitPhi(HPhi* phi) { HBasicBlock* predecessor = predecessors.Get(i); if (!(input->GetBlock() == predecessor || input->GetBlock()->Dominates(predecessor))) { - std::stringstream error; - error << "Input " << input->GetId() << " at index " << i - << " of phi " << phi->GetId() - << " from block " << phi->GetBlock()->GetBlockId() - << " is not defined in predecessor number " << i - << " nor in a block dominating it."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Input %d at index %zu of phi %d from block %d is not defined in " + "predecessor number %zu nor in a block dominating it.", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + i)); } } } @@ -369,57 +341,61 @@ void SSAChecker::VisitIf(HIf* instruction) { if (input->IsIntConstant()) { int value = input->AsIntConstant()->GetValue(); if (value != 0 && value != 1) { - std::stringstream error; - error << "If instruction " << instruction->GetId() - << " has a non-boolean constant input whose value is: " - << value << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "If instruction %d has a non-Boolean constant input " + "whose value is: %d.", + instruction->GetId(), + value)); } } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) { - std::stringstream error; - error << "If instruction " << instruction->GetId() - << " has a non-boolean input type: " - << instruction->InputAt(0)->GetType() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "If instruction %d has a non-Boolean input type: %s.", + instruction->GetId(), + Primitive::PrettyDescriptor(instruction->InputAt(0)->GetType()))); } } void SSAChecker::VisitCondition(HCondition* op) { VisitInstruction(op); if (op->GetType() != Primitive::kPrimBoolean) { - std::stringstream error; - error << "Condition " << op->DebugName() << " " << op->GetId() - << " has a non-boolean result type: " - << op->GetType() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Condition %s %d has a non-Boolean result type: %s.", + op->DebugName(), op->GetId(), + Primitive::PrettyDescriptor(op->GetType()))); } HInstruction* lhs = op->InputAt(0); HInstruction* rhs = op->InputAt(1); - if (lhs->GetType() == Primitive::kPrimNot && rhs->IsIntConstant()) { - if (rhs->AsIntConstant()->GetValue() != 0) { - std::stringstream error; - error << "Condition " << op->DebugName() << " " << op->GetId() - << " compares an object with a non-0 integer: " - << rhs->AsIntConstant()->GetValue() - << "."; - errors_.push_back(error.str()); + if (lhs->GetType() == Primitive::kPrimNot) { + if (!op->IsEqual() && !op->IsNotEqual()) { + AddError(StringPrintf( + "Condition %s %d uses an object as left-hand side input.", + op->DebugName(), op->GetId())); + } + if (rhs->IsIntConstant() && rhs->AsIntConstant()->GetValue() != 0) { + AddError(StringPrintf( + "Condition %s %d compares an object with a non-zero integer: %d.", + op->DebugName(), op->GetId(), + rhs->AsIntConstant()->GetValue())); + } + } else if (rhs->GetType() == Primitive::kPrimNot) { + if (!op->IsEqual() && !op->IsNotEqual()) { + AddError(StringPrintf( + "Condition %s %d uses an object as right-hand side input.", + op->DebugName(), op->GetId())); } - } else if (rhs->GetType() == Primitive::kPrimNot && lhs->IsIntConstant()) { - if (lhs->AsIntConstant()->GetValue() != 0) { - std::stringstream error; - error << "Condition " << op->DebugName() << " " << op->GetId() - << " compares a non-0 integer with an object: " - << lhs->AsIntConstant()->GetValue() - << "."; - errors_.push_back(error.str()); + if (lhs->IsIntConstant() && lhs->AsIntConstant()->GetValue() != 0) { + AddError(StringPrintf( + "Condition %s %d compares a non-zero integer with an object: %d.", + op->DebugName(), op->GetId(), + lhs->AsIntConstant()->GetValue())); } } else if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) { - std::stringstream error; - error << "Condition " << op->DebugName() << " " << op->GetId() - << " has inputs of different type: " - << lhs->GetType() << ", and " << rhs->GetType() - << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Condition %s %d has inputs of different types: " + "%s, and %s.", + op->DebugName(), op->GetId(), + Primitive::PrettyDescriptor(lhs->GetType()), + Primitive::PrettyDescriptor(rhs->GetType()))); } } @@ -427,41 +403,40 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); if (op->IsUShr() || op->IsShr() || op->IsShl()) { if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) { - std::stringstream error; - error << "Shift operation " << op->DebugName() << " " << op->GetId() - << " has a non-int kind second input: " - << op->InputAt(1)->DebugName() << " of type " << op->InputAt(1)->GetType() - << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Shift operation %s %d has a non-int kind second input: " + "%s of type %s.", + op->DebugName(), op->GetId(), + op->InputAt(1)->DebugName(), + Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); } } else { if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { - std::stringstream error; - error << "Binary operation " << op->DebugName() << " " << op->GetId() - << " has inputs of different type: " - << op->InputAt(0)->GetType() << ", and " << op->InputAt(1)->GetType() - << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Binary operation %s %d has inputs of different types: " + "%s, and %s.", + op->DebugName(), op->GetId(), + Primitive::PrettyDescriptor(op->InputAt(0)->GetType()), + Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); } } if (op->IsCompare()) { if (op->GetType() != Primitive::kPrimInt) { - std::stringstream error; - error << "Compare operation " << op->GetId() - << " has a non-int result type: " - << op->GetType() << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Compare operation %d has a non-int result type: %s.", + op->GetId(), + Primitive::PrettyDescriptor(op->GetType()))); } } else { // Use the first input, so that we can also make this check for shift operations. if (PrimitiveKind(op->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { - std::stringstream error; - error << "Binary operation " << op->DebugName() << " " << op->GetId() - << " has a result type different than its input type: " - << op->GetType() << ", and " << op->InputAt(1)->GetType() - << "."; - errors_.push_back(error.str()); + AddError(StringPrintf( + "Binary operation %s %d has a result type different " + "from its input type: %s vs %s.", + op->DebugName(), op->GetId(), + Primitive::PrettyDescriptor(op->GetType()), + Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); } } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index ae1557b57c..5ec3003ac8 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -60,6 +60,11 @@ class GraphChecker : public HGraphDelegateVisitor { } protected: + // Report a new error. + void AddError(const std::string& error) { + errors_.push_back(error); + } + ArenaAllocator* const allocator_; // The block currently visited. HBasicBlock* current_block_ = nullptr; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 9e0a5b89e9..ef461d9ac5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -68,7 +68,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { void PrintTime(const char* name) { AddIndent(); - output_ << name << " " << time(NULL) << std::endl; + output_ << name << " " << time(nullptr) << std::endl; } void PrintInt(const char* name, int value) { @@ -142,6 +142,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { codegen_.DumpFloatingPointRegister(output_, location.low()); output_ << " and "; codegen_.DumpFloatingPointRegister(output_, location.high()); + } else if (location.IsRegisterPair()) { + codegen_.DumpCoreRegister(output_, location.low()); + output_ << " and "; + codegen_.DumpCoreRegister(output_, location.high()); } else { DCHECK(location.IsDoubleStackSlot()); output_ << "2x" << location.GetStackIndex() << "(sp)"; @@ -219,10 +223,16 @@ class HGraphVisualizerPrinter : public HGraphVisitor { const char* kEndInstructionMarker = "<|@"; for (HInstructionIterator it(list); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); - AddIndent(); int bci = 0; - output_ << bci << " " << instruction->NumberOfUses() - << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; + size_t num_uses = 0; + for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); + !use_it.Done(); + use_it.Advance()) { + ++num_uses; + } + AddIndent(); + output_ << bci << " " << num_uses << " " + << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; PrintInstruction(instruction); output_ << kEndInstructionMarker << std::endl; } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 6e5f1bd203..89bba2d9f6 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -15,80 +15,249 @@ */ #include "gvn.h" +#include "side_effects_analysis.h" namespace art { -void GlobalValueNumberer::Run() { - ComputeSideEffects(); +/** + * A node in the collision list of a ValueSet. Encodes the instruction, + * the hash code, and the next node in the collision list. + */ +class ValueSetNode : public ArenaObject<kArenaAllocMisc> { + public: + ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next) + : instruction_(instruction), hash_code_(hash_code), next_(next) {} - sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_)); + size_t GetHashCode() const { return hash_code_; } + HInstruction* GetInstruction() const { return instruction_; } + ValueSetNode* GetNext() const { return next_; } + void SetNext(ValueSetNode* node) { next_ = node; } - // Do reverse post order to ensure the non back-edge predecessors of a block are - // visited before the block itself. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + private: + HInstruction* const instruction_; + const size_t hash_code_; + ValueSetNode* next_; + + DISALLOW_COPY_AND_ASSIGN(ValueSetNode); +}; + +/** + * A ValueSet holds instructions that can replace other instructions. It is updated + * through the `Add` method, and the `Kill` method. The `Kill` method removes + * instructions that are affected by the given side effect. + * + * The `Lookup` method returns an equivalent instruction to the given instruction + * if there is one in the set. In GVN, we would say those instructions have the + * same "number". + */ +class ValueSet : public ArenaObject<kArenaAllocMisc> { + public: + explicit ValueSet(ArenaAllocator* allocator) + : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + table_[i] = nullptr; + } } -} -void GlobalValueNumberer::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) { - int id = info->GetHeader()->GetBlockId(); - loop_effects_.Put(id, loop_effects_.Get(id).Union(effects)); -} + // Adds an instruction in the set. + void Add(HInstruction* instruction) { + DCHECK(Lookup(instruction) == nullptr); + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + if (table_[index] == nullptr) { + table_[index] = instruction; + } else { + collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_); + } + ++number_of_entries_; + } -void GlobalValueNumberer::ComputeSideEffects() { - if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - SideEffects effects = GetBlockEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); - if (block->IsLoopHeader()) { - effects = GetLoopEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + // If in the set, returns an equivalent instruction to the given instruction. Returns + // null otherwise. + HInstruction* Lookup(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + HInstruction* existing = table_[index]; + if (existing != nullptr && existing->Equals(instruction)) { + return existing; + } + + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + if (node->GetHashCode() == hash_code) { + existing = node->GetInstruction(); + if (existing->Equals(instruction)) { + return existing; + } } } + return nullptr; } - // Do a post order visit to ensure we visit a loop header after its loop body. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - - SideEffects effects = SideEffects::None(); - // Update `effects` with the side effects of all instructions in this block. - for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); - inst_it.Advance()) { - HInstruction* instruction = inst_it.Current(); - effects = effects.Union(instruction->GetSideEffects()); - if (effects.HasAllSideEffects()) { - break; + // Returns whether `instruction` is in the set. + HInstruction* IdentityLookup(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + HInstruction* existing = table_[index]; + if (existing != nullptr && existing == instruction) { + return existing; + } + + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + if (node->GetHashCode() == hash_code) { + existing = node->GetInstruction(); + if (existing == instruction) { + return existing; + } } } + return nullptr; + } - block_effects_.Put(block->GetBlockId(), effects); - - if (block->IsLoopHeader()) { - // The side effects of the loop header are part of the loop. - UpdateLoopEffects(block->GetLoopInformation(), effects); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - if (pre_header->IsInLoop()) { - // Update the side effects of the outer loop with the side effects of the inner loop. - // Note that this works because we know all the blocks of the inner loop are visited - // before the loop header of the outer loop. - UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block)); + // Removes all instructions in the set that are affected by the given side effects. + void Kill(SideEffects side_effects) { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + HInstruction* instruction = table_[i]; + if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) { + table_[i] = nullptr; + --number_of_entries_; + } + } + + for (ValueSetNode* current = collisions_, *previous = nullptr; + current != nullptr; + current = current->GetNext()) { + HInstruction* instruction = current->GetInstruction(); + if (instruction->GetSideEffects().DependsOn(side_effects)) { + if (previous == nullptr) { + collisions_ = current->GetNext(); + } else { + previous->SetNext(current->GetNext()); + } + --number_of_entries_; + } else { + previous = current; } - } else if (block->IsInLoop()) { - // Update the side effects of the loop with the side effects of this block. - UpdateLoopEffects(block->GetLoopInformation(), effects); } } -} -SideEffects GlobalValueNumberer::GetLoopEffects(HBasicBlock* block) const { - DCHECK(block->IsLoopHeader()); - return loop_effects_.Get(block->GetBlockId()); -} + // Returns a copy of this set. + ValueSet* Copy() const { + ValueSet* copy = new (allocator_) ValueSet(allocator_); + + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + copy->table_[i] = table_[i]; + } + + // Note that the order will be inverted in the copy. This is fine, as the order is not + // relevant for a ValueSet. + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + copy->collisions_ = new (allocator_) ValueSetNode( + node->GetInstruction(), node->GetHashCode(), copy->collisions_); + } -SideEffects GlobalValueNumberer::GetBlockEffects(HBasicBlock* block) const { - return block_effects_.Get(block->GetBlockId()); + copy->number_of_entries_ = number_of_entries_; + return copy; + } + + void Clear() { + number_of_entries_ = 0; + collisions_ = nullptr; + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + table_[i] = nullptr; + } + } + + // Update this `ValueSet` by intersecting with instructions in `other`. + void IntersectionWith(ValueSet* other) { + if (IsEmpty()) { + return; + } else if (other->IsEmpty()) { + Clear(); + } else { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) { + --number_of_entries_; + table_[i] = nullptr; + } + } + for (ValueSetNode* current = collisions_, *previous = nullptr; + current != nullptr; + current = current->GetNext()) { + if (other->IdentityLookup(current->GetInstruction()) == nullptr) { + if (previous == nullptr) { + collisions_ = current->GetNext(); + } else { + previous->SetNext(current->GetNext()); + } + --number_of_entries_; + } else { + previous = current; + } + } + } + } + + bool IsEmpty() const { return number_of_entries_ == 0; } + size_t GetNumberOfEntries() const { return number_of_entries_; } + + private: + static constexpr size_t kDefaultNumberOfEntries = 8; + + ArenaAllocator* const allocator_; + + // The number of entries in the set. + size_t number_of_entries_; + + // The internal implementation of the set. It uses a combination of a hash code based + // fixed-size list, and a linked list to handle hash code collisions. + // TODO: Tune the fixed size list original size, and support growing it. + ValueSetNode* collisions_; + HInstruction* table_[kDefaultNumberOfEntries]; + + DISALLOW_COPY_AND_ASSIGN(ValueSet); +}; + +/** + * Optimization phase that removes redundant instruction. + */ +class GlobalValueNumberer : public ValueObject { + public: + GlobalValueNumberer(ArenaAllocator* allocator, + HGraph* graph, + const SideEffectsAnalysis& side_effects) + : graph_(graph), + allocator_(allocator), + side_effects_(side_effects), + sets_(allocator, graph->GetBlocks().Size(), nullptr) {} + + void Run(); + + private: + // Per-block GVN. Will also update the ValueSet of the dominated and + // successor blocks. + void VisitBasicBlock(HBasicBlock* block); + + HGraph* graph_; + ArenaAllocator* const allocator_; + const SideEffectsAnalysis& side_effects_; + + // ValueSet for blocks. Initially null, but for an individual block they + // are allocated and populated by the dominator, and updated by all blocks + // in the path from the dominator to the block. + GrowableArray<ValueSet*> sets_; + + DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); +}; + +void GlobalValueNumberer::Run() { + DCHECK(side_effects_.HasRun()); + sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_)); + + // Use the reverse post order to ensure the non back-edge predecessors of a block are + // visited before the block itself. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } } void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { @@ -110,7 +279,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { if (!set->IsEmpty()) { if (block->IsLoopHeader()) { DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); - set->Kill(GetLoopEffects(block)); + set->Kill(side_effects_.GetLoopEffects(block)); } else if (predecessors.Size() > 1) { for (size_t i = 0, e = predecessors.Size(); i < e; ++i) { set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId())); @@ -142,4 +311,9 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } } +void GVNOptimization::Run() { + GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_); + gvn.Run(); +} + } // namespace art diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index 81f2c3fa87..57e0487fca 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -22,272 +22,18 @@ namespace art { -/** - * A node in the collision list of a ValueSet. Encodes the instruction, - * the hash code, and the next node in the collision list. - */ -class ValueSetNode : public ArenaObject<kArenaAllocMisc> { - public: - ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next) - : instruction_(instruction), hash_code_(hash_code), next_(next) {} - - size_t GetHashCode() const { return hash_code_; } - HInstruction* GetInstruction() const { return instruction_; } - ValueSetNode* GetNext() const { return next_; } - void SetNext(ValueSetNode* node) { next_ = node; } - - private: - HInstruction* const instruction_; - const size_t hash_code_; - ValueSetNode* next_; - - DISALLOW_COPY_AND_ASSIGN(ValueSetNode); -}; - -/** - * A ValueSet holds instructions that can replace other instructions. It is updated - * through the `Add` method, and the `Kill` method. The `Kill` method removes - * instructions that are affected by the given side effect. - * - * The `Lookup` method returns an equivalent instruction to the given instruction - * if there is one in the set. In GVN, we would say those instructions have the - * same "number". - */ -class ValueSet : public ArenaObject<kArenaAllocMisc> { - public: - explicit ValueSet(ArenaAllocator* allocator) - : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; - } - } - - // Adds an instruction in the set. - void Add(HInstruction* instruction) { - DCHECK(Lookup(instruction) == nullptr); - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - if (table_[index] == nullptr) { - table_[index] = instruction; - } else { - collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_); - } - ++number_of_entries_; - } - - // If in the set, returns an equivalent instruction to the given instruction. Returns - // null otherwise. - HInstruction* Lookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing->Equals(instruction)) { - return existing; - } - - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); - if (existing->Equals(instruction)) { - return existing; - } - } - } - return nullptr; - } - - // Returns whether `instruction` is in the set. - HInstruction* IdentityLookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing == instruction) { - return existing; - } - - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); - if (existing == instruction) { - return existing; - } - } - } - return nullptr; - } - - // Removes all instructions in the set that are affected by the given side effects. - void Kill(SideEffects side_effects) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - HInstruction* instruction = table_[i]; - if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) { - table_[i] = nullptr; - --number_of_entries_; - } - } - - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - HInstruction* instruction = current->GetInstruction(); - if (instruction->GetSideEffects().DependsOn(side_effects)) { - if (previous == nullptr) { - collisions_ = current->GetNext(); - } else { - previous->SetNext(current->GetNext()); - } - --number_of_entries_; - } else { - previous = current; - } - } - } - - // Returns a copy of this set. - ValueSet* Copy() const { - ValueSet* copy = new (allocator_) ValueSet(allocator_); - - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - copy->table_[i] = table_[i]; - } - - // Note that the order will be inverted in the copy. This is fine, as the order is not - // relevant for a ValueSet. - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - copy->collisions_ = new (allocator_) ValueSetNode( - node->GetInstruction(), node->GetHashCode(), copy->collisions_); - } - - copy->number_of_entries_ = number_of_entries_; - return copy; - } - - void Clear() { - number_of_entries_ = 0; - collisions_ = nullptr; - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; - } - } - - // Update this `ValueSet` by intersecting with instructions in `other`. - void IntersectionWith(ValueSet* other) { - if (IsEmpty()) { - return; - } else if (other->IsEmpty()) { - Clear(); - } else { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) { - --number_of_entries_; - table_[i] = nullptr; - } - } - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - if (other->IdentityLookup(current->GetInstruction()) == nullptr) { - if (previous == nullptr) { - collisions_ = current->GetNext(); - } else { - previous->SetNext(current->GetNext()); - } - --number_of_entries_; - } else { - previous = current; - } - } - } - } - - bool IsEmpty() const { return number_of_entries_ == 0; } - size_t GetNumberOfEntries() const { return number_of_entries_; } - - private: - static constexpr size_t kDefaultNumberOfEntries = 8; - - ArenaAllocator* const allocator_; - - // The number of entries in the set. - size_t number_of_entries_; - - // The internal implementation of the set. It uses a combination of a hash code based - // fixed-size list, and a linked list to handle hash code collisions. - // TODO: Tune the fixed size list original size, and support growing it. - ValueSetNode* collisions_; - HInstruction* table_[kDefaultNumberOfEntries]; - - DISALLOW_COPY_AND_ASSIGN(ValueSet); -}; - -/** - * Optimization phase that removes redundant instruction. - */ -class GlobalValueNumberer : public ValueObject { - public: - GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph) - : graph_(graph), - allocator_(allocator), - block_effects_(allocator, graph->GetBlocks().Size()), - loop_effects_(allocator, graph->GetBlocks().Size()), - sets_(allocator, graph->GetBlocks().Size()) { - size_t number_of_blocks = graph->GetBlocks().Size(); - block_effects_.SetSize(number_of_blocks); - loop_effects_.SetSize(number_of_blocks); - sets_.SetSize(number_of_blocks); - - for (size_t i = 0; i < number_of_blocks; ++i) { - block_effects_.Put(i, SideEffects::None()); - loop_effects_.Put(i, SideEffects::None()); - } - } - - void Run(); - - private: - // Per-block GVN. Will also update the ValueSet of the dominated and - // successor blocks. - void VisitBasicBlock(HBasicBlock* block); - - // Compute side effects of individual blocks and loops. The GVN algorithm - // will use these side effects to update the ValueSet of individual blocks. - void ComputeSideEffects(); - - void UpdateLoopEffects(HLoopInformation* info, SideEffects effects); - SideEffects GetLoopEffects(HBasicBlock* block) const; - SideEffects GetBlockEffects(HBasicBlock* block) const; - - HGraph* graph_; - - ArenaAllocator* const allocator_; - - // Side effects of individual blocks, that is the union of the side effects - // of the instructions in the block. - GrowableArray<SideEffects> block_effects_; - - // Side effects of loops, that is the union of the side effects of the - // blocks contained in that loop. - GrowableArray<SideEffects> loop_effects_; - - // ValueSet for blocks. Initially null, but for an individual block they - // are allocated and populated by the dominator, and updated by all blocks - // in the path from the dominator to the block. - GrowableArray<ValueSet*> sets_; - - ART_FRIEND_TEST(GVNTest, LoopSideEffects); - DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); -}; +class SideEffectsAnalysis; class GVNOptimization : public HOptimization { public: - explicit GVNOptimization(HGraph* graph) : HOptimization(graph, true, "GVN") {} + GVNOptimization(HGraph* graph, const SideEffectsAnalysis& side_effects) + : HOptimization(graph, true, "GVN"), side_effects_(side_effects) {} - void Run() OVERRIDE { - GlobalValueNumberer gvn(graph_->GetArena(), graph_); - gvn.Run(); - } + void Run() OVERRIDE; private: + const SideEffectsAnalysis& side_effects_; + DISALLOW_COPY_AND_ASSIGN(GVNOptimization); }; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 48f1ea9e15..4a48fee2fb 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -18,6 +18,7 @@ #include "gvn.h" #include "nodes.h" #include "optimizing_unit_test.h" +#include "side_effects_analysis.h" #include "utils/arena_allocator.h" #include "gtest/gtest.h" @@ -64,7 +65,9 @@ TEST(GVNTest, LocalFieldElimination) { ASSERT_EQ(use_after_kill->GetBlock(), block); graph->TryBuildingSsa(); - GlobalValueNumberer(&allocator, graph).Run(); + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); + GVNOptimization(graph, side_effects).Run(); ASSERT_TRUE(to_remove->GetBlock() == nullptr); ASSERT_EQ(different_offset->GetBlock(), block); @@ -116,7 +119,9 @@ TEST(GVNTest, GlobalFieldElimination) { join->AddInstruction(new (&allocator) HExit()); graph->TryBuildingSsa(); - GlobalValueNumberer(&allocator, graph).Run(); + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); + GVNOptimization(graph, side_effects).Run(); // Check that all field get instructions have been GVN'ed. ASSERT_TRUE(then->GetFirstInstruction()->IsGoto()); @@ -184,7 +189,11 @@ TEST(GVNTest, LoopFieldElimination) { ASSERT_EQ(field_get_in_exit->GetBlock(), exit); graph->TryBuildingSsa(); - GlobalValueNumberer(&allocator, graph).Run(); + { + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); + GVNOptimization(graph, side_effects).Run(); + } // Check that all field get instructions are still there. ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header); @@ -195,7 +204,11 @@ TEST(GVNTest, LoopFieldElimination) { // Now remove the field set, and check that all field get instructions have been GVN'ed. loop_body->RemoveInstruction(field_set); - GlobalValueNumberer(&allocator, graph).Run(); + { + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); + GVNOptimization(graph, side_effects).Run(); + } ASSERT_TRUE(field_get_in_loop_header->GetBlock() == nullptr); ASSERT_TRUE(field_get_in_loop_body->GetBlock() == nullptr); @@ -256,12 +269,12 @@ TEST(GVNTest, LoopSideEffects) { entry->AddInstruction(new (&allocator) HInstanceFieldSet( parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); - GlobalValueNumberer gvn(&allocator, graph); - gvn.Run(); + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); - ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); + ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); } // Check that the side effects of the outer loop does not affect the inner loop. @@ -271,13 +284,13 @@ TEST(GVNTest, LoopSideEffects) { parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), outer_loop_body->GetLastInstruction()); - GlobalValueNumberer gvn(&allocator, graph); - gvn.Run(); + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); - ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); - ASSERT_TRUE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); } // Check that the side effects of the inner loop affects the outer loop. @@ -288,13 +301,13 @@ TEST(GVNTest, LoopSideEffects) { parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), inner_loop_body->GetLastInstruction()); - GlobalValueNumberer gvn(&allocator, graph); - gvn.Run(); + SideEffectsAnalysis side_effects(graph); + side_effects.Run(); - ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_TRUE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); + ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); } } } // namespace art diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 49ca44331d..63bc4ae671 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -59,10 +59,9 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { equal->ReplaceWith(equal->InputAt(0)); equal->GetBlock()->RemoveInstruction(equal); } else { - // Replace (bool_value == 0) with !bool_value + // We should replace (bool_value == 0) with !bool_value, but we unfortunately + // do not have such instruction. DCHECK_EQ(input2->AsIntConstant()->GetValue(), 0); - equal->GetBlock()->ReplaceAndRemoveInstructionWith( - equal, new (GetGraph()->GetArena()) HNot(Primitive::kPrimBoolean, input1)); } } } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 59404dcb14..2ab9b571ff 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -22,6 +22,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "graph_visualizer.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -44,7 +45,7 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index 3e4b83b0b1..ac8759c805 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -278,4 +278,55 @@ TEST(LiveInterval, SplitAt) { } } +TEST(LiveInterval, AddLoopRange) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + // Test when only used in a loop. + static constexpr size_t ranges[][2] = {{0, 4}}; + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + interval->AddLoopRange(0, 8); + LiveRange* range = interval->GetFirstRange(); + ASSERT_TRUE(range->GetNext() == nullptr); + ASSERT_EQ(range->GetStart(), 0u); + ASSERT_EQ(range->GetEnd(), 8u); + } + + { + // Test when only used in a loop. + static constexpr size_t ranges[][2] = {{2, 4}}; + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + interval->AddLoopRange(0, 8); + LiveRange* range = interval->GetFirstRange(); + ASSERT_TRUE(range->GetNext() == nullptr); + ASSERT_EQ(range->GetStart(), 0u); + ASSERT_EQ(range->GetEnd(), 8u); + } + + { + // Test when used just after the loop. + static constexpr size_t ranges[][2] = {{2, 4}, {8, 10}}; + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + interval->AddLoopRange(0, 8); + LiveRange* range = interval->GetFirstRange(); + ASSERT_TRUE(range->GetNext() == nullptr); + ASSERT_EQ(range->GetStart(), 0u); + ASSERT_EQ(range->GetEnd(), 10u); + } + + { + // Test when use after the loop is after a lifetime hole. + static constexpr size_t ranges[][2] = {{2, 4}, {10, 12}}; + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + interval->AddLoopRange(0, 8); + LiveRange* range = interval->GetFirstRange(); + ASSERT_EQ(range->GetStart(), 0u); + ASSERT_EQ(range->GetEnd(), 8u); + range = range->GetNext(); + ASSERT_EQ(range->GetStart(), 10u); + ASSERT_EQ(range->GetEnd(), 12u); + } +} + } // namespace art diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 007c43e218..92742f9a06 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -63,7 +64,7 @@ TEST(LiveRangesTest, CFG1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -109,7 +110,7 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -158,7 +159,7 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -235,7 +236,7 @@ TEST(LiveRangesTest, Loop1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); RemoveSuspendChecks(graph); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -313,7 +314,7 @@ TEST(LiveRangesTest, Loop2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -389,7 +390,7 @@ TEST(LiveRangesTest, CFG4) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -431,7 +432,7 @@ TEST(LiveRangesTest, CFG4) { ASSERT_TRUE(range->GetNext() == nullptr); HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi(); - ASSERT_EQ(phi->NumberOfUses(), 1u); + ASSERT_TRUE(phi->GetUses().HasOnlyOneUse()); interval = phi->GetLiveInterval(); range = interval->GetFirstRange(); ASSERT_EQ(26u, range->GetStart()); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 6f706c391d..f2d49ac397 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -51,7 +52,7 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 9f2f9ece85..990d662d86 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -27,7 +27,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->EnvironmentSize()), - output_overlaps_(true), + output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 68d605957f..6bf8f776fd 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -37,7 +37,10 @@ std::ostream& operator<<(std::ostream& os, const Location& location); */ class Location : public ValueObject { public: - static constexpr bool kNoOutputOverlap = false; + enum OutputOverlap { + kOutputOverlap, + kNoOutputOverlap + }; enum Kind { kInvalid = 0, @@ -428,6 +431,14 @@ class RegisterSet : public ValueObject { return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_); } + uint32_t GetCoreRegisters() const { + return core_registers_; + } + + uint32_t GetFloatingPointRegisters() const { + return floating_point_registers_; + } + private: uint32_t core_registers_; uint32_t floating_point_registers_; @@ -468,7 +479,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return inputs_.Size(); } - void SetOut(Location location, bool overlaps = true) { + void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { DCHECK(output_.IsUnallocated() || output_.IsInvalid()); output_overlaps_ = overlaps; output_ = location; @@ -526,6 +537,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { register_mask_ |= (1 << reg_id); } + uint32_t GetRegisterMask() const { + return register_mask_; + } + bool RegisterContainsObject(uint32_t reg_id) { return RegisterSet::Contains(register_mask_, reg_id); } @@ -554,14 +569,21 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return false; } Location input = inputs_.Get(input_index); - if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) { + if (input.IsRegister() + || input.IsFpuRegister() + || input.IsPair() + || input.IsStackSlot() + || input.IsDoubleStackSlot()) { + // For fixed locations, the register allocator requires to have inputs die before + // the instruction, so that input moves use the location of the input just + // before that instruction (and not potential moves due to splitting). return false; } return true; } bool OutputOverlapsWithInputs() const { - return output_overlaps_; + return output_overlaps_ == Location::kOutputOverlap; } bool Intrinsified() const { @@ -574,7 +596,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. - bool output_overlaps_; + Location::OutputOverlap output_overlaps_; Location output_; const CallKind call_kind_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ade31380ec..fe9ce740da 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -15,6 +15,7 @@ */ #include "nodes.h" + #include "ssa_builder.h" #include "utils/growable_array.h" @@ -38,9 +39,11 @@ static void RemoveAsUser(HInstruction* instruction) { HEnvironment* environment = instruction->GetEnvironment(); if (environment != nullptr) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* vreg = environment->GetInstructionAt(i); - if (vreg != nullptr) { - vreg->RemoveEnvironmentUser(environment, i); + HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i); + if (vreg_env_use != nullptr) { + HInstruction* vreg = environment->GetInstructionAt(i); + DCHECK(vreg != nullptr); + vreg->RemoveEnvironmentUser(vreg_env_use); } } } @@ -60,19 +63,22 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit } } +void HGraph::RemoveBlock(HBasicBlock* block) const { + for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + block->GetSuccessors().Get(j)->RemovePredecessor(block); + } + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi()); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current()); + } +} + void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { - HBasicBlock* block = blocks_.Get(i); - for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { - block->GetSuccessors().Get(j)->RemovePredecessor(block); - } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - block->RemovePhi(it.Current()->AsPhi()); - } - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - block->RemoveInstruction(it.Current()); - } + RemoveBlock(blocks_.Get(i)); } } } @@ -421,8 +427,8 @@ static void Remove(HInstructionList* instruction_list, HBasicBlock* block, HInstruction* instruction) { DCHECK_EQ(block, instruction->GetBlock()); - DCHECK(instruction->GetUses() == nullptr); - DCHECK(instruction->GetEnvUses() == nullptr); + DCHECK(instruction->GetUses().IsEmpty()); + DCHECK(instruction->GetEnvUses().IsEmpty()); instruction->SetBlock(nullptr); instruction_list->RemoveInstruction(instruction); @@ -437,31 +443,49 @@ void HBasicBlock::RemovePhi(HPhi* phi) { Remove(&phis_, this, phi); } +void HEnvironment::CopyFrom(HEnvironment* env) { + for (size_t i = 0; i < env->Size(); i++) { + HInstruction* instruction = env->GetInstructionAt(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + template <typename T> -static void RemoveFromUseList(T* user, - size_t input_index, - HUseListNode<T>** list) { - HUseListNode<T>* previous = nullptr; - HUseListNode<T>* current = *list; - while (current != nullptr) { +static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) { + HUseListNode<T>* current; + for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) { + current = use_it.Current(); if (current->GetUser() == user && current->GetIndex() == input_index) { - if (previous == NULL) { - *list = current->GetTail(); - } else { - previous->SetTail(current->GetTail()); - } + list->Remove(current); } - previous = current; - current = current->GetTail(); } } +HInstruction* HInstruction::GetNextDisregardingMoves() const { + HInstruction* next = GetNext(); + while (next != nullptr && next->IsParallelMove()) { + next = next->GetNext(); + } + return next; +} + +HInstruction* HInstruction::GetPreviousDisregardingMoves() const { + HInstruction* previous = GetPrevious(); + while (previous != nullptr && previous->IsParallelMove()) { + previous = previous->GetPrevious(); + } + return previous; +} + void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { RemoveFromUseList(user, input_index, &uses_); } -void HInstruction::RemoveEnvironmentUser(HEnvironment* user, size_t input_index) { - RemoveFromUseList(user, input_index, &env_uses_); +void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) { + env_uses_.Remove(use); } void HInstructionList::AddInstruction(HInstruction* instruction) { @@ -553,24 +577,24 @@ bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(other != nullptr); - for (HUseIterator<HInstruction> it(GetUses()); !it.Done(); it.Advance()) { - HUseListNode<HInstruction>* current = it.Current(); + for (HUseIterator<HInstruction*> it(GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction*>* current = it.Current(); HInstruction* user = current->GetUser(); size_t input_index = current->GetIndex(); user->SetRawInputAt(input_index, other); other->AddUseAt(user, input_index); } - for (HUseIterator<HEnvironment> it(GetEnvUses()); !it.Done(); it.Advance()) { - HUseListNode<HEnvironment>* current = it.Current(); + for (HUseIterator<HEnvironment*> it(GetEnvUses()); !it.Done(); it.Advance()) { + HUseListNode<HEnvironment*>* current = it.Current(); HEnvironment* user = current->GetUser(); size_t input_index = current->GetIndex(); user->SetRawEnvAt(input_index, other); other->AddEnvUseAt(user, input_index); } - uses_ = nullptr; - env_uses_ = nullptr; + uses_.Clear(); + env_uses_.Clear(); } void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { @@ -646,7 +670,7 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { if (GetResultType() == Primitive::kPrimLong) { return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); } else { - DCHECK(GetResultType() == Primitive::kPrimInt); + DCHECK_EQ(GetResultType(), Primitive::kPrimInt); return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); } } @@ -654,11 +678,7 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { } bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { - HInstruction* previous = if_->GetPrevious(); - while (previous != nullptr && previous->IsParallelMove()) { - previous = previous->GetPrevious(); - } - return previous == this; + return this == if_->GetPreviousDisregardingMoves(); } bool HInstruction::Equals(HInstruction* other) const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fa51f27f0a..2cc021cccf 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited) const; + void RemoveBlock(HBasicBlock* block) const; ArenaAllocator* const arena_; @@ -586,26 +587,104 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) } \ virtual void Accept(HGraphVisitor* visitor) +template <typename T> class HUseList; + template <typename T> class HUseListNode : public ArenaObject<kArenaAllocMisc> { public: - HUseListNode(T* user, size_t index, HUseListNode* tail) - : user_(user), index_(index), tail_(tail) {} - - HUseListNode* GetTail() const { return tail_; } - T* GetUser() const { return user_; } + HUseListNode* GetPrevious() const { return prev_; } + HUseListNode* GetNext() const { return next_; } + T GetUser() const { return user_; } size_t GetIndex() const { return index_; } - void SetTail(HUseListNode<T>* node) { tail_ = node; } - private: - T* const user_; + HUseListNode(T user, size_t index) + : user_(user), index_(index), prev_(nullptr), next_(nullptr) {} + + T const user_; const size_t index_; - HUseListNode<T>* tail_; + HUseListNode<T>* prev_; + HUseListNode<T>* next_; + + friend class HUseList<T>; DISALLOW_COPY_AND_ASSIGN(HUseListNode); }; +template <typename T> +class HUseList : public ValueObject { + public: + HUseList() : first_(nullptr) {} + + void Clear() { + first_ = nullptr; + } + + // Adds a new entry at the beginning of the use list and returns + // the newly created node. + HUseListNode<T>* AddUse(T user, size_t index, ArenaAllocator* arena) { + HUseListNode<T>* new_node = new (arena) HUseListNode<T>(user, index); + if (IsEmpty()) { + first_ = new_node; + } else { + first_->prev_ = new_node; + new_node->next_ = first_; + first_ = new_node; + } + return new_node; + } + + HUseListNode<T>* GetFirst() const { + return first_; + } + + void Remove(HUseListNode<T>* node) { + if (node->prev_ != nullptr) { + node->prev_->next_ = node->next_; + } + if (node->next_ != nullptr) { + node->next_->prev_ = node->prev_; + } + if (node == first_) { + first_ = node->next_; + } + } + + bool IsEmpty() const { + return first_ == nullptr; + } + + bool HasOnlyOneUse() const { + return first_ != nullptr && first_->next_ == nullptr; + } + + private: + HUseListNode<T>* first_; +}; + +template<typename T> +class HUseIterator : public ValueObject { + public: + explicit HUseIterator(const HUseList<T>& uses) : current_(uses.GetFirst()) {} + + bool Done() const { return current_ == nullptr; } + + void Advance() { + DCHECK(!Done()); + current_ = current_->GetNext(); + } + + HUseListNode<T>* Current() const { + DCHECK(!Done()); + return current_; + } + + private: + HUseListNode<T>* current_; + + friend class HValue; +}; + // Represents the side effects an instruction may have. class SideEffects : public ValueObject { public: @@ -669,6 +748,57 @@ class SideEffects : public ValueObject { size_t flags_; }; +// A HEnvironment object contains the values of virtual registers at a given location. +class HEnvironment : public ArenaObject<kArenaAllocMisc> { + public: + HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) + : vregs_(arena, number_of_vregs) { + vregs_.SetSize(number_of_vregs); + for (size_t i = 0; i < number_of_vregs; i++) { + vregs_.Put(i, VRegInfo(nullptr, nullptr)); + } + } + + void CopyFrom(HEnvironment* env); + + void SetRawEnvAt(size_t index, HInstruction* instruction) { + vregs_.Put(index, VRegInfo(instruction, nullptr)); + } + + // Record instructions' use entries of this environment for constant-time removal. + void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) { + DCHECK(env_use->GetUser() == this); + size_t index = env_use->GetIndex(); + VRegInfo info = vregs_.Get(index); + DCHECK(info.vreg_ != nullptr); + DCHECK(info.node_ == nullptr); + vregs_.Put(index, VRegInfo(info.vreg_, env_use)); + } + + HInstruction* GetInstructionAt(size_t index) const { + return vregs_.Get(index).vreg_; + } + + HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const { + return vregs_.Get(index).node_; + } + + size_t Size() const { return vregs_.Size(); } + + private: + struct VRegInfo { + HInstruction* vreg_; + HUseListNode<HEnvironment*>* node_; + + VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use) + : vreg_(instruction), node_(env_use) {} + }; + + GrowableArray<VRegInfo> vregs_; + + DISALLOW_COPY_AND_ASSIGN(HEnvironment); +}; + class HInstruction : public ArenaObject<kArenaAllocMisc> { public: explicit HInstruction(SideEffects side_effects) @@ -677,8 +807,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { block_(nullptr), id_(-1), ssa_index_(-1), - uses_(nullptr), - env_uses_(nullptr), environment_(nullptr), locations_(nullptr), live_interval_(nullptr), @@ -696,6 +824,9 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } + HInstruction* GetNextDisregardingMoves() const; + HInstruction* GetPreviousDisregardingMoves() const; + HBasicBlock* GetBlock() const { return block_; } void SetBlock(HBasicBlock* block) { block_ = block; } bool IsInBlock() const { return block_ != nullptr; } @@ -716,35 +847,27 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } + virtual bool CanDoImplicitNullCheck() const { return false; } + void AddUseAt(HInstruction* user, size_t index) { - uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_); + uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); } void AddEnvUseAt(HEnvironment* user, size_t index) { DCHECK(user != nullptr); - env_uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HEnvironment>( - user, index, env_uses_); + HUseListNode<HEnvironment*>* env_use = + env_uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); + user->RecordEnvUse(env_use); } void RemoveUser(HInstruction* user, size_t index); - void RemoveEnvironmentUser(HEnvironment* user, size_t index); - - HUseListNode<HInstruction>* GetUses() const { return uses_; } - HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; } + void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use); - bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; } - bool HasEnvironmentUses() const { return env_uses_ != nullptr; } + const HUseList<HInstruction*>& GetUses() { return uses_; } + const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; } - size_t NumberOfUses() const { - // TODO: Optimize this method if it is used outside of the HGraphVisualizer. - size_t result = 0; - HUseListNode<HInstruction>* current = uses_; - while (current != nullptr) { - current = current->GetTail(); - ++result; - } - return result; - } + bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); } + bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); } // Does this instruction strictly dominate `other_instruction`? // Returns false if this instruction and `other_instruction` are the same. @@ -775,10 +898,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // Insert `this` instruction in `cursor`'s graph, just before `cursor`. void InsertBefore(HInstruction* cursor); - bool HasOnlyOneUse() const { - return uses_ != nullptr && uses_->GetTail() == nullptr; - } - #define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ @@ -841,10 +960,10 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { int ssa_index_; // List of instructions that have this instruction as input. - HUseListNode<HInstruction>* uses_; + HUseList<HInstruction*> uses_; // List of environments that contain this instruction. - HUseListNode<HEnvironment>* env_uses_; + HUseList<HEnvironment*> env_uses_; // The environment associated with this instruction. Not null if the instruction // might jump out of the method. @@ -870,69 +989,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { }; std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs); -template<typename T> -class HUseIterator : public ValueObject { - public: - explicit HUseIterator(HUseListNode<T>* uses) : current_(uses) {} - - bool Done() const { return current_ == nullptr; } - - void Advance() { - DCHECK(!Done()); - current_ = current_->GetTail(); - } - - HUseListNode<T>* Current() const { - DCHECK(!Done()); - return current_; - } - - private: - HUseListNode<T>* current_; - - friend class HValue; -}; - -// A HEnvironment object contains the values of virtual registers at a given location. -class HEnvironment : public ArenaObject<kArenaAllocMisc> { - public: - HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) : vregs_(arena, number_of_vregs) { - vregs_.SetSize(number_of_vregs); - for (size_t i = 0; i < number_of_vregs; i++) { - vregs_.Put(i, nullptr); - } - } - - void Populate(const GrowableArray<HInstruction*>& env) { - for (size_t i = 0; i < env.Size(); i++) { - HInstruction* instruction = env.Get(i); - vregs_.Put(i, instruction); - if (instruction != nullptr) { - instruction->AddEnvUseAt(this, i); - } - } - } - - void SetRawEnvAt(size_t index, HInstruction* instruction) { - vregs_.Put(index, instruction); - } - - HInstruction* GetInstructionAt(size_t index) const { - return vregs_.Get(index); - } - - GrowableArray<HInstruction*>* GetVRegs() { - return &vregs_; - } - - size_t Size() const { return vregs_.Size(); } - - private: - GrowableArray<HInstruction*> vregs_; - - DISALLOW_COPY_AND_ASSIGN(HEnvironment); -}; - class HInputIterator : public ValueObject { public: explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {} @@ -1597,7 +1653,7 @@ class HInvoke : public HInstruction { // Runtime needs to walk the stack, so Dex -> Dex calls need to // know their environment. - virtual bool NeedsEnvironment() const { return true; } + bool NeedsEnvironment() const OVERRIDE { return true; } void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); @@ -1659,6 +1715,12 @@ class HInvokeStaticOrDirect : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), invoke_type_(invoke_type) {} + bool CanDoImplicitNullCheck() const OVERRIDE { + // We access the method via the dex cache so we can't do an implicit null check. + // TODO: for intrinsics we can generate implicit null checks. + return false; + } + InvokeType GetInvokeType() const { return invoke_type_; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); @@ -1680,6 +1742,11 @@ class HInvokeVirtual : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Add implicit null checks in intrinsics. + return !GetLocations()->Intrinsified(); + } + uint32_t GetVTableIndex() const { return vtable_index_; } DECLARE_INSTRUCTION(InvokeVirtual); @@ -1701,6 +1768,11 @@ class HInvokeInterface : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Add implicit null checks in intrinsics. + return !GetLocations()->Intrinsified(); + } + uint32_t GetImtIndex() const { return imt_index_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } @@ -2180,7 +2252,11 @@ class HInstanceFieldGet : public HExpression<1> { return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - virtual size_t ComputeHashCode() const { + bool CanDoImplicitNullCheck() const OVERRIDE { + return GetFieldOffset().Uint32Value() < kPageSize; + } + + size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } @@ -2210,11 +2286,14 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { SetRawInputAt(1, value); } + bool CanDoImplicitNullCheck() const OVERRIDE { + return GetFieldOffset().Uint32Value() < kPageSize; + } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } - HInstruction* GetValue() const { return InputAt(1); } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -2238,6 +2317,15 @@ class HArrayGet : public HExpression<2> { UNUSED(other); return true; } + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: We can be smarter here. + // Currently, the array access is always preceded by an ArrayLength or a NullCheck + // which generates the implicit null check. There are cases when these can be removed + // to produce better code. If we ever add optimizations to do so we should allow an + // implicit check here (as long as the address falls in the first page). + return false; + } + void SetType(Primitive::Type type) { type_ = type; } HInstruction* GetArray() const { return InputAt(0); } @@ -2265,12 +2353,17 @@ class HArraySet : public HTemplateInstruction<3> { SetRawInputAt(2, value); } - bool NeedsEnvironment() const { + bool NeedsEnvironment() const OVERRIDE { // We currently always call a runtime method to catch array store // exceptions. return needs_type_check_; } + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Same as for ArrayGet. + return false; + } + void ClearNeedsTypeCheck() { needs_type_check_ = false; } @@ -2313,11 +2406,12 @@ class HArrayLength : public HExpression<1> { SetRawInputAt(0, array); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { UNUSED(other); return true; } + bool CanDoImplicitNullCheck() const OVERRIDE { return true; } DECLARE_INSTRUCTION(ArrayLength); @@ -2802,18 +2896,25 @@ class HParallelMove : public HTemplateInstruction<0> { AddMove(source.ToLow(), destination.ToLow(), instruction); AddMove(source.ToHigh(), destination.ToHigh(), nullptr); } else if (source.IsPair()) { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction); AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr); } else if (destination.IsPair()) { - DCHECK(source.IsDoubleStackSlot()); - AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); - // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to - // always be 4. - static constexpr int kHighOffset = 4; - AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), - destination.ToHigh(), - nullptr); + if (source.IsConstant()) { + // We put the same constant in the move. The code generator will handle which + // low or high part to use. + AddMove(source, destination.ToLow(), instruction); + AddMove(source, destination.ToHigh(), nullptr); + } else { + DCHECK(source.IsDoubleStackSlot()); + AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); + // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to + // always be 4. + static constexpr int kHighOffset = 4; + AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), + destination.ToHigh(), + nullptr); + } } else { if (kIsDebugBuild) { if (instruction != nullptr) { diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 70dd8d7f88..5dbdc74924 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -81,13 +81,12 @@ TEST(Node, InsertInstruction) { entry->AddInstruction(new (&allocator) HExit()); ASSERT_FALSE(parameter1->HasUses()); - ASSERT_EQ(parameter1->NumberOfUses(), 0u); HInstruction* to_insert = new (&allocator) HNullCheck(parameter1, 0); entry->InsertInstructionBefore(to_insert, parameter2); ASSERT_TRUE(parameter1->HasUses()); - ASSERT_EQ(parameter1->NumberOfUses(), 1u); + ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse()); } /** @@ -105,13 +104,12 @@ TEST(Node, AddInstruction) { entry->AddInstruction(parameter); ASSERT_FALSE(parameter->HasUses()); - ASSERT_EQ(parameter->NumberOfUses(), 0u); HInstruction* to_add = new (&allocator) HNullCheck(parameter, 0); entry->AddInstruction(to_add); ASSERT_TRUE(parameter->HasUses()); - ASSERT_EQ(parameter->NumberOfUses(), 1u); + ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse()); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index ad4819840f..a590c439a9 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -39,6 +39,7 @@ #include "nodes.h" #include "prepare_for_register_allocation.h" #include "register_allocator.h" +#include "side_effects_analysis.h" #include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" @@ -214,7 +215,8 @@ static void RunOptimizations(HGraph* graph, HInliner inliner(graph, dex_compilation_unit, driver, stats); HConstantFolding fold2(graph); - GVNOptimization gvn(graph); + SideEffectsAnalysis side_effects(graph); + GVNOptimization gvn(graph, side_effects); BoundsCheckElimination bce(graph); InstructionSimplifier simplify2(graph); @@ -229,6 +231,7 @@ static void RunOptimizations(HGraph* graph, &simplify1, &inliner, &fold2, + &side_effects, &gvn, &bce, &simplify2 @@ -286,7 +289,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ + codegen->GetFpuSpillMask(), ArrayRef<const uint8_t>(stack_map)); } @@ -313,7 +316,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ + codegen->GetFpuSpillMask(), &src_mapping_table, AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), @@ -330,7 +333,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, const DexFile& dex_file) const { UNUSED(invoke_type); compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); - InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet(); + CompilerDriver* compiler_driver = GetCompilerDriver(); + InstructionSet instruction_set = compiler_driver->GetInstructionSet(); // Always use the thumb2 assembler: some runtime functionality (like implicit stack // overflow checks) assume thumb2. if (instruction_set == kArm) { @@ -351,7 +355,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, DexCompilationUnit dex_compilation_unit( nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, - GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx)); + compiler_driver->GetVerifiedMethod(&dex_file, method_idx)); std::string method_name = PrettyMethod(method_idx, dex_file); @@ -366,7 +370,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &dex_compilation_unit, &dex_compilation_unit, &dex_file, - GetCompilerDriver(), + compiler_driver, &compilation_stats_); VLOG(compiler) << "Building " << PrettyMethod(method_idx, dex_file); @@ -376,9 +380,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CompilerDriver* compiler_driver = GetCompilerDriver(); std::unique_ptr<CodeGenerator> codegen( - CodeGenerator::Create(graph, instruction_set, *compiler_driver->GetInstructionSetFeatures())); + CodeGenerator::Create(graph, + instruction_set, + *compiler_driver->GetInstructionSetFeatures(), + compiler_driver->GetCompilerOptions())); if (codegen.get() == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 7186dbe85e..12acd0884a 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -55,11 +55,10 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; - if (!condition->HasOnlyOneUse()) { + if (!condition->GetUses().HasOnlyOneUse()) { needs_materialization = true; } else { - HUseListNode<HInstruction>* uses = condition->GetUses(); - HInstruction* user = uses->GetUser(); + HInstruction* user = condition->GetUses().GetFirst()->GetUser(); if (!user->IsIf()) { needs_materialization = true; } else { diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index 2c8166e65b..d2a21c8980 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -55,7 +55,7 @@ class HPrettyPrinter : public HGraphVisitor { if (instruction->HasUses()) { PrintString(" ["); bool first = true; - for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) { + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { if (first) { first = false; } else { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1b42e94960..260076acbf 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -58,7 +58,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_(0), maximum_number_of_live_core_registers_(0), maximum_number_of_live_fp_registers_(0) { - codegen->SetupBlockedRegisters(); + static constexpr bool kIsBaseline = false; + codegen->SetupBlockedRegisters(kIsBaseline); physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); // Always reserve for the current method and the graph's max out registers. @@ -71,7 +72,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, if (!Supports(instruction_set)) { return false; } - if (instruction_set == kArm64 || instruction_set == kX86_64) { + if (instruction_set == kArm64 + || instruction_set == kX86_64 + || instruction_set == kArm + || instruction_set == kThumb2) { return true; } for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { @@ -85,10 +89,6 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, current->GetType() == Primitive::kPrimDouble) { return false; } - } else if (instruction_set == kArm || instruction_set == kThumb2) { - if (current->GetType() == Primitive::kPrimLong) { - return false; - } } } } @@ -279,14 +279,18 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { if (locations->WillCall()) { // Block all registers. for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - BlockRegister(Location::RegisterLocation(i), - position, - position + 1); + if (!codegen_->IsCoreCalleeSaveRegister(i)) { + BlockRegister(Location::RegisterLocation(i), + position, + position + 1); + } } for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - BlockRegister(Location::FpuRegisterLocation(i), - position, - position + 1); + if (!codegen_->IsFloatingPointCalleeSaveRegister(i)) { + BlockRegister(Location::FpuRegisterLocation(i), + position, + position + 1); + } } } @@ -628,6 +632,9 @@ void RegisterAllocator::LinearScan() { // (6) If the interval had a register allocated, add it to the list of active // intervals. if (success) { + codegen_->AddAllocatedRegister(processing_core_registers_ + ? Location::RegisterLocation(current->GetRegister()) + : Location::FpuRegisterLocation(current->GetRegister())); active_.Add(current); if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); @@ -680,7 +687,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } - int reg = -1; + int reg = kNoRegister; if (current->HasRegister()) { // Some instructions have a fixed register output. reg = current->GetRegister(); @@ -696,13 +703,13 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { DCHECK(!IsBlocked(hint)); reg = hint; } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(free_until); + reg = FindAvailableRegisterPair(free_until, current->GetStart()); } else { reg = FindAvailableRegister(free_until); } } - DCHECK_NE(reg, -1); + DCHECK_NE(reg, kNoRegister); // If we could not find a register, we need to spill. if (free_until[reg] == 0) { return false; @@ -730,8 +737,8 @@ bool RegisterAllocator::IsBlocked(int reg) const { : blocked_fp_registers_[reg]; } -int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const { - int reg = -1; +int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { + int reg = kNoRegister; // Pick the register pair that is used the last. for (size_t i = 0; i < number_of_registers_; ++i) { if (IsBlocked(i)) continue; @@ -739,24 +746,28 @@ int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const { int high_register = GetHighForLowRegister(i); if (IsBlocked(high_register)) continue; int existing_high_register = GetHighForLowRegister(reg); - if ((reg == -1) || (next_use[i] >= next_use[reg] + if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] && next_use[high_register] >= next_use[existing_high_register])) { reg = i; if (next_use[i] == kMaxLifetimePosition && next_use[high_register] == kMaxLifetimePosition) { break; } + } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { + // If one of the current register is known to be unavailable, just unconditionally + // try a new one. + reg = i; } } return reg; } int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { - int reg = -1; + int reg = kNoRegister; // Pick the register that is used the last. for (size_t i = 0; i < number_of_registers_; ++i) { if (IsBlocked(i)) continue; - if (reg == -1 || next_use[i] > next_use[reg]) { + if (reg == kNoRegister || next_use[i] > next_use[reg]) { reg = i; if (next_use[i] == kMaxLifetimePosition) break; } @@ -764,6 +775,28 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { return reg; } +bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { + for (size_t i = 0, e = active_.Size(); i < e; ++i) { + LiveInterval* active = active_.Get(i); + DCHECK(active->HasRegister()); + // Split the first interval found. + if (first_register_use <= next_use[active->GetRegister()] + && !active->IsLowInterval() + && !active->IsHighInterval()) { + LiveInterval* split = Split(active, position); + active_.DeleteAt(i); + if (split != active) { + handled_.Add(active); + } + AddSorted(unhandled_, split); + return true; + } + } + return false; +} + // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -824,27 +857,50 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } } - int reg = -1; + int reg = kNoRegister; + bool should_spill = false; if (current->HasRegister()) { DCHECK(current->IsHighInterval()); reg = current->GetRegister(); + // When allocating the low part, we made sure the high register was available. + DCHECK_LT(first_register_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use); + reg = FindAvailableRegisterPair(next_use, current->GetStart()); + // We should spill if both registers are not available. + should_spill = (first_register_use >= next_use[reg]) + || (first_register_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); reg = FindAvailableRegister(next_use); + should_spill = (first_register_use >= next_use[reg]); } - if ((first_register_use >= next_use[reg]) - || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) { + DCHECK_NE(reg, kNoRegister); + if (should_spill) { DCHECK(!current->IsHighInterval()); - // If the first use of that instruction is after the last use of the found - // register, we split this interval just before its first register use. - AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use - 1); - DCHECK_NE(current, split) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName(); - AddSorted(unhandled_, split); + bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + if (current->IsLowInterval() + && is_allocation_at_use_site + && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + // If we're allocating a register for `current` because the instruction at + // that position requires it, but we think we should spill, then there are + // non-pair intervals blocking the allocation. We split the first + // interval found, and put ourselves first in the `unhandled_` list. + LiveInterval* existing = unhandled_->Peek(); + DCHECK(existing->IsHighInterval()); + DCHECK_EQ(existing->GetLowInterval(), current); + unhandled_->Add(current); + } else { + // If the first use of that instruction is after the last use of the found + // register, we split this interval just before its first register use. + AllocateSpillSlotFor(current); + LiveInterval* split = Split(current, first_register_use - 1); + DCHECK_NE(current, split) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName() << " " + << split->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1; + AddSorted(unhandled_, split); + } return false; } else { // Use this register and spill the active and inactives interval that @@ -861,6 +917,23 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { handled_.Add(active); } AddSorted(unhandled_, split); + + if (active->IsLowInterval() || active->IsHighInterval()) { + LiveInterval* other_half = active->IsLowInterval() + ? active->GetHighInterval() + : active->GetLowInterval(); + // We also need to remove the other half from the list of actives. + bool found = false; + for (size_t j = 0; j < active_.Size(); ++j) { + if (active_.Get(j) == other_half) { + found = true; + active_.DeleteAt(j); + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } break; } } @@ -893,6 +966,25 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { --e; handled_.Add(inactive); AddSorted(unhandled_, split); + + if (inactive->IsLowInterval() || inactive->IsHighInterval()) { + LiveInterval* other_half = inactive->IsLowInterval() + ? inactive->GetHighInterval() + : inactive->GetLowInterval(); + + // We also need to remove the other half from the list of inactives. + bool found = false; + for (size_t j = 0; j < inactive_.Size(); ++j) { + if (inactive_.Get(j) == other_half) { + found = true; + inactive_.DeleteAt(j); + --e; + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } } } } @@ -907,7 +999,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter size_t insert_at = 0; for (size_t i = array->Size(); i > 0; --i) { LiveInterval* current = array->Get(i - 1); - if (current->StartsAfter(interval)) { + // High intervals must be processed right after their low equivalent. + if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; break; } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { @@ -1026,6 +1119,7 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { static bool IsValidDestination(Location destination) { return destination.IsRegister() + || destination.IsRegisterPair() || destination.IsFpuRegister() || destination.IsFpuRegisterPair() || destination.IsStackSlot() @@ -1066,7 +1160,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); @@ -1130,7 +1224,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; DCHECK_EQ(block->GetSuccessors().Size(), 1u); @@ -1160,7 +1254,7 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* first = block->GetFirstInstruction(); @@ -1178,7 +1272,7 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; if (instruction->IsPhi()) { @@ -1271,9 +1365,11 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); - DCHECK_LE(locations->GetNumberOfLiveRegisters(), - maximum_number_of_live_core_registers_ + - maximum_number_of_live_fp_registers_); + if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + maximum_number_of_live_core_registers_ + + maximum_number_of_live_fp_registers_); + } if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } @@ -1283,6 +1379,8 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->AddLiveRegister(source); break; } + + case Location::kRegisterPair: case Location::kFpuRegisterPair: { locations->AddLiveRegister(source.ToLow()); locations->AddLiveRegister(source.ToHigh()); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index ec46a776b5..b8f70bdc18 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -128,9 +128,13 @@ class RegisterAllocator { bool ValidateInternal(bool log_fatal_on_failure) const; void DumpInterval(std::ostream& stream, LiveInterval* interval) const; void DumpAllIntervals(std::ostream& stream) const; - int FindAvailableRegisterPair(size_t* next_use) const; + int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; int FindAvailableRegister(size_t* next_use) const; + // Try splitting an active non-pair interval at the given `position`. + // Returns whether it was successful at finding such an interval. + bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); + ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 0948643355..cb5010afcd 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator.h" @@ -40,7 +41,7 @@ static bool Check(const uint16_t* data) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -56,7 +57,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -295,7 +296,7 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -327,7 +328,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -380,7 +381,7 @@ TEST(RegisterAllocatorTest, DeadPhi) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -402,7 +403,7 @@ TEST(RegisterAllocatorTest, FreeUntil) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -504,7 +505,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -519,7 +520,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -536,7 +537,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -553,7 +554,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -603,7 +604,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -616,7 +617,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -665,7 +666,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -679,7 +680,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -726,7 +727,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { { HGraph* graph = BuildDiv(&allocator, &div); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -814,7 +815,7 @@ TEST(RegisterAllocatorTest, SpillInactive) { locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); RegisterAllocator register_allocator(&allocator, &codegen, liveness); diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc new file mode 100644 index 0000000000..96e1c8f8eb --- /dev/null +++ b/compiler/optimizing/side_effects_analysis.cc @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "side_effects_analysis.h" + +namespace art { + +void SideEffectsAnalysis::Run() { + if (kIsDebugBuild) { + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + SideEffects effects = GetBlockEffects(block); + DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + if (block->IsLoopHeader()) { + effects = GetLoopEffects(block); + DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + } + } + } + + // Do a post order visit to ensure we visit a loop header after its loop body. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + + SideEffects effects = SideEffects::None(); + // Update `effects` with the side effects of all instructions in this block. + for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + inst_it.Advance()) { + HInstruction* instruction = inst_it.Current(); + effects = effects.Union(instruction->GetSideEffects()); + if (effects.HasAllSideEffects()) { + break; + } + } + + block_effects_.Put(block->GetBlockId(), effects); + + if (block->IsLoopHeader()) { + // The side effects of the loop header are part of the loop. + UpdateLoopEffects(block->GetLoopInformation(), effects); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + if (pre_header->IsInLoop()) { + // Update the side effects of the outer loop with the side effects of the inner loop. + // Note that this works because we know all the blocks of the inner loop are visited + // before the loop header of the outer loop. + UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block)); + } + } else if (block->IsInLoop()) { + // Update the side effects of the loop with the side effects of this block. + UpdateLoopEffects(block->GetLoopInformation(), effects); + } + } + has_run_ = true; +} + +SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const { + DCHECK(block->IsLoopHeader()); + return loop_effects_.Get(block->GetBlockId()); +} + +SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const { + return block_effects_.Get(block->GetBlockId()); +} + +void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) { + int id = info->GetHeader()->GetBlockId(); + loop_effects_.Put(id, loop_effects_.Get(id).Union(effects)); +} + +} // namespace art diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h new file mode 100644 index 0000000000..f1c98ac218 --- /dev/null +++ b/compiler/optimizing/side_effects_analysis.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class SideEffectsAnalysis : public HOptimization { + public: + explicit SideEffectsAnalysis(HGraph* graph) + : HOptimization(graph, true, "SideEffects"), + graph_(graph), + block_effects_(graph->GetArena(), graph->GetBlocks().Size(), SideEffects::None()), + loop_effects_(graph->GetArena(), graph->GetBlocks().Size(), SideEffects::None()) {} + + SideEffects GetLoopEffects(HBasicBlock* block) const; + SideEffects GetBlockEffects(HBasicBlock* block) const; + + // Compute side effects of individual blocks and loops. + void Run(); + + bool HasRun() const { return has_run_; } + + private: + void UpdateLoopEffects(HLoopInformation* info, SideEffects effects); + + HGraph* graph_; + + // Checked in debug build, to ensure the pass has been run prior to + // running a pass that depends on it. + bool has_run_ = false; + + // Side effects of individual blocks, that is the union of the side effects + // of the instructions in the block. + GrowableArray<SideEffects> block_effects_; + + // Side effects of loops, that is the union of the side effects of the + // blocks contained in that loop. + GrowableArray<SideEffects> loop_effects_; + + ART_FRIEND_TEST(GVNTest, LoopSideEffects); + DISALLOW_COPY_AND_ASSIGN(SideEffectsAnalysis); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index edfafcdd83..4f9c3b87c1 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -68,7 +68,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->Get(local); + return GetLocalsFor(block)->GetInstructionAt(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -85,7 +85,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->Put(local, phi); + current_locals_->SetRawEnvAt(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -125,7 +125,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->Put(local, value); + current_locals_->SetRawEnvAt(local, value); } } @@ -235,7 +235,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); if (load->GetType() != value->GetType() && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) { // If the operation requests a specific type, we make sure its input is of that type. @@ -246,7 +246,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -256,7 +256,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); - environment->Populate(*current_locals_); + environment->CopyFrom(current_locals_); instruction->SetEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 2cbd51aa10..2eec87b618 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -36,14 +36,14 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + HEnvironment* GetLocalsFor(HBasicBlock* block) { HEnvironment* env = locals_for_.Get(block->GetBlockId()); if (env == nullptr) { env = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); locals_for_.Put(block->GetBlockId(), env); } - return env->GetVRegs(); + return env; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -60,7 +60,7 @@ class SsaBuilder : public HGraphVisitor { private: // Locals for the current block being visited. - GrowableArray<HInstruction*>* current_locals_; + HEnvironment* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index d41157b8d8..1b06315fce 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -232,9 +232,9 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { if (current->HasEnvironment()) { // All instructions in the environment must be live. - GrowableArray<HInstruction*>* environment = current->GetEnvironment()->GetVRegs(); + HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* instruction = environment->Get(i); + HInstruction* instruction = environment->GetInstructionAt(i); if (instruction != nullptr) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index a123313426..b0d38531e9 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -254,16 +254,28 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddLoopRange(size_t start, size_t end) { DCHECK(first_range_ != nullptr); - while (first_range_ != nullptr && first_range_->GetEnd() < end) { - DCHECK_LE(start, first_range_->GetStart()); - first_range_ = first_range_->GetNext(); + DCHECK_LE(start, first_range_->GetStart()); + // Find the range that covers the positions after the loop. + LiveRange* after_loop = first_range_; + LiveRange* last_in_loop = nullptr; + while (after_loop != nullptr && after_loop->GetEnd() < end) { + DCHECK_LE(start, after_loop->GetStart()); + last_in_loop = after_loop; + after_loop = after_loop->GetNext(); } - if (first_range_ == nullptr) { + if (after_loop == nullptr) { // Uses are only in the loop. first_range_ = last_range_ = new (allocator_) LiveRange(start, end, nullptr); - } else { + } else if (after_loop->GetStart() <= end) { + first_range_ = after_loop; // There are uses after the loop. first_range_->start_ = start; + } else { + // The use after the loop is after a lifetime hole. + DCHECK(last_in_loop != nullptr); + first_range_ = last_in_loop; + first_range_->start_ = start; + first_range_->end_ = end; } } @@ -479,10 +491,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void Dump(std::ostream& stream) const { stream << "ranges: { "; LiveRange* current = first_range_; - do { + while (current != nullptr) { current->Dump(stream); stream << " "; - } while ((current = current->GetNext()) != nullptr); + current = current->GetNext(); + } stream << "}, uses: { "; UsePosition* use = first_use_; if (use != nullptr) { diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 58cea771b9..fd30c1bc76 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -26,8 +26,8 @@ void SsaDeadPhiElimination::Run() { HPhi* phi = inst_it.Current()->AsPhi(); // Set dead ahead of running through uses. The phi may have no use. phi->SetDead(); - for (HUseIterator<HInstruction> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction>* current = use_it.Current(); + for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { + HUseListNode<HInstruction*>* current = use_it.Current(); HInstruction* user = current->GetUser(); if (!user->IsPhi()) { worklist_.Add(phi); @@ -61,9 +61,9 @@ void SsaDeadPhiElimination::Run() { next = current->GetNext(); if (current->AsPhi()->IsDead()) { if (current->HasUses()) { - for (HUseIterator<HInstruction> use_it(current->GetUses()); !use_it.Done(); + for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction>* user_node = use_it.Current(); + HUseListNode<HInstruction*>* user_node = use_it.Current(); HInstruction* user = user_node->GetUser(); DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); DCHECK(user->AsPhi()->IsDead()) << user->GetId(); @@ -73,12 +73,12 @@ void SsaDeadPhiElimination::Run() { } } if (current->HasEnvironmentUses()) { - for (HUseIterator<HEnvironment> use_it(current->GetEnvUses()); !use_it.Done(); + for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HEnvironment>* user_node = use_it.Current(); + HUseListNode<HEnvironment*>* user_node = use_it.Current(); HEnvironment* user = user_node->GetUser(); user->SetRawEnvAt(user_node->GetIndex(), nullptr); - current->RemoveEnvironmentUser(user, user_node->GetIndex()); + current->RemoveEnvironmentUser(user_node); } } block->RemovePhi(current->AsPhi()); @@ -132,8 +132,8 @@ void SsaRedundantPhiElimination::Run() { // Because we're updating the users of this phi, we may have new // phis candidate for elimination if this phi is in a loop. Add phis that // used this phi to the worklist. - for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) { - HUseListNode<HInstruction>* current = it.Current(); + for (HUseIterator<HInstruction*> it(phi->GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction*>* current = it.Current(); HInstruction* user = current->GetUser(); if (user->IsPhi()) { worklist_.Add(user->AsPhi()); diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc index cb5ce20c46..947427b67f 100644 --- a/compiler/optimizing/ssa_type_propagation.cc +++ b/compiler/optimizing/ssa_type_propagation.cc @@ -114,7 +114,7 @@ void SsaTypePropagation::AddToWorklist(HPhi* instruction) { } void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { - for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) { + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->GetUser()->AsPhi(); if (phi != nullptr) { AddToWorklist(phi); diff --git a/compiler/optimizing/test/ConstantFolding.java b/compiler/optimizing/test/ConstantFolding.java deleted file mode 100644 index d08006b4d5..0000000000 --- a/compiler/optimizing/test/ConstantFolding.java +++ /dev/null @@ -1,221 +0,0 @@ -/* -* Copyright (C) 2014 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -public class ConstantFolding { - - /** - * Tiny three-register program exercising int constant folding - * on negation. - */ - - // CHECK-START: int ConstantFolding.IntNegation() constant_folding (before) - // CHECK-DAG: [[Const42:i\d+]] IntConstant 42 - // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Const42]] ] - // CHECK-DAG: Return [ [[Neg]] ] - - // CHECK-START: int ConstantFolding.IntNegation() constant_folding (after) - // CHECK-DAG: [[ConstN42:i\d+]] IntConstant -42 - // CHECK-DAG: Return [ [[ConstN42]] ] - - public static int IntNegation() { - int x, y; - x = 42; - y = -x; - return y; - } - - /** - * Tiny three-register program exercising int constant folding - * on addition. - */ - - // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (before) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: [[Const2:i\d+]] IntConstant 2 - // CHECK-DAG: [[Add:i\d+]] Add [ [[Const1]] [[Const2]] ] - // CHECK-DAG: Return [ [[Add]] ] - - // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (after) - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: Return [ [[Const3]] ] - - public static int IntAddition1() { - int a, b, c; - a = 1; - b = 2; - c = a + b; - return c; - } - - /** - * Small three-register program exercising int constant folding - * on addition. - */ - - // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (before) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: [[Const2:i\d+]] IntConstant 2 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Const6:i\d+]] IntConstant 6 - // CHECK-DAG: [[Add1:i\d+]] Add [ [[Const1]] [[Const2]] ] - // CHECK-DAG: [[Add2:i\d+]] Add [ [[Const5]] [[Const6]] ] - // CHECK-DAG: [[Add3:i\d+]] Add [ [[Add1]] [[Add2]] ] - // CHECK-DAG: Return [ [[Add3]] ] - - // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (after) - // CHECK-DAG: [[Const14:i\d+]] IntConstant 14 - // CHECK-DAG: Return [ [[Const14]] ] - - public static int IntAddition2() { - int a, b, c; - a = 1; - b = 2; - a += b; - b = 5; - c = 6; - b += c; - c = a + b; - return c; - } - - /** - * Tiny three-register program exercising int constant folding - * on subtraction. - */ - - // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (before) - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Const2:i\d+]] IntConstant 2 - // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Const5]] [[Const2]] ] - // CHECK-DAG: Return [ [[Sub]] ] - - // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (after) - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: Return [ [[Const3]] ] - - public static int IntSubtraction() { - int a, b, c; - a = 5; - b = 2; - c = a - b; - return c; - } - - /** - * Tiny three-register program exercising long constant folding - * on addition. - */ - - // CHECK-START: long ConstantFolding.LongAddition() constant_folding (before) - // CHECK-DAG: [[Const1:j\d+]] LongConstant 1 - // CHECK-DAG: [[Const2:j\d+]] LongConstant 2 - // CHECK-DAG: [[Add:j\d+]] Add [ [[Const1]] [[Const2]] ] - // CHECK-DAG: Return [ [[Add]] ] - - // CHECK-START: long ConstantFolding.LongAddition() constant_folding (after) - // CHECK-DAG: [[Const3:j\d+]] LongConstant 3 - // CHECK-DAG: Return [ [[Const3]] ] - - public static long LongAddition() { - long a, b, c; - a = 1L; - b = 2L; - c = a + b; - return c; - } - - /** - * Tiny three-register program exercising long constant folding - * on subtraction. - */ - - // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (before) - // CHECK-DAG: [[Const5:j\d+]] LongConstant 5 - // CHECK-DAG: [[Const2:j\d+]] LongConstant 2 - // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Const5]] [[Const2]] ] - // CHECK-DAG: Return [ [[Sub]] ] - - // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (after) - // CHECK-DAG: [[Const3:j\d+]] LongConstant 3 - // CHECK-DAG: Return [ [[Const3]] ] - - public static long LongSubtraction() { - long a, b, c; - a = 5L; - b = 2L; - c = a - b; - return c; - } - - /** - * Three-register program with a constant (static) condition. - */ - - // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (before) - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Const2:i\d+]] IntConstant 2 - // CHECK-DAG: [[Cond:z\d+]] GreaterThanOrEqual [ [[Const5]] [[Const2]] ] - // CHECK-DAG: If [ [[Cond]] ] - - // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (after) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: If [ [[Const1]] ] - - public static int StaticCondition() { - int a, b, c; - a = 5; - b = 2; - if (a < b) - c = a + b; - else - c = a - b; - return c; - } - - /** - * Four-variable program with jumps leading to the creation of many - * blocks. - * - * The intent of this test is to ensure that all constant expressions - * are actually evaluated at compile-time, thanks to the reverse - * (forward) post-order traversal of the the dominator tree. - */ - - // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (before) - // CHECK-DAG: [[Const2:i\d+]] IntConstant 2 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Add:i\d+]] Add [ [[Const5]] [[Const2]] ] - // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Const5]] [[Const2]] ] - // CHECK-DAG: [[Phi:i\d+]] Phi [ [[Add]] [[Sub]] ] - // CHECK-DAG: Return [ [[Phi]] ] - - // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (after) - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: [[Const7:i\d+]] IntConstant 7 - // CHECK-DAG: [[Phi:i\d+]] Phi [ [[Const7]] [[Const3]] ] - // CHECK-DAG: Return [ [[Phi]] ] - - public static int JumpsAndConditionals(boolean cond) { - int a, b, c; - a = 5; - b = 2; - if (cond) - c = a + b; - else - c = a - b; - return c; - } -} diff --git a/compiler/optimizing/test/Inliner.java b/compiler/optimizing/test/Inliner.java deleted file mode 100644 index 54cce62a57..0000000000 --- a/compiler/optimizing/test/Inliner.java +++ /dev/null @@ -1,202 +0,0 @@ -/* -* Copyright (C) 2014 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -public class Inliner { - - // CHECK-START: void Inliner.InlineVoid() inliner (before) - // CHECK-DAG: [[Const42:i\d+]] IntConstant 42 - // CHECK-DAG: InvokeStaticOrDirect - // CHECK-DAG: InvokeStaticOrDirect [ [[Const42]] ] - - // CHECK-START: void Inliner.InlineVoid() inliner (after) - // CHECK-NOT: InvokeStaticOrDirect - - public static void InlineVoid() { - returnVoid(); - returnVoidWithOneParameter(42); - } - - // CHECK-START: int Inliner.InlineParameter(int) inliner (before) - // CHECK-DAG: [[Param:i\d+]] ParameterValue - // CHECK-DAG: [[Result:i\d+]] InvokeStaticOrDirect [ [[Param]] ] - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: int Inliner.InlineParameter(int) inliner (after) - // CHECK-DAG: [[Param:i\d+]] ParameterValue - // CHECK-DAG: Return [ [[Param]] ] - - public static int InlineParameter(int a) { - return returnParameter(a); - } - - // CHECK-START: long Inliner.InlineWideParameter(long) inliner (before) - // CHECK-DAG: [[Param:j\d+]] ParameterValue - // CHECK-DAG: [[Result:j\d+]] InvokeStaticOrDirect [ [[Param]] ] - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: long Inliner.InlineWideParameter(long) inliner (after) - // CHECK-DAG: [[Param:j\d+]] ParameterValue - // CHECK-DAG: Return [ [[Param]] ] - - public static long InlineWideParameter(long a) { - return returnWideParameter(a); - } - - // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (before) - // CHECK-DAG: [[Param:l\d+]] ParameterValue - // CHECK-DAG: [[Result:l\d+]] InvokeStaticOrDirect [ [[Param]] ] - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (after) - // CHECK-DAG: [[Param:l\d+]] ParameterValue - // CHECK-DAG: Return [ [[Param]] ] - - public static Object InlineReferenceParameter(Object o) { - return returnReferenceParameter(o); - } - - // CHECK-START: int Inliner.InlineInt() inliner (before) - // CHECK-DAG: [[Result:i\d+]] InvokeStaticOrDirect - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: int Inliner.InlineInt() inliner (after) - // CHECK-DAG: [[Const4:i\d+]] IntConstant 4 - // CHECK-DAG: Return [ [[Const4]] ] - - public static int InlineInt() { - return returnInt(); - } - - // CHECK-START: long Inliner.InlineWide() inliner (before) - // CHECK-DAG: [[Result:j\d+]] InvokeStaticOrDirect - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: long Inliner.InlineWide() inliner (after) - // CHECK-DAG: [[Const8:j\d+]] LongConstant 8 - // CHECK-DAG: Return [ [[Const8]] ] - - public static long InlineWide() { - return returnWide(); - } - - // CHECK-START: int Inliner.InlineAdd() inliner (before) - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Result:i\d+]] InvokeStaticOrDirect - // CHECK-DAG: Return [ [[Result]] ] - - // CHECK-START: int Inliner.InlineAdd() inliner (after) - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Add:i\d+]] Add [ [[Const3]] [[Const5]] ] - // CHECK-DAG: Return [ [[Add]] ] - - public static int InlineAdd() { - return returnAdd(3, 5); - } - - // CHECK-START: int Inliner.InlineFieldAccess() inliner (before) - // CHECK-DAG: [[After:i\d+]] InvokeStaticOrDirect - // CHECK-DAG: Return [ [[After]] ] - - // CHECK-START: int Inliner.InlineFieldAccess() inliner (after) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: [[Before:i\d+]] StaticFieldGet - // CHECK-DAG: [[After:i\d+]] Add [ [[Before]] [[Const1]] ] - // CHECK-DAG: StaticFieldSet [ {{l\d+}} [[After]] ] - // CHECK-DAG: Return [ [[After]] ] - - // CHECK-START: int Inliner.InlineFieldAccess() inliner (after) - // CHECK-NOT: InvokeStaticOrDirect - - public static int InlineFieldAccess() { - return incCounter(); - } - - // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (before) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Add:i\d+]] InvokeStaticOrDirect [ [[Const1]] [[Const3]] ] - // CHECK-DAG: [[Sub:i\d+]] InvokeStaticOrDirect [ [[Const5]] [[Const3]] ] - // CHECK-DAG: [[Phi:i\d+]] Phi [ [[Add]] [[Sub]] ] - // CHECK-DAG: Return [ [[Phi]] ] - - // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (after) - // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 - // CHECK-DAG: [[Const3:i\d+]] IntConstant 3 - // CHECK-DAG: [[Const5:i\d+]] IntConstant 5 - // CHECK-DAG: [[Add:i\d+]] Add [ [[Const1]] [[Const3]] ] - // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Const5]] [[Const3]] ] - // CHECK-DAG: [[Phi:i\d+]] Phi [ [[Add]] [[Sub]] ] - // CHECK-DAG: Return [ [[Phi]] ] - - public static int InlineWithControlFlow(boolean cond) { - int x, const1, const3, const5; - const1 = 1; - const3 = 3; - const5 = 5; - if (cond) { - x = returnAdd(const1, const3); - } else { - x = returnSub(const5, const3); - } - return x; - } - - - private static void returnVoid() { - return; - } - - private static void returnVoidWithOneParameter(int a) { - return; - } - - private static int returnParameter(int a) { - return a; - } - - private static long returnWideParameter(long a) { - return a; - } - - private static Object returnReferenceParameter(Object o) { - return o; - } - - private static int returnInt() { - return 4; - } - - private static long returnWide() { - return 8L; - } - - private static int returnAdd(int a, int b) { - return a + b; - } - - private static int returnSub(int a, int b) { - return a - b; - } - - private static int counter = 42; - - private static int incCounter() { - return ++counter; - } -} |