diff options
Diffstat (limited to 'compiler')
21 files changed, 316 insertions, 268 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c532e72465..6a4ad5c92a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -283,8 +283,7 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A } void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fpu_registers, + size_t maximum_safepoint_spill_size, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -298,14 +297,12 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && !HasAllocatedCalleeSaveRegisters() && IsLeafMethod() && !RequiresCurrentMethod()) { - DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); + DCHECK_EQ(maximum_safepoint_spill_size, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( first_register_slot_in_slow_path_ - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + + maximum_safepoint_spill_size + FrameEntrySpillSize(), kStackAlignment)); } @@ -765,21 +762,16 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); - if (instruction->IsSuspendCheck()) { - // Suspend check has special ABI that saves the caller-save registers in callee, - // so we want to emit stack maps containing the registers. - // TODO: Register allocator still reserves space for the caller-save registers. - // We should add slow-path-specific caller-save information into LocationSummary - // and refactor the code here as well as in the register allocator to use it. + DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u); + if (locations->OnlyCallsOnSlowPath()) { + // In case of slow path, we currently set the location of caller-save registers + // to register (instead of their stack location when pushed before the slow-path + // call). Therefore register_mask contains both callee-save and caller-save + // registers that hold objects. We must remove the spilled caller-save from the + // mask, since they will be overwritten by the callee. + uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true); + register_mask &= ~spills; } else { - if (locations->OnlyCallsOnSlowPath()) { - // In case of slow path, we currently set the location of caller-save registers - // to register (instead of their stack location when pushed before the slow-path - // call). Therefore register_mask contains both callee-save and caller-save - // registers that hold objects. We must remove the caller-save from the mask, since - // they will be overwritten by the callee. - register_mask &= core_callee_save_mask_; - } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } @@ -1235,58 +1227,44 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); - } - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_core_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveCoreRegister(stack_offset, i); - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_fpu_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (size_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); } } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreCoreRegister(stack_offset, i); - } - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (size_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index fd396c474c..072d8cf31a 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -22,6 +22,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "base/enums.h" #include "compiled_method.h" #include "driver/compiler_options.h" @@ -212,8 +213,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fpu_registers, + size_t maximum_safepoint_spill_size, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order); // Backends can override this as necessary. For most, no special alignment is required. @@ -279,6 +279,30 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return (fpu_callee_save_mask_ & (1 << reg)) != 0; } + uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { + DCHECK(locations->OnlyCallsOnSlowPath() || + (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && + !locations->HasCustomSlowPathCallingConvention())); + uint32_t live_registers = core_registers + ? locations->GetLiveRegisters()->GetCoreRegisters() + : locations->GetLiveRegisters()->GetFloatingPointRegisters(); + if (locations->HasCustomSlowPathCallingConvention()) { + // Save only the live registers that the custom calling convention wants us to save. + uint32_t caller_saves = core_registers + ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() + : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); + return live_registers & caller_saves; + } else { + // Default ABI, we need to spill non-callee-save live registers. + uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; + return live_registers & ~callee_saves; + } + } + + size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { + return POPCOUNT(GetSlowPathSpills(locations, core_registers)); + } + // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); // Check whether we have already recorded mapping at this PC. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e60b781b78..7c2e80e741 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3996,6 +3996,9 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() @@ -4472,6 +4475,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -5024,7 +5030,9 @@ void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) } void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5355,6 +5363,10 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || @@ -5548,6 +5560,10 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + if (kUseBakerReadBarrier && !load->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || load_kind == HLoadString::LoadKind::kDexCachePcRelative) { @@ -5646,6 +5662,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5653,6 +5670,7 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5662,6 +5680,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. @@ -5832,6 +5853,7 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5840,6 +5862,7 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. + baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5849,6 +5872,9 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // Note that TypeCheckSlowPathARM uses this "temp" register too. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index a4fc044f83..4fcad510a3 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1635,6 +1635,9 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -2060,6 +2063,9 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -3101,6 +3107,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -3108,6 +3115,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -3117,6 +3125,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. @@ -3288,6 +3299,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -3296,6 +3308,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. + baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -3305,6 +3318,9 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // Note that TypeCheckSlowPathARM64 uses this "temp" register too. @@ -3991,6 +4007,10 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { @@ -4181,6 +4201,10 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + if (kUseBakerReadBarrier && !load->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -4711,7 +4735,9 @@ void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8a2f90d541..af998e9729 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -5220,7 +5220,9 @@ void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 4a5755c925..fbf7b3339c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3782,7 +3782,9 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f6e8ee1d48..6da882868d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4613,6 +4613,9 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -5077,6 +5080,9 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -5639,7 +5645,9 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) } void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -6006,6 +6014,10 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || @@ -6187,6 +6199,10 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + if (kUseBakerReadBarrier && !load->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || @@ -6282,6 +6298,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6289,6 +6306,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6298,6 +6316,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86 uses this "out" register too. @@ -6495,6 +6516,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6503,6 +6525,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. + baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6511,6 +6534,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { break; } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86 uses this "temp" register too. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cb227a1906..9a9cb26ccb 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4146,6 +4146,9 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -4575,6 +4578,9 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -5127,7 +5133,9 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio } void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5437,6 +5445,10 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { @@ -5601,6 +5613,10 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + if (kUseBakerReadBarrier && !load->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } + if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5687,6 +5703,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5694,6 +5711,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5703,6 +5721,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86_64 uses this "out" register too. @@ -5905,6 +5926,7 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5913,6 +5935,7 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. + baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5921,6 +5944,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { break; } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0bbc0e54bc..78924ba954 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -651,6 +651,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 7482057b12..082076d79b 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -894,6 +894,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index cf4a040551..9c24db9f76 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1984,6 +1984,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index a4ee546237..c79a3e37cb 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2123,6 +2123,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 83596da41a..1b1b3a79ab 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -27,12 +27,14 @@ LocationSummary::LocationSummary(HInstruction* instruction, : inputs_(instruction->InputCount(), instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), - output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), + intrinsified_(intrinsified), + has_custom_slow_path_calling_convention_(false), + output_overlaps_(Location::kOutputOverlap), stack_mask_(nullptr), register_mask_(0), live_registers_(), - intrinsified_(intrinsified) { + custom_slow_path_caller_saves_() { instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 5fdfb9b6ca..43840422ca 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -20,6 +20,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "base/bit_vector.h" #include "base/value_object.h" @@ -452,7 +453,7 @@ class RegisterSet : public ValueObject { } size_t GetNumberOfRegisters() const { - return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_); + return POPCOUNT(core_registers_) + POPCOUNT(floating_point_registers_); } uint32_t GetCoreRegisters() const { @@ -466,8 +467,6 @@ class RegisterSet : public ValueObject { private: uint32_t core_registers_; uint32_t floating_point_registers_; - - DISALLOW_COPY_AND_ASSIGN(RegisterSet); }; static constexpr bool kIntrinsified = true; @@ -569,6 +568,21 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return CanCall(); } + void SetCustomSlowPathCallerSaves(const RegisterSet& caller_saves) { + DCHECK(OnlyCallsOnSlowPath()); + has_custom_slow_path_calling_convention_ = true; + custom_slow_path_caller_saves_ = caller_saves; + } + + bool HasCustomSlowPathCallingConvention() const { + return has_custom_slow_path_calling_convention_; + } + + const RegisterSet& GetCustomSlowPathCallerSaves() const { + DCHECK(HasCustomSlowPathCallingConvention()); + return custom_slow_path_caller_saves_; + } + void SetStackBit(uint32_t index) { stack_mask_->SetBit(index); } @@ -628,18 +642,18 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return intrinsified_; } - void SetIntrinsified(bool intrinsified) { - intrinsified_ = intrinsified; - } - private: ArenaVector<Location> inputs_; ArenaVector<Location> temps_; + const CallKind call_kind_; + // Whether these are locations for an intrinsified call. + const bool intrinsified_; + // Whether the slow path has default or custom calling convention. + bool has_custom_slow_path_calling_convention_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; Location output_; - const CallKind call_kind_; // Mask of objects that live in the stack. BitVector* stack_mask_; @@ -650,8 +664,8 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { // Registers that are in use at this position. RegisterSet live_registers_; - // Whether these are locations for an intrinsified call. - bool intrinsified_; + // Custom slow path caller saves. Valid only if indicated by slow_path_calling_convention_. + RegisterSet custom_slow_path_caller_saves_; friend class RegisterAllocatorTest; DISALLOW_COPY_AND_ASSIGN(LocationSummary); diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 34502869e4..ad7a8a393e 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -28,8 +28,7 @@ RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator codegen_(codegen), liveness_(liveness) {} -void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, - size_t max_safepoint_live_fp_regs, +void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoints, size_t reserved_out_slots, size_t int_spill_slots, size_t long_spill_slots, @@ -43,10 +42,13 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, + double_spill_slots + catch_phi_spill_slots; + // Update safepoints and calculate the size of the spills. + UpdateSafepointLiveRegisters(); + size_t maximum_safepoint_spill_size = CalculateMaximumSafepointSpillSize(safepoints); + // Computes frame size and spill mask. codegen_->InitializeCodeGeneration(spill_slots, - max_safepoint_live_core_regs, - max_safepoint_live_fp_regs, + maximum_safepoint_spill_size, reserved_out_slots, // Includes slot(s) for the art method. codegen_->GetGraph()->GetLinearOrder()); @@ -135,8 +137,7 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, // Connect siblings and resolve inputs. for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - ConnectSiblings(instruction->GetLiveInterval(), - max_safepoint_live_core_regs + max_safepoint_live_fp_regs); + ConnectSiblings(instruction->GetLiveInterval()); } // Resolve non-linear control flow across branches. Order does not matter. @@ -222,8 +223,73 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, } } -void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval, - size_t max_safepoint_live_regs) { +void RegisterAllocationResolver::UpdateSafepointLiveRegisters() { + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + for (LiveInterval* current = instruction->GetLiveInterval(); + current != nullptr; + current = current->GetNextSibling()) { + if (!current->HasRegister()) { + continue; + } + Location source = current->ToLocation(); + for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); + safepoint_position != nullptr; + safepoint_position = safepoint_position->GetNext()) { + DCHECK(current->CoversSlow(safepoint_position->GetPosition())); + LocationSummary* locations = safepoint_position->GetLocations(); + switch (source.GetKind()) { + case Location::kRegister: + case Location::kFpuRegister: { + locations->AddLiveRegister(source); + break; + } + case Location::kRegisterPair: + case Location::kFpuRegisterPair: { + locations->AddLiveRegister(source.ToLow()); + locations->AddLiveRegister(source.ToHigh()); + break; + } + case Location::kStackSlot: // Fall-through + case Location::kDoubleStackSlot: // Fall-through + case Location::kConstant: { + // Nothing to do. + break; + } + default: { + LOG(FATAL) << "Unexpected location for object"; + } + } + } + } + } +} + +size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize( + ArrayRef<HInstruction* const> safepoints) { + size_t core_register_spill_size = codegen_->GetWordSize(); + size_t fp_register_spill_size = codegen_->GetFloatingPointSpillSlotSize(); + size_t maximum_safepoint_spill_size = 0u; + for (HInstruction* instruction : safepoints) { + LocationSummary* locations = instruction->GetLocations(); + if (locations->OnlyCallsOnSlowPath()) { + size_t core_spills = + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true); + size_t fp_spills = + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false); + size_t spill_size = + core_register_spill_size * core_spills + fp_register_spill_size * fp_spills; + maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size); + } else if (locations->CallsOnMainAndSlowPath()) { + // Nothing to spill on the slow path if the main path already clobbers caller-saves. + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false)); + } + } + return maximum_safepoint_spill_size; +} + +void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { LiveInterval* current = interval; if (current->HasSpillSlot() && current->HasRegister() @@ -306,48 +372,16 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval, safepoint_position = safepoint_position->GetNext()) { DCHECK(current->CoversSlow(safepoint_position->GetPosition())); - LocationSummary* locations = safepoint_position->GetLocations(); - if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + if (current->GetType() == Primitive::kPrimNot) { DCHECK(interval->GetDefinedBy()->IsActualObject()) << interval->GetDefinedBy()->DebugName() << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); - } - - switch (source.GetKind()) { - case Location::kRegister: { - locations->AddLiveRegister(source); - if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { - DCHECK_LE(locations->GetNumberOfLiveRegisters(), - max_safepoint_live_regs); - } - if (current->GetType() == Primitive::kPrimNot) { - DCHECK(interval->GetDefinedBy()->IsActualObject()) - << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetRegisterBit(source.reg()); - } - break; - } - case Location::kFpuRegister: { - locations->AddLiveRegister(source); - break; - } - - case Location::kRegisterPair: - case Location::kFpuRegisterPair: { - locations->AddLiveRegister(source.ToLow()); - locations->AddLiveRegister(source.ToHigh()); - break; - } - case Location::kStackSlot: // Fall-through - case Location::kDoubleStackSlot: // Fall-through - case Location::kConstant: { - // Nothing to do. - break; + LocationSummary* locations = safepoint_position->GetLocations(); + if (current->GetParent()->HasSpillSlot()) { + locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } - default: { - LOG(FATAL) << "Unexpected location for object"; + if (source.GetKind() == Location::kRegister) { + locations->SetRegisterBit(source.reg()); } } } diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h index 6ceb9bc955..a70ceae076 100644 --- a/compiler/optimizing/register_allocation_resolver.h +++ b/compiler/optimizing/register_allocation_resolver.h @@ -20,6 +20,7 @@ #include "base/arena_containers.h" #include "base/value_object.h" #include "primitive.h" +#include "utils/array_ref.h" namespace art { @@ -43,8 +44,7 @@ class RegisterAllocationResolver : ValueObject { CodeGenerator* codegen, const SsaLivenessAnalysis& liveness); - void Resolve(size_t max_safepoint_live_core_regs, - size_t max_safepoint_live_fp_regs, + void Resolve(ArrayRef<HInstruction* const> safepoints, size_t reserved_out_slots, // Includes slot(s) for the art method. size_t int_spill_slots, size_t long_spill_slots, @@ -54,10 +54,14 @@ class RegisterAllocationResolver : ValueObject { const ArenaVector<LiveInterval*>& temp_intervals); private: + // Update live registers of safepoint location summary. + void UpdateSafepointLiveRegisters(); + + // Calculate the maximum size of the spill area for safepoints. + size_t CalculateMaximumSafepointSpillSize(ArrayRef<HInstruction* const> safepoints); + // Connect adjacent siblings within blocks, and resolve inputs along the way. - // Uses max_safepoint_live_regs to check that we did not underestimate the - // number of live registers at safepoints. - void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs); + void ConnectSiblings(LiveInterval* interval); // Connect siblings between block entries and exits. void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index a21595fe03..717839914d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -438,8 +438,7 @@ class ColoringIteration { // track of live intervals across safepoints. // TODO: Should build safepoints elsewhere. void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, - const ArenaVector<InterferenceNode*>& physical_nodes, - ArenaVector<InterferenceNode*>* safepoints); + const ArenaVector<InterferenceNode*>& physical_nodes); // Add coalesce opportunities to interference nodes. void FindCoalesceOpportunities(); @@ -566,11 +565,7 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat num_long_spill_slots_(0), catch_phi_spill_slot_counter_(0), reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), - reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()), - number_of_globally_blocked_core_regs_(0), - number_of_globally_blocked_fp_regs_(0), - max_safepoint_live_core_regs_(0), - max_safepoint_live_fp_regs_(0) { + reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { // Before we ask for blocked registers, set them up in the code generator. codegen->SetupBlockedRegisters(); @@ -584,7 +579,6 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat physical_core_nodes_[i]->stage = NodeStage::kPrecolored; core_intervals_.push_back(interval); if (codegen_->IsBlockedCoreRegister(i)) { - ++number_of_globally_blocked_core_regs_; interval->AddRange(0, liveness.GetMaxLifetimePosition()); } } @@ -597,7 +591,6 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; fp_intervals_.push_back(interval); if (codegen_->IsBlockedFloatingPointRegister(i)) { - ++number_of_globally_blocked_fp_regs_; interval->AddRange(0, liveness.GetMaxLifetimePosition()); } } @@ -638,7 +631,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs ? physical_core_nodes_ : physical_fp_nodes_; - iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints); + iteration.BuildInterferenceGraph(intervals, physical_nodes); // (3) Add coalesce opportunities. // If we have tried coloring the graph a suspiciously high number of times, give @@ -667,19 +660,6 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { // Assign spill slots. AllocateSpillSlots(iteration.GetPrunableNodes()); - // Compute the maximum number of live registers across safepoints. - // Notice that we do not count globally blocked registers, such as the stack pointer. - if (safepoints.size() > 0) { - size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints); - if (processing_core_regs) { - max_safepoint_live_core_regs_ = - max_safepoint_live_regs - number_of_globally_blocked_core_regs_; - } else { - max_safepoint_live_fp_regs_= - max_safepoint_live_regs - number_of_globally_blocked_fp_regs_; - } - } - // Tell the code generator which registers were allocated. // We only look at prunable_nodes because we already told the code generator about // fixed intervals while processing instructions. We also ignore the fixed intervals @@ -711,8 +691,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { // (6) Resolve locations and deconstruct SSA form. RegisterAllocationResolver(allocator_, codegen_, liveness_) - .Resolve(max_safepoint_live_core_regs_, - max_safepoint_live_fp_regs_, + .Resolve(ArrayRef<HInstruction* const>(safepoints_), reserved_art_method_slots_ + reserved_out_slots_, num_int_spill_slots_, num_long_spill_slots_, @@ -989,24 +968,9 @@ void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instru void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); if (locations->NeedsSafepoint()) { safepoints_.push_back(instruction); - if (locations->OnlyCallsOnSlowPath()) { - // We add a synthesized range at this position to record the live registers - // at this position. Ideally, we could just update the safepoints when locations - // are updated, but we currently need to know the full stack size before updating - // locations (because of parameters and the fact that we don't have a frame pointer). - // And knowing the full stack size requires to know the maximum number of live - // registers at calls in slow paths. - // By adding the following interval in the algorithm, we can compute this - // maximum before updating locations. - LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); - interval->AddRange(position, position + 1); - core_intervals_.push_back(interval); - fp_intervals_.push_back(interval); - } } } @@ -1110,11 +1074,6 @@ void ColoringIteration::AddPotentialInterference(InterferenceNode* from, bool both_directions) { if (from->IsPrecolored()) { // We save space by ignoring outgoing edges from fixed nodes. - } else if (to->GetInterval()->IsSlowPathSafepoint()) { - // Safepoint intervals are only there to count max live registers, - // so no need to give them incoming interference edges. - // This is also necessary for correctness, because we don't want nodes - // to remove themselves from safepoint adjacency sets when they're pruned. } else if (to->IsPrecolored()) { // It is important that only a single node represents a given fixed register in the // interference graph. We retrieve that node here. @@ -1200,8 +1159,7 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo void ColoringIteration::BuildInterferenceGraph( const ArenaVector<LiveInterval*>& intervals, - const ArenaVector<InterferenceNode*>& physical_nodes, - ArenaVector<InterferenceNode*>* safepoints) { + const ArenaVector<InterferenceNode*>& physical_nodes) { DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we @@ -1236,11 +1194,6 @@ void ColoringIteration::BuildInterferenceGraph( node->SetAlias(physical_node); DCHECK_EQ(node->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); - } else if (sibling->IsSlowPathSafepoint()) { - // Safepoint intervals are synthesized to count max live registers. - // They will be processed separately after coloring. - node->stage = NodeStage::kSafepoint; - safepoints->push_back(node); } else { node->stage = NodeStage::kPrunable; prunable_nodes_.push_back(node); @@ -1494,7 +1447,6 @@ void ColoringIteration::PruneInterferenceGraph() { // filled by FindCoalesceOpportunities(). for (InterferenceNode* node : prunable_nodes_) { DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; - DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned"; if (IsLowDegreeNode(node, num_regs_)) { if (node->GetCoalesceOpportunities().empty()) { // Simplify Worklist. @@ -1577,8 +1529,6 @@ void ColoringIteration::PruneNode(InterferenceNode* node) { pruned_nodes_.push(node); for (InterferenceNode* adj : node->GetAdjacentNodes()) { - DCHECK(!adj->GetInterval()->IsSlowPathSafepoint()) - << "Nodes should never interfere with synthesized safepoint nodes"; DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; if (adj->IsPrecolored()) { @@ -1938,18 +1888,6 @@ bool ColoringIteration::ColorInterferenceGraph() { return successful; } -size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters( - const ArenaVector<InterferenceNode*>& safepoints) { - size_t max_safepoint_live_regs = 0; - for (InterferenceNode* safepoint : safepoints) { - DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint()); - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes()); - size_t live_regs = conflict_mask.count(); - max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs); - } - return max_safepoint_live_regs; -} - void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) { // The register allocation resolver will organize the stack based on value type, // so we assign stack slots for each value type separately. diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index ed12561d2c..548687f784 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -140,10 +140,6 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { bool IsCallerSave(size_t reg, bool processing_core_regs); - // Return the maximum number of registers live at safepoints, - // based on the outgoing interference edges of safepoint nodes. - size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints); - // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not // assigned the same stack slot. void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes, @@ -187,14 +183,6 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // Number of stack slots needed for outgoing arguments. const size_t reserved_out_slots_; - // The number of globally blocked core and floating point registers, such as the stack pointer. - size_t number_of_globally_blocked_core_regs_; - size_t number_of_globally_blocked_fp_regs_; - - // The maximum number of registers live at safe points. Needed by the code generator. - size_t max_safepoint_live_core_regs_; - size_t max_safepoint_live_fp_regs_; - friend class ColoringIteration; DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 768ed2d26a..6910c71ead 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -63,9 +63,7 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocat registers_array_(nullptr), blocked_core_registers_(codegen->GetBlockedCoreRegisters()), blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), - reserved_out_slots_(0), - maximum_number_of_live_core_registers_(0), - maximum_number_of_live_fp_registers_(0) { + reserved_out_slots_(0) { temp_intervals_.reserve(4); int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); @@ -92,8 +90,7 @@ static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval void RegisterAllocatorLinearScan::AllocateRegisters() { AllocateRegistersInternal(); RegisterAllocationResolver(allocator_, codegen_, liveness_) - .Resolve(maximum_number_of_live_core_registers_, - maximum_number_of_live_fp_registers_, + .Resolve(ArrayRef<HInstruction* const>(safepoints_), reserved_out_slots_, int_spill_slots_.size(), long_spill_slots_.size(), @@ -283,20 +280,6 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) return; } safepoints_.push_back(instruction); - if (locations->OnlyCallsOnSlowPath()) { - // We add a synthesized range at this position to record the live registers - // at this position. Ideally, we could just update the safepoints when locations - // are updated, but we currently need to know the full stack size before updating - // locations (because of parameters and the fact that we don't have a frame pointer). - // And knowing the full stack size requires to know the maximum number of live - // registers at calls in slow paths. - // By adding the following interval in the algorithm, we can compute this - // maximum before updating locations. - LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); - interval->AddRange(position, position + 1); - AddSorted(&unhandled_core_intervals_, interval); - AddSorted(&unhandled_fp_intervals_, interval); - } } if (locations->WillCall()) { @@ -569,20 +552,6 @@ void RegisterAllocatorLinearScan::LinearScan() { }); inactive_.erase(inactive_kept_end, inactive_to_handle_end); - if (current->IsSlowPathSafepoint()) { - // Synthesized interval to record the maximum number of live registers - // at safepoints. No need to allocate a register for it. - if (processing_core_registers_) { - maximum_number_of_live_core_registers_ = - std::max(maximum_number_of_live_core_registers_, active_.size()); - } else { - maximum_number_of_live_fp_registers_ = - std::max(maximum_number_of_live_fp_registers_, active_.size()); - } - DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); - continue; - } - if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { DCHECK(!current->HasRegister()); // Allocating the low part was unsucessful. The splitted interval for the high part @@ -685,7 +654,7 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { // the next intersection with `current`. for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + DCHECK(!inactive->IsTemp()); if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. // Thanks to SSA, a non-split interval starting in a hole of an @@ -933,7 +902,7 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { // start of current. for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + DCHECK(!inactive->IsTemp()); if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. // Thanks to SSA, a non-split interval starting in a hole of an @@ -1085,12 +1054,6 @@ void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, L if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; break; - } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { - // Ensure the slow path interval is the last to be processed at its location: we want the - // interval to know all live registers at this location. - DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); - insert_at = i; - break; } } diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index 1a643a0d1a..b3834f45e4 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -171,12 +171,6 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; - // The maximum live core registers at safepoints. - size_t maximum_number_of_live_core_registers_; - - // The maximum live FP registers at safepoints. - size_t maximum_number_of_live_fp_registers_; - ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 92788fe6b8..06785e15fb 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -208,11 +208,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return new (allocator) LiveInterval(allocator, type, instruction); } - static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) { - return new (allocator) LiveInterval( - allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true); - } - static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); } @@ -223,7 +218,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { bool IsFixed() const { return is_fixed_; } bool IsTemp() const { return is_temp_; } - bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; } // This interval is the result of a split. bool IsSplit() const { return parent_ != this; } @@ -790,7 +784,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { DCHECK(!HasHighInterval()); DCHECK(!HasLowInterval()); high_or_low_interval_ = new (allocator_) LiveInterval( - allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true); + allocator_, type_, defined_by_, false, kNoRegister, is_temp, true); high_or_low_interval_->high_or_low_interval_ = this; if (first_range_ != nullptr) { high_or_low_interval_->first_range_ = first_range_->Dup(allocator_); @@ -919,7 +913,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { bool is_fixed = false, int reg = kNoRegister, bool is_temp = false, - bool is_slow_path_safepoint = false, bool is_high_interval = false) : allocator_(allocator), first_range_(nullptr), @@ -936,7 +929,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { spill_slot_(kNoSpillSlot), is_fixed_(is_fixed), is_temp_(is_temp), - is_slow_path_safepoint_(is_slow_path_safepoint), is_high_interval_(is_high_interval), high_or_low_interval_(nullptr), defined_by_(defined_by) {} @@ -1121,9 +1113,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Whether the interval is for a temporary. const bool is_temp_; - // Whether the interval is for a safepoint that calls on slow path. - const bool is_slow_path_safepoint_; - // Whether this interval is a synthesized interval for register pair. const bool is_high_interval_; |