From f7bd87edf3b80ce3bbd6e571fd119c878cb79992 Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Thu, 23 Dec 2021 18:07:38 +0000 Subject: Add branch profiling in baseline compiler. Currently unused. Follow-up CLs will make use of the data. Test: test.py Bug: 304969871 Change-Id: I486faba3de030061715d06ab9fdb33970d319d9b --- compiler/optimizing/code_generator_arm64.cc | 29 ++++++++++++++ compiler/optimizing/code_generator_arm_vixl.cc | 33 ++++++++++++++++ compiler/optimizing/code_generator_x86.cc | 39 ++++++++++++++++++- compiler/optimizing/code_generator_x86_64.cc | 37 +++++++++++++++++- compiler/optimizing/graph_visualizer.cc | 5 +++ compiler/optimizing/instruction_builder.cc | 45 ++++++++++++++-------- compiler/optimizing/instruction_builder.h | 4 +- compiler/optimizing/nodes.h | 21 +++++++++- .../optimizing/prepare_for_register_allocation.cc | 5 +++ 9 files changed, 194 insertions(+), 24 deletions(-) (limited to 'compiler/optimizing') diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 89172aaebc..997d7a48c0 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -3867,6 +3867,35 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { false_target = nullptr; } + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + vixl::aarch64::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireX(); + Register counter = temps.AcquireW(); + Register condition = InputRegisterAt(if_instr, 0).X(); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, condition, LSL, 1)); + __ Add(counter, counter, 1); + __ Tbnz(counter, 16, &done); + __ Strh(counter, MemOperand(temp, condition, LSL, 1)); + __ Bind(&done); + } + } + } else { + DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName(); + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 78bf316c17..b9496ebbe0 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3013,6 +3013,9 @@ void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -3023,6 +3026,36 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint32_t address = + reinterpret_cast32(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + vixl32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0)); + vixl32::Register condition = InputRegisterAt(if_instr, 0); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, condition, LSL, 1)); + __ Adds(counter, counter, 1); + __ Uxth(counter, counter); + __ CompareAndBranchIfZero(counter, &done); + __ Strh(counter, MemOperand(temp, condition, LSL, 1)); + __ Bind(&done); + } + } + } else { + DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName(); + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index b8c8d9f73d..5296ed071e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2115,7 +2115,8 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { return cond->IsCondition() && cond->GetNext() == branch && cond->InputAt(0)->GetType() != DataType::Type::kInt64 && - !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) && + !cond->GetBlock()->GetGraph()->IsCompilingBaseline(); } template @@ -2206,7 +2207,13 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio void LocationsBuilderX86::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::Any()); + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } } } @@ -2217,6 +2224,34 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister(); + Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister(); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + NearLabel done; + Location lhs = if_instr->GetLocations()->InAt(0); + __ movl(temp, Immediate(address)); + __ movzxw(counter, Address(temp, lhs.AsRegister(), TIMES_2, 0)); + __ addw(counter, Immediate(1)); + __ j(kEqual, &done); + __ movw(Address(temp, lhs.AsRegister(), TIMES_2, 0), counter); + __ Bind(&done); + } + } + } else { + DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName(); + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f61a1f04c3..8f8690ddf6 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -2178,7 +2178,8 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { // conditions if they are materialized due to the complex branching. return cond->IsCondition() && cond->GetNext() == branch && - !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) && + !cond->GetBlock()->GetGraph()->IsCompilingBaseline(); } template @@ -2268,7 +2269,12 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::Any()); + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } } } @@ -2279,6 +2285,33 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister(); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + NearLabel done; + Location lhs = if_instr->GetLocations()->InAt(0); + __ movq(CpuRegister(TMP), Immediate(address)); + __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister(), TIMES_2, 0)); + __ addw(temp, Immediate(1)); + __ j(kZero, &done); + __ movw(Address(CpuRegister(TMP), lhs.AsRegister(), TIMES_2, 0), temp); + __ Bind(&done); + } + } + } else { + DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName(); + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index c2c0953044..e5aa5d30df 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -496,6 +496,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("bias") << condition->GetBias(); } + void VisitIf(HIf* if_instr) override { + StartAttributeStream("true_count") << if_instr->GetTrueCount(); + StartAttributeStream("false_count") << if_instr->GetFalseCount(); + } + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index; ArtMethod* method = invoke->GetResolvedMethod(); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index fd599f789e..281da6f9ec 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -665,22 +665,31 @@ void HInstructionBuilder::InitializeParameters() { } } -template -void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* first = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); - HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); - T* comparison = new (allocator_) T(first, second, dex_pc); - AppendInstruction(comparison); - AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); - current_block_ = nullptr; -} - -template -void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { +template +void HInstructionBuilder::If_21_22t(const Instruction& instruction, uint32_t dex_pc) { HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); - T* comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); + T* comparison = nullptr; + if (kCompareWithZero) { + comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); + } else { + HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); + comparison = new (allocator_) T(value, second, dex_pc); + } AppendInstruction(comparison); - AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); + HIf* if_instr = new (allocator_) HIf(comparison, dex_pc); + + ProfilingInfo* info = graph_->GetProfilingInfo(); + if (info != nullptr && !graph_->IsCompilingBaseline()) { + BranchCache* cache = info->GetBranchCache(dex_pc); + if (cache != nullptr) { + if_instr->SetTrueCount(cache->GetTrue()); + if_instr->SetFalseCount(cache->GetFalse()); + } + } + + // Append after setting true/false count, so that the builder knows if the + // instruction needs an environment. + AppendInstruction(if_instr); current_block_ = nullptr; } @@ -2879,8 +2888,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } #define IF_XX(comparison, cond) \ - case Instruction::IF_##cond: If_22t(instruction, dex_pc); break; \ - case Instruction::IF_##cond##Z: If_21t(instruction, dex_pc); break + case Instruction::IF_##cond: \ + If_21_22t(instruction, dex_pc); \ + break; \ + case Instruction::IF_##cond##Z: \ + If_21_22t(instruction, dex_pc); \ + break; IF_XX(HEqual, EQ); IF_XX(HNotEqual, NE); diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 3d65d8fb54..5c165d7bf9 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -116,8 +116,8 @@ class HInstructionBuilder : public ValueObject { template void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc); - template void If_21t(const Instruction& instruction, uint32_t dex_pc); - template void If_22t(const Instruction& instruction, uint32_t dex_pc); + template + void If_21_22t(const Instruction& instruction, uint32_t dex_pc); void Conversion_12x(const Instruction& instruction, DataType::Type input_type, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3613b9519b..04d51328f2 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2663,7 +2663,12 @@ class HInstruction : public ArenaObject { void RemoveEnvironmentUsers(); bool IsEmittedAtUseSite() const { return GetPackedFlag(); } - void MarkEmittedAtUseSite() { SetPackedFlag(true); } + void MarkEmittedAtUseSite() { + // When compiling baseline, in order to do branch profiling, we don't want to + // emit conditions at use site. + DCHECK(!IsCondition() || !GetBlock()->GetGraph()->IsCompilingBaseline()); + SetPackedFlag(true); + } protected: // If set, the machine code for this instruction is assumed to be generated by @@ -3519,7 +3524,9 @@ class HDoubleConstant final : public HConstant { class HIf final : public HExpression<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(kIf, SideEffects::None(), dex_pc) { + : HExpression(kIf, SideEffects::None(), dex_pc), + true_count_(std::numeric_limits::max()), + false_count_(std::numeric_limits::max()) { SetRawInputAt(0, input); } @@ -3534,10 +3541,20 @@ class HIf final : public HExpression<1> { return GetBlock()->GetSuccessors()[1]; } + void SetTrueCount(uint16_t count) { true_count_ = count; } + uint16_t GetTrueCount() const { return true_count_; } + + void SetFalseCount(uint16_t count) { false_count_ = count; } + uint16_t GetFalseCount() const { return false_count_; } + DECLARE_INSTRUCTION(If); protected: DEFAULT_COPY_CONSTRUCTOR(If); + + private: + uint16_t true_count_; + uint16_t false_count_; }; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 398b10abf3..59282a7222 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -180,6 +180,11 @@ bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, return false; } + if (GetGraph()->IsCompilingBaseline()) { + // To do branch profiling, we cannot emit conditions at use site. + return false; + } + if (user->IsIf() || user->IsDeoptimize()) { return true; } -- cgit v1.2.3-59-g8ed1b