summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Nicolas Geoffray <ngeoffray@google.com> 2021-12-23 18:07:38 +0000
committer Nicolas Geoffray <ngeoffray@google.com> 2023-10-17 08:34:49 +0000
commitf7bd87edf3b80ce3bbd6e571fd119c878cb79992 (patch)
treeb190e125c6e9f12040632644bf45ce7ab6fe82f7 /compiler/optimizing
parentb983874f2296c4d5a063d9e3d33f8a50fc865a09 (diff)
Add branch profiling in baseline compiler.
Currently unused. Follow-up CLs will make use of the data. Test: test.py Bug: 304969871 Change-Id: I486faba3de030061715d06ab9fdb33970d319d9b
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc29
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc33
-rw-r--r--compiler/optimizing/code_generator_x86.cc39
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc37
-rw-r--r--compiler/optimizing/graph_visualizer.cc5
-rw-r--r--compiler/optimizing/instruction_builder.cc45
-rw-r--r--compiler/optimizing/instruction_builder.h4
-rw-r--r--compiler/optimizing/nodes.h21
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc5
9 files changed, 194 insertions, 24 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 89172aaebc..997d7a48c0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3867,6 +3867,35 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
false_target = nullptr;
}
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ vixl::aarch64::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireX();
+ Register counter = temps.AcquireW();
+ Register condition = InputRegisterAt(if_instr, 0).X();
+ __ Mov(temp, address);
+ __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Add(counter, counter, 1);
+ __ Tbnz(counter, 16, &done);
+ __ Strh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Bind(&done);
+ }
+ }
+ } else {
+ DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName();
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 78bf316c17..b9496ebbe0 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3013,6 +3013,9 @@ void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -3023,6 +3026,36 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint32_t address =
+ reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ vixl32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
+ vixl32::Register condition = InputRegisterAt(if_instr, 0);
+ __ Mov(temp, address);
+ __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Adds(counter, counter, 1);
+ __ Uxth(counter, counter);
+ __ CompareAndBranchIfZero(counter, &done);
+ __ Strh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Bind(&done);
+ }
+ }
+ } else {
+ DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName();
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b8c8d9f73d..5296ed071e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2115,7 +2115,8 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
return cond->IsCondition() &&
cond->GetNext() == branch &&
cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
- !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
+ !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
+ !cond->GetBlock()->GetGraph()->IsCompilingBaseline();
}
template<class LabelType>
@@ -2206,7 +2207,13 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
void LocationsBuilderX86::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
- locations->SetInAt(0, Location::Any());
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
}
}
@@ -2217,6 +2224,34 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
+ Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ NearLabel done;
+ Location lhs = if_instr->GetLocations()->InAt(0);
+ __ movl(temp, Immediate(address));
+ __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
+ __ addw(counter, Immediate(1));
+ __ j(kEqual, &done);
+ __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
+ __ Bind(&done);
+ }
+ }
+ } else {
+ DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName();
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f61a1f04c3..8f8690ddf6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2178,7 +2178,8 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
// conditions if they are materialized due to the complex branching.
return cond->IsCondition() &&
cond->GetNext() == branch &&
- !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
+ !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
+ !cond->GetBlock()->GetGraph()->IsCompilingBaseline();
}
template<class LabelType>
@@ -2268,7 +2269,12 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc
void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
- locations->SetInAt(0, Location::Any());
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
}
}
@@ -2279,6 +2285,33 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ NearLabel done;
+ Location lhs = if_instr->GetLocations()->InAt(0);
+ __ movq(CpuRegister(TMP), Immediate(address));
+ __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
+ __ addw(temp, Immediate(1));
+ __ j(kZero, &done);
+ __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
+ __ Bind(&done);
+ }
+ }
+ } else {
+ DCHECK(!GetGraph()->IsCompilingBaseline()) << if_instr->InputAt(0)->DebugName();
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index c2c0953044..e5aa5d30df 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -496,6 +496,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("bias") << condition->GetBias();
}
+ void VisitIf(HIf* if_instr) override {
+ StartAttributeStream("true_count") << if_instr->GetTrueCount();
+ StartAttributeStream("false_count") << if_instr->GetFalseCount();
+ }
+
void VisitInvoke(HInvoke* invoke) override {
StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index;
ArtMethod* method = invoke->GetResolvedMethod();
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index fd599f789e..281da6f9ec 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -665,22 +665,31 @@ void HInstructionBuilder::InitializeParameters() {
}
}
-template<typename T>
-void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
- HInstruction* first = LoadLocal(instruction.VRegA(), DataType::Type::kInt32);
- HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32);
- T* comparison = new (allocator_) T(first, second, dex_pc);
- AppendInstruction(comparison);
- AppendInstruction(new (allocator_) HIf(comparison, dex_pc));
- current_block_ = nullptr;
-}
-
-template<typename T>
-void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+template<typename T, bool kCompareWithZero>
+void HInstructionBuilder::If_21_22t(const Instruction& instruction, uint32_t dex_pc) {
HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32);
- T* comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+ T* comparison = nullptr;
+ if (kCompareWithZero) {
+ comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+ } else {
+ HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32);
+ comparison = new (allocator_) T(value, second, dex_pc);
+ }
AppendInstruction(comparison);
- AppendInstruction(new (allocator_) HIf(comparison, dex_pc));
+ HIf* if_instr = new (allocator_) HIf(comparison, dex_pc);
+
+ ProfilingInfo* info = graph_->GetProfilingInfo();
+ if (info != nullptr && !graph_->IsCompilingBaseline()) {
+ BranchCache* cache = info->GetBranchCache(dex_pc);
+ if (cache != nullptr) {
+ if_instr->SetTrueCount(cache->GetTrue());
+ if_instr->SetFalseCount(cache->GetFalse());
+ }
+ }
+
+ // Append after setting true/false count, so that the builder knows if the
+ // instruction needs an environment.
+ AppendInstruction(if_instr);
current_block_ = nullptr;
}
@@ -2879,8 +2888,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
}
#define IF_XX(comparison, cond) \
- case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
- case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+ case Instruction::IF_##cond: \
+ If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \
+ break; \
+ case Instruction::IF_##cond##Z: \
+ If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc); \
+ break;
IF_XX(HEqual, EQ);
IF_XX(HNotEqual, NE);
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 3d65d8fb54..5c165d7bf9 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -116,8 +116,8 @@ class HInstructionBuilder : public ValueObject {
template<typename T>
void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
- template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
- template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+ template<typename T, bool kCompareWithZero>
+ void If_21_22t(const Instruction& instruction, uint32_t dex_pc);
void Conversion_12x(const Instruction& instruction,
DataType::Type input_type,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3613b9519b..04d51328f2 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2663,7 +2663,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void RemoveEnvironmentUsers();
bool IsEmittedAtUseSite() const { return GetPackedFlag<kFlagEmittedAtUseSite>(); }
- void MarkEmittedAtUseSite() { SetPackedFlag<kFlagEmittedAtUseSite>(true); }
+ void MarkEmittedAtUseSite() {
+ // When compiling baseline, in order to do branch profiling, we don't want to
+ // emit conditions at use site.
+ DCHECK(!IsCondition() || !GetBlock()->GetGraph()->IsCompilingBaseline());
+ SetPackedFlag<kFlagEmittedAtUseSite>(true);
+ }
protected:
// If set, the machine code for this instruction is assumed to be generated by
@@ -3519,7 +3524,9 @@ class HDoubleConstant final : public HConstant {
class HIf final : public HExpression<1> {
public:
explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
- : HExpression(kIf, SideEffects::None(), dex_pc) {
+ : HExpression(kIf, SideEffects::None(), dex_pc),
+ true_count_(std::numeric_limits<uint16_t>::max()),
+ false_count_(std::numeric_limits<uint16_t>::max()) {
SetRawInputAt(0, input);
}
@@ -3534,10 +3541,20 @@ class HIf final : public HExpression<1> {
return GetBlock()->GetSuccessors()[1];
}
+ void SetTrueCount(uint16_t count) { true_count_ = count; }
+ uint16_t GetTrueCount() const { return true_count_; }
+
+ void SetFalseCount(uint16_t count) { false_count_ = count; }
+ uint16_t GetFalseCount() const { return false_count_; }
+
DECLARE_INSTRUCTION(If);
protected:
DEFAULT_COPY_CONSTRUCTOR(If);
+
+ private:
+ uint16_t true_count_;
+ uint16_t false_count_;
};
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 398b10abf3..59282a7222 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -180,6 +180,11 @@ bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition,
return false;
}
+ if (GetGraph()->IsCompilingBaseline()) {
+ // To do branch profiling, we cannot emit conditions at use site.
+ return false;
+ }
+
if (user->IsIf() || user->IsDeoptimize()) {
return true;
}