ART: Introduce Loop Versioning in SuberblockCloner.
Support Loop Versioning in SuberblockCloner as a tool to
enable further optimization (e.g. Dynamic Loop Unrolling).
The patch brings the feature in without enabling it.
Replace std::cout with LOG(INFO) for debug dumps.
Test: superblock_cloner_test.
Test: test-art-target.
Change-Id: I303cabfb752b8c3c8597abfc0ac261e8616e8cee
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index 9f7a316..b968491 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -20,7 +20,7 @@
#include "induction_var_range.h"
#include "graph_checker.h"
-#include <iostream>
+#include <sstream>
namespace art {
@@ -227,6 +227,40 @@
}
}
+bool SuperblockCloner::IsRemapInfoForVersioning() const {
+ return remap_incoming_->empty() &&
+ remap_orig_internal_->empty() &&
+ remap_copy_internal_->empty();
+}
+
+void SuperblockCloner::CopyIncomingEdgesForVersioning() {
+ for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+ HBasicBlock* orig_block = GetBlockById(orig_block_id);
+ size_t incoming_edge_count = 0;
+ for (HBasicBlock* orig_pred : orig_block->GetPredecessors()) {
+ uint32_t orig_pred_id = orig_pred->GetBlockId();
+ if (IsInOrigBBSet(orig_pred_id)) {
+ continue;
+ }
+
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ // This corresponds to the requirement on the order of predecessors: all the incoming
+ // edges must be seen before the internal ones. This is always true for natural loops.
+ // TODO: remove this requirement.
+ DCHECK_EQ(orig_block->GetPredecessorIndexOf(orig_pred), incoming_edge_count);
+ for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(incoming_edge_count);
+ // Add the corresponding input of the original phi to the copy one.
+ copy_phi->AddInput(orig_phi_input);
+ }
+ copy_block->AddPredecessor(orig_pred);
+ incoming_edge_count++;
+ }
+ }
+}
+
//
// Local versions of CF calculation/adjustment routines.
//
@@ -452,6 +486,12 @@
}
void SuperblockCloner::RemapEdgesSuccessors() {
+ // By this stage all the blocks have been copied, copy phis - created with no inputs;
+ // no copy edges have been created so far.
+ if (IsRemapInfoForVersioning()) {
+ CopyIncomingEdgesForVersioning();
+ }
+
// Redirect incoming edges.
for (HEdge e : *remap_incoming_) {
HBasicBlock* orig_block = GetBlockById(e.GetFrom());
@@ -646,25 +686,26 @@
if (bb == nullptr) {
continue;
}
- std::cout << bb->GetBlockId();
- std::cout << " <- ";
+ std::ostringstream oss;
+ oss << bb->GetBlockId();
+ oss << " <- ";
for (HBasicBlock* pred : bb->GetPredecessors()) {
- std::cout << pred->GetBlockId() << " ";
+ oss << pred->GetBlockId() << " ";
}
- std::cout << " -> ";
+ oss << " -> ";
for (HBasicBlock* succ : bb->GetSuccessors()) {
- std::cout << succ->GetBlockId() << " ";
+ oss << succ->GetBlockId() << " ";
}
if (bb->GetDominator()) {
- std::cout << " dom " << bb->GetDominator()->GetBlockId();
+ oss << " dom " << bb->GetDominator()->GetBlockId();
}
if (bb->GetLoopInformation()) {
- std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+ oss << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
}
- std::cout << std::endl;
+ LOG(INFO) << oss.str();
}
}
@@ -747,35 +788,35 @@
checker.Run();
if (!checker.IsValid()) {
for (const std::string& error : checker.GetErrors()) {
- std::cout << error << std::endl;
+ LOG(ERROR) << error;
}
- LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+ LOG(FATAL) << "GraphChecker failed: superblock cloner";
}
}
}
void DumpBBSet(const ArenaBitVector* set) {
for (uint32_t idx : set->Indexes()) {
- std::cout << idx << "\n";
+ LOG(INFO) << idx;
}
}
void SuperblockCloner::DumpInputSets() {
- std::cout << "orig_bb_set:\n";
+ LOG(INFO) << "orig_bb_set:";
for (uint32_t idx : orig_bb_set_.Indexes()) {
- std::cout << idx << "\n";
+ LOG(INFO) << idx;
}
- std::cout << "remap_orig_internal:\n";
+ LOG(INFO) << "remap_orig_internal:";
for (HEdge e : *remap_orig_internal_) {
- std::cout << e << "\n";
+ LOG(INFO) << e;
}
- std::cout << "remap_copy_internal:\n";
+ LOG(INFO) << "remap_copy_internal:";
for (auto e : *remap_copy_internal_) {
- std::cout << e << "\n";
+ LOG(INFO) << e;
}
- std::cout << "remap_incoming:\n";
+ LOG(INFO) << "remap_incoming:";
for (auto e : *remap_incoming_) {
- std::cout << e << "\n";
+ LOG(INFO) << e;
}
}
@@ -837,8 +878,8 @@
return true;
}
+// Checks that loop unrolling/peeling/versioning is being conducted.
bool SuperblockCloner::IsFastCase() const {
- // Check that loop unrolling/loop peeling is being conducted.
// Check that all the basic blocks belong to the same loop.
bool flag = false;
HLoopInformation* common_loop_info = nullptr;
@@ -854,11 +895,15 @@
}
}
- // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+ // Check that orig_bb_set_ corresponds to loop peeling/unrolling/versioning.
if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
return false;
}
+ if (IsRemapInfoForVersioning()) {
+ return true;
+ }
+
bool peeling_or_unrolling = false;
HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
@@ -1012,8 +1057,7 @@
HBasicBlock* copy_block = CloneBasicBlock(orig_block);
bb_map_->Put(orig_block, copy_block);
if (kSuperblockClonerLogging) {
- std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
- std::endl;
+ LOG(INFO) << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId();
}
}
}
@@ -1089,14 +1133,14 @@
return current;
}
-bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
- PeelUnrollHelper helper(
+bool LoopClonerHelper::IsLoopClonable(HLoopInformation* loop_info) {
+ LoopClonerHelper helper(
loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr);
return helper.IsLoopClonable();
}
-HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
- // For now do peeling only for natural loops.
+HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind transformation) {
+ // For now do transformations only for natural loops.
DCHECK(!loop_info_->IsIrreducible());
HBasicBlock* loop_header = loop_info_->GetHeader();
@@ -1105,9 +1149,25 @@
HGraph* graph = loop_header->GetGraph();
if (kSuperblockClonerLogging) {
- std::cout << "Method: " << graph->PrettyMethod() << std::endl;
- std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
- " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+ LOG(INFO) << "Method: " << graph->PrettyMethod();
+ std::ostringstream oss;
+ oss << "Scalar loop ";
+ switch (transformation) {
+ case TransformationKind::kPeeling:
+ oss << "peeling";
+ break;
+ case TransformationKind::kUnrolling:
+ oss<< "unrolling";
+ break;
+ case TransformationKind::kVersioning:
+ oss << "versioning";
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+ oss << " was applied to the loop <" << loop_header->GetBlockId() << ">.";
+ LOG(INFO) << oss.str();
}
ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
@@ -1116,11 +1176,14 @@
HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
- CollectRemappingInfoForPeelUnroll(to_unroll,
- loop_info_,
- &remap_orig_internal,
- &remap_copy_internal,
- &remap_incoming);
+ // No remapping needed for loop versioning.
+ if (transformation != TransformationKind::kVersioning) {
+ CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling,
+ loop_info_,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+ }
cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
cloner_.Run();
@@ -1132,7 +1195,7 @@
return loop_header;
}
-PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info,
+LoopClonerSimpleHelper::LoopClonerSimpleHelper(HLoopInformation* info,
InductionVarRange* induction_range)
: bb_map_(std::less<HBasicBlock*>(),
info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),