Finally implement Location::kNoOutputOverlap.
The [i, i + 1) interval scheme we chose for representing
lifetime positions is not optimal for doing this optimization.
It however doesn't prevent recognizing a non-split interval
during the TryAllocateFreeReg phase, and try to re-use
its inputs' registers.
Change-Id: I80a2823b0048d3310becfc5f5fb7b1230dfd8201
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index dc2446d..fd4e391 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -290,7 +290,7 @@
result_location = locations->InAt(0);
break;
}
- locations->SetOut(result_location);
+ locations->UpdateOut(result_location);
}
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index b0cd7ba..78fd181 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1296,13 +1296,14 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
switch (neg->GetResultType()) {
- case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap;
+ case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
@@ -1837,7 +1838,7 @@
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -1914,7 +1915,7 @@
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
case Primitive::kPrimFloat:
@@ -2297,7 +2298,7 @@
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
case Primitive::kPrimLong: {
@@ -2492,7 +2493,8 @@
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // Output overlaps because it is written before doing the low comparison.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
case Primitive::kPrimFloat:
@@ -2765,12 +2767,14 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- bool generate_volatile = field_info.IsVolatile()
+ bool volatile_for_double = field_info.IsVolatile()
&& (field_info.GetFieldType() == Primitive::kPrimDouble)
&& !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
- if (generate_volatile) {
+ bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+ locations->SetOut(Location::RequiresRegister(),
+ (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ if (volatile_for_double) {
// Arm encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
@@ -3614,7 +3618,8 @@
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister());
+ // The out register is used as a temporary, so it overlaps with the inputs.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3710,10 +3715,7 @@
|| instruction->GetResultType() == Primitive::kPrimLong);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap;
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
void InstructionCodeGeneratorARM::VisitAnd(HAnd* instruction) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 8b06d60..68e3a30 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -482,16 +482,17 @@
}
void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
- DCHECK(output_.IsUnallocated() || output_.IsInvalid());
+ DCHECK(output_.IsInvalid());
output_overlaps_ = overlaps;
output_ = location;
}
void UpdateOut(Location location) {
- // The only reason for updating an output is for parameters where
- // we only know the exact stack slot after doing full register
- // allocation.
- DCHECK(output_.IsStackSlot() || output_.IsDoubleStackSlot());
+ // There are two reasons for updating an output:
+ // 1) Parameters, where we only know the exact stack slot after
+ // doing full register allocation.
+ // 2) Unallocated location.
+ DCHECK(output_.IsStackSlot() || output_.IsDoubleStackSlot() || output_.IsUnallocated());
output_ = location;
}
@@ -563,28 +564,22 @@
return live_registers_.GetNumberOfRegisters();
}
- bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const {
- if (is_environment) return true;
- if ((input_index == 0)
+ bool OutputUsesSameAs(uint32_t input_index) const {
+ return (input_index == 0)
&& output_.IsUnallocated()
- && (output_.GetPolicy() == Location::kSameAsFirstInput)) {
- return false;
- }
+ && (output_.GetPolicy() == Location::kSameAsFirstInput);
+ }
+
+ bool IsFixedInput(uint32_t input_index) const {
Location input = inputs_.Get(input_index);
- if (input.IsRegister()
+ return input.IsRegister()
|| input.IsFpuRegister()
|| input.IsPair()
|| input.IsStackSlot()
- || input.IsDoubleStackSlot()) {
- // For fixed locations, the register allocator requires to have inputs die before
- // the instruction, so that input moves use the location of the input just
- // before that instruction (and not potential moves due to splitting).
- return false;
- }
- return true;
+ || input.IsDoubleStackSlot();
}
- bool OutputOverlapsWithInputs() const {
+ bool OutputCanOverlapWithInputs() const {
return output_overlaps_ == Location::kOutputOverlap;
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 0a3f24b..3809720 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -485,6 +485,9 @@
BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister());
for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
if (liveness_of_register->IsBitSet(j)) {
+ if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
+ continue;
+ }
if (log_fatal_on_failure) {
std::ostringstream message;
message << "Register conflict at " << j << " ";
@@ -639,6 +642,29 @@
}
}
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+ DCHECK(!interval->IsHighInterval());
+ // Note that the same instruction may occur multiple times in the input list,
+ // so `free_until` may have changed already.
+ if (interval->IsDeadAt(position)) {
+ // Set the register to be free. Note that inactive intervals might later
+ // update this.
+ free_until[interval->GetRegister()] = kMaxLifetimePosition;
+ if (interval->HasHighInterval()) {
+ DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+ }
+ } else if (!interval->Covers(position)) {
+ // The interval becomes inactive at `defined_by`. We make its register
+ // available only until the next use strictly after `defined_by`.
+ free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+ if (interval->HasHighInterval()) {
+ DCHECK(!interval->GetHighInterval()->Covers(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+ }
+ }
+}
+
// Find a free register. If multiple are found, pick the register that
// is free the longest.
bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
@@ -656,6 +682,32 @@
free_until[interval->GetRegister()] = 0;
}
+ // An interval that starts an instruction (that is, it is not split), may
+ // re-use the registers used by the inputs of that instruciton, based on the
+ // location summary.
+ HInstruction* defined_by = current->GetDefinedBy();
+ if (defined_by != nullptr && !current->IsSplit()) {
+ LocationSummary* locations = defined_by->GetLocations();
+ if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+ for (HInputIterator it(defined_by); !it.Done(); it.Advance()) {
+ // Take the last interval of the input. It is the location of that interval
+ // that will be used at `defined_by`.
+ LiveInterval* interval = it.Current()->GetLiveInterval()->GetLastSibling();
+ // Note that interval may have not been processed yet.
+ // TODO: Handle non-split intervals last in the work list.
+ if (interval->HasRegister() && interval->SameRegisterKind(*current)) {
+ // The input must be live until the end of `defined_by`, to comply to
+ // the linear scan algorithm. So we use `defined_by`'s end lifetime
+ // position to check whether the input is dead or is inactive after
+ // `defined_by`.
+ DCHECK(interval->Covers(defined_by->GetLifetimePosition()));
+ size_t position = defined_by->GetLifetimePosition() + 1;
+ FreeIfNotCoverAt(interval, position, free_until);
+ }
+ }
+ }
+ }
+
// For each inactive interval, set its register to be free until
// the next intersection with `current`.
for (size_t i = 0, e = inactive_.Size(); i < e; ++i) {
@@ -1497,7 +1549,7 @@
DCHECK(locations->InAt(0).Equals(source));
}
}
- locations->SetOut(source);
+ locations->UpdateOut(source);
} else {
DCHECK(source.Equals(location));
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b0d3853..0e68a61 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
#include "nodes.h"
+#include <iostream>
namespace art {
@@ -181,12 +182,21 @@
void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
// Set the use within the instruction.
- size_t position = instruction->GetLifetimePosition();
- if (instruction->GetLocations()->InputOverlapsWithOutputOrTemp(input_index, is_environment)) {
- // If it overlaps, we need to make sure the user will not try to allocate a temp
- // or its output to the same register.
- ++position;
+ size_t position = instruction->GetLifetimePosition() + 1;
+ LocationSummary* locations = instruction->GetLocations();
+ if (!is_environment) {
+ if (locations->IsFixedInput(input_index) || locations->OutputUsesSameAs(input_index)) {
+ // For fixed inputs and output same as input, the register allocator
+ // requires to have inputs die at the instruction, so that input moves use the
+ // location of the input just before that instruction (and not potential moves due
+ // to splitting).
+ position = instruction->GetLifetimePosition();
+ }
}
+
+ DCHECK(position == instruction->GetLifetimePosition()
+ || position == instruction->GetLifetimePosition() + 1);
+
if ((first_use_ != nullptr)
&& (first_use_->GetUser() == instruction)
&& (first_use_->GetPosition() < position)) {
@@ -301,6 +311,7 @@
LiveInterval* GetParent() const { return parent_; }
LiveRange* GetFirstRange() const { return first_range_; }
+ LiveRange* GetLastRange() const { return last_range_; }
int GetRegister() const { return register_; }
void SetRegister(int reg) { register_ = reg; }
@@ -403,6 +414,23 @@
return FirstRegisterUseAfter(GetStart());
}
+ size_t FirstUseAfter(size_t position) const {
+ if (is_temp_) {
+ return position == GetStart() ? position : kNoLifetime;
+ }
+
+ UsePosition* use = first_use_;
+ size_t end = GetEnd();
+ while (use != nullptr && use->GetPosition() <= end) {
+ size_t use_position = use->GetPosition();
+ if (use_position > position) {
+ return use_position;
+ }
+ use = use->GetNext();
+ }
+ return kNoLifetime;
+ }
+
UsePosition* GetFirstUse() const {
return first_use_;
}
@@ -511,6 +539,13 @@
}
LiveInterval* GetNextSibling() const { return next_sibling_; }
+ LiveInterval* GetLastSibling() {
+ LiveInterval* result = this;
+ while (result->next_sibling_ != nullptr) {
+ result = result->next_sibling_;
+ }
+ return result;
+ }
// Returns the first register hint that is at least free before
// the value contained in `free_until`. If none is found, returns
@@ -541,6 +576,9 @@
// Returns whether `other` and `this` share the same kind of register.
bool SameRegisterKind(Location other) const;
+ bool SameRegisterKind(const LiveInterval& other) const {
+ return IsFloatingPoint() == other.IsFloatingPoint();
+ }
bool HasHighInterval() const {
return IsLowInterval();
@@ -594,6 +632,60 @@
}
}
+ // Returns whether an interval, when it is non-split, is using
+ // the same register of one of its input.
+ bool IsUsingInputRegister() const {
+ if (defined_by_ != nullptr && !IsSplit()) {
+ for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
+ LiveInterval* interval = it.Current()->GetLiveInterval();
+
+ // Find the interval that covers `defined_by`_.
+ while (interval != nullptr && !interval->Covers(defined_by_->GetLifetimePosition())) {
+ interval = interval->GetNextSibling();
+ }
+
+ // Check if both intervals have the same register of the same kind.
+ if (interval != nullptr
+ && interval->SameRegisterKind(*this)
+ && interval->GetRegister() == GetRegister()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ // Returns whether an interval, when it is non-split, can safely use
+ // the same register of one of its input. Note that this method requires
+ // IsUsingInputRegister() to be true.
+ bool CanUseInputRegister() const {
+ DCHECK(IsUsingInputRegister());
+ if (defined_by_ != nullptr && !IsSplit()) {
+ LocationSummary* locations = defined_by_->GetLocations();
+ if (locations->OutputCanOverlapWithInputs()) {
+ return false;
+ }
+ for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
+ LiveInterval* interval = it.Current()->GetLiveInterval();
+
+ // Find the interval that covers `defined_by`_.
+ while (interval != nullptr && !interval->Covers(defined_by_->GetLifetimePosition())) {
+ interval = interval->GetNextSibling();
+ }
+
+ if (interval != nullptr
+ && interval->SameRegisterKind(*this)
+ && interval->GetRegister() == GetRegister()) {
+ // We found the input that has the same register. Check if it is live after
+ // `defined_by`_.
+ return !interval->Covers(defined_by_->GetLifetimePosition() + 1);
+ }
+ }
+ }
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,