summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/boolean_simplifier.cc4
-rw-r--r--compiler/optimizing/code_generator.cc11
-rw-r--r--compiler/optimizing/code_generator.h7
-rw-r--r--compiler/optimizing/code_generator_arm.cc30
-rw-r--r--compiler/optimizing/code_generator_arm64.cc77
-rw-r--r--compiler/optimizing/code_generator_arm64.h7
-rw-r--r--compiler/optimizing/code_generator_x86.cc209
-rw-r--r--compiler/optimizing/code_generator_x86.h1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc60
-rw-r--r--compiler/optimizing/code_generator_x86_64.h1
-rw-r--r--compiler/optimizing/codegen_test.cc2
-rw-r--r--compiler/optimizing/common_arm64.h3
-rw-r--r--compiler/optimizing/dominator_test.cc8
-rw-r--r--compiler/optimizing/graph_checker.cc32
-rw-r--r--compiler/optimizing/graph_checker.h3
-rw-r--r--compiler/optimizing/inliner.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc2
-rw-r--r--compiler/optimizing/linearize_test.cc6
-rw-r--r--compiler/optimizing/live_ranges_test.cc12
-rw-r--r--compiler/optimizing/liveness_test.cc2
-rw-r--r--compiler/optimizing/nodes.cc69
-rw-r--r--compiler/optimizing/nodes.h96
-rw-r--r--compiler/optimizing/optimizing_compiler.cc2
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc32
-rw-r--r--compiler/optimizing/parallel_move_resolver.h25
-rw-r--r--compiler/optimizing/parallel_move_test.cc75
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc8
-rw-r--r--compiler/optimizing/primitive_type_propagation.h2
-rw-r--r--compiler/optimizing/register_allocator.cc68
-rw-r--r--compiler/optimizing/register_allocator_test.cc30
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc26
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h131
35 files changed, 598 insertions, 453 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index be432c5a20..06328f2490 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -73,8 +73,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
}
} else {
// General case when 'cond' is another instruction of type boolean.
- // Negate with 'cond == 0'.
- return new (allocator) HEqual(cond, graph->GetIntConstant(0));
+ DCHECK_EQ(cond->GetType(), Primitive::Type::kPrimBoolean);
+ return new (allocator) HBooleanNot(cond);
}
}
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8736374306..f7fa5db8d5 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -802,10 +802,15 @@ void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend
}
}
-void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) {
+void CodeGenerator::EmitParallelMoves(Location from1,
+ Location to1,
+ Primitive::Type type1,
+ Location from2,
+ Location to2,
+ Primitive::Type type2) {
HParallelMove parallel_move(GetGraph()->GetArena());
- parallel_move.AddMove(from1, to1, nullptr);
- parallel_move.AddMove(from2, to2, nullptr);
+ parallel_move.AddMove(from1, to1, type1, nullptr);
+ parallel_move.AddMove(from2, to2, type2, nullptr);
GetMoveResolver()->EmitNativeCode(&parallel_move);
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b888aca264..e536b2d0ee 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -244,7 +244,12 @@ class CodeGenerator {
// of the architecture.
static size_t GetCacheOffset(uint32_t index);
- void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
+ void EmitParallelMoves(Location from1,
+ Location to1,
+ Primitive::Type type1,
+ Location from2,
+ Location to2,
+ Primitive::Type type2);
static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
// Check that null value is not represented as an integer constant.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 332c99ae64..2ea920310a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -141,8 +141,10 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM {
codegen->EmitParallelMoves(
index_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimInt,
length_location_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt);
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
}
@@ -262,8 +264,10 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
codegen->EmitParallelMoves(
class_to_check_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
object_class_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
arm_codegen->InvokeRuntime(
@@ -750,8 +754,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
EmitParallelMoves(
Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+ Primitive::kPrimInt,
Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
- Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
+ Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+ Primitive::kPrimInt);
} else if (source.IsFpuRegister()) {
UNIMPLEMENTED(FATAL);
} else {
@@ -789,8 +795,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
EmitParallelMoves(
Location::StackSlot(source.GetStackIndex()),
Location::StackSlot(destination.GetStackIndex()),
+ Primitive::kPrimInt,
Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
- Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)));
+ Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)),
+ Primitive::kPrimInt);
}
}
}
@@ -2657,6 +2665,20 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* not_) {
}
}
+void LocationsBuilderARM::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations = bool_not->GetLocations();
+ Location out = locations->Out();
+ Location in = locations->InAt(0);
+ __ eor(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(1));
+}
+
void LocationsBuilderARM::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 33eacbaf08..efc41e7c06 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -122,8 +122,8 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
// move resolver.
InvokeRuntimeCallingConvention calling_convention;
codegen->EmitParallelMoves(
- index_location_, LocationFrom(calling_convention.GetRegisterAt(0)),
- length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
+ index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
+ length_location_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
@@ -322,8 +322,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
// move resolver.
InvokeRuntimeCallingConvention calling_convention;
codegen->EmitParallelMoves(
- class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)),
- object_class_, LocationFrom(calling_convention.GetRegisterAt(1)));
+ class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
+ object_class_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
arm64_codegen->InvokeRuntime(
@@ -466,8 +466,10 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
// sp[0] : current method.
__ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
GetAssembler()->cfi().AdjustCFAOffset(frame_size);
- SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
- SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
+ frame_size - GetCoreSpillSize());
+ GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
+ frame_size - FrameEntrySpillSize());
}
}
@@ -475,8 +477,10 @@ void CodeGeneratorARM64::GenerateFrameExit() {
GetAssembler()->cfi().RememberState();
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
- UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
- UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
+ frame_size - FrameEntrySpillSize());
+ GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
+ frame_size - GetCoreSpillSize());
__ Drop(frame_size);
GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
}
@@ -485,51 +489,6 @@ void CodeGeneratorARM64::GenerateFrameExit() {
GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
}
-static inline dwarf::Reg DWARFReg(CPURegister reg) {
- if (reg.IsFPRegister()) {
- return dwarf::Reg::Arm64Fp(reg.code());
- } else {
- DCHECK_LT(reg.code(), 31u); // X0 - X30.
- return dwarf::Reg::Arm64Core(reg.code());
- }
-}
-
-void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
- int size = registers.RegisterSizeInBytes();
- while (registers.Count() >= 2) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- const CPURegister& dst1 = registers.PopLowestIndex();
- __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
- GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
- GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
- offset += 2 * size;
- }
- if (!registers.IsEmpty()) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- __ Str(dst0, MemOperand(__ StackPointer(), offset));
- GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
- }
- DCHECK(registers.IsEmpty());
-}
-
-void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
- int size = registers.RegisterSizeInBytes();
- while (registers.Count() >= 2) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- const CPURegister& dst1 = registers.PopLowestIndex();
- __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
- GetAssembler()->cfi().Restore(DWARFReg(dst0));
- GetAssembler()->cfi().Restore(DWARFReg(dst1));
- offset += 2 * size;
- }
- if (!registers.IsEmpty()) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
- GetAssembler()->cfi().Restore(DWARFReg(dst0));
- }
- DCHECK(registers.IsEmpty());
-}
-
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
__ Bind(GetLabelOf(block));
}
@@ -1380,7 +1339,7 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
if (instruction->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
@@ -2320,6 +2279,16 @@ void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
}
}
+void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
+ __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
+}
+
void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9430e31037..07c6dd059a 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -46,14 +46,11 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi
const vixl::Register tr = vixl::x18; // Thread Register
static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke.
-const vixl::Register kQuickSuspendRegister = vixl::x19;
const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-// TODO: When the runtime does not use kQuickSuspendRegister as a suspend
-// counter remove it from the reserved registers list.
-const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr);
+const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
// Callee-saved registers defined by AAPCS64.
const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
@@ -227,8 +224,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
- void SpillRegisters(vixl::CPURegList registers, int offset);
- void UnspillRegisters(vixl::CPURegList registers, int offset);
vixl::CPURegList GetFramePreservedCoreRegisters() const {
return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 38f9ef8efe..879216d59b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -113,8 +113,10 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
x86_codegen->EmitParallelMoves(
index_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimInt,
length_location_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt);
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds)));
RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
@@ -266,8 +268,10 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
x86_codegen->EmitParallelMoves(
class_to_check_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
object_class_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize,
@@ -655,8 +659,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
EmitParallelMoves(
Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+ Primitive::kPrimInt,
Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
- Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
+ Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+ Primitive::kPrimInt);
} else if (source.IsFpuRegister()) {
LOG(FATAL) << "Unimplemented";
} else {
@@ -699,8 +705,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
EmitParallelMoves(
Location::StackSlot(source.GetStackIndex()),
Location::StackSlot(destination.GetStackIndex()),
+ Primitive::kPrimInt,
Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
- Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)));
+ Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
+ Primitive::kPrimInt);
}
}
}
@@ -2915,6 +2923,21 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
}
}
+void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations = bool_not->GetLocations();
+ Location in = locations->InAt(0);
+ Location out = locations->Out();
+ DCHECK(in.Equals(out));
+ __ xorl(out.AsRegister<Register>(), Immediate(1));
+}
+
void LocationsBuilderX86::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
@@ -3841,43 +3864,23 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
}
void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
- ScratchRegisterScope possible_scratch(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
- int temp = possible_scratch.GetRegister();
- if (temp == kNoRegister) {
- // Use the stack.
- __ pushl(Address(ESP, src));
- __ popl(Address(ESP, dst));
- } else {
- Register temp_reg = static_cast<Register>(temp);
- __ movl(temp_reg, Address(ESP, src));
- __ movl(Address(ESP, dst), temp_reg);
- }
+ ScratchRegisterScope ensure_scratch(
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+ Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+ int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+ __ movl(temp_reg, Address(ESP, src + stack_offset));
+ __ movl(Address(ESP, dst + stack_offset), temp_reg);
}
void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
- ScratchRegisterScope possible_scratch(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
- int temp = possible_scratch.GetRegister();
- if (temp == kNoRegister) {
- // Use the stack instead.
- // Push src low word.
- __ pushl(Address(ESP, src));
- // Push src high word. Stack offset = 4.
- __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
-
- // Pop into dst high word. Stack offset = 8.
- // Pop with ESP address uses the 'after increment' value of ESP.
- __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
- // Finally dst low word. Stack offset = 4.
- __ popl(Address(ESP, dst));
- } else {
- Register temp_reg = static_cast<Register>(temp);
- __ movl(temp_reg, Address(ESP, src));
- __ movl(Address(ESP, dst), temp_reg);
- __ movl(temp_reg, Address(ESP, src + kX86WordSize));
- __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
- }
+ ScratchRegisterScope ensure_scratch(
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+ Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+ int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+ __ movl(temp_reg, Address(ESP, src + stack_offset));
+ __ movl(Address(ESP, dst + stack_offset), temp_reg);
+ __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
+ __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
}
void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3942,18 +3945,10 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ xorps(dest, dest);
} else {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
- int temp_reg = ensure_scratch.GetRegister();
- if (temp_reg == kNoRegister) {
- // Avoid spilling/restoring a scratch register by using the stack.
- __ pushl(Immediate(value));
- __ movss(dest, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
- } else {
- Register temp = static_cast<Register>(temp_reg);
- __ movl(temp, Immediate(value));
- __ movd(dest, temp);
- }
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+ Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
@@ -4002,96 +3997,42 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
}
-void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
- // Prefer to avoid xchg as it isn't speedy on smaller processors.
- ScratchRegisterScope possible_scratch(
- this, reg1, codegen_->GetNumberOfCoreRegisters());
- int temp_reg = possible_scratch.GetRegister();
- if (temp_reg == kNoRegister || temp_reg == reg2) {
- __ pushl(reg1);
- __ movl(reg1, reg2);
- __ popl(reg2);
- } else {
- Register temp = static_cast<Register>(temp_reg);
- __ movl(temp, reg1);
- __ movl(reg1, reg2);
- __ movl(reg2, temp);
- }
-}
-
void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- ScratchRegisterScope possible_scratch(
- this, reg, codegen_->GetNumberOfCoreRegisters());
- int temp_reg = possible_scratch.GetRegister();
- if (temp_reg == kNoRegister) {
- __ pushl(Address(ESP, mem));
- __ movl(Address(ESP, mem + kX86WordSize), reg);
- __ popl(reg);
- } else {
- Register temp = static_cast<Register>(temp_reg);
- __ movl(temp, Address(ESP, mem));
- __ movl(Address(ESP, mem), reg);
- __ movl(reg, temp);
- }
+ Register suggested_scratch = reg == EAX ? EBX : EAX;
+ ScratchRegisterScope ensure_scratch(
+ this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
+ int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+ __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
+ __ movl(Address(ESP, mem + stack_offset), reg);
+ __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
}
void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
- ScratchRegisterScope possible_scratch(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
- int temp_reg = possible_scratch.GetRegister();
- if (temp_reg == kNoRegister) {
- __ pushl(Address(ESP, mem));
- __ movss(Address(ESP, mem + kX86WordSize), reg);
- __ movss(reg, Address(ESP, 0));
- __ addl(ESP, Immediate(kX86WordSize));
- } else {
- Register temp = static_cast<Register>(temp_reg);
- __ movl(temp, Address(ESP, mem));
- __ movss(Address(ESP, mem), reg);
- __ movd(reg, temp);
- }
+ ScratchRegisterScope ensure_scratch(
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+ Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+ int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+ __ movl(temp_reg, Address(ESP, mem + stack_offset));
+ __ movss(Address(ESP, mem + stack_offset), reg);
+ __ movd(reg, temp_reg);
}
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
- ScratchRegisterScope possible_scratch1(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
- int temp_reg1 = possible_scratch1.GetRegister();
- if (temp_reg1 == kNoRegister) {
- // No free registers. Use the stack.
- __ pushl(Address(ESP, mem1));
- __ pushl(Address(ESP, mem2 + kX86WordSize));
- // Pop with ESP address uses the 'after increment' value of ESP.
- __ popl(Address(ESP, mem1 + kX86WordSize));
- __ popl(Address(ESP, mem2));
- } else {
- // Got the first one. Try for a second.
- ScratchRegisterScope possible_scratch2(
- this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
- int temp_reg2 = possible_scratch2.GetRegister();
- if (temp_reg2 == kNoRegister) {
- Register temp = static_cast<Register>(temp_reg1);
- // Bummer. Only have one free register to use.
- // Save mem1 on the stack.
- __ pushl(Address(ESP, mem1));
-
- // Copy mem2 into mem1.
- __ movl(temp, Address(ESP, mem2 + kX86WordSize));
- __ movl(Address(ESP, mem1 + kX86WordSize), temp);
-
- // Now pop mem1 into mem2.
- // Pop with ESP address uses the 'after increment' value of ESP.
- __ popl(Address(ESP, mem2));
- } else {
- // Great. We have 2 registers to play with.
- Register temp1 = static_cast<Register>(temp_reg1);
- Register temp2 = static_cast<Register>(temp_reg2);
- DCHECK_NE(temp1, temp2);
- __ movl(temp1, Address(ESP, mem1));
- __ movl(temp2, Address(ESP, mem2));
- __ movl(Address(ESP, mem2), temp1);
- __ movl(Address(ESP, mem1), temp2);
- }
- }
+ ScratchRegisterScope ensure_scratch1(
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+ Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
+ ScratchRegisterScope ensure_scratch2(
+ this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
+ int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
+ stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
+ __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
+ __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
+ __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
+ __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
}
void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -4100,7 +4041,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
+ __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 00a4323a54..368ae0fb0e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,7 +106,6 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver {
X86Assembler* GetAssembler() const;
private:
- void Exchange(Register reg1, Register Reg2);
void Exchange(Register reg, int mem);
void Exchange(int mem1, int mem2);
void Exchange32(XmmRegister reg, int mem);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7a928d4d7d..a3d3490357 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -161,8 +161,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
codegen->EmitParallelMoves(
index_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimInt,
length_location_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt);
__ gs()->call(Address::Absolute(
QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true));
RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
@@ -285,8 +287,10 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
codegen->EmitParallelMoves(
class_to_check_,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
object_class_,
- Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
__ gs()->call(
@@ -2974,6 +2978,21 @@ void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
}
}
+void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
+ LocationSummary* locations = bool_not->GetLocations();
+ DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
+ locations->Out().AsRegister<CpuRegister>().AsRegister());
+ Location out = locations->Out();
+ __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
+}
+
void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -3817,27 +3836,15 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch(
- this, TMP, codegen_->GetNumberOfCoreRegisters());
-
- int temp_reg = ensure_scratch.GetRegister();
- if (temp_reg == kNoRegister) {
- // Use the stack as a temporary.
- // Save mem1 on the stack.
- __ pushq(Address(CpuRegister(RSP), mem1));
-
- // Copy mem2 into mem1.
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
- __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+ this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
- // Now pop mem1 into mem2.
- __ popq(Address(CpuRegister(RSP), mem2));
- } else {
- CpuRegister temp = CpuRegister(temp_reg);
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
- __ movq(temp, Address(CpuRegister(RSP), mem2));
- __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
- __ movq(Address(CpuRegister(RSP), mem1), temp);
- }
+ int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+ __ movq(CpuRegister(ensure_scratch.GetRegister()),
+ Address(CpuRegister(RSP), mem2 + stack_offset));
+ __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+ __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+ CpuRegister(ensure_scratch.GetRegister()));
}
void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3846,13 +3853,6 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
__ movd(reg, CpuRegister(TMP));
}
-void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
- // Prefer to avoid xchg as it isn't speedy on smaller processors.
- __ movq(CpuRegister(TMP), reg1);
- __ movq(reg1, reg2);
- __ movq(reg2, CpuRegister(TMP));
-}
-
void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
__ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3865,7 +3865,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 61bf6ac71d..b4876ef161 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,7 +118,6 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
void Exchange32(CpuRegister reg, int mem);
void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
- void Exchange64(CpuRegister reg1, CpuRegister reg2);
void Exchange64(CpuRegister reg, int mem);
void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 2be117bf38..afcff1ef65 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -152,7 +152,7 @@ static void RunCodeOptimized(CodeGenerator* codegen,
bool has_result,
Expected expected) {
graph->BuildDominatorTree();
- SsaLivenessAnalysis liveness(*graph, codegen);
+ SsaLivenessAnalysis liveness(graph, codegen);
liveness.Analyze();
RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 966165bf4c..53f1f3c45c 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -194,7 +194,8 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
- if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || instr->IsCompare()) {
+ if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() ||
+ instr->IsCompare() || instr->IsBoundsCheck()) {
// Uses aliases of ADD/SUB instructions.
return vixl::Assembler::IsImmAddSub(value);
} else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 7623e421fd..61a7697301 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -36,7 +36,13 @@ static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_leng
ASSERT_EQ(graph->GetBlocks().Size(), blocks_length);
for (size_t i = 0, e = blocks_length; i < e; ++i) {
if (blocks[i] == -1) {
- ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+ if (graph->GetBlocks().Get(i) == nullptr) {
+ // Dead block.
+ } else {
+ // Only the entry block has no dominator.
+ ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+ ASSERT_TRUE(graph->GetBlocks().Get(i)->IsEntryBlock());
+ }
} else {
ASSERT_NE(nullptr, graph->GetBlocks().Get(i)->GetDominator());
ASSERT_EQ(blocks[i], graph->GetBlocks().Get(i)->GetDominator()->GetBlockId());
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 7c3c2bf03d..906c8e8c76 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -369,26 +369,40 @@ void SSAChecker::VisitPhi(HPhi* phi) {
}
}
-void SSAChecker::VisitIf(HIf* instruction) {
- VisitInstruction(instruction);
- HInstruction* input = instruction->InputAt(0);
+void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
+ HInstruction* input = instruction->InputAt(input_index);
if (input->IsIntConstant()) {
- int value = input->AsIntConstant()->GetValue();
+ int32_t value = input->AsIntConstant()->GetValue();
if (value != 0 && value != 1) {
AddError(StringPrintf(
- "If instruction %d has a non-Boolean constant input "
- "whose value is: %d.",
+ "%s instruction %d has a non-Boolean constant input %d whose value is: %d.",
+ instruction->DebugName(),
instruction->GetId(),
+ static_cast<int>(input_index),
value));
}
- } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) {
+ } else if (input->GetType() == Primitive::kPrimInt && input->IsPhi()) {
+ // TODO: We need a data-flow analysis which determines if the Phi is boolean.
+ } else if (input->GetType() != Primitive::kPrimBoolean) {
AddError(StringPrintf(
- "If instruction %d has a non-Boolean input type: %s.",
+ "%s instruction %d has a non-Boolean input %d whose type is: %s.",
+ instruction->DebugName(),
instruction->GetId(),
- Primitive::PrettyDescriptor(instruction->InputAt(0)->GetType())));
+ static_cast<int>(input_index),
+ Primitive::PrettyDescriptor(input->GetType())));
}
}
+void SSAChecker::VisitIf(HIf* instruction) {
+ VisitInstruction(instruction);
+ HandleBooleanInput(instruction, 0);
+}
+
+void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) {
+ VisitInstruction(instruction);
+ HandleBooleanInput(instruction, 0);
+}
+
void SSAChecker::VisitCondition(HCondition* op) {
VisitInstruction(op);
if (op->GetType() != Primitive::kPrimBoolean) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 89fea0a07f..4568a5ef9d 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -107,8 +107,11 @@ class SSAChecker : public GraphChecker {
void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
void VisitCondition(HCondition* op) OVERRIDE;
void VisitIf(HIf* instruction) OVERRIDE;
+ void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
void VisitConstant(HConstant* instruction) OVERRIDE;
+ void HandleBooleanInput(HInstruction* instruction, size_t input_index);
+
private:
DISALLOW_COPY_AND_ASSIGN(SSAChecker);
};
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 87a8d33193..6d2a8d77e2 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -262,10 +262,6 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
graph_->SetHasArrayAccesses(true);
}
- // Now that we have inlined the callee, we need to update the next
- // instruction id of the caller, so that new instructions added
- // after optimizations get a unique id.
- graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
return true;
}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 94e27e912e..9a6062fedf 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -94,7 +94,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorA
Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
Location actual_loc = locations->InAt(i);
- parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
}
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d1176c460f..d3a4e6ca15 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -103,7 +103,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorA
Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
Location actual_loc = locations->InAt(i);
- parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
}
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index aec2d19b1d..3c7a2660db 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -128,7 +128,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX
Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
Location actual_loc = locations->InAt(i);
- parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
}
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index cbf94f0f81..d9a1c31c77 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -120,7 +120,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX
Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
Location actual_loc = locations->InAt(i);
- parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
}
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 28c5555d57..7818c606db 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -50,12 +50,12 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- ASSERT_EQ(liveness.GetLinearOrder().Size(), number_of_blocks);
+ ASSERT_EQ(graph->GetLinearOrder().Size(), number_of_blocks);
for (size_t i = 0; i < number_of_blocks; ++i) {
- ASSERT_EQ(liveness.GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]);
+ ASSERT_EQ(graph->GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]);
}
}
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 61d6593f2b..52367730ed 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -69,7 +69,7 @@ TEST(LiveRangesTest, CFG1) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
@@ -117,7 +117,7 @@ TEST(LiveRangesTest, CFG2) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
@@ -168,7 +168,7 @@ TEST(LiveRangesTest, CFG3) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Test for the 4 constant.
@@ -247,7 +247,7 @@ TEST(LiveRangesTest, Loop1) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Test for the 0 constant.
@@ -327,7 +327,7 @@ TEST(LiveRangesTest, Loop2) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Test for the 0 constant.
@@ -405,7 +405,7 @@ TEST(LiveRangesTest, CFG4) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Test for the 0 constant.
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 81250ca133..8a96ee9ace 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -57,7 +57,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
std::ostringstream buffer;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d8a8554610..5fca4fab22 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -51,9 +51,7 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
for (size_t i = 0; i < blocks_.Size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_.Get(i);
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- RemoveAsUser(it.Current());
- }
+ DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
@@ -61,19 +59,17 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
}
}
-void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
+void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
for (size_t i = 0; i < blocks_.Size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_.Get(i);
+ // We only need to update the successor, which might be live.
for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
block->GetSuccessors().Get(j)->RemovePredecessor(block);
}
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false);
- }
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false);
- }
+ // Remove the block from the list of blocks, so that further analyses
+ // never see it.
+ blocks_.Put(i, nullptr);
}
}
}
@@ -258,6 +254,7 @@ void HGraph::SimplifyCFG() {
// (2): Simplify loops by having only one back edge, and one preheader.
for (size_t i = 0; i < blocks_.Size(); ++i) {
HBasicBlock* block = blocks_.Get(i);
+ if (block == nullptr) continue;
if (block->GetSuccessors().Size() > 1) {
for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
HBasicBlock* successor = block->GetSuccessors().Get(j);
@@ -274,8 +271,9 @@ void HGraph::SimplifyCFG() {
}
bool HGraph::AnalyzeNaturalLoops() const {
- for (size_t i = 0; i < blocks_.Size(); ++i) {
- HBasicBlock* block = blocks_.Get(i);
+ // Order does not matter.
+ for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
if (block->IsLoopHeader()) {
HLoopInformation* info = block->GetLoopInformation();
if (!info->Populate()) {
@@ -964,23 +962,6 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks,
}
void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
- // Walk over the entry block and:
- // - Move constants from the entry block to the outer_graph's entry block,
- // - Replace HParameterValue instructions with their real value.
- // - Remove suspend checks, that hold an environment.
- int parameter_index = 0;
- for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* current = it.Current();
- if (current->IsConstant()) {
- current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
- } else if (current->IsParameterValue()) {
- current->ReplaceWith(invoke->InputAt(parameter_index++));
- } else {
- DCHECK(current->IsGoto() || current->IsSuspendCheck());
- entry_block_->RemoveInstruction(current);
- }
- }
-
if (GetBlocks().Size() == 3) {
// Simple case of an entry block, a body block, and an exit block.
// Put the body block's instruction into `invoke`'s block.
@@ -1106,6 +1087,36 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
}
+ // Update the next instruction id of the outer graph, so that instructions
+ // added later get bigger ids than those in the inner graph.
+ outer_graph->SetCurrentInstructionId(GetNextInstructionId());
+
+ // Walk over the entry block and:
+ // - Move constants from the entry block to the outer_graph's entry block,
+ // - Replace HParameterValue instructions with their real value.
+ // - Remove suspend checks, that hold an environment.
+ // We must do this after the other blocks have been inlined, otherwise ids of
+ // constants could overlap with the inner graph.
+ int parameter_index = 0;
+ for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* current = it.Current();
+ if (current->IsNullConstant()) {
+ current->ReplaceWith(outer_graph->GetNullConstant());
+ } else if (current->IsIntConstant()) {
+ current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
+ } else if (current->IsLongConstant()) {
+ current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
+ } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
+ // TODO: Don't duplicate floating-point constants.
+ current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+ } else if (current->IsParameterValue()) {
+ current->ReplaceWith(invoke->InputAt(parameter_index++));
+ } else {
+ DCHECK(current->IsGoto() || current->IsSuspendCheck());
+ entry_block_->RemoveInstruction(current);
+ }
+ }
+
// Finally remove the invoke from the caller.
invoke->GetBlock()->RemoveInstruction(invoke);
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5f50494482..fe47939359 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -112,6 +112,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
: arena_(arena),
blocks_(arena, kDefaultNumberOfBlocks),
reverse_post_order_(arena, kDefaultNumberOfBlocks),
+ linear_order_(arena, kDefaultNumberOfBlocks),
entry_block_(nullptr),
exit_block_(nullptr),
maximum_number_of_out_vregs_(0),
@@ -216,6 +217,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
return reverse_post_order_;
}
+ const GrowableArray<HBasicBlock*>& GetLinearOrder() const {
+ return linear_order_;
+ }
+
bool HasArrayAccesses() const {
return has_array_accesses_;
}
@@ -248,7 +253,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
ArenaBitVector* visited,
ArenaBitVector* visiting);
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
- void RemoveDeadBlocks(const ArenaBitVector& visited) const;
+ void RemoveDeadBlocks(const ArenaBitVector& visited);
template <class InstType, typename ValueType>
InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
@@ -262,6 +267,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// List of blocks to perform a reverse post order tree traversal.
GrowableArray<HBasicBlock*> reverse_post_order_;
+ // List of blocks to perform a linear order tree traversal.
+ GrowableArray<HBasicBlock*> linear_order_;
+
HBasicBlock* entry_block_;
HBasicBlock* exit_block_;
@@ -293,6 +301,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_;
+ friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
DISALLOW_COPY_AND_ASSIGN(HGraph);
};
@@ -676,6 +685,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ArrayGet, Instruction) \
M(ArrayLength, Instruction) \
M(ArraySet, Instruction) \
+ M(BooleanNot, UnaryOperation) \
M(BoundsCheck, Instruction) \
M(BoundType, Instruction) \
M(CheckCast, Instruction) \
@@ -2643,6 +2653,33 @@ class HNot : public HUnaryOperation {
DISALLOW_COPY_AND_ASSIGN(HNot);
};
+class HBooleanNot : public HUnaryOperation {
+ public:
+ explicit HBooleanNot(HInstruction* input)
+ : HUnaryOperation(Primitive::Type::kPrimBoolean, input) {}
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+ UNUSED(other);
+ return true;
+ }
+
+ int32_t Evaluate(int32_t x) const OVERRIDE {
+ DCHECK(IsUint<1>(x));
+ return !x;
+ }
+
+ int64_t Evaluate(int64_t x ATTRIBUTE_UNUSED) const OVERRIDE {
+ LOG(FATAL) << DebugName() << " cannot be used with 64-bit values";
+ UNREACHABLE();
+ }
+
+ DECLARE_INSTRUCTION(BooleanNot);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HBooleanNot);
+};
+
class HTypeConversion : public HExpression<1> {
public:
// Instantiate a type conversion of `input` to `result_type`.
@@ -3418,8 +3455,11 @@ class HMonitorOperation : public HTemplateInstruction<1> {
class MoveOperands : public ArenaObject<kArenaAllocMisc> {
public:
- MoveOperands(Location source, Location destination, HInstruction* instruction)
- : source_(source), destination_(destination), instruction_(instruction) {}
+ MoveOperands(Location source,
+ Location destination,
+ Primitive::Type type,
+ HInstruction* instruction)
+ : source_(source), destination_(destination), type_(type), instruction_(instruction) {}
Location GetSource() const { return source_; }
Location GetDestination() const { return destination_; }
@@ -3467,11 +3507,17 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
return source_.IsInvalid();
}
+ bool Is64BitMove() const {
+ return Primitive::Is64BitType(type_);
+ }
+
HInstruction* GetInstruction() const { return instruction_; }
private:
Location source_;
Location destination_;
+ // The type this move is for.
+ Primitive::Type type_;
// The instruction this move is assocatied with. Null when this move is
// for moving an input in the expected locations of user (including a phi user).
// This is only used in debug mode, to ensure we do not connect interval siblings
@@ -3486,7 +3532,10 @@ class HParallelMove : public HTemplateInstruction<0> {
explicit HParallelMove(ArenaAllocator* arena)
: HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
- void AddMove(Location source, Location destination, HInstruction* instruction) {
+ void AddMove(Location source,
+ Location destination,
+ Primitive::Type type,
+ HInstruction* instruction) {
DCHECK(source.IsValid());
DCHECK(destination.IsValid());
if (kIsDebugBuild) {
@@ -3512,7 +3561,7 @@ class HParallelMove : public HTemplateInstruction<0> {
<< "Same destination for two moves in a parallel move.";
}
}
- moves_.Add(MoveOperands(source, destination, instruction));
+ moves_.Add(MoveOperands(source, destination, type, instruction));
}
MoveOperands* MoveOperandsAt(size_t index) const {
@@ -3628,6 +3677,43 @@ class HPostOrderIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
};
+class HLinearPostOrderIterator : public ValueObject {
+ public:
+ explicit HLinearPostOrderIterator(const HGraph& graph)
+ : order_(graph.GetLinearOrder()), index_(graph.GetLinearOrder().Size()) {}
+
+ bool Done() const { return index_ == 0; }
+
+ HBasicBlock* Current() const { return order_.Get(index_ -1); }
+
+ void Advance() {
+ --index_;
+ DCHECK_GE(index_, 0U);
+ }
+
+ private:
+ const GrowableArray<HBasicBlock*>& order_;
+ size_t index_;
+
+ DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
+};
+
+class HLinearOrderIterator : public ValueObject {
+ public:
+ explicit HLinearOrderIterator(const HGraph& graph)
+ : order_(graph.GetLinearOrder()), index_(0) {}
+
+ bool Done() const { return index_ == order_.Size(); }
+ HBasicBlock* Current() const { return order_.Get(index_); }
+ void Advance() { ++index_; }
+
+ private:
+ const GrowableArray<HBasicBlock*>& order_;
+ size_t index_;
+
+ DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
// Iterator over the blocks that art part of the loop. Includes blocks part
// of an inner loop. The order in which the blocks are iterated is on their
// block id.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 64a066dea9..a17d6e1822 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -363,7 +363,7 @@ static void AllocateRegisters(HGraph* graph,
CodeGenerator* codegen,
PassInfoPrinter* pass_info_printer) {
PrepareForRegisterAllocation(graph).Run();
- SsaLivenessAnalysis liveness(*graph, codegen);
+ SsaLivenessAnalysis liveness(graph, codegen);
{
PassInfo pass_info(SsaLivenessAnalysis::kLivenessPassName, pass_info_printer);
liveness.Analyze();
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 4936685367..ad92ca59a1 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -189,9 +189,9 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
const MoveOperands& other_move = *moves_.Get(i);
if (other_move.Blocks(destination)) {
DCHECK(other_move.IsPending());
- if (!destination.IsPair() && other_move.GetSource().IsPair()) {
- // We swap pairs before swapping non-pairs. Go back from the
- // cycle by returning the pair that must be swapped.
+ if (!move->Is64BitMove() && other_move.Is64BitMove()) {
+ // We swap 64bits moves before swapping 32bits moves. Go back from the
+ // cycle by returning the move that must be swapped.
return moves_.Get(i);
}
do_swap = true;
@@ -216,7 +216,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
UpdateSourceOf(moves_.Get(i), swap_destination, source);
}
}
- // If the swap was required because of a pair in the middle of a cycle,
+ // If the swap was required because of a 64bits move in the middle of a cycle,
// we return the swapped move, so that the caller knows it needs to re-iterate
// its dependency loop.
return required_swap;
@@ -269,20 +269,6 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
- int register_count) {
- int scratch = -1;
- for (int reg = 0; reg < register_count; ++reg) {
- if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
- scratch = reg;
- break;
- }
- }
-
- return scratch;
-}
-
-
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
@@ -296,16 +282,6 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
- ParallelMoveResolver* resolver, int blocked, int number_of_registers)
- : resolver_(resolver),
- reg_(kNoRegister),
- spilled_(false) {
- // We don't want to spill a register if none are free.
- reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
-}
-
-
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 173cffc71e..95f8ad5b74 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,15 +42,10 @@ class ParallelMoveResolver : public ValueObject {
protected:
class ScratchRegisterScope : public ValueObject {
public:
- // Spill a scratch register if no regs are free.
ScratchRegisterScope(ParallelMoveResolver* resolver,
int blocked,
int if_scratch,
int number_of_registers);
- // Grab a scratch register only if available.
- ScratchRegisterScope(ParallelMoveResolver* resolver,
- int blocked,
- int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -67,8 +62,6 @@ class ParallelMoveResolver : public ValueObject {
// Allocate a scratch register for performing a move. The method will try to use
// a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
- // As above, but return -1 if no free register.
- int AllocateScratchRegister(int blocked, int register_count);
// Emit a move.
virtual void EmitMove(size_t index) = 0;
@@ -92,12 +85,18 @@ class ParallelMoveResolver : public ValueObject {
// other moves to satisfy dependencies).
//
// Return whether another move in the dependency cycle needs to swap. This
- // is to handle pair swaps, where we want the pair to swap first to avoid
- // building pairs that are unexpected by the code generator. For example, if
- // we were to swap R1 with R2, we would need to update all locations using
- // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
- // the code generator understand such pairs, but it's easier and cleaner to
- // just not create such pairs and exchange pairs in priority.
+ // is to handle 64bits swaps:
+ // 1) In the case of register pairs, where we want the pair to swap first to avoid
+ // building pairs that are unexpected by the code generator. For example, if
+ // we were to swap R1 with R2, we would need to update all locations using
+ // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
+ // the code generator understand such pairs, but it's easier and cleaner to
+ // just not create such pairs and exchange pairs in priority.
+ // 2) Even when the architecture does not have pairs, we must handle 64bits swaps
+ // first. Consider the case: (R0->R1) (R1->S) (S->R0), where 'S' is a single
+ // stack slot. If we end up swapping S and R0, S will only contain the low bits
+ // of R0. If R0->R1 is for a 64bits instruction, R1 will therefore not contain
+ // the right value.
MoveOperands* PerformMove(size_t index);
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 5c502f7ef4..95cca5172b 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -87,6 +87,7 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
moves->AddMove(
Location::RegisterLocation(operands[i][0]),
Location::RegisterLocation(operands[i][1]),
+ Primitive::kPrimInt,
nullptr);
}
return moves;
@@ -145,10 +146,12 @@ TEST(ParallelMoveTest, ConstantLast) {
moves->AddMove(
Location::ConstantLocation(new (&allocator) HIntConstant(0)),
Location::RegisterLocation(0),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterLocation(1),
Location::RegisterLocation(2),
+ Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
@@ -164,10 +167,12 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(4),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
@@ -179,10 +184,12 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(4),
+ Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
@@ -194,10 +201,12 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(0),
+ Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
@@ -208,14 +217,17 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(7),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterLocation(7),
Location::RegisterLocation(1),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -226,14 +238,17 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(7),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterLocation(7),
Location::RegisterLocation(1),
+ Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -244,14 +259,17 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterLocation(2),
Location::RegisterLocation(7),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterLocation(7),
Location::RegisterLocation(1),
+ Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -262,10 +280,12 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(2, 3),
Location::RegisterPairLocation(0, 1),
+ Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
@@ -276,10 +296,12 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterPairLocation(2, 3),
Location::RegisterPairLocation(0, 1),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
@@ -292,18 +314,71 @@ TEST(ParallelMoveTest, Pairs) {
moves->AddMove(
Location::RegisterLocation(10),
Location::RegisterLocation(5),
+ Primitive::kPrimInt,
nullptr);
moves->AddMove(
Location::RegisterPairLocation(4, 5),
Location::DoubleStackSlot(32),
+ Primitive::kPrimLong,
nullptr);
moves->AddMove(
Location::DoubleStackSlot(32),
Location::RegisterPairLocation(10, 11),
+ Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
}
}
+// Test that we do 64bits moves before 32bits moves.
+TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(0),
+ Location::RegisterLocation(1),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(1),
+ Location::StackSlot(48),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::StackSlot(48),
+ Location::RegisterLocation(0),
+ Primitive::kPrimInt,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+ }
+
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::DoubleStackSlot(32),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::DoubleStackSlot(32),
+ Location::RegisterPairLocation(0, 1),
+ Primitive::kPrimLong,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index c20c8a172d..af93438c9a 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -65,6 +65,10 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
if (equivalent->IsPhi()) {
equivalent->AsPhi()->SetLive();
AddToWorklist(equivalent->AsPhi());
+ } else if (equivalent == input) {
+ // The input has changed its type. It can be an input of other phis,
+ // so we need to put phi users in the work list.
+ AddDependentInstructionsToWorklist(equivalent);
}
}
}
@@ -117,10 +121,10 @@ void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
worklist_.Add(instruction);
}
-void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr && phi->IsLive()) {
+ if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) {
AddToWorklist(phi);
}
}
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
index 1374cbb6ab..6d370ed2ab 100644
--- a/compiler/optimizing/primitive_type_propagation.h
+++ b/compiler/optimizing/primitive_type_propagation.h
@@ -33,7 +33,7 @@ class PrimitiveTypePropagation : public ValueObject {
void VisitBasicBlock(HBasicBlock* block);
void ProcessWorklist();
void AddToWorklist(HPhi* phi);
- void AddDependentInstructionsToWorklist(HPhi* phi);
+ void AddDependentInstructionsToWorklist(HInstruction* instruction);
bool UpdateType(HPhi* phi);
HGraph* const graph_;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 4bca43499f..a02b1dacf7 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -103,7 +103,7 @@ void RegisterAllocator::AllocateRegisters() {
// Since only parallel moves have been inserted during the register allocation,
// these checks are mostly for making sure these moves have been added correctly.
size_t current_liveness = 0;
- for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* instruction = inst_it.Current();
@@ -147,7 +147,7 @@ void RegisterAllocator::BlockRegister(Location location,
void RegisterAllocator::AllocateRegistersInternal() {
// Iterate post-order, to ensure the list is sorted, and the last added interval
// is the one with the lowest start position.
- for (HLinearPostOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+ for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
back_it.Advance()) {
@@ -224,7 +224,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
temp_intervals_.Add(interval);
interval->AddTempUse(instruction, i);
if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
- interval->AddHighInterval(true);
+ interval->AddHighInterval(/* is_temp */ true);
LiveInterval* high = interval->GetHighInterval();
temp_intervals_.Add(high);
unhandled_fp_intervals_.Add(high);
@@ -310,6 +310,29 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
current->AddHighInterval();
}
+ for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) {
+ HInstruction* safepoint = safepoints_.Get(safepoint_index - 1);
+ size_t safepoint_position = safepoint->GetLifetimePosition();
+
+ // Test that safepoints are ordered in the optimal way.
+ DCHECK(safepoint_index == safepoints_.Size()
+ || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position);
+
+ if (safepoint_position == current->GetStart()) {
+ // The safepoint is for this instruction, so the location of the instruction
+ // does not need to be saved.
+ DCHECK_EQ(safepoint_index, safepoints_.Size());
+ DCHECK_EQ(safepoint, instruction);
+ continue;
+ } else if (current->IsDeadAt(safepoint_position)) {
+ break;
+ } else if (!current->Covers(safepoint_position)) {
+ // Hole in the interval.
+ continue;
+ }
+ current->AddSafepoint(safepoint);
+ }
+
// Some instructions define their output in fixed register/stack slot. We need
// to ensure we know these locations before doing register allocation. For a
// given register, we create an interval that covers these locations. The register
@@ -1204,10 +1227,10 @@ void RegisterAllocator::AddMove(HParallelMove* move,
&& codegen_->ShouldSplitLongMoves()
// The parallel move resolver knows how to deal with long constants.
&& !source.IsConstant()) {
- move->AddMove(source.ToLow(), destination.ToLow(), instruction);
- move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
+ move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+ move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
} else {
- move->AddMove(source, destination, instruction);
+ move->AddMove(source, destination, type, instruction);
}
}
@@ -1399,7 +1422,6 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
: Location::StackSlot(interval->GetParent()->GetSpillSlot()));
}
UsePosition* use = current->GetFirstUse();
- size_t safepoint_index = safepoints_.Size();
// Walk over all siblings, updating locations of use positions, and
// connecting them when they are adjacent.
@@ -1450,28 +1472,12 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
}
- // At each safepoint, we record stack and register information.
- // We iterate backwards to test safepoints in ascending order of positions,
- // which is what LiveInterval::Covers is optimized for.
- for (; safepoint_index > 0; --safepoint_index) {
- HInstruction* safepoint = safepoints_.Get(safepoint_index - 1);
- size_t position = safepoint->GetLifetimePosition();
-
- // Test that safepoints are ordered in the optimal way.
- DCHECK(safepoint_index == safepoints_.Size()
- || safepoints_.Get(safepoint_index)->GetLifetimePosition() <= position);
-
- if (current->IsDeadAt(position)) {
- break;
- } else if (!current->Covers(position)) {
- continue;
- } else if (interval->GetStart() == position) {
- // The safepoint is for this instruction, so the location of the instruction
- // does not need to be saved.
- continue;
- }
+ for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+ safepoint_position != nullptr;
+ safepoint_position = safepoint_position->GetNext()) {
+ DCHECK(current->Covers(safepoint_position->GetPosition()));
- LocationSummary* locations = safepoint->GetLocations();
+ LocationSummary* locations = safepoint_position->GetLocations();
if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
@@ -1589,7 +1595,7 @@ void RegisterAllocator::Resolve() {
maximum_number_of_live_core_registers_,
maximum_number_of_live_fp_registers_,
reserved_out_slots_,
- liveness_.GetLinearOrder());
+ codegen_->GetGraph()->GetLinearOrder());
// Adjust the Out Location of instructions.
// TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
@@ -1669,7 +1675,7 @@ void RegisterAllocator::Resolve() {
}
// Resolve non-linear control flow across branches. Order does not matter.
- for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
BitVector* live = liveness_.GetLiveInSet(*block);
for (uint32_t idx : live->Indexes()) {
@@ -1682,7 +1688,7 @@ void RegisterAllocator::Resolve() {
}
// Resolve phi inputs. Order does not matter.
- for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
HBasicBlock* current = it.Current();
for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* phi = inst_it.Current();
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3951439881..c307d98555 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -46,7 +46,7 @@ static bool Check(const uint16_t* data) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
register_allocator.AllocateRegisters();
@@ -306,7 +306,7 @@ TEST(RegisterAllocatorTest, Loop3) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
register_allocator.AllocateRegisters();
@@ -340,7 +340,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
HXor* first_xor = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsXor();
@@ -395,7 +395,7 @@ TEST(RegisterAllocatorTest, DeadPhi) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
register_allocator.AllocateRegisters();
@@ -419,7 +419,7 @@ TEST(RegisterAllocatorTest, FreeUntil) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -523,7 +523,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Check that the register allocator is deterministic.
@@ -540,7 +540,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Set the phi to a specific register, and check that the inputs get allocated
@@ -559,7 +559,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Set input1 to a specific register, and check that the phi and other input get allocated
@@ -578,7 +578,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Set input2 to a specific register, and check that the phi and other input get allocated
@@ -632,7 +632,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -647,7 +647,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// Check that the field gets put in the register expected by its use.
@@ -699,7 +699,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -715,7 +715,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
// check that both adds get the same register.
@@ -766,7 +766,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -856,7 +856,7 @@ TEST(RegisterAllocatorTest, SpillInactive) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(*graph, &codegen);
+ SsaLivenessAnalysis liveness(graph, &codegen);
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
register_allocator.unhandled_core_intervals_.Add(fourth);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 95da6ef551..302df2a1d2 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -69,9 +69,9 @@ void SsaLivenessAnalysis::LinearizeGraph() {
// current reverse post order in the graph, but it would require making
// order queries to a GrowableArray, which is not the best data structure
// for it.
- GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size());
- forward_predecessors.SetSize(graph_.GetBlocks().Size());
- for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+ GrowableArray<uint32_t> forward_predecessors(graph_->GetArena(), graph_->GetBlocks().Size());
+ forward_predecessors.SetSize(graph_->GetBlocks().Size());
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
size_t number_of_forward_predecessors = block->GetPredecessors().Size();
if (block->IsLoopHeader()) {
@@ -86,11 +86,11 @@ void SsaLivenessAnalysis::LinearizeGraph() {
// iterate over the successors. When all non-back edge predecessors of a
// successor block are visited, the successor block is added in the worklist
// following an order that satisfies the requirements to build our linear graph.
- GrowableArray<HBasicBlock*> worklist(graph_.GetArena(), 1);
- worklist.Add(graph_.GetEntryBlock());
+ GrowableArray<HBasicBlock*> worklist(graph_->GetArena(), 1);
+ worklist.Add(graph_->GetEntryBlock());
do {
HBasicBlock* current = worklist.Pop();
- linear_order_.Add(current);
+ graph_->linear_order_.Add(current);
for (size_t i = 0, e = current->GetSuccessors().Size(); i < e; ++i) {
HBasicBlock* successor = current->GetSuccessors().Get(i);
int block_id = successor->GetBlockId();
@@ -115,7 +115,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
// to differentiate between the start and end of an instruction. Adding 2 to
// the lifetime position for each instruction ensures the start of an
// instruction is different than the end of the previous instruction.
- for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
+ for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
block->SetLifetimeStart(lifetime_position);
@@ -127,7 +127,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
instructions_from_ssa_index_.Add(current);
current->SetSsaIndex(ssa_index++);
current->SetLiveInterval(
- LiveInterval::MakeInterval(graph_.GetArena(), current->GetType(), current));
+ LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current));
}
current->SetLifetimePosition(lifetime_position);
}
@@ -145,7 +145,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
instructions_from_ssa_index_.Add(current);
current->SetSsaIndex(ssa_index++);
current->SetLiveInterval(
- LiveInterval::MakeInterval(graph_.GetArena(), current->GetType(), current));
+ LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current));
}
instructions_from_lifetime_position_.Add(current);
current->SetLifetimePosition(lifetime_position);
@@ -158,11 +158,11 @@ void SsaLivenessAnalysis::NumberInstructions() {
}
void SsaLivenessAnalysis::ComputeLiveness() {
- for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
+ for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
block_infos_.Put(
block->GetBlockId(),
- new (graph_.GetArena()) BlockInfo(graph_.GetArena(), *block, number_of_ssa_values_));
+ new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_));
}
// Compute the live ranges, as well as the initial live_in, live_out, and kill sets.
@@ -179,7 +179,7 @@ void SsaLivenessAnalysis::ComputeLiveness() {
void SsaLivenessAnalysis::ComputeLiveRanges() {
// Do a post order visit, adding inputs of instructions live in the block where
// that instruction is defined, and killing instructions that are being visited.
- for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) {
+ for (HLinearPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
BitVector* kill = GetKillSet(*block);
@@ -281,7 +281,7 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
do {
changed = false;
- for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+ for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
const HBasicBlock& block = *it.Current();
// The live_in set depends on the kill set (which does not
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index d2da84c0c0..98f98a29d1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -149,6 +149,39 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
DISALLOW_COPY_AND_ASSIGN(UsePosition);
};
+class SafepointPosition : public ArenaObject<kArenaAllocMisc> {
+ public:
+ explicit SafepointPosition(HInstruction* instruction)
+ : instruction_(instruction),
+ next_(nullptr) {}
+
+ void SetNext(SafepointPosition* next) {
+ next_ = next;
+ }
+
+ size_t GetPosition() const {
+ return instruction_->GetLifetimePosition();
+ }
+
+ SafepointPosition* GetNext() const {
+ return next_;
+ }
+
+ LocationSummary* GetLocations() const {
+ return instruction_->GetLocations();
+ }
+
+ HInstruction* GetInstruction() const {
+ return instruction_;
+ }
+
+ private:
+ HInstruction* const instruction_;
+ SafepointPosition* next_;
+
+ DISALLOW_COPY_AND_ASSIGN(SafepointPosition);
+};
+
/**
* An interval is a list of disjoint live ranges where an instruction is live.
* Each instruction that has uses gets an interval.
@@ -459,6 +492,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return defined_by_;
}
+ SafepointPosition* FindSafepointJustBefore(size_t position) const {
+ for (SafepointPosition* safepoint = first_safepoint_, *previous = nullptr;
+ safepoint != nullptr;
+ previous = safepoint, safepoint = safepoint->GetNext()) {
+ if (safepoint->GetPosition() >= position) return previous;
+ }
+ return last_safepoint_;
+ }
+
/**
* Split this interval at `position`. This interval is changed to:
* [start ... position).
@@ -477,6 +519,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_);
+ SafepointPosition* new_last_safepoint = FindSafepointJustBefore(position);
+ if (new_last_safepoint == nullptr) {
+ new_interval->first_safepoint_ = first_safepoint_;
+ new_interval->last_safepoint_ = last_safepoint_;
+ first_safepoint_ = last_safepoint_ = nullptr;
+ } else if (last_safepoint_ != new_last_safepoint) {
+ new_interval->last_safepoint_ = last_safepoint_;
+ new_interval->first_safepoint_ = new_last_safepoint->GetNext();
+ DCHECK(new_interval->first_safepoint_ != nullptr);
+ last_safepoint_ = new_last_safepoint;
+ last_safepoint_->SetNext(nullptr);
+ }
+
new_interval->next_sibling_ = next_sibling_;
next_sibling_ = new_interval;
new_interval->parent_ = parent_;
@@ -703,6 +758,21 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
UNREACHABLE();
}
+ void AddSafepoint(HInstruction* instruction) {
+ SafepointPosition* safepoint = new (allocator_) SafepointPosition(instruction);
+ if (first_safepoint_ == nullptr) {
+ first_safepoint_ = last_safepoint_ = safepoint;
+ } else {
+ DCHECK_LT(last_safepoint_->GetPosition(), safepoint->GetPosition());
+ last_safepoint_->SetNext(safepoint);
+ last_safepoint_ = safepoint;
+ }
+ }
+
+ SafepointPosition* GetFirstSafepoint() const {
+ return first_safepoint_;
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,
@@ -715,6 +785,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
: allocator_(allocator),
first_range_(nullptr),
last_range_(nullptr),
+ first_safepoint_(nullptr),
+ last_safepoint_(nullptr),
last_visited_range_(nullptr),
first_use_(nullptr),
type_(type),
@@ -771,6 +843,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
LiveRange* first_range_;
LiveRange* last_range_;
+ // Safepoints where this interval is live.
+ SafepointPosition* first_safepoint_;
+ SafepointPosition* last_safepoint_;
+
// Last visited range. This is a range search optimization leveraging the fact
// that the register allocator does a linear scan through the intervals.
LiveRange* last_visited_range_;
@@ -838,15 +914,14 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
*/
class SsaLivenessAnalysis : public ValueObject {
public:
- SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen)
+ SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen)
: graph_(graph),
codegen_(codegen),
- linear_order_(graph.GetArena(), graph.GetBlocks().Size()),
- block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
- instructions_from_ssa_index_(graph.GetArena(), 0),
- instructions_from_lifetime_position_(graph.GetArena(), 0),
+ block_infos_(graph->GetArena(), graph->GetBlocks().Size()),
+ instructions_from_ssa_index_(graph->GetArena(), 0),
+ instructions_from_lifetime_position_(graph->GetArena(), 0),
number_of_ssa_values_(0) {
- block_infos_.SetSize(graph.GetBlocks().Size());
+ block_infos_.SetSize(graph->GetBlocks().Size());
}
void Analyze();
@@ -863,10 +938,6 @@ class SsaLivenessAnalysis : public ValueObject {
return &block_infos_.Get(block.GetBlockId())->kill_;
}
- const GrowableArray<HBasicBlock*>& GetLinearOrder() const {
- return linear_order_;
- }
-
HInstruction* GetInstructionFromSsaIndex(size_t index) const {
return instructions_from_ssa_index_.Get(index);
}
@@ -934,9 +1005,8 @@ class SsaLivenessAnalysis : public ValueObject {
return instruction->GetType() == Primitive::kPrimNot;
}
- const HGraph& graph_;
+ HGraph* const graph_;
CodeGenerator* const codegen_;
- GrowableArray<HBasicBlock*> linear_order_;
GrowableArray<BlockInfo*> block_infos_;
// Temporary array used when computing live_in, live_out, and kill sets.
@@ -949,43 +1019,6 @@ class SsaLivenessAnalysis : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
};
-class HLinearPostOrderIterator : public ValueObject {
- public:
- explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness)
- : order_(liveness.GetLinearOrder()), index_(liveness.GetLinearOrder().Size()) {}
-
- bool Done() const { return index_ == 0; }
-
- HBasicBlock* Current() const { return order_.Get(index_ -1); }
-
- void Advance() {
- --index_;
- DCHECK_GE(index_, 0U);
- }
-
- private:
- const GrowableArray<HBasicBlock*>& order_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
-};
-
-class HLinearOrderIterator : public ValueObject {
- public:
- explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness)
- : order_(liveness.GetLinearOrder()), index_(0) {}
-
- bool Done() const { return index_ == order_.Size(); }
- HBasicBlock* Current() const { return order_.Get(index_); }
- void Advance() { ++index_; }
-
- private:
- const GrowableArray<HBasicBlock*>& order_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
-};
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_