summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_x86.cc18
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc20
-rw-r--r--compiler/optimizing/graph_visualizer.cc6
-rw-r--r--compiler/optimizing/locations.h20
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc5
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc3
6 files changed, 53 insertions, 19 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 958c1a6fdb..4db4796985 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -967,7 +967,7 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
if (GetGraph()->HasSIMD()) {
- __ movupd(Address(ESP, stack_index), XmmRegister(reg_id));
+ __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
} else {
__ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
}
@@ -976,7 +976,7 @@ size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
if (GetGraph()->HasSIMD()) {
- __ movupd(XmmRegister(reg_id), Address(ESP, stack_index));
+ __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
} else {
__ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
}
@@ -5713,9 +5713,8 @@ void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
// In suspend check slow path, usually there are no caller-save registers at all.
// If SIMD instructions are present, however, we force spilling all live SIMD
// registers in full width (since the runtime only saves/restores lower part).
- locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
- ? RegisterSet::AllFpu()
- : RegisterSet::Empty());
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5818,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot());
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
}
} else if (source.IsStackSlot()) {
if (destination.IsRegister()) {
@@ -5842,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsDoubleStackSlot()) << destination;
MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c106d9b06e..2ffc398287 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1162,7 +1162,7 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
if (GetGraph()->HasSIMD()) {
- __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
} else {
__ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
}
@@ -1171,7 +1171,7 @@ size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
if (GetGraph()->HasSIMD()) {
- __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
} else {
__ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
}
@@ -5166,9 +5166,8 @@ void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
// In suspend check slow path, usually there are no caller-save registers at all.
// If SIMD instructions are present, however, we force spilling all live SIMD
// registers in full width (since the runtime only saves/restores lower part).
- locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
- ? RegisterSet::AllFpu()
- : RegisterSet::Empty());
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5257,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5307,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (destination.IsStackSlot()) {
__ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.AsFpuRegister<XmmRegister>());
}
}
}
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2bf5c53e17..0dfae11465 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
codegen_.DumpCoreRegister(stream, location.high());
} else if (location.IsUnallocated()) {
stream << "unallocated";
- } else {
- DCHECK(location.IsDoubleStackSlot());
+ } else if (location.IsDoubleStackSlot()) {
stream << "2x" << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsSIMDStackSlot());
+ stream << "4x" << location.GetStackIndex() << "(sp)";
}
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d391f6913c..6f0dbce2df 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -69,11 +69,13 @@ class Location : public ValueObject {
// We do not use the value 9 because it conflicts with kLocationConstantMask.
kDoNotUse9 = 9,
+ kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes?
+
// Unallocated location represents a location that is not fixed and can be
// allocated by a register allocator. Each unallocated location has
// a policy that specifies what kind of location is suitable. Payload
// contains register allocation policy.
- kUnallocated = 10,
+ kUnallocated = 11,
};
Location() : ValueObject(), value_(kInvalid) {
@@ -82,6 +84,7 @@ class Location : public ValueObject {
static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
+ static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -266,8 +269,20 @@ class Location : public ValueObject {
return GetKind() == kDoubleStackSlot;
}
+ static Location SIMDStackSlot(intptr_t stack_index) {
+ uintptr_t payload = EncodeStackIndex(stack_index);
+ Location loc(kSIMDStackSlot, payload);
+ // Ensure that sign is preserved.
+ DCHECK_EQ(loc.GetStackIndex(), stack_index);
+ return loc;
+ }
+
+ bool IsSIMDStackSlot() const {
+ return GetKind() == kSIMDStackSlot;
+ }
+
intptr_t GetStackIndex() const {
- DCHECK(IsStackSlot() || IsDoubleStackSlot());
+ DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot());
// Decode stack index manually to preserve sign.
return GetPayload() - kStackIndexBias;
}
@@ -315,6 +330,7 @@ class Location : public ValueObject {
case kRegister: return "R";
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
+ case kSIMDStackSlot: return "SIMD";
case kUnallocated: return "U";
case kConstant: return "C";
case kFpuRegister: return "F";
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 0d33b49fdb..c6a0b6a0d2 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -303,6 +303,7 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
switch (interval->NumberOfSpillSlotsNeeded()) {
case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break;
default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
@@ -464,6 +465,7 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
switch (parent->NumberOfSpillSlotsNeeded()) {
case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+ case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break;
default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
}
@@ -496,7 +498,8 @@ static bool IsValidDestination(Location destination) {
|| destination.IsFpuRegister()
|| destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
+ || destination.IsDoubleStackSlot()
+ || destination.IsSIMDStackSlot();
}
void RegisterAllocationResolver::AddMove(HParallelMove* move,
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index c0a045c33e..36ee5a903a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -470,6 +470,8 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+ // TODO: detect vector operation.
+ // Return number of needed spill slots based on type.
return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
@@ -497,6 +499,7 @@ Location LiveInterval::ToLocation() const {
switch (NumberOfSpillSlotsNeeded()) {
case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+ case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot());
default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
} else {