summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-03-21 20:14:07 -0700
committer Aart Bik <ajcbik@google.com> 2017-03-22 20:11:05 +0000
commitb13c65bb46544821a84ff2106d0710d77b0fb463 (patch)
tree46ef54ce881e32c80901528cf21a7951cf219023 /compiler/optimizing
parent2b864659d4399de6c17e93b8df8cdbf08c6a7ac9 (diff)
Saves full XMM state along suspend check's slow path.
Rationale: Break-out CL of ART Vectorizer. We need to save 128-bit of data (default ABI of ART runtime only saves 64-bit) Note that this is *only* done for xmm registers that are live, so overhead is not too big. Bug: 34083438 Test: test-art-host Change-Id: Ic89988b0acb0c104634271d0c6c3e29b6596d59b
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_x86.cc22
-rw-r--r--compiler/optimizing/code_generator_x86.h5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc26
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/locations.h2
-rw-r--r--compiler/optimizing/nodes.cc3
-rw-r--r--compiler/optimizing/nodes.h9
7 files changed, 60 insertions, 11 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b50619a66..958c1a6fdb 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
}
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(Address(ESP, stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ }
return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(XmmRegister(reg_id), Address(ESP, stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ }
return GetFloatingPointSpillSlotSize();
}
@@ -5699,7 +5710,12 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
+ ? RegisterSet::AllFpu()
+ : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 65ee383b54..ca3a9eadd2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // 8 bytes == 2 words for each spill.
- return 2 * kX86WordSize;
+ return GetGraph()->HasSIMD()
+ ? 4 * kX86WordSize // 16 bytes == 4 words for each spill
+ : 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 08f1adfcff..c106d9b06e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -5152,7 +5163,12 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
+ ? RegisterSet::AllFpu()
+ : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 376c3ce381..c8336dabd9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- return kX86_64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
+ : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a63d..d391f6913c 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -417,6 +417,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
static RegisterSet Empty() { return RegisterSet(); }
+ static RegisterSet AllFpu() { return RegisterSet(0, -1); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -462,6 +463,7 @@ class RegisterSet : public ValueObject {
private:
RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
uint32_t core_registers_;
uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 020e4463d4..ec706e6694 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasSIMD()) {
+ outer_graph->SetHasSIMD(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 542b218cf8..6881d8f6ae 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
@@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasSIMD() const { return has_simd_; }
+ void SetHasSIMD(bool value) { has_simd_ = value; }
+
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// false positives.
bool has_try_catch_;
+ // Flag whether SIMD instructions appear in the graph. If true, the
+ // code generators may have to be more careful spilling the wider
+ // contents of SIMD registers.
+ bool has_simd_;
+
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false. It's only best effort to keep it up
// to date in the presence of code elimination so there might be false