Implement register allocator for floating point registers.
Also:
- Fix misuses of emitting the rex prefix in the x86_64 assembler.
- Fix movaps code generation in the x86_64 assembler.
Change-Id: Ib6dcf6e7c4a9c43368cfc46b02ba50f69ae69cbe
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 408e13e..d5cd490 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -473,8 +473,7 @@
case Location::kRegister : {
int id = location.reg();
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
- if (current->GetType() == Primitive::kPrimDouble
- || current->GetType() == Primitive::kPrimLong) {
+ if (current->GetType() == Primitive::kPrimLong) {
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
++i;
DCHECK_LT(i, environment_size);
@@ -482,52 +481,55 @@
break;
}
+ case Location::kFpuRegister : {
+ int id = location.reg();
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+ if (current->GetType() == Primitive::kPrimDouble) {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+ ++i;
+ DCHECK_LT(i, environment_size);
+ }
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
}
}
-size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) {
- return first_register_slot_in_slow_path_ + index * GetWordSize();
-}
-
void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
RegisterSet* register_set = locations->GetLiveRegisters();
- uint32_t count = 0;
+ size_t stack_offset = first_register_slot_in_slow_path_;
for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
if (register_set->ContainsCoreRegister(i)) {
- size_t stack_offset = GetStackOffsetOfSavedRegister(count);
- ++count;
- SaveCoreRegister(Location::StackSlot(stack_offset), i);
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
locations->SetStackBit(stack_offset / kVRegSize);
}
+ stack_offset += SaveCoreRegister(stack_offset, i);
}
}
for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
if (register_set->ContainsFloatingPointRegister(i)) {
- LOG(FATAL) << "Unimplemented";
+ stack_offset += SaveFloatingPointRegister(stack_offset, i);
}
}
}
void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
RegisterSet* register_set = locations->GetLiveRegisters();
- uint32_t count = 0;
+ size_t stack_offset = first_register_slot_in_slow_path_;
for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
if (register_set->ContainsCoreRegister(i)) {
- size_t stack_offset = GetStackOffsetOfSavedRegister(count);
- ++count;
- RestoreCoreRegister(Location::StackSlot(stack_offset), i);
+ stack_offset += RestoreCoreRegister(stack_offset, i);
}
}
for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
if (register_set->ContainsFloatingPointRegister(i)) {
- LOG(FATAL) << "Unimplemented";
+ stack_offset += RestoreFloatingPointRegister(stack_offset, i);
}
}
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7aaf991..220d745 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -110,8 +110,18 @@
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
- virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0;
- virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0;
+ // Saves the register in the stack. Returns the size taken on stack.
+ virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
+ // Restores the register from the stack. Returns the size taken on stack.
+ virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
+ virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ LOG(FATAL) << "Unimplemented";
+ return 0u;
+ }
+ virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ LOG(FATAL) << "Unimplemented";
+ return 0u;
+ }
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
@@ -145,6 +155,7 @@
void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
+ bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
protected:
CodeGenerator(HGraph* graph,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ebb1d6a..24b7c2d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -210,12 +210,14 @@
stream << ArmManagedRegister::FromDRegister(DRegister(reg));
}
-void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
- __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex());
+size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_index);
+ return kArmWordSize;
}
-void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
- __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex());
+size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_index);
+ return kArmWordSize;
}
CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
@@ -859,6 +861,26 @@
// Will be generated at use site.
}
+void LocationsBuilderARM::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARM::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) {
+ // Will be generated at use site.
+}
+
void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 57b289c..1fe8a7e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -142,8 +142,8 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(HBasicBlock* block) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
- virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
- virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kArmWordSize;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cc2be82..2550518 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -182,12 +182,14 @@
stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg));
}
-void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id));
+size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
+ return kX86WordSize;
}
-void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex()));
+size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
+ return kX86WordSize;
}
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
@@ -795,6 +797,26 @@
// Will be generated at use site.
}
+void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) {
+ // Will be generated at use site.
+}
+
void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index db8b9ab..fff91d1 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -144,8 +144,8 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(HBasicBlock* block) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
- virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
- virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kX86WordSize;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9df9d41..9e63f8b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -191,12 +191,24 @@
stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
}
-void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id));
+size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
+ return kX86_64WordSize;
}
-void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex()));
+size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ return kX86_64WordSize;
}
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
@@ -727,6 +739,26 @@
// Will be generated at use site.
}
+void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+ // Will be generated at use site.
+}
+
void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
@@ -995,7 +1027,7 @@
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::Any());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1032,21 +1064,12 @@
}
case Primitive::kPrimFloat: {
- if (second.IsFpuRegister()) {
- __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
- } else {
- __ addss(first.As<XmmRegister>(),
- Address(CpuRegister(RSP), second.GetStackIndex()));
- }
+ __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
break;
}
case Primitive::kPrimDouble: {
- if (second.IsFpuRegister()) {
- __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
- } else {
- __ addsd(first.As<XmmRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
- }
+ __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
break;
}
@@ -1482,10 +1505,30 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ XmmRegister out = locations->Out().As<XmmRegister>();
+ if (index.IsConstant()) {
+ __ movss(out, Address(obj,
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
+ } else {
+ __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset));
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ XmmRegister out = locations->Out().As<XmmRegister>();
+ if (index.IsConstant()) {
+ __ movsd(out, Address(obj,
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
+ } else {
+ __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset));
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -1509,6 +1552,8 @@
locations->SetInAt(2, Location::RequiresRegister());
if (value_type == Primitive::kPrimLong) {
locations->SetInAt(2, Location::RequiresRegister());
+ } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
@@ -1581,6 +1626,7 @@
__ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
value.As<CpuRegister>());
} else {
+ DCHECK(value.IsConstant()) << value;
__ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
@@ -1609,10 +1655,34 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ DCHECK(value.IsFpuRegister());
+ __ movss(Address(obj, offset), value.As<XmmRegister>());
+ } else {
+ DCHECK(value.IsFpuRegister());
+ __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+ value.As<XmmRegister>());
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ DCHECK(value.IsFpuRegister());
+ __ movsd(Address(obj, offset), value.As<XmmRegister>());
+ } else {
+ DCHECK(value.IsFpuRegister());
+ __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset),
+ value.As<XmmRegister>());
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -1746,6 +1816,9 @@
if (destination.IsRegister()) {
__ movl(destination.As<CpuRegister>(),
Address(CpuRegister(RSP), source.GetStackIndex()));
+ } else if (destination.IsFpuRegister()) {
+ __ movss(destination.As<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else {
DCHECK(destination.IsStackSlot());
__ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1755,6 +1828,8 @@
if (destination.IsRegister()) {
__ movq(destination.As<CpuRegister>(),
Address(CpuRegister(RSP), source.GetStackIndex()));
+ } else if (destination.IsFpuRegister()) {
+ __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex()));
} else {
DCHECK(destination.IsDoubleStackSlot());
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1767,6 +1842,7 @@
if (destination.IsRegister()) {
__ movl(destination.As<CpuRegister>(), imm);
} else {
+ DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else if (constant->IsLongConstant()) {
@@ -1774,14 +1850,42 @@
if (destination.IsRegister()) {
__ movq(destination.As<CpuRegister>(), Immediate(value));
} else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
__ movq(CpuRegister(TMP), Immediate(value));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (constant->IsFloatConstant()) {
+ Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+ if (destination.IsFpuRegister()) {
+ __ movl(CpuRegister(TMP), imm);
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else {
+ DCHECK(destination.IsStackSlot()) << destination;
+ __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+ }
} else {
- LOG(FATAL) << "Unimplemented constant type";
+ DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
+ Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+ if (destination.IsFpuRegister()) {
+ __ movq(CpuRegister(TMP), imm);
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ movq(CpuRegister(TMP), imm);
+ __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+ }
}
- } else {
- LOG(FATAL) << "Unimplemented";
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+ } else if (destination.IsStackSlot()) {
+ __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.As<XmmRegister>());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.As<XmmRegister>());
+ }
}
}
@@ -1823,6 +1927,18 @@
CpuRegister(ensure_scratch.GetRegister()));
}
+void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
+ __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+ __ movss(Address(CpuRegister(RSP), mem), reg);
+ __ movd(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+ __ movsd(Address(CpuRegister(RSP), mem), reg);
+ __ movd(reg, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
MoveOperands* move = moves_.Get(index);
Location source = move->GetSource();
@@ -1842,8 +1958,20 @@
Exchange64(destination.As<CpuRegister>(), source.GetStackIndex());
} else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
Exchange64(destination.GetStackIndex(), source.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ __ movd(CpuRegister(TMP), source.As<XmmRegister>());
+ __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>());
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
+ Exchange32(source.As<XmmRegister>(), destination.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
+ Exchange32(destination.As<XmmRegister>(), source.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
+ Exchange64(source.As<XmmRegister>(), destination.GetStackIndex());
+ } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
+ Exchange64(destination.As<XmmRegister>(), source.GetStackIndex());
} else {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 5ac0189..e04a8d8 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -80,8 +80,10 @@
private:
void Exchange32(CpuRegister reg, int mem);
+ void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
void Exchange64(CpuRegister reg, int mem);
+ void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
CodeGeneratorX86_64* const codegen_;
@@ -146,8 +148,10 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(HBasicBlock* block) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
- virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
- virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kX86_64WordSize;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 459010d..4ed2156 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -120,13 +120,11 @@
output_<< std::endl;
}
- void DumpLocation(Location location, Primitive::Type type) {
+ void DumpLocation(Location location) {
if (location.IsRegister()) {
- if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) {
- codegen_.DumpFloatingPointRegister(output_, location.reg());
- } else {
- codegen_.DumpCoreRegister(output_, location.reg());
- }
+ codegen_.DumpCoreRegister(output_, location.reg());
+ } else if (location.IsFpuRegister()) {
+ codegen_.DumpFloatingPointRegister(output_, location.reg());
} else if (location.IsConstant()) {
output_ << "constant";
HConstant* constant = location.GetConstant();
@@ -150,9 +148,9 @@
output_ << " (";
for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
MoveOperands* move = instruction->MoveOperandsAt(i);
- DumpLocation(move->GetSource(), Primitive::kPrimInt);
+ DumpLocation(move->GetSource());
output_ << " -> ";
- DumpLocation(move->GetDestination(), Primitive::kPrimInt);
+ DumpLocation(move->GetDestination());
if (i + 1 != e) {
output_ << ", ";
}
@@ -183,13 +181,13 @@
if (locations != nullptr) {
output_ << " ( ";
for (size_t i = 0; i < instruction->InputCount(); ++i) {
- DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType());
+ DumpLocation(locations->InAt(i));
output_ << " ";
}
output_ << ")";
if (locations->Out().IsValid()) {
output_ << " -> ";
- DumpLocation(locations->Out(), instruction->GetType());
+ DumpLocation(locations->Out());
}
}
output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a219b97..0505510 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -363,6 +363,25 @@
Add(&phis_, this, phi);
}
+void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
+ DCHECK_EQ(phi->GetId(), -1);
+ DCHECK_NE(cursor->GetId(), -1);
+ DCHECK_EQ(cursor->GetBlock(), this);
+ if (cursor->next_ == nullptr) {
+ cursor->next_ = phi;
+ phi->previous_ = cursor;
+ DCHECK(phi->next_ == nullptr);
+ } else {
+ phi->next_ = cursor->next_;
+ phi->previous_ = cursor;
+ cursor->next_ = phi;
+ phi->next_->previous_ = phi;
+ }
+ phi->SetBlock(this);
+ phi->SetId(GetGraph()->GetNextInstructionId());
+ UpdateInputsUsers(phi);
+}
+
static void Remove(HInstructionList* instruction_list,
HBasicBlock* block,
HInstruction* instruction) {
@@ -531,6 +550,12 @@
env_uses_ = nullptr;
}
+void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
+ InputAt(index)->RemoveUser(this, index);
+ SetRawInputAt(index, replacement);
+ replacement->AddUseAt(this, index);
+}
+
size_t HInstruction::EnvironmentSize() const {
return HasEnvironment() ? environment_->Size() : 0;
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3f29e53..7c933aa 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -399,6 +399,7 @@
void ReplaceAndRemoveInstructionWith(HInstruction* initial,
HInstruction* replacement);
void AddPhi(HPhi* phi);
+ void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
void RemovePhi(HPhi* phi);
bool IsLoopHeader() const {
@@ -503,7 +504,9 @@
M(Temporary, Instruction) \
M(SuspendCheck, Instruction) \
M(Mul, BinaryOperation) \
- M(Neg, UnaryOperation)
+ M(Neg, UnaryOperation) \
+ M(FloatConstant, Constant) \
+ M(DoubleConstant, Constant) \
#define FOR_EACH_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION(M) \
@@ -710,6 +713,7 @@
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
+ void ReplaceInput(HInstruction* replacement, size_t index);
bool HasOnlyOneUse() const {
return uses_ != nullptr && uses_->GetTail() == nullptr;
@@ -995,8 +999,8 @@
virtual Primitive::Type GetType() const { return type_; }
- private:
- const Primitive::Type type_;
+ protected:
+ Primitive::Type type_;
};
// Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
@@ -1401,6 +1405,48 @@
DISALLOW_COPY_AND_ASSIGN(HConstant);
};
+class HFloatConstant : public HConstant {
+ public:
+ explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+
+ float GetValue() const { return value_; }
+
+ virtual bool InstructionDataEquals(HInstruction* other) const {
+ return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
+ bit_cast<float, int32_t>(value_);
+ }
+
+ virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); }
+
+ DECLARE_INSTRUCTION(FloatConstant);
+
+ private:
+ const float value_;
+
+ DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
+};
+
+class HDoubleConstant : public HConstant {
+ public:
+ explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+
+ double GetValue() const { return value_; }
+
+ virtual bool InstructionDataEquals(HInstruction* other) const {
+ return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
+ bit_cast<double, int64_t>(value_);
+ }
+
+ virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); }
+
+ DECLARE_INSTRUCTION(DoubleConstant);
+
+ private:
+ const double value_;
+
+ DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+};
+
// Constants of the type int. Those can be from Dex instructions, or
// synthesized (for example with the if-eqz instruction).
class HIntConstant : public HConstant {
@@ -1794,6 +1840,7 @@
virtual bool CanBeMoved() const { return true; }
virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
+ void SetType(Primitive::Type type) { type_ = type; }
DECLARE_INSTRUCTION(ArrayGet);
@@ -1806,11 +1853,11 @@
HArraySet(HInstruction* array,
HInstruction* index,
HInstruction* value,
- Primitive::Type component_type,
+ Primitive::Type expected_component_type,
uint32_t dex_pc)
: HTemplateInstruction(SideEffects::ChangesSomething()),
dex_pc_(dex_pc),
- component_type_(component_type) {
+ expected_component_type_(expected_component_type) {
SetRawInputAt(0, array);
SetRawInputAt(1, index);
SetRawInputAt(2, value);
@@ -1824,13 +1871,24 @@
uint32_t GetDexPc() const { return dex_pc_; }
- Primitive::Type GetComponentType() const { return component_type_; }
+ HInstruction* GetValue() const { return InputAt(2); }
+
+ Primitive::Type GetComponentType() const {
+ // The Dex format does not type floating point index operations. Since the
+ // `expected_component_type_` is set during building and can therefore not
+ // be correct, we also check what is the value type. If it is a floating
+ // point type, we must use that type.
+ Primitive::Type value_type = GetValue()->GetType();
+ return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble))
+ ? value_type
+ : expected_component_type_;
+ }
DECLARE_INSTRUCTION(ArraySet);
private:
const uint32_t dex_pc_;
- const Primitive::Type component_type_;
+ const Primitive::Type expected_component_type_;
DISALLOW_COPY_AND_ASSIGN(HArraySet);
};
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 719c069..3b51bfb 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -37,18 +37,21 @@
handled_(allocator, 0),
active_(allocator, 0),
inactive_(allocator, 0),
- physical_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
+ physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
+ physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
temp_intervals_(allocator, 4),
spill_slots_(allocator, kDefaultNumberOfSpillSlots),
safepoints_(allocator, 0),
processing_core_registers_(false),
number_of_registers_(-1),
registers_array_(nullptr),
- blocked_registers_(codegen->GetBlockedCoreRegisters()),
+ blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+ blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
reserved_out_slots_(0),
maximum_number_of_live_registers_(0) {
codegen->SetupBlockedRegisters();
- physical_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
+ physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
+ physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
// Always reserve for the current method and the graph's max out registers.
// TODO: compute it instead.
reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
@@ -65,8 +68,10 @@
it.Advance()) {
HInstruction* current = it.Current();
if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
- if (current->GetType() == Primitive::kPrimFloat) return false;
- if (current->GetType() == Primitive::kPrimDouble) return false;
+ if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble)
+ && instruction_set != kX86_64) {
+ return false;
+ }
}
}
return true;
@@ -93,14 +98,22 @@
void RegisterAllocator::BlockRegister(Location location,
size_t start,
- size_t end,
- Primitive::Type type) {
+ size_t end) {
int reg = location.reg();
- LiveInterval* interval = physical_register_intervals_.Get(reg);
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
+ LiveInterval* interval = location.IsRegister()
+ ? physical_core_register_intervals_.Get(reg)
+ : physical_fp_register_intervals_.Get(reg);
+ Primitive::Type type = location.IsRegister()
+ ? Primitive::kPrimInt
+ : Primitive::kPrimDouble;
if (interval == nullptr) {
interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
- physical_register_intervals_.Put(reg, interval);
- inactive_.Add(interval);
+ if (location.IsRegister()) {
+ physical_core_register_intervals_.Put(reg, interval);
+ } else {
+ physical_fp_register_intervals_.Put(reg, interval);
+ }
}
DCHECK(interval->GetRegister() == reg);
interval->AddRange(start, end);
@@ -123,8 +136,17 @@
registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
processing_core_registers_ = true;
unhandled_ = &unhandled_core_intervals_;
+ for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
+ LiveInterval* fixed = physical_core_register_intervals_.Get(i);
+ if (fixed != nullptr) {
+ inactive_.Add(fixed);
+ }
+ }
LinearScan();
+ size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
+ maximum_number_of_live_registers_ = 0;
+
inactive_.Reset();
active_.Reset();
handled_.Reset();
@@ -133,9 +155,14 @@
registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
processing_core_registers_ = false;
unhandled_ = &unhandled_fp_intervals_;
- // TODO: Enable FP register allocation.
- DCHECK(unhandled_->IsEmpty());
+ for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
+ LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
+ if (fixed != nullptr) {
+ inactive_.Add(fixed);
+ }
+ }
LinearScan();
+ maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
}
void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -148,8 +175,9 @@
for (size_t i = 0; i < locations->GetTempCount(); ++i) {
Location temp = locations->GetTemp(i);
if (temp.IsRegister()) {
- BlockRegister(temp, position, position + 1, Primitive::kPrimInt);
+ BlockRegister(temp, position, position + 1);
} else {
+ DCHECK(temp.IsUnallocated());
LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
temp_intervals_.Add(interval);
interval->AddRange(position, position + 1);
@@ -160,10 +188,6 @@
bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
&& (instruction->GetType() != Primitive::kPrimFloat);
- GrowableArray<LiveInterval*>& unhandled = core_register
- ? unhandled_core_intervals_
- : unhandled_fp_intervals_;
-
if (locations->CanCall()) {
if (!instruction->IsSuspendCheck()) {
codegen_->MarkNotLeaf();
@@ -180,7 +204,8 @@
// maximum before updating locations.
LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
interval->AddRange(position, position + 1);
- unhandled.Add(interval);
+ unhandled_core_intervals_.Add(interval);
+ unhandled_fp_intervals_.Add(interval);
}
}
@@ -189,21 +214,29 @@
for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
BlockRegister(Location::RegisterLocation(i),
position,
- position + 1,
- Primitive::kPrimInt);
+ position + 1);
+ }
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ BlockRegister(Location::FpuRegisterLocation(i),
+ position,
+ position + 1);
}
}
for (size_t i = 0; i < instruction->InputCount(); ++i) {
Location input = locations->InAt(i);
- if (input.IsRegister()) {
- BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType());
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ BlockRegister(input, position, position + 1);
}
}
LiveInterval* current = instruction->GetLiveInterval();
if (current == nullptr) return;
+ GrowableArray<LiveInterval*>& unhandled = core_register
+ ? unhandled_core_intervals_
+ : unhandled_fp_intervals_;
+
DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
// Some instructions define their output in fixed register/stack slot. We need
// to ensure we know these locations before doing register allocation. For a
@@ -213,11 +246,11 @@
//
// The backwards walking ensures the ranges are ordered on increasing start positions.
Location output = locations->Out();
- if (output.IsRegister()) {
+ if (output.IsRegister() || output.IsFpuRegister()) {
// Shift the interval's start by one to account for the blocked register.
current->SetFrom(position + 1);
current->SetRegister(output.reg());
- BlockRegister(output, position, position + 1, instruction->GetType());
+ BlockRegister(output, position, position + 1);
} else if (!locations->OutputOverlapsWithInputs()) {
// Shift the interval's start by one to not interfere with the inputs.
current->SetFrom(position + 1);
@@ -281,10 +314,19 @@
}
}
- for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) {
- LiveInterval* fixed = physical_register_intervals_.Get(i);
- if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) {
- intervals.Add(fixed);
+ if (processing_core_registers_) {
+ for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
+ LiveInterval* fixed = physical_core_register_intervals_.Get(i);
+ if (fixed != nullptr) {
+ intervals.Add(fixed);
+ }
+ }
+ } else {
+ for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
+ LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
+ if (fixed != nullptr) {
+ intervals.Add(fixed);
+ }
}
}
@@ -377,10 +419,10 @@
interval->Dump(stream);
stream << ": ";
if (interval->HasRegister()) {
- if (processing_core_registers_) {
- codegen_->DumpCoreRegister(stream, interval->GetRegister());
- } else {
+ if (interval->IsFloatingPoint()) {
codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+ } else {
+ codegen_->DumpCoreRegister(stream, interval->GetRegister());
}
} else {
stream << "spilled";
@@ -522,10 +564,9 @@
}
bool RegisterAllocator::IsBlocked(int reg) const {
- // TODO: This only works for core registers and needs to be adjusted for
- // floating point registers.
- DCHECK(processing_core_registers_);
- return blocked_registers_[reg];
+ return processing_core_registers_
+ ? blocked_core_registers_[reg]
+ : blocked_fp_registers_[reg];
}
// Find the register that is used the last, and spill the interval
@@ -727,7 +768,10 @@
}
static bool IsValidDestination(Location destination) {
- return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot();
+ return destination.IsRegister()
+ || destination.IsFpuRegister()
+ || destination.IsStackSlot()
+ || destination.IsDoubleStackSlot();
}
void RegisterAllocator::AddInputMoveFor(HInstruction* user,
@@ -877,7 +921,9 @@
if (current->HasSpillSlot() && current->HasRegister()) {
// We spill eagerly, so move must be at definition.
InsertMoveAfter(interval->GetDefinedBy(),
- Location::RegisterLocation(interval->GetRegister()),
+ interval->IsFloatingPoint()
+ ? Location::FpuRegisterLocation(interval->GetRegister())
+ : Location::RegisterLocation(interval->GetRegister()),
interval->NeedsTwoSpillSlots()
? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
: Location::StackSlot(interval->GetParent()->GetSpillSlot()));
@@ -935,6 +981,10 @@
}
break;
}
+ case Location::kFpuRegister: {
+ locations->AddLiveRegister(source);
+ break;
+ }
case Location::kStackSlot: // Fall-through
case Location::kDoubleStackSlot: // Fall-through
case Location::kConstant: {
@@ -1098,6 +1148,7 @@
current = at;
}
LocationSummary* locations = at->GetLocations();
+ DCHECK(temp->GetType() == Primitive::kPrimInt);
locations->SetTempAt(
temp_index++, Location::RegisterLocation(temp->GetRegister()));
}
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 0c3a9b3..b881539 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -94,7 +94,7 @@
bool IsBlocked(int reg) const;
// Update the interval for the register in `location` to cover [start, end).
- void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type);
+ void BlockRegister(Location location, size_t start, size_t end);
// Allocate a spill slot for the given interval.
void AllocateSpillSlotFor(LiveInterval* interval);
@@ -156,7 +156,8 @@
// Fixed intervals for physical registers. Such intervals cover the positions
// where an instruction requires a specific register.
- GrowableArray<LiveInterval*> physical_register_intervals_;
+ GrowableArray<LiveInterval*> physical_core_register_intervals_;
+ GrowableArray<LiveInterval*> physical_fp_register_intervals_;
// Intervals for temporaries. Such intervals cover the positions
// where an instruction requires a temporary.
@@ -179,7 +180,8 @@
size_t* registers_array_;
// Blocked registers, as decided by the code generator.
- bool* const blocked_registers_;
+ bool* const blocked_core_registers_;
+ bool* const blocked_fp_registers_;
// Slots reserved for out arguments.
size_t reserved_out_slots_;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index be2c039..a0cc8a9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -129,8 +129,112 @@
}
}
+/**
+ * Constants in the Dex format are not typed. So the builder types them as
+ * integers, but when doing the SSA form, we might realize the constant
+ * is used for floating point operations. We create a floating-point equivalent
+ * constant to make the operations correctly typed.
+ */
+static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) {
+ // We place the floating point constant next to this constant.
+ HFloatConstant* result = constant->GetNext()->AsFloatConstant();
+ if (result == nullptr) {
+ HGraph* graph = constant->GetBlock()->GetGraph();
+ ArenaAllocator* allocator = graph->GetArena();
+ result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+ constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+ } else {
+ // If there is already a constant with the expected type, we know it is
+ // the floating point equivalent of this constant.
+ DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+ }
+ return result;
+}
+
+/**
+ * Wide constants in the Dex format are not typed. So the builder types them as
+ * longs, but when doing the SSA form, we might realize the constant
+ * is used for floating point operations. We create a floating-point equivalent
+ * constant to make the operations correctly typed.
+ */
+static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) {
+ // We place the floating point constant next to this constant.
+ HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
+ if (result == nullptr) {
+ HGraph* graph = constant->GetBlock()->GetGraph();
+ ArenaAllocator* allocator = graph->GetArena();
+ result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+ constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+ } else {
+ // If there is already a constant with the expected type, we know it is
+ // the floating point equivalent of this constant.
+ DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+ }
+ return result;
+}
+
+/**
+ * Because of Dex format, we might end up having the same phi being
+ * used for non floating point operations and floating point operations. Because
+ * we want the graph to be correctly typed (and thereafter avoid moves between
+ * floating point registers and core registers), we need to create a copy of the
+ * phi with a floating point type.
+ */
+static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+ // We place the floating point phi next to this phi.
+ HInstruction* next = phi->GetNext();
+ if (next == nullptr
+ || (next->GetType() != Primitive::kPrimDouble && next->GetType() != Primitive::kPrimFloat)) {
+ ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
+ HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
+ // but the type propagation phase will fix it.
+ new_phi->SetRawInputAt(i, phi->InputAt(i));
+ }
+ phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+ return new_phi;
+ } else {
+ // If there is already a phi with the expected type, we know it is the floating
+ // point equivalent of this phi.
+ DCHECK_EQ(next->AsPhi()->GetRegNumber(), phi->GetRegNumber());
+ return next->AsPhi();
+ }
+}
+
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
+ HInstruction* value,
+ Primitive::Type type) {
+ if (value->IsArrayGet()) {
+ // The verifier has checked that values in arrays cannot be used for both
+ // floating point and non-floating point operations. It is therefore safe to just
+ // change the type of the operation.
+ value->AsArrayGet()->SetType(type);
+ return value;
+ } else if (value->IsLongConstant()) {
+ return GetDoubleEquivalent(value->AsLongConstant());
+ } else if (value->IsIntConstant()) {
+ return GetFloatEquivalent(value->AsIntConstant());
+ } else if (value->IsPhi()) {
+ return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+ } else {
+ // For other instructions, we assume the verifier has checked that the dex format is correctly
+ // typed and the value in a dex register will not be used for both floating point and
+ // non-floating point operations. So the only reason an instruction would want a floating
+ // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
+ DCHECK(user->IsPhi());
+ return value;
+ }
+}
+
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
- load->ReplaceWith(current_locals_->Get(load->GetLocal()->GetRegNumber()));
+ HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
+ if (load->GetType() != value->GetType()
+ && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
+ // If the operation requests a specific type, we make sure its input is of that type.
+ value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+ }
+ load->ReplaceWith(value);
load->GetBlock()->RemoveInstruction(load);
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 9d8c072..24f5ac5 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -52,6 +52,10 @@
void VisitStoreLocal(HStoreLocal* store);
void VisitInstruction(HInstruction* instruction);
+ static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
+ HInstruction* instruction,
+ Primitive::Type type);
+
private:
// Locals for the current block being visited.
GrowableArray<HInstruction*>* current_locals_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index f0edc64..1e34670 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -319,7 +319,7 @@
if (user->IsPhi()) {
// If the phi has a register, try to use the same.
Location phi_location = user->GetLiveInterval()->ToLocation();
- if (phi_location.IsRegister() && free_until[phi_location.reg()] >= use_position) {
+ if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) {
return phi_location.reg();
}
const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
@@ -345,7 +345,7 @@
// We use the user's lifetime position - 1 (and not `use_position`) because the
// register is blocked at the beginning of the user.
size_t position = user->GetLifetimePosition() - 1;
- if (expected.IsRegister() && free_until[expected.reg()] >= position) {
+ if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) {
return expected.reg();
}
}
@@ -368,7 +368,7 @@
// If the input dies at the end of the predecessor, we know its register can
// be reused.
Location input_location = input_interval.ToLocation();
- if (input_location.IsRegister()) {
+ if (SameRegisterKind(input_location)) {
return input_location.reg();
}
}
@@ -384,7 +384,7 @@
// If the input dies at the start of this instruction, we know its register can
// be reused.
Location location = input_interval.ToLocation();
- if (location.IsRegister()) {
+ if (SameRegisterKind(location)) {
return location.reg();
}
}
@@ -393,13 +393,21 @@
return kNoRegister;
}
+bool LiveInterval::SameRegisterKind(Location other) const {
+ return IsFloatingPoint()
+ ? other.IsFpuRegister()
+ : other.IsRegister();
+}
+
bool LiveInterval::NeedsTwoSpillSlots() const {
return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
}
Location LiveInterval::ToLocation() const {
if (HasRegister()) {
- return Location::RegisterLocation(GetRegister());
+ return IsFloatingPoint()
+ ? Location::FpuRegisterLocation(GetRegister())
+ : Location::RegisterLocation(GetRegister());
} else {
HInstruction* defined_by = GetParent()->GetDefinedBy();
if (defined_by->IsConstant()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index d3e1c0e..8ce5ce9 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -358,6 +358,10 @@
|| (location.GetPolicy() == Location::kSameAsFirstInput
&& locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) {
return position;
+ } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
+ return position;
}
}
}
@@ -368,7 +372,9 @@
size_t use_position = use->GetPosition();
if (use_position >= position && !use->GetIsEnvironment()) {
Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
- if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+ if (location.IsUnallocated()
+ && (location.GetPolicy() == Location::kRequiresRegister
+ || location.GetPolicy() == Location::kRequiresFpuRegister)) {
// Return the lifetime just before the user, so that the interval has a register
// when entering the user.
return use->GetUser()->GetLifetimePosition() - 1;
@@ -502,6 +508,10 @@
// slots for spilling.
bool NeedsTwoSpillSlots() const;
+ bool IsFloatingPoint() const {
+ return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;
+ }
+
// Converts the location of the interval to a `Location` object.
Location ToLocation() const;
@@ -513,6 +523,9 @@
bool IsTemp() const { return is_temp_; }
+ // Returns whether `other` and `this` share the same kind of register.
+ bool SameRegisterKind(Location other) const;
+
private:
ArenaAllocator* const allocator_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index e02a182..4eda0f3 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -24,18 +24,13 @@
HBasicBlock* block = it.Current();
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- if (phi->HasEnvironmentUses()) {
- // TODO: Do we want to keep that phi alive?
- worklist_.Add(phi);
- phi->SetLive();
- continue;
- }
for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) {
HUseListNode<HInstruction>* current = it.Current();
HInstruction* user = current->GetUser();
if (!user->IsPhi()) {
worklist_.Add(phi);
phi->SetLive();
+ break;
} else {
phi->SetDead();
}
@@ -76,6 +71,14 @@
current->RemoveUser(user, user_node->GetIndex());
}
}
+ if (current->HasEnvironmentUses()) {
+ for (HUseIterator<HEnvironment> it(current->GetEnvUses()); !it.Done(); it.Advance()) {
+ HUseListNode<HEnvironment>* user_node = it.Current();
+ HEnvironment* user = user_node->GetUser();
+ user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+ current->RemoveEnvironmentUser(user, user_node->GetIndex());
+ }
+ }
block->RemovePhi(current->AsPhi());
}
current = next;
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
index a860cb7..3828142 100644
--- a/compiler/optimizing/ssa_type_propagation.cc
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "ssa_builder.h"
#include "ssa_type_propagation.h"
#include "nodes.h"
@@ -38,15 +39,31 @@
// Re-compute and update the type of the instruction. Returns
// whether or not the type was changed.
-static bool UpdateType(HPhi* phi) {
+bool SsaTypePropagation::UpdateType(HPhi* phi) {
Primitive::Type existing = phi->GetType();
- Primitive::Type new_type = Primitive::kPrimVoid;
+ Primitive::Type new_type = existing;
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
Primitive::Type input_type = phi->InputAt(i)->GetType();
new_type = MergeTypes(new_type, input_type);
}
phi->SetType(new_type);
+
+ if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+ // If the phi is of floating point type, we need to update its inputs to that
+ // type. For inputs that are phis, we need to recompute their types.
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (input->GetType() != new_type) {
+ HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+ phi->ReplaceInput(equivalent, i);
+ if (equivalent->IsPhi()) {
+ AddToWorklist(equivalent->AsPhi());
+ }
+ }
+ }
+ }
+
return existing != new_type;
}
@@ -63,7 +80,12 @@
HPhi* phi = it.Current()->AsPhi();
// Set the initial type for the phi. Use the non back edge input for reaching
// a fixed point faster.
- phi->SetType(phi->InputAt(0)->GetType());
+ Primitive::Type phi_type = phi->GetType();
+ // We merge with the existing type, that has been set by the SSA builder.
+ DCHECK(phi_type == Primitive::kPrimVoid
+ || phi_type == Primitive::kPrimFloat
+ || phi_type == Primitive::kPrimDouble);
+ phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
AddToWorklist(phi);
}
} else {
diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h
index 5f471a9..f4d3d63 100644
--- a/compiler/optimizing/ssa_type_propagation.h
+++ b/compiler/optimizing/ssa_type_propagation.h
@@ -34,6 +34,7 @@
void ProcessWorklist();
void AddToWorklist(HPhi* phi);
void AddDependentInstructionsToWorklist(HPhi* phi);
+ bool UpdateType(HPhi* phi);
HGraph* const graph_;
GrowableArray<HPhi*> worklist_;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 3742913..5bfa462 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -373,7 +373,7 @@
}
} else {
// This will output the assembly.
- EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical.";
+ EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical.";
}
}
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db7151c..f4c9862 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -317,7 +317,7 @@
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x28);
- EmitXmmRegisterOperand(src.LowBits(), dst);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
}
@@ -354,7 +354,7 @@
void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
- EmitOptionalRex32(dst, src);
+ EmitRex64(dst, src);
EmitUint8(0x0F);
EmitUint8(0x6E);
EmitOperand(dst.LowBits(), Operand(src));
@@ -364,7 +364,7 @@
void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
- EmitOptionalRex32(src, dst);
+ EmitRex64(src, dst);
EmitUint8(0x0F);
EmitUint8(0x7E);
EmitOperand(src.LowBits(), Operand(dst));
@@ -1748,6 +1748,10 @@
EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
}
+void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
+ EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
+}
+
void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
uint8_t rex = 0x48 | operand.rex(); // REX.W000
if (dst.NeedsRex()) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4ffb6b5..7e5859c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -666,6 +666,7 @@
void EmitRex64(CpuRegister reg);
void EmitRex64(CpuRegister dst, CpuRegister src);
void EmitRex64(CpuRegister dst, const Operand& operand);
+ void EmitRex64(XmmRegister dst, CpuRegister src);
// Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
@@ -692,7 +693,7 @@
inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
CHECK_GE(rm, 0);
CHECK_LT(rm, 8);
- buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg);
+ buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
}
inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 69a5fa0..37a0932 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -134,6 +134,32 @@
DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
}
+TEST_F(AssemblerX86_64Test, Movaps) {
+ GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::XmmRegister(x86_64::XMM8));
+ DriverStr("movaps %xmm8, %xmm0", "movaps");
+}
+
+TEST_F(AssemblerX86_64Test, Movd) {
+ GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::R11));
+ GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::RAX));
+ GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::R11));
+ GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::RAX));
+ GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM0));
+ GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM0));
+ GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM8));
+ GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM8));
+ const char* expected =
+ "movd %r11, %xmm0\n"
+ "movd %rax, %xmm0\n"
+ "movd %r11, %xmm8\n"
+ "movd %rax, %xmm8\n"
+ "movd %xmm0, %r11\n"
+ "movd %xmm0, %rax\n"
+ "movd %xmm8, %r11\n"
+ "movd %xmm8, %rax\n";
+ DriverStr(expected, "movd");
+}
+
TEST_F(AssemblerX86_64Test, Movl) {
GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));