summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/builder.cc68
-rw-r--r--compiler/optimizing/builder.h1
-rw-r--r--compiler/optimizing/code_generator.cc110
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm.cc9
-rw-r--r--compiler/optimizing/code_generator_arm64.cc7
-rw-r--r--compiler/optimizing/code_generator_mips.cc9
-rw-r--r--compiler/optimizing/code_generator_mips64.cc18
-rw-r--r--compiler/optimizing/code_generator_x86.cc161
-rw-r--r--compiler/optimizing/code_generator_x86.h8
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc347
-rw-r--r--compiler/optimizing/code_generator_x86_64.h11
-rw-r--r--compiler/optimizing/inliner.cc1
-rw-r--r--compiler/optimizing/instruction_simplifier.cc48
-rw-r--r--compiler/optimizing/intrinsics_arm.cc12
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc66
-rw-r--r--compiler/optimizing/intrinsics_mips.cc6
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc6
-rw-r--r--compiler/optimizing/intrinsics_x86.cc178
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc128
-rw-r--r--compiler/optimizing/nodes.cc4
-rw-r--r--compiler/optimizing/nodes.h80
-rw-r--r--compiler/optimizing/nodes_x86.h19
-rw-r--r--compiler/optimizing/optimizing_compiler.cc42
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc77
25 files changed, 893 insertions, 525 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c7430e7eb6..8d77daf183 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -72,74 +72,6 @@ class Temporaries : public ValueObject {
size_t index_;
};
-class SwitchTable : public ValueObject {
- public:
- SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
- : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
- int32_t table_offset = instruction.VRegB_31t();
- const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
- if (sparse) {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
- } else {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
- }
- num_entries_ = table[1];
- values_ = reinterpret_cast<const int32_t*>(&table[2]);
- }
-
- uint16_t GetNumEntries() const {
- return num_entries_;
- }
-
- void CheckIndex(size_t index) const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
- }
- }
-
- int32_t GetEntryAt(size_t index) const {
- CheckIndex(index);
- return values_[index];
- }
-
- uint32_t GetDexPcForIndex(size_t index) const {
- CheckIndex(index);
- return dex_pc_ +
- (reinterpret_cast<const int16_t*>(values_ + index) -
- reinterpret_cast<const int16_t*>(&instruction_));
- }
-
- // Index of the first value in the table.
- size_t GetFirstValueIndex() const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- return num_entries_;
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- return 1;
- }
- }
-
- private:
- const Instruction& instruction_;
- const uint32_t dex_pc_;
-
- // Whether this is a sparse-switch table (or a packed-switch one).
- const bool sparse_;
-
- // This can't be const as it needs to be computed off of the given instruction, and complicated
- // expressions in the initializer list seemed very ugly.
- uint16_t num_entries_;
-
- const int32_t* values_;
-
- DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
void HGraphBuilder::InitializeLocals(uint16_t count) {
graph_->SetNumberOfVRegs(count);
locals_.resize(count);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 1d604e7135..93e17d6422 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -30,7 +30,6 @@
namespace art {
class Instruction;
-class SwitchTable;
class HGraphBuilder : public ValueObject {
public:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a3bbfdbd27..e1b83f05d6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -629,8 +629,76 @@ size_t CodeGenerator::ComputeStackMapsSize() {
return stack_map_stream_.PrepareForFillIn();
}
-void CodeGenerator::BuildStackMaps(MemoryRegion region) {
+static void CheckCovers(uint32_t dex_pc,
+ const HGraph& graph,
+ const CodeInfo& code_info,
+ const ArenaVector<HSuspendCheck*>& loop_headers,
+ ArenaVector<size_t>* covered) {
+ StackMapEncoding encoding = code_info.ExtractEncoding();
+ for (size_t i = 0; i < loop_headers.size(); ++i) {
+ if (loop_headers[i]->GetDexPc() == dex_pc) {
+ if (graph.IsCompilingOsr()) {
+ DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+ }
+ ++(*covered)[i];
+ }
+ }
+}
+
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+ const CodeInfo& code_info,
+ const DexFile::CodeItem& code_item) {
+ if (graph.HasTryCatch()) {
+ // One can write loops through try/catch, which we do not support for OSR anyway.
+ return;
+ }
+ ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+ for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+ if (it.Current()->IsLoopHeader()) {
+ HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+ if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+ loop_headers.push_back(suspend_check);
+ }
+ }
+ }
+ ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+ const uint16_t* code_ptr = code_item.insns_;
+ const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+ size_t dex_pc = 0;
+ while (code_ptr < code_end) {
+ const Instruction& instruction = *Instruction::At(code_ptr);
+ if (instruction.IsBranch()) {
+ uint32_t target = dex_pc + instruction.GetTargetOffset();
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ } else if (instruction.IsSwitch()) {
+ SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+ uint16_t num_entries = table.GetNumEntries();
+ size_t offset = table.GetFirstValueIndex();
+
+ // Use a larger loop counter type to avoid overflow issues.
+ for (size_t i = 0; i < num_entries; ++i) {
+ // The target of the case.
+ uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ }
+ }
+ dex_pc += instruction.SizeInCodeUnits();
+ code_ptr += instruction.SizeInCodeUnits();
+ }
+
+ for (size_t i = 0; i < covered.size(); ++i) {
+ DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
+ }
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
stack_map_stream_.FillIn(region);
+ if (kIsDebugBuild) {
+ CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+ }
}
void CodeGenerator::RecordPcInfo(HInstruction* instruction,
@@ -705,6 +773,46 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
EmitEnvironment(instruction->GetEnvironment(), slow_path);
stack_map_stream_.EndStackMapEntry();
+
+ HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+ if (instruction->IsSuspendCheck() &&
+ (info != nullptr) &&
+ graph_->IsCompilingOsr() &&
+ (inlining_depth == 0)) {
+ DCHECK_EQ(info->GetSuspendCheck(), instruction);
+ // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+ // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+ DCHECK(info->IsIrreducible());
+ stack_map_stream_.BeginStackMapEntry(
+ dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+ EmitEnvironment(instruction->GetEnvironment(), slow_path);
+ stack_map_stream_.EndStackMapEntry();
+ if (kIsDebugBuild) {
+ HEnvironment* environment = instruction->GetEnvironment();
+ for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+ HInstruction* in_environment = environment->GetInstructionAt(i);
+ if (in_environment != nullptr) {
+ DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+ Location location = environment->GetLocationAt(i);
+ DCHECK(location.IsStackSlot() ||
+ location.IsDoubleStackSlot() ||
+ location.IsConstant() ||
+ location.IsInvalid());
+ if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+ DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+ }
+ }
+ }
+ }
+ } else if (kIsDebugBuild) {
+ // Ensure stack maps are unique, by checking that the native pc in the stack map
+ // last emitted is different than the native pc of the stack map just emitted.
+ size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+ if (number_of_stack_maps > 1) {
+ DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+ stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+ }
+ }
}
bool CodeGenerator::HasStackMapAtCurrentPc() {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4f8f146753..0a688cf649 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -288,7 +288,7 @@ class CodeGenerator {
slow_paths_.push_back(slow_path);
}
- void BuildStackMaps(MemoryRegion region);
+ void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
size_t ComputeStackMapsSize();
bool IsLeafMethod() const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c2d9edd43e..e43493280a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3750,6 +3750,7 @@ void LocationsBuilderARM::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -3779,6 +3780,13 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Primitive::Type type = compare->InputAt(0)->GetType();
Condition less_cond;
switch (type) {
+ case Primitive::kPrimInt: {
+ __ LoadImmediate(out, 0);
+ __ cmp(left.AsRegister<Register>(),
+ ShifterOperand(right.AsRegister<Register>())); // Signed compare.
+ less_cond = LT;
+ break;
+ }
case Primitive::kPrimLong: {
__ cmp(left.AsRegisterPairHigh<Register>(),
ShifterOperand(right.AsRegisterPairHigh<Register>())); // Signed compare.
@@ -3808,6 +3816,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
LOG(FATAL) << "Unexpected compare type " << type;
UNREACHABLE();
}
+
__ b(&done, EQ);
__ b(&less, less_cond);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4179fabe48..e20e04400f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2408,6 +2408,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
Primitive::Type in_type = compare->InputAt(0)->GetType();
switch (in_type) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
@@ -2436,14 +2437,14 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
// 1 if: left > right
// -1 if: left < right
switch (in_type) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
Register result = OutputRegister(compare);
Register left = InputRegisterAt(compare, 0);
Operand right = InputOperandAt(compare, 1);
-
__ Cmp(left, right);
- __ Cset(result, ne);
- __ Cneg(result, result, lt);
+ __ Cset(result, ne); // result == +1 if NE or 0 otherwise
+ __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise
break;
}
case Primitive::kPrimFloat:
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 961fe62932..e9c0b6ae79 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2123,6 +2123,7 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
switch (in_type) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2153,6 +2154,14 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) {
// 1 if: left > right
// -1 if: left < right
switch (in_type) {
+ case Primitive::kPrimInt: {
+ Register lhs = locations->InAt(0).AsRegister<Register>();
+ Register rhs = locations->InAt(1).AsRegister<Register>();
+ __ Slt(TMP, lhs, rhs);
+ __ Slt(res, rhs, lhs);
+ __ Subu(res, res, TMP);
+ break;
+ }
case Primitive::kPrimLong: {
MipsLabel done;
Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3e1563c66b..da98a89f65 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1763,6 +1763,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
switch (in_type) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
@@ -1791,16 +1792,25 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
// 1 if: left > right
// -1 if: left < right
switch (in_type) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
Location rhs_location = locations->InAt(1);
bool use_imm = rhs_location.IsConstant();
GpuRegister rhs = ZERO;
if (use_imm) {
- int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
- if (value != 0) {
- rhs = AT;
- __ LoadConst64(rhs, value);
+ if (in_type == Primitive::kPrimInt) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant());
+ if (value != 0) {
+ rhs = AT;
+ __ LoadConst32(rhs, value);
+ }
+ } else {
+ int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+ if (value != 0) {
+ rhs = AT;
+ __ LoadConst64(rhs, value);
+ }
}
} else {
rhs = rhs_location.AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index da054baa1c..de62010102 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1350,11 +1350,7 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
int32_t val_high = High32Bits(value);
int32_t val_low = Low32Bits(value);
- if (val_high == 0) {
- __ testl(left_high, left_high);
- } else {
- __ cmpl(left_high, Immediate(val_high));
- }
+ codegen_->Compare32BitValue(left_high, val_high);
if (if_cond == kCondNE) {
__ j(X86Condition(true_high_cond), true_label);
} else if (if_cond == kCondEQ) {
@@ -1364,11 +1360,7 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
__ j(X86Condition(false_high_cond), false_label);
}
// Must be equal high, so compare the lows.
- if (val_low == 0) {
- __ testl(left_low, left_low);
- } else {
- __ cmpl(left_low, Immediate(val_low));
- }
+ codegen_->Compare32BitValue(left_low, val_low);
} else {
Register right_high = right.AsRegisterPairHigh<Register>();
Register right_low = right.AsRegisterPairLow<Register>();
@@ -1389,6 +1381,40 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
__ j(final_condition, true_label);
}
+void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
+ Location rhs,
+ HInstruction* insn,
+ bool is_double) {
+ HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+ if (is_double) {
+ if (rhs.IsFpuRegister()) {
+ __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ } else if (const_area != nullptr) {
+ DCHECK(const_area->IsEmittedAtUseSite());
+ __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(
+ const_area->GetConstant()->AsDoubleConstant()->GetValue(),
+ const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+ } else {
+ DCHECK(rhs.IsDoubleStackSlot());
+ __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+ }
+ } else {
+ if (rhs.IsFpuRegister()) {
+ __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ } else if (const_area != nullptr) {
+ DCHECK(const_area->IsEmittedAtUseSite());
+ __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(
+ const_area->GetConstant()->AsFloatConstant()->GetValue(),
+ const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+ } else {
+ DCHECK(rhs.IsStackSlot());
+ __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+ }
+ }
+}
+
template<class LabelType>
void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
LabelType* true_target_in,
@@ -1409,11 +1435,11 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condi
GenerateLongComparesAndJumps(condition, true_target, false_target);
break;
case Primitive::kPrimFloat:
- __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(left, right, condition, false);
GenerateFPJumps(condition, true_target, false_target);
break;
case Primitive::kPrimDouble:
- __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(left, right, condition, true);
GenerateFPJumps(condition, true_target, false_target);
break;
default:
@@ -1513,11 +1539,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
__ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
} else if (rhs.IsConstant()) {
int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
- }
+ codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
} else {
__ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
}
@@ -1665,7 +1687,13 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
+ DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
+ } else if (cond->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
if (!cond->IsEmittedAtUseSite()) {
locations->SetOut(Location::RequiresRegister());
}
@@ -1704,11 +1732,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
__ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
} else if (rhs.IsConstant()) {
int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
- }
+ codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
} else {
__ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
}
@@ -1719,11 +1743,11 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
GenerateLongComparesAndJumps(cond, &true_label, &false_label);
break;
case Primitive::kPrimFloat:
- __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(lhs, rhs, cond, false);
GenerateFPJumps(cond, &true_label, &false_label);
break;
case Primitive::kPrimDouble:
- __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(lhs, rhs, cond, true);
GenerateFPJumps(cond, &true_label, &false_label);
break;
}
@@ -2159,6 +2183,32 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
}
}
+void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+ DCHECK(Primitive::IsFloatingPointType(neg->GetType()));
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
+ LocationSummary* locations = neg->GetLocations();
+ Location out = locations->Out();
+ DCHECK(locations->InAt(0).Equals(out));
+
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ if (neg->GetType() == Primitive::kPrimFloat) {
+ __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area));
+ __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
+ } else {
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area));
+ __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+ }
+}
+
void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
Primitive::Type result_type = conversion->GetResultType();
Primitive::Type input_type = conversion->GetInputType();
@@ -4077,6 +4127,7 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
@@ -4086,7 +4137,13 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
+ DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
+ } else if (compare->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -4102,7 +4159,21 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
NearLabel less, greater, done;
+ Condition less_cond = kLess;
+
switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimInt: {
+ Register left_reg = left.AsRegister<Register>();
+ if (right.IsConstant()) {
+ int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+ codegen_->Compare32BitValue(left_reg, value);
+ } else if (right.IsStackSlot()) {
+ __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
+ } else {
+ __ cmpl(left_reg, right.AsRegister<Register>());
+ }
+ break;
+ }
case Primitive::kPrimLong: {
Register left_low = left.AsRegisterPairLow<Register>();
Register left_high = left.AsRegisterPairHigh<Register>();
@@ -4124,11 +4195,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
__ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
} else {
DCHECK(right_is_const) << right;
- if (val_high == 0) {
- __ testl(left_high, left_high);
- } else {
- __ cmpl(left_high, Immediate(val_high));
- }
+ codegen_->Compare32BitValue(left_high, val_high);
}
__ j(kLess, &less); // Signed compare.
__ j(kGreater, &greater); // Signed compare.
@@ -4138,30 +4205,30 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
__ cmpl(left_low, Address(ESP, right.GetStackIndex()));
} else {
DCHECK(right_is_const) << right;
- if (val_low == 0) {
- __ testl(left_low, left_low);
- } else {
- __ cmpl(left_low, Immediate(val_low));
- }
+ codegen_->Compare32BitValue(left_low, val_low);
}
+ less_cond = kBelow; // for CF (unsigned).
break;
}
case Primitive::kPrimFloat: {
- __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(left, right, compare, false);
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+ less_cond = kBelow; // for CF (floats).
break;
}
case Primitive::kPrimDouble: {
- __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ GenerateFPCompare(left, right, compare, true);
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+ less_cond = kBelow; // for CF (floats).
break;
}
default:
LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
}
+
__ movl(out, Immediate(0));
__ j(kEqual, &done);
- __ j(kBelow, &less); // kBelow is for CF (unsigned & floats).
+ __ j(less_cond, &less);
__ Bind(&greater);
__ movl(out, Immediate(1));
@@ -7121,6 +7188,22 @@ Address CodeGeneratorX86::LiteralInt64Address(int64_t v, Register reg) {
return Address(reg, kDummy32BitOffset, fixup);
}
+void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
+ if (value == 0) {
+ __ xorl(dest, dest);
+ } else {
+ __ movl(dest, Immediate(value));
+ }
+}
+
+void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
+ if (value == 0) {
+ __ testl(dest, dest);
+ } else {
+ __ cmpl(dest, Immediate(value));
+ }
+}
+
Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
Register reg,
Register value) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0aef478569..45e8ffa84f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -296,6 +296,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
HBasicBlock* switch_block,
HBasicBlock* default_block);
+ void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -450,6 +452,12 @@ class CodeGeneratorX86 : public CodeGenerator {
Address LiteralInt32Address(int32_t v, Register reg);
Address LiteralInt64Address(int64_t v, Register reg);
+ // Load a 32-bit value into a register in the most efficient manner.
+ void Load32BitValue(Register dest, int32_t value);
+
+ // Compare a register with a 32-bit value in the most efficient manner.
+ void Compare32BitValue(Register dest, int32_t value);
+
Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
void Finalize(CodeAllocator* allocator) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6795488769..99396cd983 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1126,30 +1126,43 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
return;
}
if (destination.IsRegister()) {
+ CpuRegister dest = destination.AsRegister<CpuRegister>();
if (source.IsRegister()) {
- __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ __ movq(dest, source.AsRegister<CpuRegister>());
} else if (source.IsFpuRegister()) {
- __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>());
+ __ movd(dest, source.AsFpuRegister<XmmRegister>());
} else if (source.IsStackSlot()) {
- __ movl(destination.AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), source.GetStackIndex()));
+ __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
+ } else if (source.IsConstant()) {
+ HConstant* constant = source.GetConstant();
+ if (constant->IsLongConstant()) {
+ Load64BitValue(dest, constant->AsLongConstant()->GetValue());
+ } else {
+ Load32BitValue(dest, GetInt32ValueOf(constant));
+ }
} else {
DCHECK(source.IsDoubleStackSlot());
- __ movq(destination.AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), source.GetStackIndex()));
+ __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
}
} else if (destination.IsFpuRegister()) {
+ XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (source.IsRegister()) {
- __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>());
+ __ movd(dest, source.AsRegister<CpuRegister>());
} else if (source.IsFpuRegister()) {
- __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+ __ movaps(dest, source.AsFpuRegister<XmmRegister>());
+ } else if (source.IsConstant()) {
+ HConstant* constant = source.GetConstant();
+ int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+ if (constant->IsFloatConstant()) {
+ Load32BitValue(dest, static_cast<int32_t>(value));
+ } else {
+ Load64BitValue(dest, value);
+ }
} else if (source.IsStackSlot()) {
- __ movss(destination.AsFpuRegister<XmmRegister>(),
- Address(CpuRegister(RSP), source.GetStackIndex()));
+ __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
} else {
DCHECK(source.IsDoubleStackSlot());
- __ movsd(destination.AsFpuRegister<XmmRegister>(),
- Address(CpuRegister(RSP), source.GetStackIndex()));
+ __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
}
} else if (destination.IsStackSlot()) {
if (source.IsRegister()) {
@@ -1345,42 +1358,44 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
__ j(X86_64FPCondition(cond->GetCondition()), true_label);
}
-template<class LabelType>
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
- LabelType* true_target_in,
- LabelType* false_target_in) {
- // Generated branching requires both targets to be explicit. If either of the
- // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
- LabelType fallthrough_target;
- LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
- LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
-
+void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
LocationSummary* locations = condition->GetLocations();
+
Location left = locations->InAt(0);
Location right = locations->InAt(1);
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
- case Primitive::kPrimLong: {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
CpuRegister left_reg = left.AsRegister<CpuRegister>();
if (right.IsConstant()) {
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- if (IsInt<32>(value)) {
- if (value == 0) {
- __ testq(left_reg, left_reg);
- } else {
- __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
- }
+ int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
+ if (value == 0) {
+ __ testl(left_reg, left_reg);
} else {
- // Value won't fit in a 32-bit integer.
- __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
+ __ cmpl(left_reg, Immediate(value));
}
+ } else if (right.IsStackSlot()) {
+ __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+ } else {
+ __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ CpuRegister left_reg = left.AsRegister<CpuRegister>();
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ codegen_->Compare64BitValue(left_reg, value);
} else if (right.IsDoubleStackSlot()) {
__ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
} else {
__ cmpq(left_reg, right.AsRegister<CpuRegister>());
}
- __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
break;
}
case Primitive::kPrimFloat: {
@@ -1395,7 +1410,6 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co
__ ucomiss(left.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), right.GetStackIndex()));
}
- GenerateFPJumps(condition, true_target, false_target);
break;
}
case Primitive::kPrimDouble: {
@@ -1410,6 +1424,38 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co
__ ucomisd(left.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), right.GetStackIndex()));
}
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected condition type " << type;
+ }
+}
+
+template<class LabelType>
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+ LabelType* true_target_in,
+ LabelType* false_target_in) {
+ // Generated branching requires both targets to be explicit. If either of the
+ // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+ LabelType fallthrough_target;
+ LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+ LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
+ // Generate the comparison to set the CC.
+ GenerateCompareTest(condition);
+
+ // Now generate the correct jump(s).
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimLong: {
+ __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ }
+ case Primitive::kPrimDouble: {
GenerateFPJumps(condition, true_target, false_target);
break;
}
@@ -1508,11 +1554,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc
__ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
} else if (rhs.IsConstant()) {
int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
- }
+ codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
} else {
__ cmpl(lhs.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -1564,14 +1606,37 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
/* false_target */ nullptr);
}
+static bool SelectCanUseCMOV(HSelect* select) {
+ // There are no conditional move instructions for XMMs.
+ if (Primitive::IsFloatingPointType(select->GetType())) {
+ return false;
+ }
+
+ // A FP condition doesn't generate the single CC that we need.
+ HInstruction* condition = select->GetCondition();
+ if (condition->IsCondition() &&
+ Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
+ return false;
+ }
+
+ // We can generate a CMOV for this Select.
+ return true;
+}
+
void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
if (Primitive::IsFloatingPointType(select->GetType())) {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ // Since we can't use CMOV, there is no need to force 'true' into a register.
+ locations->SetInAt(1, Location::Any());
} else {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (SelectCanUseCMOV(select)) {
+ locations->SetInAt(1, Location::RequiresRegister());
+ } else {
+ // Since we can't use CMOV, there is no need to force 'true' into a register.
+ locations->SetInAt(1, Location::Any());
+ }
}
if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
locations->SetInAt(2, Location::RequiresRegister());
@@ -1581,13 +1646,52 @@ void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
LocationSummary* locations = select->GetLocations();
- NearLabel false_target;
- GenerateTestAndBranch<NearLabel>(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
- &false_target);
- codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
- __ Bind(&false_target);
+ if (SelectCanUseCMOV(select)) {
+ // If both the condition and the source types are integer, we can generate
+ // a CMOV to implement Select.
+ CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister value_true = locations->InAt(1).AsRegister<CpuRegister>();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ HInstruction* select_condition = select->GetCondition();
+ Condition cond = kNotEqual;
+
+ // Figure out how to test the 'condition'.
+ if (select_condition->IsCondition()) {
+ HCondition* condition = select_condition->AsCondition();
+ if (!condition->IsEmittedAtUseSite()) {
+ // This was a previously materialized condition.
+ // Can we use the existing condition code?
+ if (AreEflagsSetFrom(condition, select)) {
+ // Materialization was the previous instruction. Condition codes are right.
+ cond = X86_64IntegerCondition(condition->GetCondition());
+ } else {
+ // No, we have to recreate the condition code.
+ CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+ __ testl(cond_reg, cond_reg);
+ }
+ } else {
+ GenerateCompareTest(condition);
+ cond = X86_64IntegerCondition(condition->GetCondition());
+ }
+ } else {
+ // Must be a boolean condition, which needs to be compared to 0.
+ CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+ __ testl(cond_reg, cond_reg);
+ }
+
+ // If the condition is true, overwrite the output, which already contains false.
+ // Generate the correct sized CMOV.
+ __ cmov(cond, value_false, value_true, select->GetType() == Primitive::kPrimLong);
+ } else {
+ NearLabel false_target;
+ GenerateTestAndBranch<NearLabel>(select,
+ /* condition_input_index */ 2,
+ /* true_target */ nullptr,
+ &false_target);
+ codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+ __ Bind(&false_target);
+ }
}
void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1691,11 +1795,7 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
__ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
} else if (rhs.IsConstant()) {
int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
- }
+ codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
} else {
__ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
@@ -1709,16 +1809,7 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
__ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
} else if (rhs.IsConstant()) {
int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
- if (IsInt<32>(value)) {
- if (value == 0) {
- __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
- }
- } else {
- // Value won't fit in an int.
- __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
- }
+ codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
} else {
__ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
@@ -1850,6 +1941,7 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
@@ -1876,21 +1968,26 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
NearLabel less, greater, done;
Primitive::Type type = compare->InputAt(0)->GetType();
+ Condition less_cond = kLess;
+
switch (type) {
+ case Primitive::kPrimInt: {
+ CpuRegister left_reg = left.AsRegister<CpuRegister>();
+ if (right.IsConstant()) {
+ int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+ codegen_->Compare32BitValue(left_reg, value);
+ } else if (right.IsStackSlot()) {
+ __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+ } else {
+ __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+ }
+ break;
+ }
case Primitive::kPrimLong: {
CpuRegister left_reg = left.AsRegister<CpuRegister>();
if (right.IsConstant()) {
int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- if (IsInt<32>(value)) {
- if (value == 0) {
- __ testq(left_reg, left_reg);
- } else {
- __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
- }
- } else {
- // Value won't fit in an int.
- __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
- }
+ codegen_->Compare64BitValue(left_reg, value);
} else if (right.IsDoubleStackSlot()) {
__ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
} else {
@@ -1909,6 +2006,7 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
__ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
}
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+ less_cond = kBelow; // ucomis{s,d} sets CF
break;
}
case Primitive::kPrimDouble: {
@@ -1922,14 +2020,16 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
__ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
}
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+ less_cond = kBelow; // ucomis{s,d} sets CF
break;
}
default:
LOG(FATAL) << "Unexpected compare type " << type;
}
+
__ movl(out, Immediate(0));
__ j(kEqual, &done);
- __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less); // ucomis{s,d} sets CF (kBelow)
+ __ j(less_cond, &less);
__ Bind(&greater);
__ movl(out, Immediate(1));
@@ -2750,11 +2850,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (v == 0) {
- __ xorps(dest, dest);
- } else {
- __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
- }
+ codegen_->Load32BitValue(dest, static_cast<float>(v));
} else {
__ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2768,11 +2864,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (v == 0) {
- __ xorps(dest, dest);
- } else {
- __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
- }
+ codegen_->Load64BitValue(dest, static_cast<double>(v));
} else {
__ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2786,11 +2878,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
double v = in.GetConstant()->AsDoubleConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (bit_cast<int64_t, double>(v) == 0) {
- __ xorps(dest, dest);
- } else {
- __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
- }
+ codegen_->Load32BitValue(dest, static_cast<float>(v));
} else {
__ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -2817,11 +2905,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (v == 0) {
- __ xorpd(dest, dest);
- } else {
- __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
- }
+ codegen_->Load64BitValue(dest, static_cast<double>(v));
} else {
__ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2835,11 +2919,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (v == 0) {
- __ xorpd(dest, dest);
- } else {
- __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
- }
+ codegen_->Load64BitValue(dest, static_cast<double>(v));
} else {
__ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2853,11 +2933,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else if (in.IsConstant()) {
float v = in.GetConstant()->AsFloatConstant()->GetValue();
XmmRegister dest = out.AsFpuRegister<XmmRegister>();
- if (bit_cast<int32_t, float>(v) == 0) {
- __ xorpd(dest, dest);
- } else {
- __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
- }
+ codegen_->Load64BitValue(dest, static_cast<double>(v));
} else {
__ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -5196,18 +5272,12 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
}
} else if (constant->IsFloatConstant()) {
float fp_value = constant->AsFloatConstant()->GetValue();
- int32_t value = bit_cast<int32_t, float>(fp_value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
- if (value == 0) {
- // easy FP 0.0.
- __ xorps(dest, dest);
- } else {
- __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
- }
+ codegen_->Load32BitValue(dest, fp_value);
} else {
DCHECK(destination.IsStackSlot()) << destination;
- Immediate imm(value);
+ Immediate imm(bit_cast<int32_t, float>(fp_value));
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else {
@@ -5216,11 +5286,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
int64_t value = bit_cast<int64_t, double>(fp_value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
- if (value == 0) {
- __ xorpd(dest, dest);
- } else {
- __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
- }
+ codegen_->Load64BitValue(dest, fp_value);
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
codegen_->Store64BitValueToStack(destination, value);
@@ -6467,6 +6533,51 @@ void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
}
}
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
+ if (value == 0) {
+ __ xorps(dest, dest);
+ } else {
+ __ movss(dest, LiteralInt32Address(value));
+ }
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
+ if (value == 0) {
+ __ xorpd(dest, dest);
+ } else {
+ __ movsd(dest, LiteralInt64Address(value));
+ }
+}
+
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
+ Load32BitValue(dest, bit_cast<int32_t, float>(value));
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
+ Load64BitValue(dest, bit_cast<int64_t, double>(value));
+}
+
+void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
+ if (value == 0) {
+ __ testl(dest, dest);
+ } else {
+ __ cmpl(dest, Immediate(value));
+ }
+}
+
+void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
+ if (IsInt<32>(value)) {
+ if (value == 0) {
+ __ testq(dest, dest);
+ } else {
+ __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
+ }
+ } else {
+ // Value won't fit in an int.
+ __ cmpq(dest, LiteralInt64Address(value));
+ }
+}
+
void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
DCHECK(dest.IsDoubleStackSlot());
if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 318087eb9c..72dddfddfa 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -264,6 +264,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void GenerateExplicitNullCheck(HNullCheck* instruction);
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
+ void GenerateCompareTest(HCondition* condition);
template<class LabelType>
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
@@ -478,9 +479,17 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Address LiteralInt32Address(int32_t v);
Address LiteralInt64Address(int64_t v);
- // Load a 32/64 bit value into a register in the most efficient manner.
+ // Load a 32/64-bit value into a register in the most efficient manner.
void Load32BitValue(CpuRegister dest, int32_t value);
void Load64BitValue(CpuRegister dest, int64_t value);
+ void Load32BitValue(XmmRegister dest, int32_t value);
+ void Load64BitValue(XmmRegister dest, int64_t value);
+ void Load32BitValue(XmmRegister dest, float value);
+ void Load64BitValue(XmmRegister dest, double value);
+
+ // Compare a register with a 32/64-bit value in the most efficient manner.
+ void Compare32BitValue(CpuRegister dest, int32_t value);
+ void Compare64BitValue(CpuRegister dest, int64_t value);
Address LiteralCaseTable(HPackedSwitch* switch_instr);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 9b91b53813..a8841d31c5 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -758,6 +758,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
compiler_driver_->GetInstructionSet(),
invoke_type,
graph_->IsDebuggable(),
+ /* osr */ false,
graph_->GetCurrentInstructionId());
callee_graph->SetArtMethod(resolved_method);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c1e38633fc..0029cc3650 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -91,6 +91,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyRotate(HInvoke* invoke, bool is_left);
void SimplifySystemArrayCopy(HInvoke* invoke);
void SimplifyStringEquals(HInvoke* invoke);
+ void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
@@ -176,8 +177,8 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation
// We can apply De Morgan's laws if both inputs are Not's and are only used
// by `op`.
- if (left->IsNot() &&
- right->IsNot() &&
+ if (((left->IsNot() && right->IsNot()) ||
+ (left->IsBooleanNot() && right->IsBooleanNot())) &&
left->HasOnlyOneNonEnvironmentUse() &&
right->HasOnlyOneNonEnvironmentUse()) {
// Replace code looking like
@@ -187,8 +188,8 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation
// with
// OR or, a, b (respectively AND)
// NOT dest, or
- HInstruction* src_left = left->AsNot()->GetInput();
- HInstruction* src_right = right->AsNot()->GetInput();
+ HInstruction* src_left = left->InputAt(0);
+ HInstruction* src_right = right->InputAt(0);
uint32_t dex_pc = op->GetDexPc();
// Remove the negations on the inputs.
@@ -204,7 +205,12 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation
} else {
hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc);
}
- HNot* hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+ HInstruction* hnot;
+ if (left->IsBooleanNot()) {
+ hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc);
+ } else {
+ hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+ }
op->GetBlock()->InsertInstructionBefore(hbin, op);
op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot);
@@ -1308,8 +1314,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
HInstruction* left = instruction->GetLeft();
HInstruction* right = instruction->GetRight();
- if (left->IsNot() &&
- right->IsNot() &&
+ if (((left->IsNot() && right->IsNot()) ||
+ (left->IsBooleanNot() && right->IsBooleanNot())) &&
left->HasOnlyOneNonEnvironmentUse() &&
right->HasOnlyOneNonEnvironmentUse()) {
// Replace code looking like
@@ -1318,8 +1324,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
// XOR dst, nota, notb
// with
// XOR dst, a, b
- instruction->ReplaceInput(left->AsNot()->GetInput(), 0);
- instruction->ReplaceInput(right->AsNot()->GetInput(), 1);
+ instruction->ReplaceInput(left->InputAt(0), 0);
+ instruction->ReplaceInput(right->InputAt(0), 1);
left->GetBlock()->RemoveInstruction(left);
right->GetBlock()->RemoveInstruction(right);
RecordSimplification();
@@ -1441,6 +1447,24 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
}
}
+void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_signum) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ uint32_t dex_pc = invoke->GetDexPc();
+ HInstruction* left = invoke->InputAt(0);
+ HInstruction* right;
+ Primitive::Type type = left->GetType();
+ if (!is_signum) {
+ right = invoke->InputAt(1);
+ } else if (type == Primitive::kPrimLong) {
+ right = GetGraph()->GetLongConstant(0);
+ } else {
+ right = GetGraph()->GetIntConstant(0);
+ }
+ HCompare* compare = new (GetGraph()->GetArena())
+ HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc);
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
+}
+
void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
SimplifyStringEquals(instruction);
@@ -1452,6 +1476,12 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
} else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
SimplifyRotate(instruction, true);
+ } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerCompare ||
+ instruction->GetIntrinsic() == Intrinsics::kLongCompare) {
+ SimplifyCompare(instruction, /* is_signum */ false);
+ } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum ||
+ instruction->GetIntrinsic() == Intrinsics::kLongSignum) {
+ SimplifyCompare(instruction, /* is_signum */ true);
}
}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8912b39ab..96a3c3c2f1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1633,20 +1633,20 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5dce83a69c..4140d94e17 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -284,36 +284,6 @@ static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
-static void GenCompare(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
- Register out_reg = WRegisterFrom(out);
-
- __ Cmp(op1_reg, op2_reg);
- __ Cset(out_reg, gt); // out == +1 if GT or 0 otherwise
- __ Cinv(out_reg, out_reg, lt); // out == -1 if LT or unchanged otherwise
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerCompare(HInvoke* invoke) {
- CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerCompare(HInvoke* invoke) {
- GenCompare(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongCompare(HInvoke* invoke) {
- CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongCompare(HInvoke* invoke) {
- GenCompare(invoke->GetLocations(), /* is_long */ true, GetVIXLAssembler());
-}
-
static void GenNumberOfLeadingZeros(LocationSummary* locations,
Primitive::Type type,
vixl::MacroAssembler* masm) {
@@ -1456,34 +1426,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke
__ Bind(slow_path->GetExitLabel());
}
-static void GenSignum(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register out_reg = WRegisterFrom(out);
-
- __ Cmp(op1_reg, 0);
- __ Cset(out_reg, gt); // out == +1 if GT or 0 otherwise
- __ Cinv(out_reg, out_reg, lt); // out == -1 if LT or unchanged otherwise
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerSignum(HInvoke* invoke) {
- CreateIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerSignum(HInvoke* invoke) {
- GenSignum(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongSignum(HInvoke* invoke) {
- CreateIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongSignum(HInvoke* invoke) {
- GenSignum(invoke->GetLocations(), /* is_long */ true, GetVIXLAssembler());
-}
-
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
@@ -1684,11 +1626,15 @@ UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 0d9cf091cc..2294713a3e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1019,12 +1019,14 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerSignum)
UNIMPLEMENTED_INTRINSIC(LongSignum)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index f681d1fd56..ac2850342d 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1767,12 +1767,14 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerSignum)
UNIMPLEMENTED_INTRINSIC(LongSignum)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index acc40bc998..ab4f6f9d28 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -37,10 +37,12 @@ namespace x86 {
static constexpr int kDoubleNaNHigh = 0x7FF80000;
static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int kFloatNaN = 0x7FC00000;
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
- : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+ : arena_(codegen->GetGraph()->GetArena()),
+ codegen_(codegen) {
}
@@ -256,15 +258,37 @@ static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- // TODO: Allow x86 to work with memory. This requires assembler support, see below.
- // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
+ HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(static_or_direct != nullptr);
+ if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+ // We need addressibility for the constant area.
+ locations->SetInAt(1, Location::RequiresRegister());
+ // We need a temporary to hold the constant.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+ bool is64bit,
+ X86Assembler* assembler,
+ CodeGeneratorX86* codegen) {
Location output = locations->Out();
- if (output.IsFpuRegister()) {
+ DCHECK(output.IsFpuRegister());
+ if (locations->InAt(1).IsValid()) {
+ DCHECK(locations->InAt(1).IsRegister());
+ // We also have a constant area pointer.
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ if (is64bit) {
+ __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
+ __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
+ } else {
+ __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
+ __ andps(output.AsFpuRegister<XmmRegister>(), temp);
+ }
+ } else {
// Create the right constant on an aligned stack.
if (is64bit) {
__ subl(ESP, Immediate(8));
@@ -277,19 +301,6 @@ static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* as
__ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
}
__ addl(ESP, Immediate(16));
- } else {
- // TODO: update when assember support is available.
- UNIMPLEMENTED(FATAL) << "Needs assembler support.";
-// Once assembler support is available, in-memory operations look like this:
-// if (is64bit) {
-// DCHECK(output.IsDoubleStackSlot());
-// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
-// Immediate(0x7FFFFFFF));
-// } else {
-// DCHECK(output.IsStackSlot());
-// // Can use and with a literal directly.
-// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
-// }
}
}
@@ -298,7 +309,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
@@ -306,7 +317,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
}
static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
@@ -388,8 +399,11 @@ void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
GenAbsLong(invoke->GetLocations(), GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool is_double,
+ X86Assembler* assembler,
+ CodeGeneratorX86* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -450,15 +464,26 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
// NaN handling.
__ Bind(&nan);
- if (is_double) {
- __ pushl(Immediate(kDoubleNaNHigh));
- __ pushl(Immediate(kDoubleNaNLow));
- __ movsd(out, Address(ESP, 0));
- __ addl(ESP, Immediate(8));
+ // Do we have a constant area pointer?
+ if (locations->InAt(2).IsValid()) {
+ DCHECK(locations->InAt(2).IsRegister());
+ Register constant_area = locations->InAt(2).AsRegister<Register>();
+ if (is_double) {
+ __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
+ } else {
+ __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
+ }
} else {
- __ pushl(Immediate(kFloatNaN));
- __ movss(out, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
+ if (is_double) {
+ __ pushl(Immediate(kDoubleNaNHigh));
+ __ pushl(Immediate(kDoubleNaNLow));
+ __ movsd(out, Address(ESP, 0));
+ __ addl(ESP, Immediate(8));
+ } else {
+ __ pushl(Immediate(kFloatNaN));
+ __ movss(out, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ }
}
__ jmp(&done);
@@ -483,6 +508,11 @@ static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
+ HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(static_or_direct != nullptr);
+ if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+ locations->SetInAt(2, Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -490,7 +520,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(),
+ /* is_min */ true,
+ /* is_double */ true,
+ GetAssembler(),
+ codegen_);
}
void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -498,7 +532,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(),
+ /* is_min */ true,
+ /* is_double */ false,
+ GetAssembler(),
+ codegen_);
}
void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -506,7 +544,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(),
+ /* is_min */ false,
+ /* is_double */ true,
+ GetAssembler(),
+ codegen_);
}
void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -514,7 +556,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(),
+ /* is_min */ false,
+ /* is_double */ false,
+ GetAssembler(),
+ codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -2245,7 +2291,7 @@ static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
}
void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+ X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
Register reg = locations->InAt(0).AsRegister<Register>();
@@ -2276,7 +2322,7 @@ void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+ X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -2320,7 +2366,9 @@ static void CreateBitCountLocations(
locations->SetOut(Location::RequiresRegister());
}
-static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenBitCount(X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ HInvoke* invoke, bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -2331,11 +2379,7 @@ static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long)
value = is_long
? POPCOUNT(static_cast<uint64_t>(value))
: POPCOUNT(static_cast<uint32_t>(value));
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2367,7 +2411,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
@@ -2375,7 +2419,7 @@ void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2390,7 +2434,9 @@ static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, b
locations->SetOut(Location::RequiresRegister());
}
-static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ HInvoke* invoke, bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -2403,11 +2449,7 @@ static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_lo
} else {
value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
}
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2474,8 +2516,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenLeadingZeros(assembler, invoke, /* is_long */ false);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2483,8 +2524,7 @@ void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke
}
void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenLeadingZeros(assembler, invoke, /* is_long */ true);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2499,7 +2539,9 @@ static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke,
locations->SetOut(Location::RequiresRegister());
}
-static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ HInvoke* invoke, bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -2512,11 +2554,7 @@ static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_l
} else {
value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
}
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2570,8 +2608,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenTrailingZeros(assembler, invoke, /* is_long */ false);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2579,8 +2616,7 @@ void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invok
}
void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenTrailingZeros(assembler, invoke, /* is_long */ true);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
// Unimplemented intrinsics.
@@ -2600,20 +2636,20 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 51fa514cb6..c9a43442b3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2431,58 +2431,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
-static void CreateCompareLocations(ArenaAllocator* arena, HInvoke* invoke) {
- LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister());
-}
-
-static void GenCompare(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
- LocationSummary* locations = invoke->GetLocations();
- CpuRegister src1 = locations->InAt(0).AsRegister<CpuRegister>();
- CpuRegister src2 = locations->InAt(1).AsRegister<CpuRegister>();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
- NearLabel is_lt, done;
-
- __ xorl(out, out);
-
- if (is_long) {
- __ cmpq(src1, src2);
- } else {
- __ cmpl(src1, src2);
- }
- __ j(kEqual, &done);
- __ j(kLess, &is_lt);
-
- __ movl(out, Immediate(1));
- __ jmp(&done);
-
- __ Bind(&is_lt);
- __ movl(out, Immediate(-1));
-
- __ Bind(&done);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerCompare(HInvoke* invoke) {
- CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerCompare(HInvoke* invoke) {
- GenCompare(GetAssembler(), invoke, /* is_long */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongCompare(HInvoke* invoke) {
- CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongCompare(HInvoke* invoke) {
- GenCompare(GetAssembler(), invoke, /* is_long */ true);
-}
-
static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -2757,74 +2705,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
-static void CreateSignLocations(ArenaAllocator* arena, HInvoke* invoke) {
- LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::Any());
- locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister()); // Need a writeable register.
-}
-
-static void GenSign(X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen,
- HInvoke* invoke, bool is_long) {
- LocationSummary* locations = invoke->GetLocations();
- Location src = locations->InAt(0);
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
- if (invoke->InputAt(0)->IsConstant()) {
- // Evaluate this at compile time.
- int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
- codegen->Load32BitValue(out, value == 0 ? 0 : (value > 0 ? 1 : -1));
- return;
- }
-
- // Copy input into temporary.
- CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
- if (src.IsRegister()) {
- if (is_long) {
- __ movq(tmp, src.AsRegister<CpuRegister>());
- } else {
- __ movl(tmp, src.AsRegister<CpuRegister>());
- }
- } else if (is_long) {
- DCHECK(src.IsDoubleStackSlot());
- __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- } else {
- DCHECK(src.IsStackSlot());
- __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- }
-
- // Do the bit twiddling: basically tmp >> 63/31 | -tmp >>> 63/31 for long/int.
- if (is_long) {
- __ movq(out, tmp);
- __ sarq(out, Immediate(63));
- __ negq(tmp);
- __ shrq(tmp, Immediate(63));
- __ orq(out, tmp);
- } else {
- __ movl(out, tmp);
- __ sarl(out, Immediate(31));
- __ negl(tmp);
- __ shrl(tmp, Immediate(31));
- __ orl(out, tmp);
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerSignum(HInvoke* invoke) {
- CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitIntegerSignum(HInvoke* invoke) {
- GenSign(GetAssembler(), codegen_, invoke, /* is_long */ false);
-}
-void IntrinsicLocationsBuilderX86_64::VisitLongSignum(HInvoke* invoke) {
- CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitLongSignum(HInvoke* invoke) {
- GenSign(GetAssembler(), codegen_, invoke, /* is_long */ true);
-}
-
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -2840,11 +2720,15 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3dda8501d2..f269885907 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -647,6 +647,10 @@ void HLoopInformation::Populate() {
header_->GetGraph()->SetHasIrreducibleLoops(true);
PopulateIrreducibleRecursive(back_edge);
} else {
+ if (header_->GetGraph()->IsCompilingOsr()) {
+ irreducible_ = true;
+ header_->GetGraph()->SetHasIrreducibleLoops(true);
+ }
PopulateRecursive(back_edge);
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b8083477cf..daec096f3e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -274,6 +274,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
InstructionSet instruction_set,
InvokeType invoke_type = kInvalidInvokeType,
bool debuggable = false,
+ bool osr = false,
int start_instruction_id = 0)
: arena_(arena),
blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -302,7 +303,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_current_method_(nullptr),
- inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+ osr_(osr) {
blocks_.reserve(kDefaultNumberOfBlocks);
}
@@ -478,6 +480,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return instruction_set_;
}
+ bool IsCompilingOsr() const { return osr_; }
+
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
@@ -606,6 +610,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// collection pointer to passes which may create NullConstant.
ReferenceTypeInfo inexact_object_rti_;
+ // Whether we are compiling this graph for on stack replacement: this will
+ // make all loops seen as irreducible and emit special stack maps to mark
+ // compiled code entries which the interpreter can directly jump to.
+ const bool osr_;
+
friend class SsaBuilder; // For caching constants.
friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1259,6 +1268,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \
M(X86ComputeBaseMethodAddress, Instruction) \
M(X86LoadFromConstantTable, Instruction) \
+ M(X86FPNeg, Instruction) \
M(X86PackedSwitch, Instruction)
#endif
@@ -6040,6 +6050,74 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
#undef INSTRUCTION_TYPE_CHECK
+class SwitchTable : public ValueObject {
+ public:
+ SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+ : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+ int32_t table_offset = instruction.VRegB_31t();
+ const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+ if (sparse) {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+ } else {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+ }
+ num_entries_ = table[1];
+ values_ = reinterpret_cast<const int32_t*>(&table[2]);
+ }
+
+ uint16_t GetNumEntries() const {
+ return num_entries_;
+ }
+
+ void CheckIndex(size_t index) const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+ }
+ }
+
+ int32_t GetEntryAt(size_t index) const {
+ CheckIndex(index);
+ return values_[index];
+ }
+
+ uint32_t GetDexPcForIndex(size_t index) const {
+ CheckIndex(index);
+ return dex_pc_ +
+ (reinterpret_cast<const int16_t*>(values_ + index) -
+ reinterpret_cast<const int16_t*>(&instruction_));
+ }
+
+ // Index of the first value in the table.
+ size_t GetFirstValueIndex() const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ return num_entries_;
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ return 1;
+ }
+ }
+
+ private:
+ const Instruction& instruction_;
+ const uint32_t dex_pc_;
+
+ // Whether this is a sparse-switch table (or a packed-switch one).
+ const bool sparse_;
+
+ // This can't be const as it needs to be computed off of the given instruction, and complicated
+ // expressions in the initializer list seemed very ugly.
+ uint16_t num_entries_;
+
+ const int32_t* values_;
+
+ DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index b1bf939b36..0b3a84d3d3 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -56,6 +56,25 @@ class HX86LoadFromConstantTable : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
};
+// Version of HNeg with access to the constant table for FP types.
+class HX86FPNeg : public HExpression<2> {
+ public:
+ HX86FPNeg(Primitive::Type result_type,
+ HInstruction* input,
+ HX86ComputeBaseMethodAddress* method_base,
+ uint32_t dex_pc)
+ : HExpression(result_type, SideEffects::None(), dex_pc) {
+ DCHECK(Primitive::IsFloatingPointType(result_type));
+ SetRawInputAt(0, input);
+ SetRawInputAt(1, method_base);
+ }
+
+ DECLARE_INSTRUCTION(X86FPNeg);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HX86FPNeg);
+};
+
// X86 version of HPackedSwitch that holds a pointer to the base method address.
class HX86PackedSwitch : public HTemplateInstruction<2> {
public:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bdc664b3eb..736ac32011 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -300,7 +300,7 @@ class OptimizingCompiler FINAL : public Compiler {
}
}
- bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+ bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
OVERRIDE
SHARED_REQUIRES(Locks::mutator_lock_);
@@ -309,7 +309,8 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* driver) const;
+ CompilerDriver* driver,
+ const DexFile::CodeItem* item) const;
// Try compiling a method and return the code generator used for
// compiling it.
@@ -327,7 +328,8 @@ class OptimizingCompiler FINAL : public Compiler {
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const;
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const;
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -580,11 +582,12 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen)
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
+ CompilerDriver* compiler_driver,
+ const DexFile::CodeItem* code_item) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
stack_map.resize(codegen->ComputeStackMapsSize());
- codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
+ codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
@@ -615,7 +618,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const {
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const {
MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -663,8 +667,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
dex_compilation_unit.GetDexFile(),
dex_compilation_unit.GetClassDefIndex());
HGraph* graph = new (arena) HGraph(
- arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
- kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
+ arena,
+ dex_file,
+ method_idx,
+ requires_barrier,
+ compiler_driver->GetInstructionSet(),
+ kInvalidInvokeType,
+ compiler_driver->GetCompilerOptions().GetDebuggable(),
+ osr);
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
@@ -797,10 +807,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
method_idx,
jclass_loader,
dex_file,
- dex_cache));
+ dex_cache,
+ /* osr */ false));
if (codegen.get() != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiled);
- method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
+ method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
}
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -843,7 +854,8 @@ bool IsCompilingWithCoreImage() {
bool OptimizingCompiler::JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
- ArtMethod* method) {
+ ArtMethod* method,
+ bool osr) {
StackHandleScope<2> hs(self);
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
method->GetDeclaringClass()->GetClassLoader()));
@@ -873,7 +885,8 @@ bool OptimizingCompiler::JitCompile(Thread* self,
method_idx,
jclass_loader,
*dex_file,
- dex_cache));
+ dex_cache,
+ osr));
if (codegen.get() == nullptr) {
return false;
}
@@ -885,7 +898,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return false;
}
MaybeRecordStat(MethodCompilationStat::kCompiled);
- codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+ codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
const void* code = code_cache->CommitCode(
self,
method,
@@ -896,7 +909,8 @@ bool OptimizingCompiler::JitCompile(Thread* self,
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
- code_allocator.GetSize());
+ code_allocator.GetSize(),
+ osr);
if (code == nullptr) {
code_cache->ClearData(self, stack_map_data);
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a2180bc9d7..a6f14616bf 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -53,6 +53,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
BinaryFP(div);
}
+ void VisitCompare(HCompare* compare) OVERRIDE {
+ BinaryFP(compare);
+ }
+
void VisitReturn(HReturn* ret) OVERRIDE {
HConstant* value = ret->InputAt(0)->AsConstant();
if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
@@ -74,11 +78,50 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
void BinaryFP(HBinaryOperation* bin) {
HConstant* rhs = bin->InputAt(1)->AsConstant();
- if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
+ if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) {
ReplaceInput(bin, rhs, 1, false);
}
}
+ void VisitEqual(HEqual* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitNotEqual(HNotEqual* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitLessThan(HLessThan* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitGreaterThan(HGreaterThan* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE {
+ BinaryFP(cond);
+ }
+
+ void VisitNeg(HNeg* neg) OVERRIDE {
+ if (Primitive::IsFloatingPointType(neg->GetType())) {
+ // We need to replace the HNeg with a HX86FPNeg in order to address the constant area.
+ InitializePCRelativeBasePointer();
+ HGraph* graph = GetGraph();
+ HBasicBlock* block = neg->GetBlock();
+ HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg(
+ neg->GetType(),
+ neg->InputAt(0),
+ base_,
+ neg->GetDexPc());
+ block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg);
+ }
+ }
+
void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
if (switch_insn->GetNumEntries() <=
InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
@@ -127,12 +170,23 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
// If this is an invoke-static/-direct with PC-relative dex cache array
// addressing, we need the PC-relative address base.
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ // We can't add a pointer to the constant area if we already have a current
+ // method pointer. This may arise when sharpening doesn't remove the current
+ // method pointer from the invoke.
+ if (invoke_static_or_direct != nullptr &&
+ invoke_static_or_direct->HasCurrentMethodInput()) {
+ DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+ return;
+ }
+
+ bool base_added = false;
if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
InitializePCRelativeBasePointer();
// Add the extra parameter base_.
- DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
invoke_static_or_direct->AddSpecialInput(base_);
+ base_added = true;
}
+
// Ensure that we can load FP arguments from the constant area.
for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
HConstant* input = invoke->InputAt(i)->AsConstant();
@@ -140,6 +194,25 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
ReplaceInput(invoke, input, i, true);
}
}
+
+ // These intrinsics need the constant area.
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kMathAbsDouble:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathMaxDoubleDouble:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMinFloatFloat:
+ if (!base_added) {
+ DCHECK(invoke_static_or_direct != nullptr);
+ DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
+ InitializePCRelativeBasePointer();
+ invoke_static_or_direct->AddSpecialInput(base_);
+ }
+ break;
+ default:
+ break;
+ }
}
// The generated HX86ComputeBaseMethodAddress in the entry block needed as an