diff options
Diffstat (limited to 'compiler/optimizing')
25 files changed, 893 insertions, 525 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index c7430e7eb6..8d77daf183 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -72,74 +72,6 @@ class Temporaries : public ValueObject { size_t index_; }; -class SwitchTable : public ValueObject { - public: - SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) - : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { - int32_t table_offset = instruction.VRegB_31t(); - const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; - if (sparse) { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); - } else { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); - } - num_entries_ = table[1]; - values_ = reinterpret_cast<const int32_t*>(&table[2]); - } - - uint16_t GetNumEntries() const { - return num_entries_; - } - - void CheckIndex(size_t index) const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); - } else { - // In a packed table, we have the starting key and num_entries_ values. - DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); - } - } - - int32_t GetEntryAt(size_t index) const { - CheckIndex(index); - return values_[index]; - } - - uint32_t GetDexPcForIndex(size_t index) const { - CheckIndex(index); - return dex_pc_ + - (reinterpret_cast<const int16_t*>(values_ + index) - - reinterpret_cast<const int16_t*>(&instruction_)); - } - - // Index of the first value in the table. - size_t GetFirstValueIndex() const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - return num_entries_; - } else { - // In a packed table, we have the starting key and num_entries_ values. - return 1; - } - } - - private: - const Instruction& instruction_; - const uint32_t dex_pc_; - - // Whether this is a sparse-switch table (or a packed-switch one). - const bool sparse_; - - // This can't be const as it needs to be computed off of the given instruction, and complicated - // expressions in the initializer list seemed very ugly. - uint16_t num_entries_; - - const int32_t* values_; - - DISALLOW_COPY_AND_ASSIGN(SwitchTable); -}; - void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); locals_.resize(count); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 1d604e7135..93e17d6422 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -30,7 +30,6 @@ namespace art { class Instruction; -class SwitchTable; class HGraphBuilder : public ValueObject { public: diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a3bbfdbd27..e1b83f05d6 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -629,8 +629,76 @@ size_t CodeGenerator::ComputeStackMapsSize() { return stack_map_stream_.PrepareForFillIn(); } -void CodeGenerator::BuildStackMaps(MemoryRegion region) { +static void CheckCovers(uint32_t dex_pc, + const HGraph& graph, + const CodeInfo& code_info, + const ArenaVector<HSuspendCheck*>& loop_headers, + ArenaVector<size_t>* covered) { + StackMapEncoding encoding = code_info.ExtractEncoding(); + for (size_t i = 0; i < loop_headers.size(); ++i) { + if (loop_headers[i]->GetDexPc() == dex_pc) { + if (graph.IsCompilingOsr()) { + DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid()); + } + ++(*covered)[i]; + } + } +} + +// Debug helper to ensure loop entries in compiled code are matched by +// dex branch instructions. +static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, + const CodeInfo& code_info, + const DexFile::CodeItem& code_item) { + if (graph.HasTryCatch()) { + // One can write loops through try/catch, which we do not support for OSR anyway. + return; + } + ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); + for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { + if (it.Current()->IsLoopHeader()) { + HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + loop_headers.push_back(suspend_check); + } + } + } + ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc)); + const uint16_t* code_ptr = code_item.insns_; + const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; + + size_t dex_pc = 0; + while (code_ptr < code_end) { + const Instruction& instruction = *Instruction::At(code_ptr); + if (instruction.IsBranch()) { + uint32_t target = dex_pc + instruction.GetTargetOffset(); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } else if (instruction.IsSwitch()) { + SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); + uint16_t num_entries = table.GetNumEntries(); + size_t offset = table.GetFirstValueIndex(); + + // Use a larger loop counter type to avoid overflow issues. + for (size_t i = 0; i < num_entries; ++i) { + // The target of the case. + uint32_t target = dex_pc + table.GetEntryAt(i + offset); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } + } + dex_pc += instruction.SizeInCodeUnits(); + code_ptr += instruction.SizeInCodeUnits(); + } + + for (size_t i = 0; i < covered.size(); ++i) { + DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent"; + } +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) { stack_map_stream_.FillIn(region); + if (kIsDebugBuild) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item); + } } void CodeGenerator::RecordPcInfo(HInstruction* instruction, @@ -705,6 +773,46 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, EmitEnvironment(instruction->GetEnvironment(), slow_path); stack_map_stream_.EndStackMapEntry(); + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + if (instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0)) { + DCHECK_EQ(info->GetSuspendCheck(), instruction); + // We duplicate the stack map as a marker that this stack map can be an OSR entry. + // Duplicating it avoids having the runtime recognize and skip an OSR stack map. + DCHECK(info->IsIrreducible()); + stack_map_stream_.BeginStackMapEntry( + dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); + EmitEnvironment(instruction->GetEnvironment(), slow_path); + stack_map_stream_.EndStackMapEntry(); + if (kIsDebugBuild) { + HEnvironment* environment = instruction->GetEnvironment(); + for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { + HInstruction* in_environment = environment->GetInstructionAt(i); + if (in_environment != nullptr) { + DCHECK(in_environment->IsPhi() || in_environment->IsConstant()); + Location location = environment->GetLocationAt(i); + DCHECK(location.IsStackSlot() || + location.IsDoubleStackSlot() || + location.IsConstant() || + location.IsInvalid()); + if (location.IsStackSlot() || location.IsDoubleStackSlot()) { + DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize())); + } + } + } + } + } else if (kIsDebugBuild) { + // Ensure stack maps are unique, by checking that the native pc in the stack map + // last emitted is different than the native pc of the stack map just emitted. + size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + if (number_of_stack_maps > 1) { + DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset, + stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset); + } + } } bool CodeGenerator::HasStackMapAtCurrentPc() { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4f8f146753..0a688cf649 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -288,7 +288,7 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void BuildStackMaps(MemoryRegion region); + void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); size_t ComputeStackMapsSize(); bool IsLeafMethod() const { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index c2d9edd43e..e43493280a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3750,6 +3750,7 @@ void LocationsBuilderARM::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -3779,6 +3780,13 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Primitive::Type type = compare->InputAt(0)->GetType(); Condition less_cond; switch (type) { + case Primitive::kPrimInt: { + __ LoadImmediate(out, 0); + __ cmp(left.AsRegister<Register>(), + ShifterOperand(right.AsRegister<Register>())); // Signed compare. + less_cond = LT; + break; + } case Primitive::kPrimLong: { __ cmp(left.AsRegisterPairHigh<Register>(), ShifterOperand(right.AsRegisterPairHigh<Register>())); // Signed compare. @@ -3808,6 +3816,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { LOG(FATAL) << "Unexpected compare type " << type; UNREACHABLE(); } + __ b(&done, EQ); __ b(&less, less_cond); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4179fabe48..e20e04400f 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2408,6 +2408,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); Primitive::Type in_type = compare->InputAt(0)->GetType(); switch (in_type) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); @@ -2436,14 +2437,14 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { // 1 if: left > right // -1 if: left < right switch (in_type) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { Register result = OutputRegister(compare); Register left = InputRegisterAt(compare, 0); Operand right = InputOperandAt(compare, 1); - __ Cmp(left, right); - __ Cset(result, ne); - __ Cneg(result, result, lt); + __ Cset(result, ne); // result == +1 if NE or 0 otherwise + __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise break; } case Primitive::kPrimFloat: diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 961fe62932..e9c0b6ae79 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -2123,6 +2123,7 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); switch (in_type) { + case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2153,6 +2154,14 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { // 1 if: left > right // -1 if: left < right switch (in_type) { + case Primitive::kPrimInt: { + Register lhs = locations->InAt(0).AsRegister<Register>(); + Register rhs = locations->InAt(1).AsRegister<Register>(); + __ Slt(TMP, lhs, rhs); + __ Slt(res, rhs, lhs); + __ Subu(res, res, TMP); + break; + } case Primitive::kPrimLong: { MipsLabel done; Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3e1563c66b..da98a89f65 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1763,6 +1763,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); switch (in_type) { + case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); @@ -1791,16 +1792,25 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { // 1 if: left > right // -1 if: left < right switch (in_type) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); GpuRegister rhs = ZERO; if (use_imm) { - int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); - if (value != 0) { - rhs = AT; - __ LoadConst64(rhs, value); + if (in_type == Primitive::kPrimInt) { + int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant()); + if (value != 0) { + rhs = AT; + __ LoadConst32(rhs, value); + } + } else { + int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); + if (value != 0) { + rhs = AT; + __ LoadConst64(rhs, value); + } } } else { rhs = rhs_location.AsRegister<GpuRegister>(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index da054baa1c..de62010102 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1350,11 +1350,7 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, int32_t val_high = High32Bits(value); int32_t val_low = Low32Bits(value); - if (val_high == 0) { - __ testl(left_high, left_high); - } else { - __ cmpl(left_high, Immediate(val_high)); - } + codegen_->Compare32BitValue(left_high, val_high); if (if_cond == kCondNE) { __ j(X86Condition(true_high_cond), true_label); } else if (if_cond == kCondEQ) { @@ -1364,11 +1360,7 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ j(X86Condition(false_high_cond), false_label); } // Must be equal high, so compare the lows. - if (val_low == 0) { - __ testl(left_low, left_low); - } else { - __ cmpl(left_low, Immediate(val_low)); - } + codegen_->Compare32BitValue(left_low, val_low); } else { Register right_high = right.AsRegisterPairHigh<Register>(); Register right_low = right.AsRegisterPairLow<Register>(); @@ -1389,6 +1381,40 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ j(final_condition, true_label); } +void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, + Location rhs, + HInstruction* insn, + bool is_double) { + HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable(); + if (is_double) { + if (rhs.IsFpuRegister()) { + __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); + } else if (const_area != nullptr) { + DCHECK(const_area->IsEmittedAtUseSite()); + __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress( + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); + } else { + DCHECK(rhs.IsDoubleStackSlot()); + __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); + } + } else { + if (rhs.IsFpuRegister()) { + __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); + } else if (const_area != nullptr) { + DCHECK(const_area->IsEmittedAtUseSite()); + __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress( + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); + } else { + DCHECK(rhs.IsStackSlot()); + __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); + } + } +} + template<class LabelType> void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition, LabelType* true_target_in, @@ -1409,11 +1435,11 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condi GenerateLongComparesAndJumps(condition, true_target, false_target); break; case Primitive::kPrimFloat: - __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(left, right, condition, false); GenerateFPJumps(condition, true_target, false_target); break; case Primitive::kPrimDouble: - __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(left, right, condition, true); GenerateFPJumps(condition, true_target, false_target); break; default: @@ -1513,11 +1539,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); - } + codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant); } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } @@ -1665,7 +1687,13 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + if (cond->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(cond->InputAt(1)->IsEmittedAtUseSite()); + } else if (cond->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::Any()); + } if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister()); } @@ -1704,11 +1732,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); - } + codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant); } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } @@ -1719,11 +1743,11 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { GenerateLongComparesAndJumps(cond, &true_label, &false_label); break; case Primitive::kPrimFloat: - __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(lhs, rhs, cond, false); GenerateFPJumps(cond, &true_label, &false_label); break; case Primitive::kPrimDouble: - __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(lhs, rhs, cond, true); GenerateFPJumps(cond, &true_label, &false_label); break; } @@ -2159,6 +2183,32 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { } } +void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + DCHECK(Primitive::IsFloatingPointType(neg->GetType())); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); +} + +void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + DCHECK(locations->InAt(0).Equals(out)); + + Register constant_area = locations->InAt(1).AsRegister<Register>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + if (neg->GetType() == Primitive::kPrimFloat) { + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area)); + __ xorps(out.AsFpuRegister<XmmRegister>(), mask); + } else { + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area)); + __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); + } +} + void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); @@ -4077,6 +4127,7 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); @@ -4086,7 +4137,13 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + if (compare->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(compare->InputAt(1)->IsEmittedAtUseSite()); + } else if (compare->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::Any()); + } locations->SetOut(Location::RequiresRegister()); break; } @@ -4102,7 +4159,21 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); NearLabel less, greater, done; + Condition less_cond = kLess; + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimInt: { + Register left_reg = left.AsRegister<Register>(); + if (right.IsConstant()) { + int32_t value = right.GetConstant()->AsIntConstant()->GetValue(); + codegen_->Compare32BitValue(left_reg, value); + } else if (right.IsStackSlot()) { + __ cmpl(left_reg, Address(ESP, right.GetStackIndex())); + } else { + __ cmpl(left_reg, right.AsRegister<Register>()); + } + break; + } case Primitive::kPrimLong: { Register left_low = left.AsRegisterPairLow<Register>(); Register left_high = left.AsRegisterPairHigh<Register>(); @@ -4124,11 +4195,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize))); } else { DCHECK(right_is_const) << right; - if (val_high == 0) { - __ testl(left_high, left_high); - } else { - __ cmpl(left_high, Immediate(val_high)); - } + codegen_->Compare32BitValue(left_high, val_high); } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. @@ -4138,30 +4205,30 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { __ cmpl(left_low, Address(ESP, right.GetStackIndex())); } else { DCHECK(right_is_const) << right; - if (val_low == 0) { - __ testl(left_low, left_low); - } else { - __ cmpl(left_low, Immediate(val_low)); - } + codegen_->Compare32BitValue(left_low, val_low); } + less_cond = kBelow; // for CF (unsigned). break; } case Primitive::kPrimFloat: { - __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(left, right, compare, false); __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + less_cond = kBelow; // for CF (floats). break; } case Primitive::kPrimDouble: { - __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + GenerateFPCompare(left, right, compare, true); __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + less_cond = kBelow; // for CF (floats). break; } default: LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); } + __ movl(out, Immediate(0)); __ j(kEqual, &done); - __ j(kBelow, &less); // kBelow is for CF (unsigned & floats). + __ j(less_cond, &less); __ Bind(&greater); __ movl(out, Immediate(1)); @@ -7121,6 +7188,22 @@ Address CodeGeneratorX86::LiteralInt64Address(int64_t v, Register reg) { return Address(reg, kDummy32BitOffset, fixup); } +void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) { + if (value == 0) { + __ xorl(dest, dest); + } else { + __ movl(dest, Immediate(value)); + } +} + +void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) { + if (value == 0) { + __ testl(dest, dest); + } else { + __ cmpl(dest, Immediate(value)); + } +} + Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0aef478569..45e8ffa84f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -296,6 +296,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* switch_block, HBasicBlock* default_block); + void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); + X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -450,6 +452,12 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralInt32Address(int32_t v, Register reg); Address LiteralInt64Address(int64_t v, Register reg); + // Load a 32-bit value into a register in the most efficient manner. + void Load32BitValue(Register dest, int32_t value); + + // Compare a register with a 32-bit value in the most efficient manner. + void Compare32BitValue(Register dest, int32_t value); + Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); void Finalize(CodeAllocator* allocator) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 6795488769..99396cd983 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1126,30 +1126,43 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { return; } if (destination.IsRegister()) { + CpuRegister dest = destination.AsRegister<CpuRegister>(); if (source.IsRegister()) { - __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); + __ movq(dest, source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { - __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>()); + __ movd(dest, source.AsFpuRegister<XmmRegister>()); } else if (source.IsStackSlot()) { - __ movl(destination.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex())); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + if (constant->IsLongConstant()) { + Load64BitValue(dest, constant->AsLongConstant()->GetValue()); + } else { + Load32BitValue(dest, GetInt32ValueOf(constant)); + } } else { DCHECK(source.IsDoubleStackSlot()); - __ movq(destination.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex())); } } else if (destination.IsFpuRegister()) { + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (source.IsRegister()) { - __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>()); + __ movd(dest, source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { - __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); + __ movaps(dest, source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (constant->IsFloatConstant()) { + Load32BitValue(dest, static_cast<int32_t>(value)); + } else { + Load64BitValue(dest, value); + } } else if (source.IsStackSlot()) { - __ movss(destination.AsFpuRegister<XmmRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(source.IsDoubleStackSlot()); - __ movsd(destination.AsFpuRegister<XmmRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex())); } } else if (destination.IsStackSlot()) { if (source.IsRegister()) { @@ -1345,42 +1358,44 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, __ j(X86_64FPCondition(cond->GetCondition()), true_label); } -template<class LabelType> -void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, - LabelType* true_target_in, - LabelType* false_target_in) { - // Generated branching requires both targets to be explicit. If either of the - // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. - LabelType fallthrough_target; - LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; - LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; - +void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) { LocationSummary* locations = condition->GetLocations(); + Location left = locations->InAt(0); Location right = locations->InAt(1); - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { - case Primitive::kPrimLong: { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { CpuRegister left_reg = left.AsRegister<CpuRegister>(); if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - if (IsInt<32>(value)) { - if (value == 0) { - __ testq(left_reg, left_reg); - } else { - __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); - } + int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant()); + if (value == 0) { + __ testl(left_reg, left_reg); } else { - // Value won't fit in a 32-bit integer. - __ cmpq(left_reg, codegen_->LiteralInt64Address(value)); + __ cmpl(left_reg, Immediate(value)); } + } else if (right.IsStackSlot()) { + __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ cmpl(left_reg, right.AsRegister<CpuRegister>()); + } + break; + } + case Primitive::kPrimLong: { + CpuRegister left_reg = left.AsRegister<CpuRegister>(); + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + codegen_->Compare64BitValue(left_reg, value); } else if (right.IsDoubleStackSlot()) { __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); } else { __ cmpq(left_reg, right.AsRegister<CpuRegister>()); } - __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); break; } case Primitive::kPrimFloat: { @@ -1395,7 +1410,6 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co __ ucomiss(left.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), right.GetStackIndex())); } - GenerateFPJumps(condition, true_target, false_target); break; } case Primitive::kPrimDouble: { @@ -1410,6 +1424,38 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co __ ucomisd(left.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), right.GetStackIndex())); } + break; + } + default: + LOG(FATAL) << "Unexpected condition type " << type; + } +} + +template<class LabelType> +void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, + LabelType* true_target_in, + LabelType* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + LabelType fallthrough_target; + LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + + // Generate the comparison to set the CC. + GenerateCompareTest(condition); + + // Now generate the correct jump(s). + Primitive::Type type = condition->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimLong: { + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); + break; + } + case Primitive::kPrimFloat: { + GenerateFPJumps(condition, true_target, false_target); + break; + } + case Primitive::kPrimDouble: { GenerateFPJumps(condition, true_target, false_target); break; } @@ -1508,11 +1554,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); - } + codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant); } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); @@ -1564,14 +1606,37 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +static bool SelectCanUseCMOV(HSelect* select) { + // There are no conditional move instructions for XMMs. + if (Primitive::IsFloatingPointType(select->GetType())) { + return false; + } + + // A FP condition doesn't generate the single CC that we need. + HInstruction* condition = select->GetCondition(); + if (condition->IsCondition() && + Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) { + return false; + } + + // We can generate a CMOV for this Select. + return true; +} + void LocationsBuilderX86_64::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); if (Primitive::IsFloatingPointType(select->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + // Since we can't use CMOV, there is no need to force 'true' into a register. + locations->SetInAt(1, Location::Any()); } else { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (SelectCanUseCMOV(select)) { + locations->SetInAt(1, Location::RequiresRegister()); + } else { + // Since we can't use CMOV, there is no need to force 'true' into a register. + locations->SetInAt(1, Location::Any()); + } } if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { locations->SetInAt(2, Location::RequiresRegister()); @@ -1581,13 +1646,52 @@ void LocationsBuilderX86_64::VisitSelect(HSelect* select) { void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { LocationSummary* locations = select->GetLocations(); - NearLabel false_target; - GenerateTestAndBranch<NearLabel>(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); + if (SelectCanUseCMOV(select)) { + // If both the condition and the source types are integer, we can generate + // a CMOV to implement Select. + CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister value_true = locations->InAt(1).AsRegister<CpuRegister>(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + + HInstruction* select_condition = select->GetCondition(); + Condition cond = kNotEqual; + + // Figure out how to test the 'condition'. + if (select_condition->IsCondition()) { + HCondition* condition = select_condition->AsCondition(); + if (!condition->IsEmittedAtUseSite()) { + // This was a previously materialized condition. + // Can we use the existing condition code? + if (AreEflagsSetFrom(condition, select)) { + // Materialization was the previous instruction. Condition codes are right. + cond = X86_64IntegerCondition(condition->GetCondition()); + } else { + // No, we have to recreate the condition code. + CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); + __ testl(cond_reg, cond_reg); + } + } else { + GenerateCompareTest(condition); + cond = X86_64IntegerCondition(condition->GetCondition()); + } + } else { + // Must be a boolean condition, which needs to be compared to 0. + CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); + __ testl(cond_reg, cond_reg); + } + + // If the condition is true, overwrite the output, which already contains false. + // Generate the correct sized CMOV. + __ cmov(cond, value_false, value_true, select->GetType() == Primitive::kPrimLong); + } else { + NearLabel false_target; + GenerateTestAndBranch<NearLabel>(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); + } } void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -1691,11 +1795,7 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); - } + codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant); } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } @@ -1709,16 +1809,7 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); - if (IsInt<32>(value)) { - if (value == 0) { - __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); - } - } else { - // Value won't fit in an int. - __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); - } + codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value); } else { __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } @@ -1850,6 +1941,7 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); @@ -1876,21 +1968,26 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { NearLabel less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); + Condition less_cond = kLess; + switch (type) { + case Primitive::kPrimInt: { + CpuRegister left_reg = left.AsRegister<CpuRegister>(); + if (right.IsConstant()) { + int32_t value = right.GetConstant()->AsIntConstant()->GetValue(); + codegen_->Compare32BitValue(left_reg, value); + } else if (right.IsStackSlot()) { + __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ cmpl(left_reg, right.AsRegister<CpuRegister>()); + } + break; + } case Primitive::kPrimLong: { CpuRegister left_reg = left.AsRegister<CpuRegister>(); if (right.IsConstant()) { int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - if (IsInt<32>(value)) { - if (value == 0) { - __ testq(left_reg, left_reg); - } else { - __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); - } - } else { - // Value won't fit in an int. - __ cmpq(left_reg, codegen_->LiteralInt64Address(value)); - } + codegen_->Compare64BitValue(left_reg, value); } else if (right.IsDoubleStackSlot()) { __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); } else { @@ -1909,6 +2006,7 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + less_cond = kBelow; // ucomis{s,d} sets CF break; } case Primitive::kPrimDouble: { @@ -1922,14 +2020,16 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + less_cond = kBelow; // ucomis{s,d} sets CF break; } default: LOG(FATAL) << "Unexpected compare type " << type; } + __ movl(out, Immediate(0)); __ j(kEqual, &done); - __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less); // ucomis{s,d} sets CF (kBelow) + __ j(less_cond, &less); __ Bind(&greater); __ movl(out, Immediate(1)); @@ -2750,11 +2850,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (v == 0) { - __ xorps(dest, dest); - } else { - __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); - } + codegen_->Load32BitValue(dest, static_cast<float>(v)); } else { __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex()), false); @@ -2768,11 +2864,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (v == 0) { - __ xorps(dest, dest); - } else { - __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); - } + codegen_->Load64BitValue(dest, static_cast<double>(v)); } else { __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex()), true); @@ -2786,11 +2878,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { double v = in.GetConstant()->AsDoubleConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (bit_cast<int64_t, double>(v) == 0) { - __ xorps(dest, dest); - } else { - __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); - } + codegen_->Load32BitValue(dest, static_cast<float>(v)); } else { __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); @@ -2817,11 +2905,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (v == 0) { - __ xorpd(dest, dest); - } else { - __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); - } + codegen_->Load64BitValue(dest, static_cast<double>(v)); } else { __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex()), false); @@ -2835,11 +2919,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (v == 0) { - __ xorpd(dest, dest); - } else { - __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); - } + codegen_->Load64BitValue(dest, static_cast<double>(v)); } else { __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex()), true); @@ -2853,11 +2933,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else if (in.IsConstant()) { float v = in.GetConstant()->AsFloatConstant()->GetValue(); XmmRegister dest = out.AsFpuRegister<XmmRegister>(); - if (bit_cast<int32_t, float>(v) == 0) { - __ xorpd(dest, dest); - } else { - __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); - } + codegen_->Load64BitValue(dest, static_cast<double>(v)); } else { __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); @@ -5196,18 +5272,12 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); - int32_t value = bit_cast<int32_t, float>(fp_value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); - if (value == 0) { - // easy FP 0.0. - __ xorps(dest, dest); - } else { - __ movss(dest, codegen_->LiteralFloatAddress(fp_value)); - } + codegen_->Load32BitValue(dest, fp_value); } else { DCHECK(destination.IsStackSlot()) << destination; - Immediate imm(value); + Immediate imm(bit_cast<int32_t, float>(fp_value)); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { @@ -5216,11 +5286,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { int64_t value = bit_cast<int64_t, double>(fp_value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); - if (value == 0) { - __ xorpd(dest, dest); - } else { - __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value)); - } + codegen_->Load64BitValue(dest, fp_value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; codegen_->Store64BitValueToStack(destination, value); @@ -6467,6 +6533,51 @@ void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { } } +void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) { + if (value == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, LiteralInt32Address(value)); + } +} + +void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) { + if (value == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, LiteralInt64Address(value)); + } +} + +void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) { + Load32BitValue(dest, bit_cast<int32_t, float>(value)); +} + +void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) { + Load64BitValue(dest, bit_cast<int64_t, double>(value)); +} + +void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) { + if (value == 0) { + __ testl(dest, dest); + } else { + __ cmpl(dest, Immediate(value)); + } +} + +void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { + if (IsInt<32>(value)) { + if (value == 0) { + __ testq(dest, dest); + } else { + __ cmpq(dest, Immediate(static_cast<int32_t>(value))); + } + } else { + // Value won't fit in an int. + __ cmpq(dest, LiteralInt64Address(value)); + } +} + void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { DCHECK(dest.IsDoubleStackSlot()); if (IsInt<32>(value)) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 318087eb9c..72dddfddfa 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -264,6 +264,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); + void GenerateCompareTest(HCondition* condition); template<class LabelType> void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, @@ -478,9 +479,17 @@ class CodeGeneratorX86_64 : public CodeGenerator { Address LiteralInt32Address(int32_t v); Address LiteralInt64Address(int64_t v); - // Load a 32/64 bit value into a register in the most efficient manner. + // Load a 32/64-bit value into a register in the most efficient manner. void Load32BitValue(CpuRegister dest, int32_t value); void Load64BitValue(CpuRegister dest, int64_t value); + void Load32BitValue(XmmRegister dest, int32_t value); + void Load64BitValue(XmmRegister dest, int64_t value); + void Load32BitValue(XmmRegister dest, float value); + void Load64BitValue(XmmRegister dest, double value); + + // Compare a register with a 32/64-bit value in the most efficient manner. + void Compare32BitValue(CpuRegister dest, int32_t value); + void Compare64BitValue(CpuRegister dest, int64_t value); Address LiteralCaseTable(HPackedSwitch* switch_instr); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9b91b53813..a8841d31c5 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -758,6 +758,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, compiler_driver_->GetInstructionSet(), invoke_type, graph_->IsDebuggable(), + /* osr */ false, graph_->GetCurrentInstructionId()); callee_graph->SetArtMethod(resolved_method); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index c1e38633fc..0029cc3650 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -91,6 +91,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyRotate(HInvoke* invoke, bool is_left); void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); + void SimplifyCompare(HInvoke* invoke, bool has_zero_op); OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -176,8 +177,8 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation // We can apply De Morgan's laws if both inputs are Not's and are only used // by `op`. - if (left->IsNot() && - right->IsNot() && + if (((left->IsNot() && right->IsNot()) || + (left->IsBooleanNot() && right->IsBooleanNot())) && left->HasOnlyOneNonEnvironmentUse() && right->HasOnlyOneNonEnvironmentUse()) { // Replace code looking like @@ -187,8 +188,8 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation // with // OR or, a, b (respectively AND) // NOT dest, or - HInstruction* src_left = left->AsNot()->GetInput(); - HInstruction* src_right = right->AsNot()->GetInput(); + HInstruction* src_left = left->InputAt(0); + HInstruction* src_right = right->InputAt(0); uint32_t dex_pc = op->GetDexPc(); // Remove the negations on the inputs. @@ -204,7 +205,12 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation } else { hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc); } - HNot* hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc); + HInstruction* hnot; + if (left->IsBooleanNot()) { + hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc); + } else { + hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc); + } op->GetBlock()->InsertInstructionBefore(hbin, op); op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot); @@ -1308,8 +1314,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); - if (left->IsNot() && - right->IsNot() && + if (((left->IsNot() && right->IsNot()) || + (left->IsBooleanNot() && right->IsBooleanNot())) && left->HasOnlyOneNonEnvironmentUse() && right->HasOnlyOneNonEnvironmentUse()) { // Replace code looking like @@ -1318,8 +1324,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { // XOR dst, nota, notb // with // XOR dst, a, b - instruction->ReplaceInput(left->AsNot()->GetInput(), 0); - instruction->ReplaceInput(right->AsNot()->GetInput(), 1); + instruction->ReplaceInput(left->InputAt(0), 0); + instruction->ReplaceInput(right->InputAt(0), 1); left->GetBlock()->RemoveInstruction(left); right->GetBlock()->RemoveInstruction(right); RecordSimplification(); @@ -1441,6 +1447,24 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) } } +void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_signum) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + uint32_t dex_pc = invoke->GetDexPc(); + HInstruction* left = invoke->InputAt(0); + HInstruction* right; + Primitive::Type type = left->GetType(); + if (!is_signum) { + right = invoke->InputAt(1); + } else if (type == Primitive::kPrimLong) { + right = GetGraph()->GetLongConstant(0); + } else { + right = GetGraph()->GetIntConstant(0); + } + HCompare* compare = new (GetGraph()->GetArena()) + HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) { SimplifyStringEquals(instruction); @@ -1452,6 +1476,12 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft || instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) { SimplifyRotate(instruction, true); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerCompare || + instruction->GetIntrinsic() == Intrinsics::kLongCompare) { + SimplifyCompare(instruction, /* is_signum */ false); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum || + instruction->GetIntrinsic() == Intrinsics::kLongSignum) { + SimplifyCompare(instruction, /* is_signum */ true); } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index e8912b39ab..96a3c3c2f1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1633,20 +1633,20 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(FloatIsNaN) UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) -UNIMPLEMENTED_INTRINSIC(IntegerCompare) -UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) -UNIMPLEMENTED_INTRINSIC(IntegerSignum) -UNIMPLEMENTED_INTRINSIC(LongSignum) -// Rotate operations are handled as HRor instructions. +// Handled as HIR instructions. UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) -UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) +UNIMPLEMENTED_INTRINSIC(IntegerSignum) +UNIMPLEMENTED_INTRINSIC(LongSignum) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 5dce83a69c..4140d94e17 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -284,36 +284,6 @@ static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -static void GenCompare(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Cset(out_reg, gt); // out == +1 if GT or 0 otherwise - __ Cinv(out_reg, out_reg, lt); // out == -1 if LT or unchanged otherwise -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerCompare(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerCompare(HInvoke* invoke) { - GenCompare(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongCompare(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitLongCompare(HInvoke* invoke) { - GenCompare(invoke->GetLocations(), /* is_long */ true, GetVIXLAssembler()); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, Primitive::Type type, vixl::MacroAssembler* masm) { @@ -1456,34 +1426,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke __ Bind(slow_path->GetExitLabel()); } -static void GenSignum(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register out_reg = WRegisterFrom(out); - - __ Cmp(op1_reg, 0); - __ Cset(out_reg, gt); // out == +1 if GT or 0 otherwise - __ Cinv(out_reg, out_reg, lt); // out == -1 if LT or unchanged otherwise -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerSignum(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerSignum(HInvoke* invoke) { - GenSignum(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongSignum(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitLongSignum(HInvoke* invoke) { - GenSignum(invoke->GetLocations(), /* is_long */ true, GetVIXLAssembler()); -} - static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType())); @@ -1684,11 +1626,15 @@ UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) -// Rotate operations are handled as HRor instructions. +// Handled as HIR instructions. UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) -UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) +UNIMPLEMENTED_INTRINSIC(IntegerSignum) +UNIMPLEMENTED_INTRINSIC(LongSignum) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 0d9cf091cc..2294713a3e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1019,12 +1019,14 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(FloatIsNaN) UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) -UNIMPLEMENTED_INTRINSIC(IntegerCompare) -UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) + +// Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerSignum) UNIMPLEMENTED_INTRINSIC(LongSignum) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index f681d1fd56..ac2850342d 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1767,12 +1767,14 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(FloatIsNaN) UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) -UNIMPLEMENTED_INTRINSIC(IntegerCompare) -UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) + +// Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerSignum) UNIMPLEMENTED_INTRINSIC(LongSignum) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index acc40bc998..ab4f6f9d28 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -37,10 +37,12 @@ namespace x86 { static constexpr int kDoubleNaNHigh = 0x7FF80000; static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int kFloatNaN = 0x7FC00000; +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) - : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { + : arena_(codegen->GetGraph()->GetArena()), + codegen_(codegen) { } @@ -256,15 +258,37 @@ static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - // TODO: Allow x86 to work with memory. This requires assembler support, see below. - // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); + HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(static_or_direct != nullptr); + if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { + // We need addressibility for the constant area. + locations->SetInAt(1, Location::RequiresRegister()); + // We need a temporary to hold the constant. + locations->AddTemp(Location::RequiresFpuRegister()); + } } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, + bool is64bit, + X86Assembler* assembler, + CodeGeneratorX86* codegen) { Location output = locations->Out(); - if (output.IsFpuRegister()) { + DCHECK(output.IsFpuRegister()); + if (locations->InAt(1).IsValid()) { + DCHECK(locations->InAt(1).IsRegister()); + // We also have a constant area pointer. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + if (is64bit) { + __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area)); + __ andpd(output.AsFpuRegister<XmmRegister>(), temp); + } else { + __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area)); + __ andps(output.AsFpuRegister<XmmRegister>(), temp); + } + } else { // Create the right constant on an aligned stack. if (is64bit) { __ subl(ESP, Immediate(8)); @@ -277,19 +301,6 @@ static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* as __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); } __ addl(ESP, Immediate(16)); - } else { - // TODO: update when assember support is available. - UNIMPLEMENTED(FATAL) << "Needs assembler support."; -// Once assembler support is available, in-memory operations look like this: -// if (is64bit) { -// DCHECK(output.IsDoubleStackSlot()); -// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)), -// Immediate(0x7FFFFFFF)); -// } else { -// DCHECK(output.IsStackSlot()); -// // Can use and with a literal directly. -// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF)); -// } } } @@ -298,7 +309,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { @@ -306,7 +317,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); } static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { @@ -388,8 +399,11 @@ void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { GenAbsLong(invoke->GetLocations(), GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86Assembler* assembler) { +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + X86Assembler* assembler, + CodeGeneratorX86* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -450,15 +464,26 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); + // Do we have a constant area pointer? + if (locations->InAt(2).IsValid()) { + DCHECK(locations->InAt(2).IsRegister()); + Register constant_area = locations->InAt(2).AsRegister<Register>(); + if (is_double) { + __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area)); + } else { + __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area)); + } } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); + if (is_double) { + __ pushl(Immediate(kDoubleNaNHigh)); + __ pushl(Immediate(kDoubleNaNLow)); + __ movsd(out, Address(ESP, 0)); + __ addl(ESP, Immediate(8)); + } else { + __ pushl(Immediate(kFloatNaN)); + __ movss(out, Address(ESP, 0)); + __ addl(ESP, Immediate(4)); + } } __ jmp(&done); @@ -483,6 +508,11 @@ static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); + HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(static_or_direct != nullptr); + if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(2, Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -490,7 +520,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ true, + /* is_double */ true, + GetAssembler(), + codegen_); } void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -498,7 +532,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ true, + /* is_double */ false, + GetAssembler(), + codegen_); } void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -506,7 +544,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ false, + /* is_double */ true, + GetAssembler(), + codegen_); } void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -514,7 +556,11 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ false, + /* is_double */ false, + GetAssembler(), + codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, @@ -2245,7 +2291,7 @@ static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, } void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); + X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); Register reg = locations->InAt(0).AsRegister<Register>(); @@ -2276,7 +2322,7 @@ void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); + X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); @@ -2320,7 +2366,9 @@ static void CreateBitCountLocations( locations->SetOut(Location::RequiresRegister()); } -static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) { +static void GenBitCount(X86Assembler* assembler, + CodeGeneratorX86* codegen, + HInvoke* invoke, bool is_long) { LocationSummary* locations = invoke->GetLocations(); Location src = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -2331,11 +2379,7 @@ static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) value = is_long ? POPCOUNT(static_cast<uint64_t>(value)) : POPCOUNT(static_cast<uint32_t>(value)); - if (value == 0) { - __ xorl(out, out); - } else { - __ movl(out, Immediate(value)); - } + codegen->Load32BitValue(out, value); return; } @@ -2367,7 +2411,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), invoke, /* is_long */ false); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); } void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { @@ -2375,7 +2419,7 @@ void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), invoke, /* is_long */ true); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); } static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { @@ -2390,7 +2434,9 @@ static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, b locations->SetOut(Location::RequiresRegister()); } -static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) { +static void GenLeadingZeros(X86Assembler* assembler, + CodeGeneratorX86* codegen, + HInvoke* invoke, bool is_long) { LocationSummary* locations = invoke->GetLocations(); Location src = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -2403,11 +2449,7 @@ static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_lo } else { value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); } - if (value == 0) { - __ xorl(out, out); - } else { - __ movl(out, Immediate(value)); - } + codegen->Load32BitValue(out, value); return; } @@ -2474,8 +2516,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenLeadingZeros(assembler, invoke, /* is_long */ false); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -2483,8 +2524,7 @@ void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke } void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenLeadingZeros(assembler, invoke, /* is_long */ true); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { @@ -2499,7 +2539,9 @@ static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, locations->SetOut(Location::RequiresRegister()); } -static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) { +static void GenTrailingZeros(X86Assembler* assembler, + CodeGeneratorX86* codegen, + HInvoke* invoke, bool is_long) { LocationSummary* locations = invoke->GetLocations(); Location src = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -2512,11 +2554,7 @@ static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_l } else { value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); } - if (value == 0) { - __ xorl(out, out); - } else { - __ movl(out, Immediate(value)); - } + codegen->Load32BitValue(out, value); return; } @@ -2570,8 +2608,7 @@ void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenTrailingZeros(assembler, invoke, /* is_long */ false); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -2579,8 +2616,7 @@ void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invok } void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenTrailingZeros(assembler, invoke, /* is_long */ true); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } // Unimplemented intrinsics. @@ -2600,20 +2636,20 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(FloatIsNaN) UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) -UNIMPLEMENTED_INTRINSIC(IntegerCompare) -UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) -UNIMPLEMENTED_INTRINSIC(IntegerSignum) -UNIMPLEMENTED_INTRINSIC(LongSignum) -// Rotate operations are handled as HRor instructions. +// Handled as HIR instructions. UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) -UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) +UNIMPLEMENTED_INTRINSIC(IntegerSignum) +UNIMPLEMENTED_INTRINSIC(LongSignum) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 51fa514cb6..c9a43442b3 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2431,58 +2431,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateCompareLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -static void GenCompare(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) { - LocationSummary* locations = invoke->GetLocations(); - CpuRegister src1 = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister src2 = locations->InAt(1).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - NearLabel is_lt, done; - - __ xorl(out, out); - - if (is_long) { - __ cmpq(src1, src2); - } else { - __ cmpl(src1, src2); - } - __ j(kEqual, &done); - __ j(kLess, &is_lt); - - __ movl(out, Immediate(1)); - __ jmp(&done); - - __ Bind(&is_lt); - __ movl(out, Immediate(-1)); - - __ Bind(&done); -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerCompare(HInvoke* invoke) { - CreateCompareLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitIntegerCompare(HInvoke* invoke) { - GenCompare(GetAssembler(), invoke, /* is_long */ false); -} - -void IntrinsicLocationsBuilderX86_64::VisitLongCompare(HInvoke* invoke) { - CreateCompareLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitLongCompare(HInvoke* invoke) { - GenCompare(GetAssembler(), invoke, /* is_long */ true); -} - static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, @@ -2757,74 +2705,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateSignLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); // Need a writeable register. -} - -static void GenSign(X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen, - HInvoke* invoke, bool is_long) { - LocationSummary* locations = invoke->GetLocations(); - Location src = locations->InAt(0); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - if (invoke->InputAt(0)->IsConstant()) { - // Evaluate this at compile time. - int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); - codegen->Load32BitValue(out, value == 0 ? 0 : (value > 0 ? 1 : -1)); - return; - } - - // Copy input into temporary. - CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (src.IsRegister()) { - if (is_long) { - __ movq(tmp, src.AsRegister<CpuRegister>()); - } else { - __ movl(tmp, src.AsRegister<CpuRegister>()); - } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - - // Do the bit twiddling: basically tmp >> 63/31 | -tmp >>> 63/31 for long/int. - if (is_long) { - __ movq(out, tmp); - __ sarq(out, Immediate(63)); - __ negq(tmp); - __ shrq(tmp, Immediate(63)); - __ orq(out, tmp); - } else { - __ movl(out, tmp); - __ sarl(out, Immediate(31)); - __ negl(tmp); - __ shrl(tmp, Immediate(31)); - __ orl(out, tmp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerSignum(HInvoke* invoke) { - CreateSignLocations(arena_, invoke); -} -void IntrinsicCodeGeneratorX86_64::VisitIntegerSignum(HInvoke* invoke) { - GenSign(GetAssembler(), codegen_, invoke, /* is_long */ false); -} -void IntrinsicLocationsBuilderX86_64::VisitLongSignum(HInvoke* invoke) { - CreateSignLocations(arena_, invoke); -} -void IntrinsicCodeGeneratorX86_64::VisitLongSignum(HInvoke* invoke) { - GenSign(GetAssembler(), codegen_, invoke, /* is_long */ true); -} - // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -2840,11 +2720,15 @@ UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(FloatIsNaN) UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) -// Rotate operations are handled as HRor instructions. +// Handled as HIR instructions. UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) -UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerCompare) +UNIMPLEMENTED_INTRINSIC(LongCompare) +UNIMPLEMENTED_INTRINSIC(IntegerSignum) +UNIMPLEMENTED_INTRINSIC(LongSignum) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3dda8501d2..f269885907 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -647,6 +647,10 @@ void HLoopInformation::Populate() { header_->GetGraph()->SetHasIrreducibleLoops(true); PopulateIrreducibleRecursive(back_edge); } else { + if (header_->GetGraph()->IsCompilingOsr()) { + irreducible_ = true; + header_->GetGraph()->SetHasIrreducibleLoops(true); + } PopulateRecursive(back_edge); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b8083477cf..daec096f3e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -274,6 +274,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { InstructionSet instruction_set, InvokeType invoke_type = kInvalidInvokeType, bool debuggable = false, + bool osr = false, int start_instruction_id = 0) : arena_(arena), blocks_(arena->Adapter(kArenaAllocBlockList)), @@ -302,7 +303,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), - inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) { + inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), + osr_(osr) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -478,6 +480,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return instruction_set_; } + bool IsCompilingOsr() const { return osr_; } + bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } @@ -606,6 +610,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // collection pointer to passes which may create NullConstant. ReferenceTypeInfo inexact_object_rti_; + // Whether we are compiling this graph for on stack replacement: this will + // make all loops seen as irreducible and emit special stack maps to mark + // compiled code entries which the interpreter can directly jump to. + const bool osr_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -1259,6 +1268,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ M(X86ComputeBaseMethodAddress, Instruction) \ M(X86LoadFromConstantTable, Instruction) \ + M(X86FPNeg, Instruction) \ M(X86PackedSwitch, Instruction) #endif @@ -6040,6 +6050,74 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK +class SwitchTable : public ValueObject { + public: + SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) + : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { + int32_t table_offset = instruction.VRegB_31t(); + const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; + if (sparse) { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); + } else { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); + } + num_entries_ = table[1]; + values_ = reinterpret_cast<const int32_t*>(&table[2]); + } + + uint16_t GetNumEntries() const { + return num_entries_; + } + + void CheckIndex(size_t index) const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); + } else { + // In a packed table, we have the starting key and num_entries_ values. + DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); + } + } + + int32_t GetEntryAt(size_t index) const { + CheckIndex(index); + return values_[index]; + } + + uint32_t GetDexPcForIndex(size_t index) const { + CheckIndex(index); + return dex_pc_ + + (reinterpret_cast<const int16_t*>(values_ + index) - + reinterpret_cast<const int16_t*>(&instruction_)); + } + + // Index of the first value in the table. + size_t GetFirstValueIndex() const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + return num_entries_; + } else { + // In a packed table, we have the starting key and num_entries_ values. + return 1; + } + } + + private: + const Instruction& instruction_; + const uint32_t dex_pc_; + + // Whether this is a sparse-switch table (or a packed-switch one). + const bool sparse_; + + // This can't be const as it needs to be computed off of the given instruction, and complicated + // expressions in the initializer list seemed very ugly. + uint16_t num_entries_; + + const int32_t* values_; + + DISALLOW_COPY_AND_ASSIGN(SwitchTable); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index b1bf939b36..0b3a84d3d3 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -56,6 +56,25 @@ class HX86LoadFromConstantTable : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable); }; +// Version of HNeg with access to the constant table for FP types. +class HX86FPNeg : public HExpression<2> { + public: + HX86FPNeg(Primitive::Type result_type, + HInstruction* input, + HX86ComputeBaseMethodAddress* method_base, + uint32_t dex_pc) + : HExpression(result_type, SideEffects::None(), dex_pc) { + DCHECK(Primitive::IsFloatingPointType(result_type)); + SetRawInputAt(0, input); + SetRawInputAt(1, method_base); + } + + DECLARE_INSTRUCTION(X86FPNeg); + + private: + DISALLOW_COPY_AND_ASSIGN(HX86FPNeg); +}; + // X86 version of HPackedSwitch that holds a pointer to the base method address. class HX86PackedSwitch : public HTemplateInstruction<2> { public: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bdc664b3eb..736ac32011 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -300,7 +300,7 @@ class OptimizingCompiler FINAL : public Compiler { } } - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); @@ -309,7 +309,8 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* driver) const; + CompilerDriver* driver, + const DexFile::CodeItem* item) const; // Try compiling a method and return the code generator used for // compiling it. @@ -327,7 +328,8 @@ class OptimizingCompiler FINAL : public Compiler { uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; + Handle<mirror::DexCache> dex_cache, + bool osr) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -580,11 +582,12 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { + CompilerDriver* compiler_driver, + const DexFile::CodeItem* code_item) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); stack_map.resize(codegen->ComputeStackMapsSize()); - codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, @@ -615,7 +618,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { + Handle<mirror::DexCache> dex_cache, + bool osr) const { MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -663,8 +667,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); HGraph* graph = new (arena) HGraph( - arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), - kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); + arena, + dex_file, + method_idx, + requires_barrier, + compiler_driver->GetInstructionSet(), + kInvalidInvokeType, + compiler_driver->GetCompilerOptions().GetDebuggable(), + osr); std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, @@ -797,10 +807,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, method_idx, jclass_loader, dex_file, - dex_cache)); + dex_cache, + /* osr */ false)); if (codegen.get() != nullptr) { MaybeRecordStat(MethodCompilationStat::kCompiled); - method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver); + method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); } } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { @@ -843,7 +854,8 @@ bool IsCompilingWithCoreImage() { bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, - ArtMethod* method) { + ArtMethod* method, + bool osr) { StackHandleScope<2> hs(self); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( method->GetDeclaringClass()->GetClassLoader())); @@ -873,7 +885,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, method_idx, jclass_loader, *dex_file, - dex_cache)); + dex_cache, + osr)); if (codegen.get() == nullptr) { return false; } @@ -885,7 +898,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } MaybeRecordStat(MethodCompilationStat::kCompiled); - codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item); const void* code = code_cache->CommitCode( self, method, @@ -896,7 +909,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize()); + code_allocator.GetSize(), + osr); if (code == nullptr) { code_cache->ClearData(self, stack_map_data); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a2180bc9d7..a6f14616bf 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -53,6 +53,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { BinaryFP(div); } + void VisitCompare(HCompare* compare) OVERRIDE { + BinaryFP(compare); + } + void VisitReturn(HReturn* ret) OVERRIDE { HConstant* value = ret->InputAt(0)->AsConstant(); if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) { @@ -74,11 +78,50 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void BinaryFP(HBinaryOperation* bin) { HConstant* rhs = bin->InputAt(1)->AsConstant(); - if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) { + if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) { ReplaceInput(bin, rhs, 1, false); } } + void VisitEqual(HEqual* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitNotEqual(HNotEqual* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitLessThan(HLessThan* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitGreaterThan(HGreaterThan* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE { + BinaryFP(cond); + } + + void VisitNeg(HNeg* neg) OVERRIDE { + if (Primitive::IsFloatingPointType(neg->GetType())) { + // We need to replace the HNeg with a HX86FPNeg in order to address the constant area. + InitializePCRelativeBasePointer(); + HGraph* graph = GetGraph(); + HBasicBlock* block = neg->GetBlock(); + HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg( + neg->GetType(), + neg->InputAt(0), + base_, + neg->GetDexPc()); + block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg); + } + } + void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { if (switch_insn->GetNumEntries() <= InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { @@ -127,12 +170,23 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // If this is an invoke-static/-direct with PC-relative dex cache array // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // We can't add a pointer to the constant area if we already have a current + // method pointer. This may arise when sharpening doesn't remove the current + // method pointer from the invoke. + if (invoke_static_or_direct != nullptr && + invoke_static_or_direct->HasCurrentMethodInput()) { + DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + return; + } + + bool base_added = false; if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { InitializePCRelativeBasePointer(); // Add the extra parameter base_. - DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); invoke_static_or_direct->AddSpecialInput(base_); + base_added = true; } + // Ensure that we can load FP arguments from the constant area. for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { HConstant* input = invoke->InputAt(i)->AsConstant(); @@ -140,6 +194,25 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { ReplaceInput(invoke, input, i, true); } } + + // These intrinsics need the constant area. + switch (invoke->GetIntrinsic()) { + case Intrinsics::kMathAbsDouble: + case Intrinsics::kMathAbsFloat: + case Intrinsics::kMathMaxDoubleDouble: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMinFloatFloat: + if (!base_added) { + DCHECK(invoke_static_or_direct != nullptr); + DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); + InitializePCRelativeBasePointer(); + invoke_static_or_direct->AddSpecialInput(base_); + } + break; + default: + break; + } } // The generated HX86ComputeBaseMethodAddress in the entry block needed as an |