From 0c5b18edd1308975804ccf29a02a130a7b6f7fa7 Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Sat, 6 Feb 2016 13:58:35 -0500 Subject: Support CMOV for x86 Select If possible, generate CMOV to implement HSelect. Tricky cases are a long or FP condition (no single CC generated), FP inputs (no FP CMOV) and when the condition is a boolean or not emitted at the use site. In these cases, keep using the existing HSelect code. Change-Id: I4ff1e152b8ef126fbbabeb3316e9e2b6a6b74aeb Signed-off-by: Mark Mendell --- compiler/optimizing/code_generator_x86.cc | 146 ++++++++++++++++----- compiler/optimizing/code_generator_x86.h | 1 + .../optimizing/prepare_for_register_allocation.cc | 10 +- 3 files changed, 117 insertions(+), 40 deletions(-) (limited to 'compiler/optimizing') diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 07edd97c1f..ae5679eaf5 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -26,7 +26,6 @@ #include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" -#include "pc_relative_fixups_x86.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -1505,30 +1504,131 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +static bool SelectCanUseCMOV(HSelect* select) { + // There are no conditional move instructions for XMMs. + if (Primitive::IsFloatingPointType(select->GetType())) { + return false; + } + + // A FP condition doesn't generate the single CC that we need. + // In 32 bit mode, a long condition doesn't generate a single CC either. + HInstruction* condition = select->GetCondition(); + if (condition->IsCondition()) { + Primitive::Type compare_type = condition->InputAt(0)->GetType(); + if (compare_type == Primitive::kPrimLong || + Primitive::IsFloatingPointType(compare_type)) { + return false; + } + } + + // We can generate a CMOV for this Select. + return true; +} + void LocationsBuilderX86::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - Primitive::Type select_type = select->GetType(); - HInstruction* cond = select->GetCondition(); - - if (Primitive::IsFloatingPointType(select_type)) { + if (Primitive::IsFloatingPointType(select->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); } else { locations->SetInAt(0, Location::RequiresRegister()); + if (SelectCanUseCMOV(select)) { + if (select->InputAt(1)->IsConstant()) { + // Cmov can't handle a constant value. + locations->SetInAt(1, Location::RequiresRegister()); + } else { + locations->SetInAt(1, Location::Any()); + } + } else { + locations->SetInAt(1, Location::Any()); + } } - locations->SetInAt(1, Location::Any()); - if (IsBooleanValueOrMaterializedCondition(cond)) { - locations->SetInAt(2, Location::Any()); + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); } locations->SetOut(Location::SameAsFirstInput()); } +void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { + Register lhs_reg = lhs.AsRegister(); + if (rhs.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + codegen_->Compare32BitValue(lhs_reg, value); + } else if (rhs.IsStackSlot()) { + __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); + } else { + __ cmpl(lhs_reg, rhs.AsRegister()); + } +} + void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { LocationSummary* locations = select->GetLocations(); - NearLabel false_target; - GenerateTestAndBranch( - select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); + DCHECK(locations->InAt(0).Equals(locations->Out())); + if (SelectCanUseCMOV(select)) { + // If both the condition and the source types are integer, we can generate + // a CMOV to implement Select. + + HInstruction* select_condition = select->GetCondition(); + Condition cond = kNotEqual; + + // Figure out how to test the 'condition'. + if (select_condition->IsCondition()) { + HCondition* condition = select_condition->AsCondition(); + if (!condition->IsEmittedAtUseSite()) { + // This was a previously materialized condition. + // Can we use the existing condition code? + if (AreEflagsSetFrom(condition, select)) { + // Materialization was the previous instruction. Condition codes are right. + cond = X86Condition(condition->GetCondition()); + } else { + // No, we have to recreate the condition code. + Register cond_reg = locations->InAt(2).AsRegister(); + __ testl(cond_reg, cond_reg); + } + } else { + // We can't handle FP or long here. + DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong); + DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); + LocationSummary* cond_locations = condition->GetLocations(); + GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); + cond = X86Condition(condition->GetCondition()); + } + } else { + // Must be a boolean condition, which needs to be compared to 0. + Register cond_reg = locations->InAt(2).AsRegister(); + __ testl(cond_reg, cond_reg); + } + + // If the condition is true, overwrite the output, which already contains false. + Location false_loc = locations->InAt(0); + Location true_loc = locations->InAt(1); + if (select->GetType() == Primitive::kPrimLong) { + // 64 bit conditional move. + Register false_high = false_loc.AsRegisterPairHigh(); + Register false_low = false_loc.AsRegisterPairLow(); + if (true_loc.IsRegisterPair()) { + __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh()); + __ cmovl(cond, false_low, true_loc.AsRegisterPairLow()); + } else { + __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize))); + __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex())); + } + } else { + // 32 bit conditional move. + Register false_reg = false_loc.AsRegister(); + if (true_loc.IsRegister()) { + __ cmovl(cond, false_reg, true_loc.AsRegister()); + } else { + __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex())); + } + } + } else { + NearLabel false_target; + GenerateTestAndBranch( + select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); + } } void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -1642,15 +1742,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { // Clear output register: setb only sets the low byte. __ xorl(reg, reg); - - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister(), rhs.AsRegister()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs.AsRegister(), constant); - } else { - __ cmpl(lhs.AsRegister(), Address(ESP, rhs.GetStackIndex())); - } + GenerateIntCompare(lhs, rhs); __ setb(X86Condition(cond->GetCondition()), reg); return; } @@ -4128,15 +4220,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimInt: { - Register left_reg = left.AsRegister(); - if (right.IsConstant()) { - int32_t value = right.GetConstant()->AsIntConstant()->GetValue(); - codegen_->Compare32BitValue(left_reg, value); - } else if (right.IsStackSlot()) { - __ cmpl(left_reg, Address(ESP, right.GetStackIndex())); - } else { - __ cmpl(left_reg, right.AsRegister()); - } + GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 2fb6d60ad5..63e9b2fc9c 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -297,6 +297,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); + void GenerateIntCompare(Location lhs, Location rhs); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 324d84f3db..0ad104eaa7 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -138,15 +138,7 @@ bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, } if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) { - if (GetGraph()->GetInstructionSet() == kX86) { - // Long values and long condition inputs result in 8 required core registers. - // We don't have that many on x86. Materialize the condition in such case. - return user->GetType() != Primitive::kPrimLong || - condition->InputAt(1)->GetType() != Primitive::kPrimLong || - condition->InputAt(1)->IsConstant(); - } else { - return true; - } + return true; } return false; -- cgit v1.2.3-59-g8ed1b