diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 61 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 104 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.h | 12 |
6 files changed, 51 insertions, 132 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 2f495fc15f..2452139d42 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3078,6 +3078,18 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { const Location first = locations->InAt(0); const Location out = locations->Out(); const Location second = locations->InAt(1); + + // In the unlucky case the output of this instruction overlaps + // with an input of an "emitted-at-use-site" condition, and + // the output of this instruction is not one of its inputs, we'll + // need to fallback to branches instead of conditional ARM instructions. + bool output_overlaps_with_condition_inputs = + !IsBooleanValueOrMaterializedCondition(condition) && + !out.Equals(first) && + !out.Equals(second) && + (condition->GetLocations()->InAt(0).Equals(out) || + condition->GetLocations()->InAt(1).Equals(out)); + DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition()); Location src; if (condition->IsIntConstant()) { @@ -3091,7 +3103,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { return; } - if (!DataType::IsFloatingPointType(type)) { + if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) { bool invert = false; if (out.Equals(second)) { @@ -3163,6 +3175,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { vixl32::Label* false_target = nullptr; vixl32::Label* true_target = nullptr; vixl32::Label select_end; + vixl32::Label other_case; vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end); if (out.Equals(second)) { @@ -3173,12 +3186,21 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { src = second; if (!out.Equals(first)) { - codegen_->MoveLocation(out, first, type); + if (output_overlaps_with_condition_inputs) { + false_target = &other_case; + } else { + codegen_->MoveLocation(out, first, type); + } } } GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false); codegen_->MoveLocation(out, src, type); + if (output_overlaps_with_condition_inputs) { + __ B(target); + __ Bind(&other_case); + codegen_->MoveLocation(out, first, type); + } if (select_end.IsReferenced()) { __ Bind(&select_end); @@ -3277,31 +3299,16 @@ void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); - // Handle the long/FP comparisons made in instruction simplification. - switch (cond->InputAt(0)->GetType()) { - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - break; - - default: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + const DataType::Type type = cond->InputAt(0)->GetType(); + if (DataType::IsFloatingPointType(type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + } + if (!cond->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 174efdf115..1cfdf54816 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -1290,6 +1290,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { Register scratch; switch (instruction->GetPackedType()) { + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. @@ -1321,7 +1322,6 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kFloat32: case DataType::Type::kInt64: diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index f2ffccc887..4945328e2b 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1141,6 +1141,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. @@ -1168,7 +1169,6 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index e2b0485f89..a77c7d6838 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1114,6 +1114,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. @@ -1141,7 +1142,6 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 899496328e..9f278a9f4e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -334,29 +334,12 @@ static bool IsAddConst(HInstruction* instruction, // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. -// x = max(x_phi, ..) -// x = min(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { if (reduction->IsAdd()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); - } else if (reduction->IsInvokeStaticOrDirect()) { - switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || - (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); - default: - return false; - } } return false; } @@ -365,10 +348,6 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { return HVecReduce::kSum; - } else if (reduction->IsVecMin()) { - return HVecReduce::kMin; - } else if (reduction->IsVecMax()) { - return HVecReduce::kMax; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); UNREACHABLE(); @@ -1124,7 +1103,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -1331,43 +1309,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return false; } - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); - } - return true; - } - return false; - } default: return false; } // switch @@ -1426,7 +1367,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv; return TrySetVectorLength(4); case DataType::Type::kInt64: - *restrictions |= kNoDiv | kNoMul | kNoMinMax; + *restrictions |= kNoDiv | kNoMul; return TrySetVectorLength(2); case DataType::Type::kFloat32: *restrictions |= kNoReduction; @@ -1456,13 +1397,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); case DataType::Type::kInt64: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD; return TrySetVectorLength(2); case DataType::Type::kFloat32: - *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 + *restrictions |= kNoReduction; return TrySetVectorLength(4); case DataType::Type::kFloat64: - *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; @@ -1488,10 +1429,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv; return TrySetVectorLength(2); case DataType::Type::kFloat32: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoReduction; return TrySetVectorLength(4); case DataType::Type::kFloat64: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; @@ -1517,10 +1458,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv; return TrySetVectorLength(2); case DataType::Type::kFloat32: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoReduction; return TrySetVectorLength(4); case DataType::Type::kFloat64: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; @@ -1745,8 +1686,7 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct void HLoopOptimization::GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - DataType::Type type, - bool is_unsigned) { + DataType::Type type) { uint32_t dex_pc = org->GetDexPc(); HInstruction* vector = nullptr; DataType::Type org_type = org->GetType(); @@ -1823,32 +1763,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); break; - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { - vector = new (global_allocator_) - HVecMin(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - vector = new (global_allocator_) - HVecMax(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } default: LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); UNREACHABLE(); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index a707ad1358..d70751037b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -75,11 +75,10 @@ class HLoopOptimization : public HOptimization { kNoSignedHAdd = 1 << 5, // no signed halving add kNoUnroundedHAdd = 1 << 6, // no unrounded halving add kNoAbs = 1 << 7, // no absolute value - kNoMinMax = 1 << 8, // no min/max - kNoStringCharAt = 1 << 9, // no StringCharAt - kNoReduction = 1 << 10, // no reduction - kNoSAD = 1 << 11, // no sum of absolute differences (SAD) - kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening + kNoStringCharAt = 1 << 8, // no StringCharAt + kNoReduction = 1 << 9, // no reduction + kNoSAD = 1 << 10, // no sum of absolute differences (SAD) + kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening }; /* @@ -173,8 +172,7 @@ class HLoopOptimization : public HOptimization { void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - DataType::Type type, - bool is_unsigned = false); + DataType::Type type); // Vectorization idioms. bool VectorizeHalvingAddIdiom(LoopNode* node, |