summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc61
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc2
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc2
-rw-r--r--compiler/optimizing/loop_optimization.cc104
-rw-r--r--compiler/optimizing/loop_optimization.h12
6 files changed, 51 insertions, 132 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2f495fc15f..2452139d42 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3078,6 +3078,18 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
const Location first = locations->InAt(0);
const Location out = locations->Out();
const Location second = locations->InAt(1);
+
+ // In the unlucky case the output of this instruction overlaps
+ // with an input of an "emitted-at-use-site" condition, and
+ // the output of this instruction is not one of its inputs, we'll
+ // need to fallback to branches instead of conditional ARM instructions.
+ bool output_overlaps_with_condition_inputs =
+ !IsBooleanValueOrMaterializedCondition(condition) &&
+ !out.Equals(first) &&
+ !out.Equals(second) &&
+ (condition->GetLocations()->InAt(0).Equals(out) ||
+ condition->GetLocations()->InAt(1).Equals(out));
+ DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition());
Location src;
if (condition->IsIntConstant()) {
@@ -3091,7 +3103,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
return;
}
- if (!DataType::IsFloatingPointType(type)) {
+ if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
bool invert = false;
if (out.Equals(second)) {
@@ -3163,6 +3175,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
vixl32::Label* false_target = nullptr;
vixl32::Label* true_target = nullptr;
vixl32::Label select_end;
+ vixl32::Label other_case;
vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
if (out.Equals(second)) {
@@ -3173,12 +3186,21 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
src = second;
if (!out.Equals(first)) {
- codegen_->MoveLocation(out, first, type);
+ if (output_overlaps_with_condition_inputs) {
+ false_target = &other_case;
+ } else {
+ codegen_->MoveLocation(out, first, type);
+ }
}
}
GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
codegen_->MoveLocation(out, src, type);
+ if (output_overlaps_with_condition_inputs) {
+ __ B(target);
+ __ Bind(&other_case);
+ codegen_->MoveLocation(out, first, type);
+ }
if (select_end.IsReferenced()) {
__ Bind(&select_end);
@@ -3277,31 +3299,16 @@ void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
- // Handle the long/FP comparisons made in instruction simplification.
- switch (cond->InputAt(0)->GetType()) {
- case DataType::Type::kInt64:
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
- if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
- break;
-
- case DataType::Type::kFloat32:
- case DataType::Type::kFloat64:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
- if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
- break;
-
- default:
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
- if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ const DataType::Type type = cond->InputAt(0)->GetType();
+ if (DataType::IsFloatingPointType(type)) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+ }
+ if (!cond->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 174efdf115..1cfdf54816 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -1290,6 +1290,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
Register scratch;
switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
// Special handling of compressed/uncompressed string load.
@@ -1321,7 +1322,6 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
case DataType::Type::kInt64:
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index f2ffccc887..4945328e2b 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1141,6 +1141,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
// Special handling of compressed/uncompressed string load.
@@ -1168,7 +1169,6 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
DCHECK_LE(2u, instruction->GetVectorLength());
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e2b0485f89..a77c7d6838 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1114,6 +1114,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
// Special handling of compressed/uncompressed string load.
@@ -1141,7 +1142,6 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
DCHECK_LE(2u, instruction->GetVectorLength());
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 899496328e..9f278a9f4e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -334,29 +334,12 @@ static bool IsAddConst(HInstruction* instruction,
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
-// x = max(x_phi, ..)
-// x = min(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
if (reduction->IsAdd()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
(reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
- } else if (reduction->IsInvokeStaticOrDirect()) {
- switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
- (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
- default:
- return false;
- }
}
return false;
}
@@ -365,10 +348,6 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
return HVecReduce::kSum;
- } else if (reduction->IsVecMin()) {
- return HVecReduce::kMin;
- } else if (reduction->IsVecMax()) {
- return HVecReduce::kMax;
}
LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId();
UNREACHABLE();
@@ -1124,7 +1103,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
}
-// TODO: saturation arithmetic.
bool HLoopOptimization::VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -1331,43 +1309,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return false;
}
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- HInstruction* r = opa;
- HInstruction* s = opb;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoMinMax)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
- return false; // reject, unless all operands are same-extension narrower
- }
- // Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- s = opb;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions) &&
- VectorizeUse(node, s, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(
- instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
- }
- return true;
- }
- return false;
- }
default:
return false;
} // switch
@@ -1426,7 +1367,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoMul | kNoMinMax;
+ *restrictions |= kNoDiv | kNoMul;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
*restrictions |= kNoReduction;
@@ -1456,13 +1397,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case DataType::Type::kFloat64:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
break;
@@ -1488,10 +1429,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case DataType::Type::kFloat64:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
break;
@@ -1517,10 +1458,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case DataType::Type::kFloat64:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
break;
@@ -1745,8 +1686,7 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
void HLoopOptimization::GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
- DataType::Type type,
- bool is_unsigned) {
+ DataType::Type type) {
uint32_t dex_pc = org->GetDexPc();
HInstruction* vector = nullptr;
DataType::Type org_type = org->GetType();
@@ -1823,32 +1763,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
vector = new (global_allocator_)
HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
break;
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble: {
- vector = new (global_allocator_)
- HVecMin(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- vector = new (global_allocator_)
- HVecMax(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
default:
LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
UNREACHABLE();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index a707ad1358..d70751037b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -75,11 +75,10 @@ class HLoopOptimization : public HOptimization {
kNoSignedHAdd = 1 << 5, // no signed halving add
kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
kNoAbs = 1 << 7, // no absolute value
- kNoMinMax = 1 << 8, // no min/max
- kNoStringCharAt = 1 << 9, // no StringCharAt
- kNoReduction = 1 << 10, // no reduction
- kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
- kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening
+ kNoStringCharAt = 1 << 8, // no StringCharAt
+ kNoReduction = 1 << 9, // no reduction
+ kNoSAD = 1 << 10, // no sum of absolute differences (SAD)
+ kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening
};
/*
@@ -173,8 +172,7 @@ class HLoopOptimization : public HOptimization {
void GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
- DataType::Type type,
- bool is_unsigned = false);
+ DataType::Type type);
// Vectorization idioms.
bool VectorizeHalvingAddIdiom(LoopNode* node,