diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 57 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm_vixl.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips64.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 45 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 41 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_range.cc | 28 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 194 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 204 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 23 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 19 | ||||
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 80 |
14 files changed, 761 insertions, 31 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc index e7f7b3019c..6e82123e56 100644 --- a/compiler/optimizing/code_generator_vector_arm.cc +++ b/compiler/optimizing/code_generator_vector_arm.cc @@ -124,6 +124,14 @@ void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -148,6 +156,22 @@ void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARM::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 0923920366..2dfccfff85 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -318,6 +318,47 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { } } +void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) + : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + instruction->IsRounded() + ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) + : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) + : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + instruction->IsRounded() + ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) + : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -420,6 +461,22 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { } } +void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + +void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 74fa584e09..990178b31b 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -124,6 +124,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -148,6 +156,22 @@ void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index 6969abd422..8ea1ca7d90 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 87118cefa5..a484bb4774 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 8dabb4d08f..a86d060821 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -350,6 +350,35 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { } } +void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK(instruction->IsRounded()); + DCHECK(instruction->IsUnsigned()); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pavgb(dst, src); + return; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pavgw(dst, src); + return; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -448,6 +477,22 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { } } +void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index e95608839b..696735367e 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -343,6 +343,31 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { } } +void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pavgb(dst, src); + return; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pavgw(dst, src); + return; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } @@ -441,6 +466,22 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { } } +void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index cc3c143b15..1b2b9f80ac 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -509,6 +509,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << deoptimize->GetKind(); } + void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha; + StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 1c8674d522..7c833cf70c 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -45,18 +45,6 @@ static bool IsSafeDiv(int32_t c1, int32_t c2) { return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2)); } -/** Returns true for 32/64-bit constant instruction. */ -static bool IsIntAndGet(HInstruction* instruction, int64_t* value) { - if (instruction->IsIntConstant()) { - *value = instruction->AsIntConstant()->GetValue(); - return true; - } else if (instruction->IsLongConstant()) { - *value = instruction->AsLongConstant()->GetValue(); - return true; - } - return false; -} - /** Computes a * b for a,b > 0 (at least until first overflow happens). */ static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) { if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) { @@ -106,7 +94,7 @@ static bool IsGEZero(HInstruction* instruction) { } } int64_t value = -1; - return IsIntAndGet(instruction, &value) && value >= 0; + return IsInt64AndGet(instruction, &value) && value >= 0; } /** Hunts "under the hood" for a suitable instruction at the hint. */ @@ -149,7 +137,7 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruc int64_t value; if (v.instruction->IsDiv() && v.instruction->InputAt(0)->IsArrayLength() && - IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { + IsInt64AndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant); } // If a == 1, the most suitable one suffices as maximum value. @@ -444,7 +432,7 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, // any of the three requests (kExact, kAtMost, and KAtLeast). if (info->induction_class == HInductionVarAnalysis::kInvariant && info->operation == HInductionVarAnalysis::kFetch) { - if (IsIntAndGet(info->fetch, value)) { + if (IsInt64AndGet(info->fetch, value)) { return true; } } @@ -635,7 +623,7 @@ InductionVarRange::Value InductionVarRange::GetGeometric(HInductionVarAnalysis:: int64_t f = 0; if (IsConstant(info->op_a, kExact, &a) && CanLongValueFitIntoInt(a) && - IsIntAndGet(info->fetch, &f) && f >= 1) { + IsInt64AndGet(info->fetch, &f) && f >= 1) { // Conservative bounds on a * f^-i + b with f >= 1 can be computed without // trip count. Other forms would require a much more elaborate evaluation. const bool is_min_a = a >= 0 ? is_min : !is_min; @@ -663,7 +651,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that // it becomes more likely range analysis will compare the same instructions as terminal nodes. int64_t value; - if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { + if (IsInt64AndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { // Proper constant reveals best information. return Value(static_cast<int32_t>(value)); } else if (instruction == chase_hint_) { @@ -671,10 +659,10 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return Value(instruction, 1, 0); } else if (instruction->IsAdd()) { // Incorporate suitable constants in the chased value. - if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { + if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { return AddValue(Value(static_cast<int32_t>(value)), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); - } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) { + } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) { return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(static_cast<int32_t>(value))); } @@ -1074,7 +1062,7 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct // Detect known base and trip count (always taken). int64_t f = 0; int64_t m = 0; - if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) { + if (IsInt64AndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) { HInstruction* opa = nullptr; HInstruction* opb = nullptr; if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) && diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 82d0567ef9..b57b41f686 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2093,6 +2093,199 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } +// static void java.lang.System.arraycopy(Object src, int srcPos, +// Object dest, int destPos, +// int length) +void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + + // As long as we are checking, we might as well check to see if the src and dest + // positions are >= 0. + if ((src_pos != nullptr && src_pos->GetValue() < 0) || + (dest_pos != nullptr && dest_pos->GetValue() < 0)) { + // We will have to fail anyways. + return; + } + + // And since we are already checking, check the length too. + if (length != nullptr) { + int32_t len = length->GetValue(); + if (len < 0) { + // Just call as normal. + return; + } + } + + // Okay, it is safe to generate inline code. + LocationSummary* locations = + new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); + locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +// Utility routine to verify that "length(input) - pos >= length" +static void EnoughItems(Mips64Assembler* assembler, + GpuRegister length_input_minus_pos, + Location length, + SlowPathCodeMIPS64* slow_path) { + if (length.IsConstant()) { + int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue(); + + if (IsInt<16>(length_constant)) { + __ Slti(TMP, length_input_minus_pos, length_constant); + __ Bnezc(TMP, slow_path->GetEntryLabel()); + } else { + __ LoadConst32(TMP, length_constant); + __ Bltc(length_input_minus_pos, TMP, slow_path->GetEntryLabel()); + } + } else { + __ Bltc(length_input_minus_pos, length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + } +} + +static void CheckPosition(Mips64Assembler* assembler, + Location pos, + GpuRegister input, + Location length, + SlowPathCodeMIPS64* slow_path, + bool length_is_input_length = false) { + // Where is the length in the Array? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + // Calculate length(input) - pos. + if (pos.IsConstant()) { + int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); + if (pos_const == 0) { + if (!length_is_input_length) { + // Check that length(input) >= length. + __ LoadFromOffset(kLoadWord, AT, input, length_offset); + EnoughItems(assembler, AT, length, slow_path); + } + } else { + // Check that (length(input) - pos) >= zero. + __ LoadFromOffset(kLoadWord, AT, input, length_offset); + DCHECK_GT(pos_const, 0); + __ Addiu32(AT, AT, -pos_const); + __ Bltzc(AT, slow_path->GetEntryLabel()); + + // Verify that (length(input) - pos) >= length. + EnoughItems(assembler, AT, length, slow_path); + } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + GpuRegister pos_reg = pos.AsRegister<GpuRegister>(); + __ Bnezc(pos_reg, slow_path->GetEntryLabel()); + } else { + // Verify that pos >= 0. + GpuRegister pos_reg = pos.AsRegister<GpuRegister>(); + __ Bltzc(pos_reg, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= zero. + __ LoadFromOffset(kLoadWord, AT, input, length_offset); + __ Subu(AT, AT, pos_reg); + __ Bltzc(AT, slow_path->GetEntryLabel()); + + // Verify that (length(input) - pos) >= length. + EnoughItems(assembler, AT, length, slow_path); + } +} + +void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); + Location src_pos = locations->InAt(1); + GpuRegister dest = locations->InAt(2).AsRegister<GpuRegister>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + + Mips64Label loop; + + GpuRegister dest_base = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>(); + GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>(); + + SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + codegen_->AddSlowPath(slow_path); + + // Bail out if the source and destination are the same (to handle overlap). + __ Beqc(src, dest, slow_path->GetEntryLabel()); + + // Bail out if the source is null. + __ Beqzc(src, slow_path->GetEntryLabel()); + + // Bail out if the destination is null. + __ Beqzc(dest, slow_path->GetEntryLabel()); + + // Load length into register for count. + if (length.IsConstant()) { + __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue()); + } else { + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + __ Bltzc(length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + + __ Move(count, length.AsRegister<GpuRegister>()); + } + + // Validity checks: source. + CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path); + + // Validity checks: dest. + CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path); + + // If count is zero, we're done. + __ Beqzc(count, slow_path->GetExitLabel()); + + // Okay, everything checks out. Finally time to do the copy. + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Calculate source and destination addresses. + if (src_pos.IsConstant()) { + int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); + + __ Daddiu64(src_base, src, data_offset + char_size * src_pos_const, TMP); + } else { + __ Daddiu64(src_base, src, data_offset, TMP); + __ Dlsa(src_base, src_pos.AsRegister<GpuRegister>(), src_base, char_shift); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + + __ Daddiu64(dest_base, dest, data_offset + char_size * dest_pos_const, TMP); + } else { + __ Daddiu64(dest_base, dest, data_offset, TMP); + __ Dlsa(dest_base, dest_pos.AsRegister<GpuRegister>(), dest_base, char_shift); + } + + __ Bind(&loop); + __ Lh(TMP, src_base, 0); + __ Daddiu(src_base, src_base, char_size); + __ Daddiu(count, count, -1); + __ Sh(TMP, dest_base, 0); + __ Daddiu(dest_base, dest_base, char_size); + __ Bnezc(count, &loop); + + __ Bind(slow_path->GetExitLabel()); +} + static void GenHighestOneBit(LocationSummary* locations, Primitive::Type type, Mips64Assembler* assembler) { @@ -2372,7 +2565,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) { } UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 8e88c1ec7f..5a95abdb50 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -63,12 +63,122 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } +// Detect a sign extension from the given type. Returns the promoted operand on success. +static bool IsSignExtensionAndGet(HInstruction* instruction, + Primitive::Type type, + /*out*/ HInstruction** operand) { + // Accept any already wider constant that would be handled properly by sign + // extension when represented in the *width* of the given narrower data type + // (the fact that char normally zero extends does not matter here). + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + switch (type) { + case Primitive::kPrimByte: + if (std::numeric_limits<int8_t>::min() <= value && + std::numeric_limits<int8_t>::max() >= value) { + *operand = instruction; + return true; + } + return false; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + if (std::numeric_limits<int16_t>::min() <= value && + std::numeric_limits<int16_t>::max() <= value) { + *operand = instruction; + return true; + } + return false; + default: + return false; + } + } + // An implicit widening conversion of a signed integer to an integral type sign-extends + // the two's-complement representation of the integer value to fill the wider format. + if (instruction->GetType() == type && (instruction->IsArrayGet() || + instruction->IsStaticFieldGet() || + instruction->IsInstanceFieldGet())) { + switch (type) { + case Primitive::kPrimByte: + case Primitive::kPrimShort: + *operand = instruction; + return true; + default: + return false; + } + } + // TODO: perhaps explicit conversions later too? + // (this may return something different from instruction) + return false; +} + +// Detect a zero extension from the given type. Returns the promoted operand on success. +static bool IsZeroExtensionAndGet(HInstruction* instruction, + Primitive::Type type, + /*out*/ HInstruction** operand) { + // Accept any already wider constant that would be handled properly by zero + // extension when represented in the *width* of the given narrower data type + // (the fact that byte/short normally sign extend does not matter here). + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + switch (type) { + case Primitive::kPrimByte: + if (std::numeric_limits<uint8_t>::min() <= value && + std::numeric_limits<uint8_t>::max() >= value) { + *operand = instruction; + return true; + } + return false; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + if (std::numeric_limits<uint16_t>::min() <= value && + std::numeric_limits<uint16_t>::max() <= value) { + *operand = instruction; + return true; + } + return false; + default: + return false; + } + } + // An implicit widening conversion of a char to an integral type zero-extends + // the representation of the char value to fill the wider format. + if (instruction->GetType() == type && (instruction->IsArrayGet() || + instruction->IsStaticFieldGet() || + instruction->IsInstanceFieldGet())) { + if (type == Primitive::kPrimChar) { + *operand = instruction; + return true; + } + } + // A sign (or zero) extension followed by an explicit removal of just the + // higher sign bits is equivalent to a zero extension of the underlying operand. + if (instruction->IsAnd()) { + int64_t mask = 0; + HInstruction* a = instruction->InputAt(0); + HInstruction* b = instruction->InputAt(1); + // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask. + if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) || + IsZeroExtensionAndGet(b, type, /*out*/ operand))) || + (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || + IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { + switch ((*operand)->GetType()) { + case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max(); + case Primitive::kPrimChar: + case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); + default: return false; + } + } + } + // TODO: perhaps explicit conversions later too? + return false; +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; } -// Inserts an instruction. +// Insert an instruction. static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); DCHECK(instruction != nullptr); @@ -713,6 +823,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { + // Recognize vectorization idioms. + if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { + return true; + } // Deal with vector restrictions. if ((HasVectorRestrictions(restrictions, kNoShift)) || (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { @@ -806,11 +920,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs; + *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1039,6 +1153,90 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, #undef GENERATE_VEC // +// Vectorization idioms. +// + +// Method recognizes the following idioms: +// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b +// regular halving add (a + b) >> 1 for unsigned/signed operands a, b +// Provided that the operands are promoted to a wider form to do the arithmetic and +// then cast back to narrower form, the idioms can be mapped into efficient SIMD +// implementation that operates directly in narrower form (plus one extra bit). +// TODO: current version recognizes implicit byte/short/char widening only; +// explicit widening from int to long could be added later. +bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions) { + // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 + // (note whether the sign bit in higher precision is shifted in has no effect + // on the narrow precision computed by the idiom). + int64_t value = 0; + if ((instruction->IsShr() || + instruction->IsUShr()) && + IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { + // + // TODO: make following code less sensitive to associativity and commutativity differences. + // + HInstruction* x = instruction->InputAt(0); + // Test for an optional rounding part (x + 1) >> 1. + bool is_rounded = false; + if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { + x = x->InputAt(0); + is_rounded = true; + } + // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed. + if (x->IsAdd()) { + HInstruction* a = x->InputAt(0); + HInstruction* b = x->InputAt(1); + HInstruction* r = nullptr; + HInstruction* s = nullptr; + bool is_unsigned = false; + if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) { + is_unsigned = true; + } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) { + is_unsigned = false; + } else { + return false; + } + // Deal with vector restrictions. + if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) || + (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) { + return false; + } + // Accept recognized halving add for vectorizable operands. Vectorized code uses the + // shorthand idiomatic operation. Sequential code uses the original scalar expressions. + DCHECK(r != nullptr && s != nullptr); + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( + global_allocator_, + vector_map_->Get(r), + vector_map_->Get(s), + type, + vector_length_, + is_unsigned, + is_rounded)); + } else { + VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions); + VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions); + GenerateVecOp(instruction, + vector_map_->Get(instruction->InputAt(0)), + vector_map_->Get(instruction->InputAt(1)), + type); + } + } + return true; + } + } + } + return false; +} + +// // Helpers. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index d8f50aab28..4a7da86e32 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -62,13 +62,15 @@ class HLoopOptimization : public HOptimization { * Vectorization restrictions (bit mask). */ enum VectorRestrictions { - kNone = 0, // no restrictions - kNoMul = 1, // no multiplication - kNoDiv = 2, // no division - kNoShift = 4, // no shift - kNoShr = 8, // no arithmetic shift right - kNoHiBits = 16, // "wider" operations cannot bring in higher order bits - kNoAbs = 32, // no absolute value + kNone = 0, // no restrictions + kNoMul = 1, // no multiplication + kNoDiv = 2, // no division + kNoShift = 4, // no shift + kNoShr = 8, // no arithmetic shift right + kNoHiBits = 16, // "wider" operations cannot bring in higher order bits + kNoSignedHAdd = 32, // no signed halving add + kNoUnroundedHAdd = 64, // no unrounded halving add + kNoAbs = 128, // no absolute value }; /* @@ -136,6 +138,13 @@ class HLoopOptimization : public HOptimization { Primitive::Type type); void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); + // Vectorization idioms. + bool VectorizeHalvingAddIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions); + // Helpers. bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); bool TrySetSimpleLoopHeader(HBasicBlock* block); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c109369106..6be237e612 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1369,9 +1369,12 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecAbs, VecUnaryOperation) \ M(VecNot, VecUnaryOperation) \ M(VecAdd, VecBinaryOperation) \ + M(VecHalvingAdd, VecBinaryOperation) \ M(VecSub, VecBinaryOperation) \ M(VecMul, VecBinaryOperation) \ M(VecDiv, VecBinaryOperation) \ + M(VecMin, VecBinaryOperation) \ + M(VecMax, VecBinaryOperation) \ M(VecAnd, VecBinaryOperation) \ M(VecAndNot, VecBinaryOperation) \ M(VecOr, VecBinaryOperation) \ @@ -6845,6 +6848,7 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); }; +// Returns int64_t value of a properly typed constant. inline int64_t Int64FromConstant(HConstant* constant) { if (constant->IsIntConstant()) { return constant->AsIntConstant()->GetValue(); @@ -6856,6 +6860,21 @@ inline int64_t Int64FromConstant(HConstant* constant) { } } +// Returns true iff instruction is an integral constant (and sets value on success). +inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) { + if (instruction->IsIntConstant()) { + *value = instruction->AsIntConstant()->GetValue(); + return true; + } else if (instruction->IsLongConstant()) { + *value = instruction->AsLongConstant()->GetValue(); + return true; + } else if (instruction->IsNullConstant()) { + *value = 0; + return true; + } + return false; +} + #define INSTRUCTION_TYPE_CHECK(type, super) \ inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ inline const H##type* HInstruction::As##type() const { \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 0cbbf2a215..bff58d0910 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -338,6 +338,42 @@ class HVecAdd FINAL : public HVecBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HVecAdd); }; +// Performs halving add on every component in the two vectors, viz. +// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] +// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] +// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension). +class HVecHalvingAdd FINAL : public HVecBinaryOperation { + public: + HVecHalvingAdd(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + bool is_unsigned, + bool is_rounded, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc), + is_unsigned_(is_unsigned), + is_rounded_(is_rounded) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool IsUnsigned() const { return is_unsigned_; } + bool IsRounded() const { return is_rounded_; } + + DECLARE_INSTRUCTION(VecHalvingAdd); + + private: + bool is_unsigned_; + bool is_rounded_; + + DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd); +}; + // Subtracts every component in the two vectors, // viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. class HVecSub FINAL : public HVecBinaryOperation { @@ -404,6 +440,50 @@ class HVecDiv FINAL : public HVecBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HVecDiv); }; +// Takes minimum of every component in the two vectors, +// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ]. +class HVecMin FINAL : public HVecBinaryOperation { + public: + HVecMin(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecMin); + private: + DISALLOW_COPY_AND_ASSIGN(HVecMin); +}; + +// Takes maximum of every component in the two vectors, +// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ]. +class HVecMax FINAL : public HVecBinaryOperation { + public: + HVecMax(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecMax); + private: + DISALLOW_COPY_AND_ASSIGN(HVecMax); +}; + // Bitwise-ands every component in the two vectors, // viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. class HVecAnd FINAL : public HVecBinaryOperation { |