diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 25 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 25 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 12 |
3 files changed, 55 insertions, 7 deletions
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 68aef779f2..ace8e7ab27 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1201,11 +1201,32 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr } void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); } void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + LocationSummary* locations = instruction->GetLocations(); + XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + cpu_has_avx ? __ vpmaddwd(tmp, left, right) : __ pmaddwd(left, right); + cpu_has_avx ? __ vpaddd(acc, acc, tmp) : __ paddd(acc, left); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType(); + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 19dfd1d2a8..f68062cb55 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1174,11 +1174,32 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in } void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); } void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + LocationSummary* locations = instruction->GetLocations(); + XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + cpu_has_avx ? __ vpmaddwd(tmp, left, right) : __ pmaddwd(left, right); + cpu_has_avx ? __ vpaddd(acc, acc, tmp) : __ paddd(acc, left); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType(); + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 9c4e9d25f7..567a41e2fd 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1623,14 +1623,20 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: - case DataType::Type::kInt16: *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | - kNoSAD| + kNoSAD | kNoDotProd; return TrySetVectorLength(8); + case DataType::Type::kInt16: + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD; + return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); @@ -2166,7 +2172,7 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, bool generate_code, DataType::Type reduction_type, uint64_t restrictions) { - if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + if (!instruction->IsAdd() || reduction_type != DataType::Type::kInt32) { return false; } |