diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/code_generator_vector_mips.cc | 188 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_mips64.cc | 188 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 16 | 
3 files changed, 380 insertions, 12 deletions
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index 384b642145..3cf150a6b8 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -1071,11 +1071,195 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu  void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {    CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +  LocationSummary* locations = instruction->GetLocations(); +  // All conversions require at least one temporary register. +  locations->AddTemp(Location::RequiresFpuRegister()); +  // Some conversions require a second temporary register. +  HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); +  HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); +  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), +            HVecOperation::ToSignedType(b->GetPackedType())); +  switch (a->GetPackedType()) { +    case DataType::Type::kInt32: +      if (instruction->GetPackedType() == DataType::Type::kInt32) { +        break; +      } +      FALLTHROUGH_INTENDED; +    case DataType::Type::kUint8: +    case DataType::Type::kInt8: +    case DataType::Type::kUint16: +    case DataType::Type::kInt16: +      locations->AddTemp(Location::RequiresFpuRegister()); +      break; +    default: +      break; +  }  }  void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { -  LOG(FATAL) << "No SIMD for " << instruction->GetId(); -  // TODO: implement this, location helper already filled out (shared with MulAcc). +  LocationSummary* locations = instruction->GetLocations(); +  VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); +  VectorRegister left = VectorRegisterFrom(locations->InAt(1)); +  VectorRegister right = VectorRegisterFrom(locations->InAt(2)); +  VectorRegister tmp = static_cast<VectorRegister>(FTMP); +  VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); + +  DCHECK(locations->InAt(0).Equals(locations->Out())); + +  // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). +  HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); +  HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); +  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), +            HVecOperation::ToSignedType(b->GetPackedType())); +  switch (a->GetPackedType()) { +    case DataType::Type::kUint8: +    case DataType::Type::kInt8: +      DCHECK_EQ(16u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kUint16: +        case DataType::Type::kInt16: { +          DCHECK_EQ(8u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ AddvH(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ AddvH(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ AddvW(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kUint16: +    case DataType::Type::kInt16: +      DCHECK_EQ(8u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillH(tmp, ZERO); +          __ Hadd_sW(tmp1, left, tmp); +          __ Hadd_sW(tmp2, right, tmp); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ AddvW(acc, acc, tmp1); +          __ Hadd_sW(tmp1, tmp, left); +          __ Hadd_sW(tmp2, tmp, right); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillH(tmp, ZERO); +          __ Hadd_sW(tmp1, left, tmp); +          __ Hadd_sW(tmp2, right, tmp); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sW(tmp1, tmp, left); +          __ Hadd_sW(tmp2, tmp, right); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kInt32: +      DCHECK_EQ(4u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          __ FillW(tmp, ZERO); +          __ SubvW(tmp1, left, right); +          __ Add_aW(tmp1, tmp1, tmp); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillW(tmp, ZERO); +          __ Hadd_sD(tmp1, left, tmp); +          __ Hadd_sD(tmp2, right, tmp); +          __ Asub_sD(tmp1, tmp1, tmp2); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sD(tmp1, tmp, left); +          __ Hadd_sD(tmp2, tmp, right); +          __ Asub_sD(tmp1, tmp1, tmp2); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kInt64: { +      DCHECK_EQ(2u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          __ FillW(tmp, ZERO); +          __ SubvD(tmp1, left, right); +          __ Add_aD(tmp1, tmp1, tmp); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    } +    default: +      LOG(FATAL) << "Unsupported SIMD type"; +      UNREACHABLE(); +  }  }  // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 0c59b7344a..2d69533f21 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -1069,11 +1069,195 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu  void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {    CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +  LocationSummary* locations = instruction->GetLocations(); +  // All conversions require at least one temporary register. +  locations->AddTemp(Location::RequiresFpuRegister()); +  // Some conversions require a second temporary register. +  HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); +  HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); +  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), +            HVecOperation::ToSignedType(b->GetPackedType())); +  switch (a->GetPackedType()) { +    case DataType::Type::kInt32: +      if (instruction->GetPackedType() == DataType::Type::kInt32) { +        break; +      } +      FALLTHROUGH_INTENDED; +    case DataType::Type::kUint8: +    case DataType::Type::kInt8: +    case DataType::Type::kUint16: +    case DataType::Type::kInt16: +      locations->AddTemp(Location::RequiresFpuRegister()); +      break; +    default: +      break; +  }  }  void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { -  LOG(FATAL) << "No SIMD for " << instruction->GetId(); -  // TODO: implement this, location helper already filled out (shared with MulAcc). +  LocationSummary* locations = instruction->GetLocations(); +  VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); +  VectorRegister left = VectorRegisterFrom(locations->InAt(1)); +  VectorRegister right = VectorRegisterFrom(locations->InAt(2)); +  VectorRegister tmp = static_cast<VectorRegister>(FTMP); +  VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); + +  DCHECK(locations->InAt(0).Equals(locations->Out())); + +  // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). +  HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); +  HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); +  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), +            HVecOperation::ToSignedType(b->GetPackedType())); +  switch (a->GetPackedType()) { +    case DataType::Type::kUint8: +    case DataType::Type::kInt8: +      DCHECK_EQ(16u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kUint16: +        case DataType::Type::kInt16: { +          DCHECK_EQ(8u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ AddvH(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ AddvH(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ AddvW(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillB(tmp, ZERO); +          __ Hadd_sH(tmp1, left, tmp); +          __ Hadd_sH(tmp2, right, tmp); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sH(tmp1, tmp, left); +          __ Hadd_sH(tmp2, tmp, right); +          __ Asub_sH(tmp1, tmp1, tmp2); +          __ Hadd_sW(tmp1, tmp1, tmp1); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kUint16: +    case DataType::Type::kInt16: +      DCHECK_EQ(8u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillH(tmp, ZERO); +          __ Hadd_sW(tmp1, left, tmp); +          __ Hadd_sW(tmp2, right, tmp); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ AddvW(acc, acc, tmp1); +          __ Hadd_sW(tmp1, tmp, left); +          __ Hadd_sW(tmp2, tmp, right); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillH(tmp, ZERO); +          __ Hadd_sW(tmp1, left, tmp); +          __ Hadd_sW(tmp2, right, tmp); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sW(tmp1, tmp, left); +          __ Hadd_sW(tmp2, tmp, right); +          __ Asub_sW(tmp1, tmp1, tmp2); +          __ Hadd_sD(tmp1, tmp1, tmp1); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kInt32: +      DCHECK_EQ(4u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt32: { +          DCHECK_EQ(4u, instruction->GetVectorLength()); +          __ FillW(tmp, ZERO); +          __ SubvW(tmp1, left, right); +          __ Add_aW(tmp1, tmp1, tmp); +          __ AddvW(acc, acc, tmp1); +          break; +        } +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); +          __ FillW(tmp, ZERO); +          __ Hadd_sD(tmp1, left, tmp); +          __ Hadd_sD(tmp2, right, tmp); +          __ Asub_sD(tmp1, tmp1, tmp2); +          __ AddvD(acc, acc, tmp1); +          __ Hadd_sD(tmp1, tmp, left); +          __ Hadd_sD(tmp2, tmp, right); +          __ Asub_sD(tmp1, tmp1, tmp2); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    case DataType::Type::kInt64: { +      DCHECK_EQ(2u, a->GetVectorLength()); +      switch (instruction->GetPackedType()) { +        case DataType::Type::kInt64: { +          DCHECK_EQ(2u, instruction->GetVectorLength()); +          __ FillD(tmp, ZERO); +          __ SubvD(tmp1, left, right); +          __ Add_aD(tmp1, tmp1, tmp); +          __ AddvD(acc, acc, tmp1); +          break; +        } +        default: +          LOG(FATAL) << "Unsupported SIMD type"; +          UNREACHABLE(); +      } +      break; +    } +    default: +      LOG(FATAL) << "Unsupported SIMD type"; +      UNREACHABLE(); +  }  }  // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 69c58275b4..fcc59ea3f9 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1512,17 +1512,17 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict            case DataType::Type::kBool:            case DataType::Type::kUint8:            case DataType::Type::kInt8: -            *restrictions |= kNoDiv | kNoReduction | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(16);            case DataType::Type::kUint16:            case DataType::Type::kInt16: -            *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; +            *restrictions |= kNoDiv | kNoStringCharAt;              return TrySetVectorLength(8);            case DataType::Type::kInt32: -            *restrictions |= kNoDiv | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(4);            case DataType::Type::kInt64: -            *restrictions |= kNoDiv | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(2);            case DataType::Type::kFloat32:              *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN) @@ -1541,17 +1541,17 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict            case DataType::Type::kBool:            case DataType::Type::kUint8:            case DataType::Type::kInt8: -            *restrictions |= kNoDiv | kNoReduction | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(16);            case DataType::Type::kUint16:            case DataType::Type::kInt16: -            *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; +            *restrictions |= kNoDiv | kNoStringCharAt;              return TrySetVectorLength(8);            case DataType::Type::kInt32: -            *restrictions |= kNoDiv | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(4);            case DataType::Type::kInt64: -            *restrictions |= kNoDiv | kNoSAD; +            *restrictions |= kNoDiv;              return TrySetVectorLength(2);            case DataType::Type::kFloat32:              *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN)  |