diff options
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 204 | 
1 files changed, 201 insertions, 3 deletions
| diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 8e88c1ec7f..5a95abdb50 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -63,12 +63,122 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {    return false;  } +// Detect a sign extension from the given type. Returns the promoted operand on success. +static bool IsSignExtensionAndGet(HInstruction* instruction, +                                  Primitive::Type type, +                                  /*out*/ HInstruction** operand) { +  // Accept any already wider constant that would be handled properly by sign +  // extension when represented in the *width* of the given narrower data type +  // (the fact that char normally zero extends does not matter here). +  int64_t value = 0; +  if (IsInt64AndGet(instruction, &value)) { +    switch (type) { +      case Primitive::kPrimByte: +        if (std::numeric_limits<int8_t>::min() <= value && +            std::numeric_limits<int8_t>::max() >= value) { +          *operand = instruction; +          return true; +        } +        return false; +      case Primitive::kPrimChar: +      case Primitive::kPrimShort: +        if (std::numeric_limits<int16_t>::min() <= value && +            std::numeric_limits<int16_t>::max() <= value) { +          *operand = instruction; +          return true; +        } +        return false; +      default: +        return false; +    } +  } +  // An implicit widening conversion of a signed integer to an integral type sign-extends +  // the two's-complement representation of the integer value to fill the wider format. +  if (instruction->GetType() == type && (instruction->IsArrayGet() || +                                         instruction->IsStaticFieldGet() || +                                         instruction->IsInstanceFieldGet())) { +    switch (type) { +      case Primitive::kPrimByte: +      case Primitive::kPrimShort: +        *operand = instruction; +        return true; +      default: +        return false; +    } +  } +  // TODO: perhaps explicit conversions later too? +  //       (this may return something different from instruction) +  return false; +} + +// Detect a zero extension from the given type. Returns the promoted operand on success. +static bool IsZeroExtensionAndGet(HInstruction* instruction, +                                  Primitive::Type type, +                                  /*out*/ HInstruction** operand) { +  // Accept any already wider constant that would be handled properly by zero +  // extension when represented in the *width* of the given narrower data type +  // (the fact that byte/short normally sign extend does not matter here). +  int64_t value = 0; +  if (IsInt64AndGet(instruction, &value)) { +    switch (type) { +      case Primitive::kPrimByte: +        if (std::numeric_limits<uint8_t>::min() <= value && +            std::numeric_limits<uint8_t>::max() >= value) { +          *operand = instruction; +          return true; +        } +        return false; +      case Primitive::kPrimChar: +      case Primitive::kPrimShort: +        if (std::numeric_limits<uint16_t>::min() <= value && +            std::numeric_limits<uint16_t>::max() <= value) { +          *operand = instruction; +          return true; +        } +        return false; +      default: +        return false; +    } +  } +  // An implicit widening conversion of a char to an integral type zero-extends +  // the representation of the char value to fill the wider format. +  if (instruction->GetType() == type && (instruction->IsArrayGet() || +                                         instruction->IsStaticFieldGet() || +                                         instruction->IsInstanceFieldGet())) { +    if (type == Primitive::kPrimChar) { +      *operand = instruction; +      return true; +    } +  } +  // A sign (or zero) extension followed by an explicit removal of just the +  // higher sign bits is equivalent to a zero extension of the underlying operand. +  if (instruction->IsAnd()) { +    int64_t mask = 0; +    HInstruction* a = instruction->InputAt(0); +    HInstruction* b = instruction->InputAt(1); +    // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask. +    if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) || +                                             IsZeroExtensionAndGet(b, type, /*out*/ operand))) || +        (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || +                                             IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { +      switch ((*operand)->GetType()) { +        case Primitive::kPrimByte:  return mask == std::numeric_limits<uint8_t>::max(); +        case Primitive::kPrimChar: +        case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); +        default: return false; +      } +    } +  } +  // TODO: perhaps explicit conversions later too? +  return false; +} +  // Test vector restrictions.  static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {    return (restrictions & tested) != 0;  } -// Inserts an instruction. +// Insert an instruction.  static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {    DCHECK(block != nullptr);    DCHECK(instruction != nullptr); @@ -713,6 +823,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,        return true;      }    } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { +    // Recognize vectorization idioms. +    if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { +      return true; +    }      // Deal with vector restrictions.      if ((HasVectorRestrictions(restrictions, kNoShift)) ||          (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { @@ -806,11 +920,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric          switch (type) {            case Primitive::kPrimBoolean:            case Primitive::kPrimByte: -            *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs; +            *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;              return TrySetVectorLength(16);            case Primitive::kPrimChar:            case Primitive::kPrimShort: -            *restrictions |= kNoDiv | kNoAbs; +            *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;              return TrySetVectorLength(8);            case Primitive::kPrimInt:              *restrictions |= kNoDiv; @@ -1039,6 +1153,90 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,  #undef GENERATE_VEC  // +// Vectorization idioms. +// + +// Method recognizes the following idioms: +//   rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b +//   regular  halving add (a + b)     >> 1 for unsigned/signed operands a, b +// Provided that the operands are promoted to a wider form to do the arithmetic and +// then cast back to narrower form, the idioms can be mapped into efficient SIMD +// implementation that operates directly in narrower form (plus one extra bit). +// TODO: current version recognizes implicit byte/short/char widening only; +//       explicit widening from int to long could be added later. +bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, +                                                 HInstruction* instruction, +                                                 bool generate_code, +                                                 Primitive::Type type, +                                                 uint64_t restrictions) { +  // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 +  // (note whether the sign bit in higher precision is shifted in has no effect +  // on the narrow precision computed by the idiom). +  int64_t value = 0; +  if ((instruction->IsShr() || +       instruction->IsUShr()) && +      IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { +    // +    // TODO: make following code less sensitive to associativity and commutativity differences. +    // +    HInstruction* x = instruction->InputAt(0); +    // Test for an optional rounding part (x + 1) >> 1. +    bool is_rounded = false; +    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { +      x = x->InputAt(0); +      is_rounded = true; +    } +    // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed. +    if (x->IsAdd()) { +      HInstruction* a = x->InputAt(0); +      HInstruction* b = x->InputAt(1); +      HInstruction* r = nullptr; +      HInstruction* s = nullptr; +      bool is_unsigned = false; +      if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) { +        is_unsigned = true; +      } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) { +        is_unsigned = false; +      } else { +        return false; +      } +      // Deal with vector restrictions. +      if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) || +          (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) { +        return false; +      } +      // Accept recognized halving add for vectorizable operands. Vectorized code uses the +      // shorthand idiomatic operation. Sequential code uses the original scalar expressions. +      DCHECK(r != nullptr && s != nullptr); +      if (VectorizeUse(node, r, generate_code, type, restrictions) && +          VectorizeUse(node, s, generate_code, type, restrictions)) { +        if (generate_code) { +          if (vector_mode_ == kVector) { +            vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( +                global_allocator_, +                vector_map_->Get(r), +                vector_map_->Get(s), +                type, +                vector_length_, +                is_unsigned, +                is_rounded)); +          } else { +            VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions); +            VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions); +            GenerateVecOp(instruction, +                          vector_map_->Get(instruction->InputAt(0)), +                          vector_map_->Get(instruction->InputAt(1)), +                          type); +          } +        } +        return true; +      } +    } +  } +  return false; +} + +//  // Helpers.  // |