diff options
84 files changed, 2465 insertions, 455 deletions
diff --git a/build/Android.bp b/build/Android.bp index 5f64c2d9f7..3eb4aaff79 100644 --- a/build/Android.bp +++ b/build/Android.bp @@ -23,6 +23,7 @@ art_clang_tidy_errors = [      "bugprone-virtual-near-miss",      "modernize-use-bool-literals",      "modernize-use-nullptr", +    "modernize-use-using",      "performance-faster-string-find",      "performance-for-range-copy",      "performance-implicit-conversion-in-loop", @@ -37,6 +38,7 @@ art_clang_tidy_errors_str = "bugprone-lambda-function-name"          + ",modernize-redundant-void-arg"          + ",modernize-use-bool-literals"          + ",modernize-use-nullptr" +        + ",modernize-use-using"          + ",performance-faster-string-find"          + ",performance-for-range-copy"          + ",performance-implicit-conversion-in-loop" diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index fe8b766d0f..183173b298 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -41,7 +41,7 @@ namespace {  // anonymous namespace  class Matcher {   public:    // Match function type. -  typedef bool MatchFn(Matcher* matcher); +  using MatchFn = bool(Matcher*);    template <size_t size>    static bool Match(const CodeItemDataAccessor* code_item, MatchFn* const (&pattern)[size]); diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 43169ba7eb..e79a96bc2a 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -1277,6 +1277,74 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins    }  } +void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { +  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); +  DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); +  locations->SetInAt(0, Location::RequiresFpuRegister()); +  locations->SetInAt(1, Location::RequiresFpuRegister()); +  locations->SetInAt(2, Location::RequiresFpuRegister()); +  locations->SetOut(Location::SameAsFirstInput()); + +  // For Int8 and Uint8 we need a temp register. +  if (DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) { +    locations->AddTemp(Location::RequiresFpuRegister()); +  } +} + +void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { +  LocationSummary* locations = instruction->GetLocations(); +  DCHECK(locations->InAt(0).Equals(locations->Out())); +  VRegister acc = VRegisterFrom(locations->InAt(0)); +  VRegister left = VRegisterFrom(locations->InAt(1)); +  VRegister right = VRegisterFrom(locations->InAt(2)); +  HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); +  HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); +  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), +            HVecOperation::ToSignedType(b->GetPackedType())); +  DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); +  DCHECK_EQ(4u, instruction->GetVectorLength()); + +  size_t inputs_data_size = DataType::Size(a->GetPackedType()); +  switch (inputs_data_size) { +    case 1u: { +      DCHECK_EQ(16u, a->GetVectorLength()); +      VRegister tmp = VRegisterFrom(locations->GetTemp(0)); +      if (instruction->IsZeroExtending()) { +        // TODO: Use Armv8.4-A UDOT instruction when it is available. +        __ Umull(tmp.V8H(), left.V8B(), right.V8B()); +        __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); +        __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + +        __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); +        __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); +        __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); +      } else { +        // TODO: Use Armv8.4-A SDOT instruction when it is available. +        __ Smull(tmp.V8H(), left.V8B(), right.V8B()); +        __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); +        __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + +        __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); +        __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); +        __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); +      } +      break; +    } +    case 2u: +      DCHECK_EQ(8u, a->GetVectorLength()); +      if (instruction->IsZeroExtending()) { +        __ Umlal(acc.V4S(), left.V4H(), right.V4H()); +        __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); +      } else { +        __ Smlal(acc.V4S(), left.V4H(), right.V4H()); +        __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); +      } +      break; +    default: +      LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; +  } +} +  // Helper to set up locations for vector memory operations.  static void CreateVecMemLocations(ArenaAllocator* allocator,                                    HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7b66b17983..62b6c4ea01 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -854,6 +854,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i    }  } +void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} +  // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word  // size equals to 4).  static bool IsWordAligned(HVecMemoryOperation* instruction) { diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index df0e1485d6..24f4fb2d7b 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -1274,6 +1274,14 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst    }  } +void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} +  // Helper to set up locations for vector memory operations.  static void CreateVecMemLocations(ArenaAllocator* allocator,                                    HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index de354b63a1..972c49ebb1 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -1272,6 +1272,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in    }  } +void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} +  // Helper to set up locations for vector memory operations.  static void CreateVecMemLocations(ArenaAllocator* allocator,                                    HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 2502275b3a..c52ecc77c5 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1143,6 +1143,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr    LOG(FATAL) << "No SIMD for " << instruction->GetId();  } +void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} +  // Helper to set up locations for vector memory operations.  static void CreateVecMemLocations(ArenaAllocator* allocator,                                    HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4a67dafd8a..87d0106c3e 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1116,6 +1116,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in    LOG(FATAL) << "No SIMD for " << instruction->GetId();  } +void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { +  LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} +  // Helper to set up locations for vector memory operations.  static void CreateVecMemLocations(ArenaAllocator* allocator,                                    HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6c77232361..39cbe5e850 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -8301,7 +8301,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;    uintptr_t address =        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); -  typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; +  using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =       dchecked_integral_cast<uint32_t>(address);  } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 39d97899ae..e458dfffb4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -7542,7 +7542,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;    uintptr_t address =        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); -  typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; +  using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =       dchecked_integral_cast<uint32_t>(address);  } diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 5ac6e46003..3cbcc9e0c3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -231,6 +231,21 @@ class DataType {      }    } +  static Type ToUnsigned(Type type) { +    switch (type) { +      case Type::kInt8: +        return Type::kUint8; +      case Type::kInt16: +        return Type::kUint16; +      case Type::kInt32: +        return Type::kUint32; +      case Type::kInt64: +        return Type::kUint64; +      default: +        return type; +    } +  } +    static const char* PrettyDescriptor(Type type);   private: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 31db8c205f..a1af2be9de 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -106,8 +106,7 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) {    }  } -typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set, -                                              DisassemblerOptions* options); +using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*);  class HGraphVisualizerDisassembler {   public:    HGraphVisualizerDisassembler(InstructionSet instruction_set, @@ -564,6 +563,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {      StartAttributeStream("kind") << instruction->GetOpKind();    } +  void VisitVecDotProd(HVecDotProd* instruction) override { +    VisitVecOperation(instruction); +    DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType(); +    StartAttributeStream("type") << (instruction->IsZeroExtending() ? +                                    DataType::ToUnsigned(arg_type) : +                                    DataType::ToSigned(arg_type)); +  } +  #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)    void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override {      StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7d66155b39..12b180d5ff 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -351,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {  // Translates vector operation to reduction kind.  static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { -  if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { +  if (reduction->IsVecAdd() || +      reduction->IsVecSub() || +      reduction->IsVecSADAccumulate() || +      reduction->IsVecDotProd()) {      return HVecReduce::kSum;    }    LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -431,6 +434,23 @@ static void PeelByCount(HLoopInformation* loop_info, int count) {    }  } +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { +  DataType::Type type = a->GetType(); +  if (DataType::Size(b->GetType()) < DataType::Size(type)) { +    type = b->GetType(); +  } +  if (a->IsTypeConversion() && +      DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { +    type = a->InputAt(0)->GetType(); +  } +  if (b->IsTypeConversion() && +      DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { +    type = b->InputAt(0)->GetType(); +  } +  return type; +} +  //  // Public methods.  // @@ -1289,6 +1309,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,      DataType::Type type = instruction->GetType();      // Recognize SAD idiom or direct reduction.      if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || +        VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||          (TrySetVectorType(type, &restrictions) &&           VectorizeUse(node, instruction, generate_code, type, restrictions))) {        if (generate_code) { @@ -1531,11 +1552,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict          case DataType::Type::kBool:          case DataType::Type::kUint8:          case DataType::Type::kInt8: -          *restrictions |= kNoDiv | kNoReduction; +          *restrictions |= kNoDiv | kNoReduction | kNoDotProd;            return TrySetVectorLength(8);          case DataType::Type::kUint16:          case DataType::Type::kInt16: -          *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; +          *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd;            return TrySetVectorLength(4);          case DataType::Type::kInt32:            *restrictions |= kNoDiv | kNoWideSAD; @@ -1580,12 +1601,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict            case DataType::Type::kBool:            case DataType::Type::kUint8:            case DataType::Type::kInt8: -            *restrictions |= -                kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; +            *restrictions |= kNoMul | +                             kNoDiv | +                             kNoShift | +                             kNoAbs | +                             kNoSignedHAdd | +                             kNoUnroundedHAdd | +                             kNoSAD | +                             kNoDotProd;              return TrySetVectorLength(16);            case DataType::Type::kUint16:            case DataType::Type::kInt16: -            *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; +            *restrictions |= kNoDiv | +                             kNoAbs | +                             kNoSignedHAdd | +                             kNoUnroundedHAdd | +                             kNoSAD| +                             kNoDotProd;              return TrySetVectorLength(8);            case DataType::Type::kInt32:              *restrictions |= kNoDiv | kNoSAD; @@ -1610,11 +1642,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict            case DataType::Type::kBool:            case DataType::Type::kUint8:            case DataType::Type::kInt8: -            *restrictions |= kNoDiv; +            *restrictions |= kNoDiv | kNoDotProd;              return TrySetVectorLength(16);            case DataType::Type::kUint16:            case DataType::Type::kInt16: -            *restrictions |= kNoDiv | kNoStringCharAt; +            *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd;              return TrySetVectorLength(8);            case DataType::Type::kInt32:              *restrictions |= kNoDiv; @@ -1639,11 +1671,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict            case DataType::Type::kBool:            case DataType::Type::kUint8:            case DataType::Type::kInt8: -            *restrictions |= kNoDiv; +            *restrictions |= kNoDiv | kNoDotProd;              return TrySetVectorLength(16);            case DataType::Type::kUint16:            case DataType::Type::kInt16: -            *restrictions |= kNoDiv | kNoStringCharAt; +            *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd;              return TrySetVectorLength(8);            case DataType::Type::kInt32:              *restrictions |= kNoDiv; @@ -2071,18 +2103,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,    HInstruction* r = a;    HInstruction* s = b;    bool is_unsigned = false; -  DataType::Type sub_type = a->GetType(); -  if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { -    sub_type = b->GetType(); -  } -  if (a->IsTypeConversion() && -      DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { -    sub_type = a->InputAt(0)->GetType(); -  } -  if (b->IsTypeConversion() && -      DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { -    sub_type = b->InputAt(0)->GetType(); -  } +  DataType::Type sub_type = GetNarrowerType(a, b);    if (reduction_type != sub_type &&        (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {      return false; @@ -2123,6 +2144,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,    return false;  } +// Method recognises the following dot product idiom: +//   q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, +                                              HInstruction* instruction, +                                              bool generate_code, +                                              DataType::Type reduction_type, +                                              uint64_t restrictions) { +  if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { +    return false; +  } + +  HInstruction* q = instruction->InputAt(0); +  HInstruction* v = instruction->InputAt(1); +  if (!v->IsMul() || v->GetType() != reduction_type) { +    return false; +  } + +  HInstruction* a = v->InputAt(0); +  HInstruction* b = v->InputAt(1); +  HInstruction* r = a; +  HInstruction* s = b; +  DataType::Type op_type = GetNarrowerType(a, b); +  bool is_unsigned = false; + +  if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { +    return false; +  } +  op_type = HVecOperation::ToProperType(op_type, is_unsigned); + +  if (!TrySetVectorType(op_type, &restrictions) || +      HasVectorRestrictions(restrictions, kNoDotProd)) { +    return false; +  } + +  DCHECK(r != nullptr && s != nullptr); +  // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand +  // idiomatic operation. Sequential code uses the original scalar expressions. +  if (generate_code && vector_mode_ != kVector) {  // de-idiom +    r = a; +    s = b; +  } +  if (VectorizeUse(node, q, generate_code, op_type, restrictions) && +      VectorizeUse(node, r, generate_code, op_type, restrictions) && +      VectorizeUse(node, s, generate_code, op_type, restrictions)) { +    if (generate_code) { +      if (vector_mode_ == kVector) { +        vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( +            global_allocator_, +            vector_map_->Get(q), +            vector_map_->Get(r), +            vector_map_->Get(s), +            reduction_type, +            is_unsigned, +            GetOtherVL(reduction_type, op_type, vector_length_), +            kNoDexPc)); +        MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); +      } else { +        GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); +        GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); +      } +    } +    return true; +  } +  return false; +} +  //  // Vectorization heuristics.  // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 2b202fda75..1a842c4bf3 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -82,6 +82,7 @@ class HLoopOptimization : public HOptimization {      kNoReduction     = 1 << 9,   // no reduction      kNoSAD           = 1 << 10,  // no sum of absolute differences (SAD)      kNoWideSAD       = 1 << 11,  // no sum of absolute differences (SAD) with operand widening +    kNoDotProd       = 1 << 12,  // no dot product    };    /* @@ -217,6 +218,11 @@ class HLoopOptimization : public HOptimization {                           bool generate_code,                           DataType::Type type,                           uint64_t restrictions); +  bool VectorizeDotProdIdiom(LoopNode* node, +                             HInstruction* instruction, +                             bool generate_code, +                             DataType::Type type, +                             uint64_t restrictions);    // Vectorization heuristics.    Alignment ComputeAlignment(HInstruction* offset, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 68f1a2406a..76887f9a5b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1453,6 +1453,7 @@ class HLoopInformationOutwardIterator : public ValueObject {    M(VecSetScalars, VecOperation)                                        \    M(VecMultiplyAccumulate, VecOperation)                                \    M(VecSADAccumulate, VecOperation)                                     \ +  M(VecDotProd, VecOperation)                                           \    M(VecLoad, VecMemoryOperation)                                        \    M(VecStore, VecMemoryOperation)                                       \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c7539f2846..597e399dd1 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1021,6 +1021,66 @@ class HVecSADAccumulate final : public HVecOperation {    DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate);  }; +// Performs dot product of two vectors and adds the result to wider precision components in +// the accumulator. +// +// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) = +//                  [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ], +//      for m <= n, non-overlapping sums, +//      for either both signed or both unsigned operands x, y. +// +// Notes: +//   - packed type reflects the type of sum reduction, not the type of the operands. +//   - IsZeroExtending() is used to determine the kind of signed/zero extension to be +//     performed for the operands. +// +// TODO: Support types other than kInt32 for packed type. +class HVecDotProd final : public HVecOperation { + public: +  HVecDotProd(ArenaAllocator* allocator, +              HInstruction* accumulator, +              HInstruction* left, +              HInstruction* right, +              DataType::Type packed_type, +              bool is_zero_extending, +              size_t vector_length, +              uint32_t dex_pc) +    : HVecOperation(kVecDotProd, +                    allocator, +                    packed_type, +                    SideEffects::None(), +                    /* number_of_inputs */ 3, +                    vector_length, +                    dex_pc) { +    DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); +    DCHECK(DataType::IsIntegralType(packed_type)); +    DCHECK(left->IsVecOperation()); +    DCHECK(right->IsVecOperation()); +    DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()), +              ToSignedType(right->AsVecOperation()->GetPackedType())); +    SetRawInputAt(0, accumulator); +    SetRawInputAt(1, left); +    SetRawInputAt(2, right); +    SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending); +  } + +  bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); } + +  bool CanBeMoved() const override { return true; } + +  DECLARE_INSTRUCTION(VecDotProd); + + protected: +  DEFAULT_COPY_CONSTRUCTOR(VecDotProd); + + private: +  // Additional packed bits. +  static constexpr size_t kFieldHDotProdIsZeroExtending = +      HVecOperation::kNumberOfVectorOpPackedBits; +  static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1; +  static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); +}; +  // Loads a vector from memory, viz. load(mem, 1)  // yield the vector [ mem(1), .. , mem(n) ].  class HVecLoad final : public HVecMemoryOperation { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 9ae025b3fe..3a550efeb8 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -399,7 +399,8 @@ class OptimizingCompiler final : public Compiler {                              PassObserver* pass_observer,                              VariableSizedHandleScope* handles) const; -  void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) +  void GenerateJitDebugInfo(ArtMethod* method, +                            const debug::MethodDebugInfo& method_debug_info)        REQUIRES_SHARED(Locks::mutator_lock_);    std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -1406,7 +1407,8 @@ bool OptimizingCompiler::JitCompile(Thread* self,    return true;  } -void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { +void OptimizingCompiler::GenerateJitDebugInfo( +    ArtMethod* method, const debug::MethodDebugInfo& info) {    const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();    DCHECK(compiler_options.GenerateAnyDebugInfo()); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 399a6d8cbd..a8ab6cdd0c 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -174,8 +174,8 @@ class ParallelMoveTest : public ::testing::Test {  template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true;  template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; -typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> -    ParallelMoveResolverTestTypes; +using ParallelMoveResolverTestTypes = +    ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>;  TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dda29a1b4b..db96e41064 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -440,7 +440,10 @@ static bool HasAliasInEnvironments(HInstruction* instruction) {    return false;  } -void SsaBuilder::ReplaceUninitializedStringPhis() { +// Returns whether the analysis succeeded. If it did not, we are going to bail +// to interpreter. +// TODO(ngeoffray): Remove this workaround. +bool SsaBuilder::ReplaceUninitializedStringPhis() {    ScopedArenaHashSet<HInstruction*> seen_instructions(        local_allocator_->Adapter(kArenaAllocGraphBuilder));    ScopedArenaVector<HInstruction*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); @@ -467,17 +470,23 @@ void SsaBuilder::ReplaceUninitializedStringPhis() {          if (found_instance == nullptr) {            found_instance = current->AsNewInstance();          } else { -          DCHECK(found_instance == current); +          if (found_instance != current) { +            return false; +          }          }        } else if (current->IsPhi()) {          // Push all inputs to the worklist. Those should be Phis or NewInstance.          for (HInstruction* input : current->GetInputs()) { -          DCHECK(input->IsPhi() || input->IsNewInstance()) << input->DebugName(); +          if (!input->IsPhi() && !input->IsNewInstance()) { +            return false; +          }            worklist.push_back(input);          }        } else {          // The verifier prevents any other DEX uses of the uninitialized string. -        DCHECK(current->IsEqual() || current->IsNotEqual()); +        if (!current->IsEqual() && !current->IsNotEqual()) { +          return false; +        }          continue;        }        current->ReplaceUsesDominatedBy(invoke, invoke); @@ -487,13 +496,18 @@ void SsaBuilder::ReplaceUninitializedStringPhis() {        // be Phi, or Equal/NotEqual.        for (const HUseListNode<HInstruction*>& use : current->GetUses()) {          HInstruction* user = use.GetUser(); -        DCHECK(user->IsPhi() || user->IsEqual() || user->IsNotEqual()) << user->DebugName(); +        if (!user->IsPhi() && !user->IsEqual() && !user->IsNotEqual()) { +          return false; +        }          worklist.push_back(user);        }      } while (!worklist.empty());      seen_instructions.clear(); -    DCHECK(found_instance != nullptr); +    if (found_instance == nullptr) { +      return false; +    }    } +  return true;  }  void SsaBuilder::RemoveRedundantUninitializedStrings() { @@ -547,7 +561,9 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {    // Replace Phis that feed in a String.<init>, as well as their aliases, with    // the actual String allocation invocation. We do this first, as the phis stored in    // the data structure might get removed from the graph in later stages during `BuildSsa`. -  ReplaceUninitializedStringPhis(); +  if (!ReplaceUninitializedStringPhis()) { +    return kAnalysisSkipped; +  }    // Propagate types of phis. At this point, phis are typed void in the general    // case, or float/double/reference if we created an equivalent phi. So we need diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 765544508e..bae15acf98 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -123,7 +123,7 @@ class SsaBuilder : public ValueObject {    HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);    void RemoveRedundantUninitializedStrings(); -  void ReplaceUninitializedStringPhis(); +  bool ReplaceUninitializedStringPhis();    HGraph* const graph_;    Handle<mirror::ClassLoader> class_loader_; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 1ba535f4c3..a673e3210c 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -179,7 +179,7 @@ void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) {      return;    } -  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; +  using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC;    const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();    const std::vector<uint8_t>& old_stream = data.first;    const std::vector<DelayedAdvancePC>& advances = data.second; diff --git a/compiler/utils/mips/assembler_mips32r5_test.cc b/compiler/utils/mips/assembler_mips32r5_test.cc index bd73c12dc5..98fc44ba5d 100644 --- a/compiler/utils/mips/assembler_mips32r5_test.cc +++ b/compiler/utils/mips/assembler_mips32r5_test.cc @@ -38,12 +38,12 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler,                                                     uint32_t,                                                     mips::VectorRegister> {   public: -  typedef AssemblerTest<mips::MipsAssembler, -                        mips::MipsLabel, -                        mips::Register, -                        mips::FRegister, -                        uint32_t, -                        mips::VectorRegister> Base; +  using Base = AssemblerTest<mips::MipsAssembler, +                             mips::MipsLabel, +                             mips::Register, +                             mips::FRegister, +                             uint32_t, +                             mips::VectorRegister>;    // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<>    // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 9637c25e7e..723c489f21 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -38,12 +38,12 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,                                                     uint32_t,                                                     mips::VectorRegister> {   public: -  typedef AssemblerTest<mips::MipsAssembler, -                        mips::MipsLabel, -                        mips::Register, -                        mips::FRegister, -                        uint32_t, -                        mips::VectorRegister> Base; +  using Base = AssemblerTest<mips::MipsAssembler, +                             mips::MipsLabel, +                             mips::Register, +                             mips::FRegister, +                             uint32_t, +                             mips::VectorRegister>;    // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<>    // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index f137c60eb8..4f8ccee2c2 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -37,11 +37,11 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,                                                 mips::FRegister,                                                 uint32_t> {   public: -  typedef AssemblerTest<mips::MipsAssembler, -                        mips::MipsLabel, -                        mips::Register, -                        mips::FRegister, -                        uint32_t> Base; +  using Base = AssemblerTest<mips::MipsAssembler, +                             mips::MipsLabel, +                             mips::Register, +                             mips::FRegister, +                             uint32_t>;    // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<>    // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 6df9562fd5..29d2beda96 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -52,7 +52,7 @@ void Mips64Assembler::PatchCFI() {      return;    } -  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; +  using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC;    const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();    const std::vector<uint8_t>& old_stream = data.first;    const std::vector<DelayedAdvancePC>& advances = data.second; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 3218ae3a90..66711c3210 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -41,12 +41,12 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,                                                   uint32_t,                                                   mips64::VectorRegister> {   public: -  typedef AssemblerTest<mips64::Mips64Assembler, -                        mips64::Mips64Label, -                        mips64::GpuRegister, -                        mips64::FpuRegister, -                        uint32_t, -                        mips64::VectorRegister> Base; +  using Base = AssemblerTest<mips64::Mips64Assembler, +                             mips64::Mips64Label, +                             mips64::GpuRegister, +                             mips64::FpuRegister, +                             uint32_t, +                             mips64::VectorRegister>;    // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<>    // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index b03c40aa3e..ad75174d23 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -44,11 +44,11 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,                                                x86::XmmRegister,                                                x86::Immediate> {   public: -  typedef AssemblerTest<x86::X86Assembler, -                        x86::Address, -                        x86::Register, -                        x86::XmmRegister, -                        x86::Immediate> Base; +  using Base = AssemblerTest<x86::X86Assembler, +                             x86::Address, +                             x86::Register, +                             x86::XmmRegister, +                             x86::Immediate>;   protected:    std::string GetArchitectureString() override { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 65711e0855..fe42f9b19b 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -137,11 +137,11 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,                                                   x86_64::XmmRegister,                                                   x86_64::Immediate> {   public: -  typedef AssemblerTest<x86_64::X86_64Assembler, -                        x86_64::Address, -                        x86_64::CpuRegister, -                        x86_64::XmmRegister, -                        x86_64::Immediate> Base; +  using Base = AssemblerTest<x86_64::X86_64Assembler, +                             x86_64::Address, +                             x86_64::CpuRegister, +                             x86_64::XmmRegister, +                             x86_64::Immediate>;   protected:    // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... diff --git a/dex2oat/linker/elf_writer_test.cc b/dex2oat/linker/elf_writer_test.cc index ef85fd16ff..1d578ab9d1 100644 --- a/dex2oat/linker/elf_writer_test.cc +++ b/dex2oat/linker/elf_writer_test.cc @@ -164,7 +164,7 @@ TEST_F(ElfWriterTest, EncodeDecodeOatPatches) {      // Patch manually.      std::vector<uint8_t> expected = initial_data;      for (uintptr_t location : patch_locations) { -      typedef __attribute__((__aligned__(1))) uint32_t UnalignedAddress; +      using UnalignedAddress __attribute__((__aligned__(1))) = uint32_t;        *reinterpret_cast<UnalignedAddress*>(expected.data() + location) += delta;      } diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc index e89de84739..acd49d5b45 100644 --- a/dex2oat/linker/oat_writer.cc +++ b/dex2oat/linker/oat_writer.cc @@ -92,10 +92,10 @@ static constexpr bool kOatWriterForceOatCodeLayout = false;  static constexpr bool kOatWriterDebugOatCodeLayout = false; -typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader; +using UnalignedDexFileHeader __attribute__((__aligned__(1))) = DexFile::Header;  const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) { -    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data); +  return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);  }  class ChecksumUpdatingOutputStream : public OutputStream { diff --git a/dex2oat/linker/x86/relative_patcher_x86_base.cc b/dex2oat/linker/x86/relative_patcher_x86_base.cc index 6a9690d768..07cd724308 100644 --- a/dex2oat/linker/x86/relative_patcher_x86_base.cc +++ b/dex2oat/linker/x86/relative_patcher_x86_base.cc @@ -50,7 +50,7 @@ void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code,    uint32_t displacement = target_offset - patch_offset;    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch. -  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; +  using unaligned_int32_t __attribute__((__aligned__(1))) = int32_t;    reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;  } diff --git a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc index 9633564999..c80f6a92f2 100644 --- a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc +++ b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc @@ -31,7 +31,7 @@ void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,    uint32_t displacement = target_offset - patch_offset;    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch. -  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; +  using unaligned_int32_t __attribute__((__aligned__(1))) = int32_t;    reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;  } diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc index f09d448493..6b2a1b9a70 100644 --- a/dexdump/dexdump.cc +++ b/dexdump/dexdump.cc @@ -69,14 +69,14 @@ FILE* gOutFile = stdout;  /*   * Data types that match the definitions in the VM specification.   */ -typedef uint8_t  u1; -typedef uint16_t u2; -typedef uint32_t u4; -typedef uint64_t u8; -typedef int8_t   s1; -typedef int16_t  s2; -typedef int32_t  s4; -typedef int64_t  s8; +using u1 = uint8_t; +using u2 = uint16_t; +using u4 = uint32_t; +using u8 = uint64_t; +using s1 = int8_t; +using s2 = int16_t; +using s4 = int32_t; +using s8 = int64_t;  /*   * Basic information about a field or a method. diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc index 23be19dd2e..067daa7842 100644 --- a/dexlist/dexlist.cc +++ b/dexlist/dexlist.cc @@ -55,9 +55,9 @@ static FILE* gOutFile = stdout;  /*   * Data types that match the definitions in the VM specification.   */ -typedef uint8_t  u1; -typedef uint32_t u4; -typedef uint64_t u8; +using u1 = uint8_t; +using u4 = uint32_t; +using u8 = uint64_t;  /*   * Returns a newly-allocated string for the "dot version" of the class diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index c1a6f59341..94ea0064e6 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -137,12 +137,12 @@ class DisassemblerArm::CustomDisassembler final : public PrintDisassembler {  void DisassemblerArm::CustomDisassembler::CustomDisassemblerStream::PrintLiteral(LocationType type,                                                                                   int32_t offset) {    // Literal offsets are not required to be aligned, so we may need unaligned access. -  typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1))); -  typedef const uint16_t unaligned_uint16_t __attribute__ ((aligned (1))); -  typedef const int32_t unaligned_int32_t __attribute__ ((aligned (1))); -  typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1))); -  typedef const float unaligned_float __attribute__ ((aligned (1))); -  typedef const double unaligned_double __attribute__ ((aligned (1))); +  using unaligned_int16_t  __attribute__((__aligned__(1))) = const int16_t; +  using unaligned_uint16_t __attribute__((__aligned__(1))) = const uint16_t; +  using unaligned_int32_t  __attribute__((__aligned__(1))) = const int32_t; +  using unaligned_int64_t  __attribute__((__aligned__(1))) = const int64_t; +  using unaligned_float    __attribute__((__aligned__(1))) = const float; +  using unaligned_double   __attribute__((__aligned__(1))) = const double;    // Zeros are used for the LocationType values this function does not care about.    const size_t literal_size[kVst4Location + 1] = { diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc index 1bf553d293..92551f17b6 100644 --- a/libartbase/base/mem_map.cc +++ b/libartbase/base/mem_map.cc @@ -692,6 +692,24 @@ MemMap MemMap::RemapAtEnd(uint8_t* new_end,                            int tail_prot,                            std::string* error_msg,                            bool use_debug_name) { +  return RemapAtEnd(new_end, +                    tail_name, +                    tail_prot, +                    MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, +                    /* fd */ -1, +                    /* offset */ 0, +                    error_msg, +                    use_debug_name); +} + +MemMap MemMap::RemapAtEnd(uint8_t* new_end, +                          const char* tail_name, +                          int tail_prot, +                          int flags, +                          int fd, +                          off_t offset, +                          std::string* error_msg, +                          bool use_debug_name) {    DCHECK_GE(new_end, Begin());    DCHECK_LE(new_end, End());    DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_); @@ -715,9 +733,6 @@ MemMap MemMap::RemapAtEnd(uint8_t* new_end,    DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);    DCHECK_ALIGNED(tail_base_size, kPageSize); -  unique_fd fd; -  int flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS; -    MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);    // Note: Do not explicitly unmap the tail region, mmap() with MAP_FIXED automatically    // removes old mappings for the overlapping region. This makes the operation atomic @@ -726,13 +741,13 @@ MemMap MemMap::RemapAtEnd(uint8_t* new_end,                                                            tail_base_size,                                                            tail_prot,                                                            flags, -                                                          fd.get(), -                                                          0)); +                                                          fd, +                                                          offset));    if (actual == MAP_FAILED) {      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING); -    *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process " +    *error_msg = StringPrintf("map(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "                                "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags, -                              fd.get()); +                              fd);      return Invalid();    }    // Update *this. diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h index 20eda324e1..309da27319 100644 --- a/libartbase/base/mem_map.h +++ b/libartbase/base/mem_map.h @@ -261,6 +261,16 @@ class MemMap {                      std::string* error_msg,                      bool use_debug_name = true); +  // Unmap the pages of a file at end and remap them to create another memory map. +  MemMap RemapAtEnd(uint8_t* new_end, +                    const char* tail_name, +                    int tail_prot, +                    int tail_flags, +                    int fd, +                    off_t offset, +                    std::string* error_msg, +                    bool use_debug_name = true); +    // Take ownership of pages at the beginning of the mapping. The mapping must be an    // anonymous reservation mapping, owning entire pages. The `byte_count` must not    // exceed the size of this reservation. diff --git a/libartbase/base/mem_map_test.cc b/libartbase/base/mem_map_test.cc index ab3d18ff04..bf143d472d 100644 --- a/libartbase/base/mem_map_test.cc +++ b/libartbase/base/mem_map_test.cc @@ -455,6 +455,53 @@ TEST_F(MemMapTest, RemapAtEnd32bit) {  }  #endif +TEST_F(MemMapTest, RemapFileViewAtEnd) { +  CommonInit(); +  std::string error_msg; +  ScratchFile scratch_file; + +  // Create a scratch file 3 pages large. +  constexpr size_t kMapSize = 3 * kPageSize; +  std::unique_ptr<uint8_t[]> data(new uint8_t[kMapSize]()); +  memset(data.get(), 1, kPageSize); +  memset(&data[0], 0x55, kPageSize); +  memset(&data[kPageSize], 0x5a, kPageSize); +  memset(&data[2 * kPageSize], 0xaa, kPageSize); +  ASSERT_TRUE(scratch_file.GetFile()->WriteFully(&data[0], kMapSize)); + +  MemMap map = MemMap::MapFile(/*byte_count*/kMapSize, +                               PROT_READ, +                               MAP_PRIVATE, +                               scratch_file.GetFd(), +                               /*start*/0, +                               /*low_4gb*/true, +                               scratch_file.GetFilename().c_str(), +                               &error_msg); +  ASSERT_TRUE(map.IsValid()) << error_msg; +  ASSERT_TRUE(error_msg.empty()); +  ASSERT_EQ(map.Size(), kMapSize); +  ASSERT_LT(reinterpret_cast<uintptr_t>(map.BaseBegin()), 1ULL << 32); +  ASSERT_EQ(data[0], *map.Begin()); +  ASSERT_EQ(data[kPageSize], *(map.Begin() + kPageSize)); +  ASSERT_EQ(data[2 * kPageSize], *(map.Begin() + 2 * kPageSize)); + +  for (size_t offset = 2 * kPageSize; offset > 0; offset -= kPageSize) { +    MemMap tail = map.RemapAtEnd(map.Begin() + offset, +                                 "bad_offset_map", +                                 PROT_READ, +                                 MAP_PRIVATE | MAP_FIXED, +                                 scratch_file.GetFd(), +                                 offset, +                                 &error_msg); +    ASSERT_TRUE(tail.IsValid()) << error_msg; +    ASSERT_TRUE(error_msg.empty()); +    ASSERT_EQ(offset, map.Size()); +    ASSERT_EQ(static_cast<size_t>(kPageSize), tail.Size()); +    ASSERT_EQ(tail.Begin(), map.Begin() + map.Size()); +    ASSERT_EQ(data[offset], *tail.Begin()); +  } +} +  TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {    // Some MIPS32 hardware (namely the Creator Ci20 development board)    // cannot allocate in the 2GB-4GB region. diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc index 2242fe877e..0f172fdcfb 100644 --- a/libartbase/base/utils.cc +++ b/libartbase/base/utils.cc @@ -24,6 +24,7 @@  #include <sys/wait.h>  #include <unistd.h> +#include <fstream>  #include <memory>  #include "android-base/file.h" @@ -213,4 +214,25 @@ void SleepForever() {    }  } +std::string GetProcessStatus(const char* key) { +  // Build search pattern of key and separator. +  std::string pattern(key); +  pattern.push_back(':'); + +  // Search for status lines starting with pattern. +  std::ifstream fs("/proc/self/status"); +  std::string line; +  while (std::getline(fs, line)) { +    if (strncmp(pattern.c_str(), line.c_str(), pattern.size()) == 0) { +      // Skip whitespace in matching line (if any). +      size_t pos = line.find_first_not_of(" \t", pattern.size()); +      if (UNLIKELY(pos == std::string::npos)) { +        break; +      } +      return std::string(line, pos); +    } +  } +  return "<unknown>"; +} +  }  // namespace art diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h index e6a0459e27..9c7105599c 100644 --- a/libartbase/base/utils.h +++ b/libartbase/base/utils.h @@ -216,6 +216,11 @@ static inline void CheckedCall(const Func& function, const char* what, Args... a    }  } +// Lookup value for a given key in /proc/self/status. Keys and values are separated by a ':' in +// the status file. Returns value found on success and "<unknown>" if the key is not found or +// there is an I/O error. +std::string GetProcessStatus(const char* key); +  }  // namespace art  #endif  // ART_LIBARTBASE_BASE_UTILS_H_ diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc index 892d1fd5bf..9bd50c309a 100644 --- a/libartbase/base/utils_test.cc +++ b/libartbase/base/utils_test.cc @@ -126,4 +126,12 @@ TEST_F(UtilsTest, BoundsCheckedCast) {    EXPECT_EQ(BoundsCheckedCast<const uint64_t*>(buffer + 57, buffer, buffer_end), nullptr);  } +TEST_F(UtilsTest, GetProcessStatus) { +  EXPECT_EQ("utils_test", GetProcessStatus("Name")); +  EXPECT_EQ("R (running)", GetProcessStatus("State")); +  EXPECT_EQ("<unknown>", GetProcessStatus("tate")); +  EXPECT_EQ("<unknown>", GetProcessStatus("e")); +  EXPECT_EQ("<unknown>", GetProcessStatus("Dummy")); +} +  }  // namespace art diff --git a/libdexfile/dex/dex_file_loader.cc b/libdexfile/dex/dex_file_loader.cc index 400c32b519..4aafc665ee 100644 --- a/libdexfile/dex/dex_file_loader.cc +++ b/libdexfile/dex/dex_file_loader.cc @@ -25,10 +25,6 @@  #include "standard_dex_file.h"  #include "ziparchive/zip_archive.h" -// system/core/zip_archive definitions. -struct ZipEntry; -typedef void* ZipArchiveHandle; -  namespace art {  namespace { diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 707fc1c9ed..d30ec3157d 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -419,7 +419,7 @@ class OatDumper {      return instruction_set_;    } -  typedef std::vector<std::unique_ptr<const DexFile>> DexFileUniqV; +  using DexFileUniqV = std::vector<std::unique_ptr<const DexFile>>;    bool Dump(std::ostream& os) {      bool success = true; @@ -2480,7 +2480,7 @@ class ImageDumper {        size_t bytes;        size_t count;      }; -    typedef SafeMap<std::string, SizeAndCount> SizeAndCountTable; +    using SizeAndCountTable = SafeMap<std::string, SizeAndCount>;      SizeAndCountTable sizes_and_counts;      void Update(const char* descriptor, size_t object_bytes_in) { diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc index 286b6867a3..f9707d3738 100644 --- a/profman/profile_assistant_test.cc +++ b/profman/profile_assistant_test.cc @@ -116,9 +116,9 @@ class ProfileAssistantTest : public CommonRuntimeTest {    void SetupBasicProfile(const std::string& id,                           uint32_t checksum,                           uint16_t number_of_methods, -                         const std::vector<uint32_t> hot_methods, -                         const std::vector<uint32_t> startup_methods, -                         const std::vector<uint32_t> post_startup_methods, +                         const std::vector<uint32_t>& hot_methods, +                         const std::vector<uint32_t>& startup_methods, +                         const std::vector<uint32_t>& post_startup_methods,                           const ScratchFile& profile,                           ProfileCompilationInfo* info) {      std::string dex_location = "location1" + id; diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 28b29125cd..b2ddff3f6a 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -227,18 +227,15 @@ void BaseMutex::DumpAll(std::ostream& os) {        // No mutexes have been created yet during at startup.        return;      } -    typedef std::set<BaseMutex*>::const_iterator It;      os << "(Contended)\n"; -    for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) { -      BaseMutex* mutex = *it; +    for (const BaseMutex* mutex : *all_mutexes) {        if (mutex->HasEverContended()) {          mutex->Dump(os);          os << "\n";        }      }      os << "(Never contented)\n"; -    for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) { -      BaseMutex* mutex = *it; +    for (const BaseMutex* mutex : *all_mutexes) {        if (!mutex->HasEverContended()) {          mutex->Dump(os);          os << "\n"; diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index b8ce1f9859..bcc3a22c86 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -307,7 +307,7 @@ struct FieldGapsComparator {      return lhs.size < rhs.size || (lhs.size == rhs.size && lhs.start_offset > rhs.start_offset);    }  }; -typedef std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator> FieldGaps; +using FieldGaps = std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator>;  // Adds largest aligned gaps to queue of gaps.  static void AddFieldGap(uint32_t gap_start, uint32_t gap_end, FieldGaps* gaps) { @@ -2301,7 +2301,7 @@ ObjPtr<mirror::Class> ClassLinker::EnsureResolved(Thread* self,    return klass;  } -typedef std::pair<const DexFile*, const DexFile::ClassDef*> ClassPathEntry; +using ClassPathEntry = std::pair<const DexFile*, const DexFile::ClassDef*>;  // Search a collection of DexFiles for a descriptor  ClassPathEntry FindInClassPath(const char* descriptor, @@ -6448,7 +6448,7 @@ static bool NotSubinterfaceOfAny(  // iftable must be large enough to hold all interfaces without changing its size.  static size_t FillIfTable(ObjPtr<mirror::IfTable> iftable,                            size_t super_ifcount, -                          std::vector<ObjPtr<mirror::Class>> to_process) +                          const std::vector<ObjPtr<mirror::Class>>& to_process)      REQUIRES(Roles::uninterruptible_)      REQUIRES_SHARED(Locks::mutator_lock_) {    // This is the set of all class's already in the iftable. Used to make checking if a class has diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc index e7715c4934..ce742fe47e 100644 --- a/runtime/elf_file.cc +++ b/runtime/elf_file.cc @@ -1417,7 +1417,7 @@ template <typename ElfTypes>  void ElfFileImpl<ElfTypes>::ApplyOatPatches(      const uint8_t* patches, const uint8_t* patches_end, Elf_Addr delta,      uint8_t* to_patch, const uint8_t* to_patch_end) { -  typedef __attribute__((__aligned__(1))) Elf_Addr UnalignedAddress; +  using UnalignedAddress __attribute__((__aligned__(1))) = Elf_Addr;    while (patches < patches_end) {      to_patch += DecodeUnsignedLeb128(&patches);      DCHECK_LE(patches, patches_end) << "Unexpected end of patch list."; diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index e8a47d1087..9467c4c952 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -148,11 +148,11 @@ enum HprofBasicType {    hprof_basic_long = 11,  }; -typedef uint32_t HprofStringId; -typedef uint32_t HprofClassObjectId; -typedef uint32_t HprofClassSerialNumber; -typedef uint32_t HprofStackTraceSerialNumber; -typedef uint32_t HprofStackFrameId; +using HprofStringId = uint32_t; +using HprofClassObjectId = uint32_t; +using HprofClassSerialNumber = uint32_t; +using HprofStackTraceSerialNumber = uint32_t; +using HprofStackFrameId = uint32_t;  static constexpr HprofStackTraceSerialNumber kHprofNullStackTrace = 0;  class EndianOutput { diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 048c6e4d66..df66061d01 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -56,7 +56,7 @@ static void InterpreterJni(Thread* self,    ScopedObjectAccessUnchecked soa(self);    if (method->IsStatic()) {      if (shorty == "L") { -      typedef jobject (fntype)(JNIEnv*, jclass); +      using fntype = jobject(JNIEnv*, jclass);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -67,35 +67,35 @@ static void InterpreterJni(Thread* self,        }        result->SetL(soa.Decode<mirror::Object>(jresult));      } else if (shorty == "V") { -      typedef void (fntype)(JNIEnv*, jclass); +      using fntype = void(JNIEnv*, jclass);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        fn(soa.Env(), klass.get());      } else if (shorty == "Z") { -      typedef jboolean (fntype)(JNIEnv*, jclass); +      using fntype = jboolean(JNIEnv*, jclass);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        result->SetZ(fn(soa.Env(), klass.get()));      } else if (shorty == "BI") { -      typedef jbyte (fntype)(JNIEnv*, jclass, jint); +      using fntype = jbyte(JNIEnv*, jclass, jint);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        result->SetB(fn(soa.Env(), klass.get(), args[0]));      } else if (shorty == "II") { -      typedef jint (fntype)(JNIEnv*, jclass, jint); +      using fntype = jint(JNIEnv*, jclass, jint);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        result->SetI(fn(soa.Env(), klass.get(), args[0]));      } else if (shorty == "LL") { -      typedef jobject (fntype)(JNIEnv*, jclass, jobject); +      using fntype = jobject(JNIEnv*, jclass, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -108,14 +108,14 @@ static void InterpreterJni(Thread* self,        }        result->SetL(soa.Decode<mirror::Object>(jresult));      } else if (shorty == "IIZ") { -      typedef jint (fntype)(JNIEnv*, jclass, jint, jboolean); +      using fntype = jint(JNIEnv*, jclass, jint, jboolean);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        result->SetI(fn(soa.Env(), klass.get(), args[0], args[1]));      } else if (shorty == "ILI") { -      typedef jint (fntype)(JNIEnv*, jclass, jobject, jint); +      using fntype = jint(JNIEnv*, jclass, jobject, jint);        fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(            method->GetEntryPointFromJni()));        ScopedLocalRef<jclass> klass(soa.Env(), @@ -125,7 +125,7 @@ static void InterpreterJni(Thread* self,        ScopedThreadStateChange tsc(self, kNative);        result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1]));      } else if (shorty == "SIZ") { -      typedef jshort (fntype)(JNIEnv*, jclass, jint, jboolean); +      using fntype = jshort(JNIEnv*, jclass, jint, jboolean);        fntype* const fn =            reinterpret_cast<fntype*>(const_cast<void*>(method->GetEntryPointFromJni()));        ScopedLocalRef<jclass> klass(soa.Env(), @@ -133,14 +133,14 @@ static void InterpreterJni(Thread* self,        ScopedThreadStateChange tsc(self, kNative);        result->SetS(fn(soa.Env(), klass.get(), args[0], args[1]));      } else if (shorty == "VIZ") { -      typedef void (fntype)(JNIEnv*, jclass, jint, jboolean); +      using fntype = void(JNIEnv*, jclass, jint, jboolean);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass()));        ScopedThreadStateChange tsc(self, kNative);        fn(soa.Env(), klass.get(), args[0], args[1]);      } else if (shorty == "ZLL") { -      typedef jboolean (fntype)(JNIEnv*, jclass, jobject, jobject); +      using fntype = jboolean(JNIEnv*, jclass, jobject, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -151,7 +151,7 @@ static void InterpreterJni(Thread* self,        ScopedThreadStateChange tsc(self, kNative);        result->SetZ(fn(soa.Env(), klass.get(), arg0.get(), arg1.get()));      } else if (shorty == "ZILL") { -      typedef jboolean (fntype)(JNIEnv*, jclass, jint, jobject, jobject); +      using fntype = jboolean(JNIEnv*, jclass, jint, jobject, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -162,7 +162,7 @@ static void InterpreterJni(Thread* self,        ScopedThreadStateChange tsc(self, kNative);        result->SetZ(fn(soa.Env(), klass.get(), args[0], arg1.get(), arg2.get()));      } else if (shorty == "VILII") { -      typedef void (fntype)(JNIEnv*, jclass, jint, jobject, jint, jint); +      using fntype = void(JNIEnv*, jclass, jint, jobject, jint, jint);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -171,7 +171,7 @@ static void InterpreterJni(Thread* self,        ScopedThreadStateChange tsc(self, kNative);        fn(soa.Env(), klass.get(), args[0], arg1.get(), args[2], args[3]);      } else if (shorty == "VLILII") { -      typedef void (fntype)(JNIEnv*, jclass, jobject, jint, jobject, jint, jint); +      using fntype = void(JNIEnv*, jclass, jobject, jint, jobject, jint, jint);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jclass> klass(soa.Env(),                                     soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -187,7 +187,7 @@ static void InterpreterJni(Thread* self,      }    } else {      if (shorty == "L") { -      typedef jobject (fntype)(JNIEnv*, jobject); +      using fntype = jobject(JNIEnv*, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jobject> rcvr(soa.Env(),                                     soa.AddLocalReference<jobject>(receiver)); @@ -198,14 +198,14 @@ static void InterpreterJni(Thread* self,        }        result->SetL(soa.Decode<mirror::Object>(jresult));      } else if (shorty == "V") { -      typedef void (fntype)(JNIEnv*, jobject); +      using fntype = void(JNIEnv*, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jobject> rcvr(soa.Env(),                                     soa.AddLocalReference<jobject>(receiver));        ScopedThreadStateChange tsc(self, kNative);        fn(soa.Env(), rcvr.get());      } else if (shorty == "LL") { -      typedef jobject (fntype)(JNIEnv*, jobject, jobject); +      using fntype = jobject(JNIEnv*, jobject, jobject);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jobject> rcvr(soa.Env(),                                     soa.AddLocalReference<jobject>(receiver)); @@ -219,7 +219,7 @@ static void InterpreterJni(Thread* self,        result->SetL(soa.Decode<mirror::Object>(jresult));        ScopedThreadStateChange tsc(self, kNative);      } else if (shorty == "III") { -      typedef jint (fntype)(JNIEnv*, jobject, jint, jint); +      using fntype = jint(JNIEnv*, jobject, jint, jint);        fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());        ScopedLocalRef<jobject> rcvr(soa.Env(),                                     soa.AddLocalReference<jobject>(receiver)); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index b324b4c99d..a607b48786 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -121,56 +121,15 @@ template<bool is_range, bool do_assignability_check>  bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,              const Instruction* inst, uint16_t inst_data, JValue* result); -// Handles streamlined non-range invoke static, direct and virtual instructions originating in -// mterp. Access checks and instrumentation other than jit profiling are not supported, but does -// support interpreter intrinsics if applicable. -// Returns true on success, otherwise throws an exception and returns false. -template<InvokeType type> -static inline bool DoFastInvoke(Thread* self, -                                ShadowFrame& shadow_frame, -                                const Instruction* inst, -                                uint16_t inst_data, -                                JValue* result) { -  const uint32_t method_idx = inst->VRegB_35c(); -  const uint32_t vregC = inst->VRegC_35c(); -  ObjPtr<mirror::Object> receiver = (type == kStatic) -      ? nullptr -      : shadow_frame.GetVRegReference(vregC); -  ArtMethod* sf_method = shadow_frame.GetMethod(); -  ArtMethod* const called_method = FindMethodFromCode<type, false>( -      method_idx, &receiver, sf_method, self); -  // The shadow frame should already be pushed, so we don't need to update it. -  if (UNLIKELY(called_method == nullptr)) { -    CHECK(self->IsExceptionPending()); -    result->SetJ(0); -    return false; -  } else if (UNLIKELY(!called_method->IsInvokable())) { -    called_method->ThrowInvocationTimeError(); -    result->SetJ(0); -    return false; -  } else { -    jit::Jit* jit = Runtime::Current()->GetJit(); -    if (jit != nullptr && type == kVirtual) { -      jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); -    } -    if (called_method->IsIntrinsic()) { -      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data, -                               shadow_frame.GetResultRegister())) { -        return !self->IsExceptionPending(); -      } -    } -    return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result); -  } -} -  // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range].  // Returns true on success, otherwise throws an exception and returns false. -template<InvokeType type, bool is_range, bool do_access_check> -static inline bool DoInvoke(Thread* self, -                            ShadowFrame& shadow_frame, -                            const Instruction* inst, -                            uint16_t inst_data, -                            JValue* result) { +template<InvokeType type, bool is_range, bool do_access_check, bool fast_invoke = false> +static ALWAYS_INLINE bool DoInvoke(Thread* self, +                                   ShadowFrame& shadow_frame, +                                   const Instruction* inst, +                                   uint16_t inst_data, +                                   JValue* result) +    REQUIRES_SHARED(Locks::mutator_lock_) {    // Make sure to check for async exceptions before anything else.    if (UNLIKELY(self->ObserveAsyncException())) {      return false; @@ -196,12 +155,24 @@ static inline bool DoInvoke(Thread* self,      if (jit != nullptr && (type == kVirtual || type == kInterface)) {        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);      } -    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT. -    if (type == kVirtual || type == kInterface) { -      instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); -      if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) { -        instrumentation->InvokeVirtualOrInterface( -            self, receiver.Ptr(), sf_method, shadow_frame.GetDexPC(), called_method); +    // The fast invoke is used from mterp for some invoke variants. +    // The non-fast version is used from switch interpreter and it might not support intrinsics. +    // TODO: Unify both paths. +    if (fast_invoke) { +      if (called_method->IsIntrinsic()) { +        if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data, +                                 shadow_frame.GetResultRegister())) { +          return !self->IsExceptionPending(); +        } +      } +    } else { +      // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT. +      if (type == kVirtual || type == kInterface) { +        instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); +        if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) { +          instrumentation->InvokeVirtualOrInterface( +              self, receiver.Ptr(), sf_method, shadow_frame.GetDexPC(), called_method); +        }        }      }      return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data, @@ -277,7 +248,8 @@ bool DoInvokeCustom(Thread* self,  template<bool is_range>  static inline bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame,                                          const Instruction* inst, uint16_t inst_data, -                                        JValue* result) { +                                        JValue* result) +    REQUIRES_SHARED(Locks::mutator_lock_) {    const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();    ObjPtr<mirror::Object> const receiver = shadow_frame.GetVRegReference(vregC);    if (UNLIKELY(receiver == nullptr)) { @@ -601,52 +573,6 @@ void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,                                      uint16_t this_obj_vreg,                                      JValue result); -// Explicitly instantiate all DoInvoke functions. -#define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \ -  template REQUIRES_SHARED(Locks::mutator_lock_)                                           \ -  bool DoInvoke<_type, _is_range, _do_check>(Thread* self,                                 \ -                                             ShadowFrame& shadow_frame,                    \ -                                             const Instruction* inst, uint16_t inst_data,  \ -                                             JValue* result) - -#define EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(_type)       \ -  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, false);  \ -  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, true);   \ -  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, false);   \ -  EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, true); - -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kStatic)      // invoke-static/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kDirect)      // invoke-direct/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kVirtual)     // invoke-virtual/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kSuper)       // invoke-super/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kInterface)   // invoke-interface/range. -#undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL -#undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL - -// Explicitly instantiate all DoFastInvoke functions. -#define EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(_type)                     \ -  template REQUIRES_SHARED(Locks::mutator_lock_)                         \ -  bool DoFastInvoke<_type>(Thread* self,                                 \ -                           ShadowFrame& shadow_frame,                    \ -                           const Instruction* inst, uint16_t inst_data,  \ -                           JValue* result) - -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kStatic);     // invoke-static -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kDirect);     // invoke-direct -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kVirtual);    // invoke-virtual -#undef EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL - -// Explicitly instantiate all DoInvokeVirtualQuick functions. -#define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \ -  template REQUIRES_SHARED(Locks::mutator_lock_)                                     \ -  bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,      \ -                                       const Instruction* inst, uint16_t inst_data,  \ -                                       JValue* result) - -EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(false);  // invoke-virtual-quick. -EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range. -#undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK -  }  // namespace interpreter  }  // namespace art diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 65c1aa8a79..a7423c8bbb 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -172,7 +172,7 @@ extern "C" size_t MterpInvokeVirtual(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoFastInvoke<kVirtual>( +  return DoInvoke<kVirtual, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -183,7 +183,7 @@ extern "C" size_t MterpInvokeSuper(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kSuper, false, false>( +  return DoInvoke<kSuper, /*is_range*/ false, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -194,7 +194,7 @@ extern "C" size_t MterpInvokeInterface(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kInterface, false, false>( +  return DoInvoke<kInterface, /*is_range*/ false, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -205,7 +205,7 @@ extern "C" size_t MterpInvokeDirect(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoFastInvoke<kDirect>( +  return DoInvoke<kDirect, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -216,7 +216,7 @@ extern "C" size_t MterpInvokeStatic(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoFastInvoke<kStatic>( +  return DoInvoke<kStatic, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -249,7 +249,7 @@ extern "C" size_t MterpInvokeVirtualRange(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kVirtual, true, false>( +  return DoInvoke<kVirtual, /*is_range*/ true, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -260,7 +260,7 @@ extern "C" size_t MterpInvokeSuperRange(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kSuper, true, false>( +  return DoInvoke<kSuper, /*is_range*/ true, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -271,7 +271,7 @@ extern "C" size_t MterpInvokeInterfaceRange(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kInterface, true, false>( +  return DoInvoke<kInterface, /*is_range*/ true, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -282,7 +282,7 @@ extern "C" size_t MterpInvokeDirectRange(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kDirect, true, false>( +  return DoInvoke<kDirect, /*is_range*/ true, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } @@ -293,7 +293,7 @@ extern "C" size_t MterpInvokeStaticRange(Thread* self,      REQUIRES_SHARED(Locks::mutator_lock_) {    JValue* result_register = shadow_frame->GetResultRegister();    const Instruction* inst = Instruction::At(dex_pc_ptr); -  return DoInvoke<kStatic, true, false>( +  return DoInvoke<kStatic, /*is_range*/ true, /*access_check*/ false>(        self, *shadow_frame, inst, inst_data, result_register);  } diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index d4b51af903..4cd378256e 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -1180,19 +1180,19 @@ static void UnstartedMemoryPeek(      }      case Primitive::kPrimShort: { -      typedef int16_t unaligned_short __attribute__ ((aligned (1))); +      using unaligned_short __attribute__((__aligned__(1))) = int16_t;        result->SetS(*reinterpret_cast<unaligned_short*>(static_cast<intptr_t>(address)));        return;      }      case Primitive::kPrimInt: { -      typedef int32_t unaligned_int __attribute__ ((aligned (1))); +      using unaligned_int __attribute__((__aligned__(1))) = int32_t;        result->SetI(*reinterpret_cast<unaligned_int*>(static_cast<intptr_t>(address)));        return;      }      case Primitive::kPrimLong: { -      typedef int64_t unaligned_long __attribute__ ((aligned (1))); +      using unaligned_long __attribute__((__aligned__(1))) = int64_t;        result->SetJ(*reinterpret_cast<unaligned_long*>(static_cast<intptr_t>(address)));        return;      } @@ -1919,11 +1919,16 @@ void UnstartedRuntime::UnstartedJNIUnsafeGetArrayIndexScaleForComponentType(    result->SetI(Primitive::ComponentSize(primitive_type));  } -typedef void (*InvokeHandler)(Thread* self, ShadowFrame* shadow_frame, JValue* result, -    size_t arg_size); +using InvokeHandler = void(*)(Thread* self, +                              ShadowFrame* shadow_frame, +                              JValue* result, +                              size_t arg_size); -typedef void (*JNIHandler)(Thread* self, ArtMethod* method, mirror::Object* receiver, -    uint32_t* args, JValue* result); +using JNIHandler = void(*)(Thread* self, +                           ArtMethod* method, +                           mirror::Object* receiver, +                           uint32_t* args, +                           JValue* result);  static bool tables_initialized_ = false;  static std::unordered_map<std::string, InvokeHandler> invoke_handlers_; diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc index 200fc5b334..bd2705d530 100644 --- a/runtime/interpreter/unstarted_runtime_test.cc +++ b/runtime/interpreter/unstarted_runtime_test.cc @@ -261,7 +261,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekShort) {      UnstartedMemoryPeekShort(self, tmp.get(), &result, 0); -    typedef int16_t unaligned_short __attribute__ ((aligned (1))); +    using unaligned_short __attribute__((__aligned__(1))) = int16_t;      const unaligned_short* short_ptr = reinterpret_cast<const unaligned_short*>(base_ptr + i);      EXPECT_EQ(result.GetS(), *short_ptr);    } @@ -284,7 +284,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekInt) {      UnstartedMemoryPeekInt(self, tmp.get(), &result, 0); -    typedef int32_t unaligned_int __attribute__ ((aligned (1))); +    using unaligned_int __attribute__((__aligned__(1))) = int32_t;      const unaligned_int* int_ptr = reinterpret_cast<const unaligned_int*>(base_ptr + i);      EXPECT_EQ(result.GetI(), *int_ptr);    } @@ -307,7 +307,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekLong) {      UnstartedMemoryPeekLong(self, tmp.get(), &result, 0); -    typedef int64_t unaligned_long __attribute__ ((aligned (1))); +    using unaligned_long __attribute__((__aligned__(1))) = int64_t;      const unaligned_long* long_ptr = reinterpret_cast<const unaligned_long*>(base_ptr + i);      EXPECT_EQ(result.GetJ(), *long_ptr);    } diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index 1e61ba0f2d..0a54e38698 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -1432,7 +1432,7 @@ static JdwpError DDM_Chunk(JdwpState* state, Request* request, ExpandBuf* pReply  /*   * Handler map decl.   */ -typedef JdwpError (*JdwpRequestHandler)(JdwpState* state, Request* request, ExpandBuf* reply); +using JdwpRequestHandler = JdwpError(*)(JdwpState* state, Request* request, ExpandBuf* reply);  struct JdwpHandlerMap {    uint8_t cmdSet; diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc index 63fb22cfce..6cd719a55c 100644 --- a/runtime/jit/debugger_interface.cc +++ b/runtime/jit/debugger_interface.cc @@ -77,11 +77,11 @@  namespace art {  extern "C" { -  typedef enum { +  enum JITAction {      JIT_NOACTION = 0,      JIT_REGISTER_FN,      JIT_UNREGISTER_FN -  } JITAction; +  };    struct JITCodeEntry {      // Atomic to ensure the reader can always iterate over the linked list diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 33d228f255..1119317867 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -18,12 +18,15 @@  #include <sstream> +#include "android-base/unique_fd.h" +  #include "arch/context.h"  #include "art_method-inl.h"  #include "base/enums.h"  #include "base/histogram-inl.h"  #include "base/logging.h"  // For VLOG.  #include "base/membarrier.h" +#include "base/memfd.h"  #include "base/mem_map.h"  #include "base/quasi_atomic.h"  #include "base/stl_util.h" @@ -52,16 +55,32 @@  #include "thread-current-inl.h"  #include "thread_list.h" +using android::base::unique_fd; +  namespace art {  namespace jit { -static constexpr int kProtCode = PROT_READ | PROT_EXEC; -static constexpr int kProtData = PROT_READ | PROT_WRITE; -static constexpr int kProtProfile = PROT_READ; -  static constexpr size_t kCodeSizeLogThreshold = 50 * KB;  static constexpr size_t kStackMapSizeLogThreshold = 50 * KB; +static constexpr int kProtR = PROT_READ; +static constexpr int kProtRW = PROT_READ | PROT_WRITE; +static constexpr int kProtRWX = PROT_READ | PROT_WRITE | PROT_EXEC; +static constexpr int kProtRX = PROT_READ | PROT_EXEC; + +namespace { + +// Translate an address belonging to one memory map into an address in a second. This is useful +// when there are two virtual memory ranges for the same physical memory range. +template <typename T> +T* TranslateAddress(T* src_ptr, const MemMap& src, const MemMap& dst) { +  CHECK(src.HasAddress(src_ptr)); +  uint8_t* const raw_src_ptr = reinterpret_cast<uint8_t*>(src_ptr); +  return reinterpret_cast<T*>(raw_src_ptr - src.Begin() + dst.Begin()); +} + +}  // namespace +  class JitCodeCache::JniStubKey {   public:    explicit JniStubKey(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) @@ -190,17 +209,41 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,    // Register for membarrier expedited sync core if JIT will be generating code.    if (!used_only_for_profile_data) { -    art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore); +    if (art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore) != 0) { +      // MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ensures that CPU instruction pipelines are +      // flushed and it's used when adding code to the JIT. The memory used by the new code may +      // have just been released and, in theory, the old code could still be in a pipeline. +      VLOG(jit) << "Kernel does not support membarrier sync-core"; +    } +  } + +  // File descriptor enabling dual-view mapping of code section. +  unique_fd mem_fd; + +  // Bionic supports memfd_create, but the call may fail on older kernels. +  mem_fd = unique_fd(art::memfd_create("/jit-cache", /* flags */ 0)); +  if (mem_fd.get() < 0) { +    VLOG(jit) << "Failed to initialize dual view JIT. memfd_create() error: " +              << strerror(errno); +  } + +  if (mem_fd.get() >= 0 && ftruncate(mem_fd, max_capacity) != 0) { +    std::ostringstream oss; +    oss << "Failed to initialize memory file: " << strerror(errno); +    *error_msg = oss.str(); +    return nullptr;    } -  // Decide how we should map the code and data sections. -  // If we use the code cache just for profiling we do not need to map the code section as -  // executable. -  // NOTE 1: this is yet another workaround to bypass strict SElinux policies in order to be able -  //         to profile system server. -  // NOTE 2: We could just not create the code section at all but we will need to -  //         special case too many cases. -  int memmap_flags_prot_code = used_only_for_profile_data ? kProtProfile : kProtCode; +  // Data cache will be half of the initial allocation. +  // Code cache will be the other half of the initial allocation. +  // TODO: Make this variable? + +  // Align both capacities to page size, as that's the unit mspaces use. +  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); +  max_capacity = RoundDown(max_capacity, 2 * kPageSize); +  const size_t data_capacity = max_capacity / 2; +  const size_t exec_capacity = used_only_for_profile_data ? 0 : max_capacity - data_capacity; +  DCHECK_LE(data_capacity + exec_capacity, max_capacity);    std::string error_str;    // Map name specific for android_os_Debug.cpp accounting. @@ -208,71 +251,149 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,    // We could do PC-relative addressing to avoid this problem, but that    // would require reserving code and data area before submitting, which    // means more windows for the code memory to be RWX. -  MemMap data_map = MemMap::MapAnonymous( -      "data-code-cache", -      /* addr */ nullptr, -      max_capacity, -      kProtData, -      /* low_4gb */ true, -      /* reuse */ false, -      /* reservation */ nullptr, -      &error_str); -  if (!data_map.IsValid()) { +  int base_flags; +  MemMap data_pages; +  if (mem_fd.get() >= 0) { +    // Dual view of JIT code cache case. Create an initial mapping of data pages large enough +    // for data and non-writable view of JIT code pages. We use the memory file descriptor to +    // enable dual mapping - we'll create a second mapping using the descriptor below. The +    // mappings will look like: +    // +    //       VA                  PA +    // +    //       +---------------+ +    //       | non exec code |\ +    //       +---------------+ \ +    //       :               :\ \ +    //       +---------------+.\.+---------------+ +    //       |  exec code    |  \|     code      | +    //       +---------------+...+---------------+ +    //       |      data     |   |     data      | +    //       +---------------+...+---------------+ +    // +    // In this configuration code updates are written to the non-executable view of the code +    // cache, and the executable view of the code cache has fixed RX memory protections. +    // +    // This memory needs to be mapped shared as the code portions will have two mappings. +    base_flags = MAP_SHARED; +    data_pages = MemMap::MapFile( +        data_capacity + exec_capacity, +        kProtRW, +        base_flags, +        mem_fd, +        /* start */ 0, +        /* low_4gb */ true, +        "data-code-cache", +        &error_str); +  } else { +    // Single view of JIT code cache case. Create an initial mapping of data pages large enough +    // for data and JIT code pages. The mappings will look like: +    // +    //       VA                  PA +    // +    //       +---------------+...+---------------+ +    //       |  exec code    |   |     code      | +    //       +---------------+...+---------------+ +    //       |      data     |   |     data      | +    //       +---------------+...+---------------+ +    // +    // In this configuration code updates are written to the executable view of the code cache, +    // and the executable view of the code cache transitions RX to RWX for the update and then +    // back to RX after the update. +    base_flags = MAP_PRIVATE | MAP_ANON; +    data_pages = MemMap::MapAnonymous( +        "data-code-cache", +        /* addr */ nullptr, +        data_capacity + exec_capacity, +        kProtRW, +        /* low_4gb */ true, +        /* reuse */ false, +        /* reservation */ nullptr, +        &error_str); +  } + +  if (!data_pages.IsValid()) {      std::ostringstream oss;      oss << "Failed to create read write cache: " << error_str << " size=" << max_capacity;      *error_msg = oss.str();      return nullptr;    } -  // Align both capacities to page size, as that's the unit mspaces use. -  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); -  max_capacity = RoundDown(max_capacity, 2 * kPageSize); +  MemMap exec_pages; +  MemMap non_exec_pages; +  if (exec_capacity > 0) { +    uint8_t* const divider = data_pages.Begin() + data_capacity; +    // Set initial permission for executable view to catch any SELinux permission problems early +    // (for processes that cannot map WX pages). Otherwise, this region does not need to be +    // executable as there is no code in the cache yet. +    exec_pages = data_pages.RemapAtEnd(divider, +                                       "jit-code-cache", +                                       kProtRX, +                                       base_flags | MAP_FIXED, +                                       mem_fd.get(), +                                       (mem_fd.get() >= 0) ? data_capacity : 0, +                                       &error_str); +    if (!exec_pages.IsValid()) { +      std::ostringstream oss; +      oss << "Failed to create read execute code cache: " << error_str << " size=" << max_capacity; +      *error_msg = oss.str(); +      return nullptr; +    } -  // Data cache is 1 / 2 of the map. -  // TODO: Make this variable? -  size_t data_size = max_capacity / 2; -  size_t code_size = max_capacity - data_size; -  DCHECK_EQ(code_size + data_size, max_capacity); -  uint8_t* divider = data_map.Begin() + data_size; - -  MemMap code_map = data_map.RemapAtEnd( -      divider, "jit-code-cache", memmap_flags_prot_code | PROT_WRITE, &error_str); -  if (!code_map.IsValid()) { -    std::ostringstream oss; -    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; -    *error_msg = oss.str(); -    return nullptr; +    if (mem_fd.get() >= 0) { +      // For dual view, create the secondary view of code memory used for updating code. This view +      // is never executable. +      non_exec_pages = MemMap::MapFile(exec_capacity, +                                       kProtR, +                                       base_flags, +                                       mem_fd, +                                       /* start */ data_capacity, +                                       /* low_4GB */ false, +                                       "jit-code-cache-rw", +                                       &error_str); +      if (!exec_pages.IsValid()) { +        std::ostringstream oss; +        oss << "Failed to create read write code cache: " << error_str << " size=" << max_capacity; +        *error_msg = oss.str(); +        return nullptr; +      } +    } +  } else { +    // Profiling only. No memory for code required. +    DCHECK(used_only_for_profile_data);    } -  DCHECK_EQ(code_map.Begin(), divider); -  data_size = initial_capacity / 2; -  code_size = initial_capacity - data_size; -  DCHECK_EQ(code_size + data_size, initial_capacity); + +  const size_t initial_data_capacity = initial_capacity / 2; +  const size_t initial_exec_capacity = +      (exec_capacity == 0) ? 0 : (initial_capacity - initial_data_capacity); +    return new JitCodeCache( -      std::move(code_map), -      std::move(data_map), -      code_size, -      data_size, +      std::move(data_pages), +      std::move(exec_pages), +      std::move(non_exec_pages), +      initial_data_capacity, +      initial_exec_capacity,        max_capacity, -      garbage_collect_code, -      memmap_flags_prot_code); +      garbage_collect_code);  } -JitCodeCache::JitCodeCache(MemMap&& code_map, -                           MemMap&& data_map, -                           size_t initial_code_capacity, +JitCodeCache::JitCodeCache(MemMap&& data_pages, +                           MemMap&& exec_pages, +                           MemMap&& non_exec_pages,                             size_t initial_data_capacity, +                           size_t initial_exec_capacity,                             size_t max_capacity, -                           bool garbage_collect_code, -                           int memmap_flags_prot_code) +                           bool garbage_collect_code)      : lock_("Jit code cache", kJitCodeCacheLock),        lock_cond_("Jit code cache condition variable", lock_),        collection_in_progress_(false), -      code_map_(std::move(code_map)), -      data_map_(std::move(data_map)), +      data_pages_(std::move(data_pages)), +      exec_pages_(std::move(exec_pages)), +      non_exec_pages_(std::move(non_exec_pages)),        max_capacity_(max_capacity), -      current_capacity_(initial_code_capacity + initial_data_capacity), -      code_end_(initial_code_capacity), +      current_capacity_(initial_exec_capacity + initial_data_capacity),        data_end_(initial_data_capacity), +      exec_end_(initial_exec_capacity),        last_collection_increased_code_cache_(false),        garbage_collect_code_(garbage_collect_code),        used_memory_for_data_(0), @@ -284,40 +405,46 @@ JitCodeCache::JitCodeCache(MemMap&& code_map,        histogram_code_memory_use_("Memory used for compiled code", 16),        histogram_profiling_info_memory_use_("Memory used for profiling info", 16),        is_weak_access_enabled_(true), -      inline_cache_cond_("Jit inline cache condition variable", lock_), -      memmap_flags_prot_code_(memmap_flags_prot_code) { - -  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity); -  code_mspace_ = create_mspace_with_base(code_map_.Begin(), code_end_, false /*locked*/); -  data_mspace_ = create_mspace_with_base(data_map_.Begin(), data_end_, false /*locked*/); - -  if (code_mspace_ == nullptr || data_mspace_ == nullptr) { -    PLOG(FATAL) << "create_mspace_with_base failed"; +      inline_cache_cond_("Jit inline cache condition variable", lock_) { + +  DCHECK_GE(max_capacity, initial_exec_capacity + initial_data_capacity); + +  // Initialize the data heap +  data_mspace_ = create_mspace_with_base(data_pages_.Begin(), data_end_, false /*locked*/); +  CHECK(data_mspace_ != nullptr) << "create_mspace_with_base (data) failed"; + +  // Initialize the code heap +  MemMap* code_heap = nullptr; +  if (non_exec_pages_.IsValid()) { +    code_heap = &non_exec_pages_; +  } else if (exec_pages_.IsValid()) { +    code_heap = &exec_pages_; +  } +  if (code_heap != nullptr) { +    // Make all pages reserved for the code heap writable. The mspace allocator, that manages the +    // heap, will take and initialize pages in create_mspace_with_base(). +    CheckedCall(mprotect, "create code heap", code_heap->Begin(), code_heap->Size(), kProtRW); +    exec_mspace_ = create_mspace_with_base(code_heap->Begin(), exec_end_, false /*locked*/); +    CHECK(exec_mspace_ != nullptr) << "create_mspace_with_base (exec) failed"; +    SetFootprintLimit(current_capacity_); +    // Protect pages containing heap metadata. Updates to the code heap toggle write permission to +    // perform the update and there are no other times write access is required. +    CheckedCall(mprotect, "protect code heap", code_heap->Begin(), code_heap->Size(), kProtR); +  } else { +    exec_mspace_ = nullptr; +    SetFootprintLimit(current_capacity_);    } -  SetFootprintLimit(current_capacity_); - -  CheckedCall(mprotect, -              "mprotect jit code cache", -              code_map_.Begin(), -              code_map_.Size(), -              memmap_flags_prot_code_); -  CheckedCall(mprotect, -              "mprotect jit data cache", -              data_map_.Begin(), -              data_map_.Size(), -              kProtData); -    VLOG(jit) << "Created jit code cache: initial data size="              << PrettySize(initial_data_capacity)              << ", initial code size=" -            << PrettySize(initial_code_capacity); +            << PrettySize(initial_exec_capacity);  }  JitCodeCache::~JitCodeCache() {}  bool JitCodeCache::ContainsPc(const void* ptr) const { -  return code_map_.Begin() <= ptr && ptr < code_map_.End(); +  return exec_pages_.Begin() <= ptr && ptr < exec_pages_.End();  }  bool JitCodeCache::WillExecuteJitCode(ArtMethod* method) { @@ -385,22 +512,20 @@ class ScopedCodeCacheWrite : ScopedTrace {        : ScopedTrace("ScopedCodeCacheWrite"),          code_cache_(code_cache) {      ScopedTrace trace("mprotect all"); -    CheckedCall( -        mprotect, -        "make code writable", -        code_cache_->code_map_.Begin(), -        code_cache_->code_map_.Size(), -        code_cache_->memmap_flags_prot_code_ | PROT_WRITE); +    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping(); +    if (updatable_pages != nullptr) { +      int prot = code_cache_->HasDualCodeMapping() ? kProtRW : kProtRWX; +      CheckedCall(mprotect, "Cache +W", updatable_pages->Begin(), updatable_pages->Size(), prot); +    }    }    ~ScopedCodeCacheWrite() {      ScopedTrace trace("mprotect code"); -    CheckedCall( -        mprotect, -        "make code protected", -        code_cache_->code_map_.Begin(), -        code_cache_->code_map_.Size(), -        code_cache_->memmap_flags_prot_code_); +    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping(); +    if (updatable_pages != nullptr) { +      int prot = code_cache_->HasDualCodeMapping() ? kProtR : kProtRX; +      CheckedCall(mprotect, "Cache -W", updatable_pages->Begin(), updatable_pages->Size(), prot); +    }    }   private: @@ -602,7 +727,13 @@ void JitCodeCache::FreeCodeAndData(const void* code_ptr) {    if (OatQuickMethodHeader::FromCodePointer(code_ptr)->IsOptimized()) {      FreeData(GetRootTable(code_ptr));    }  // else this is a JNI stub without any data. -  FreeCode(reinterpret_cast<uint8_t*>(allocation)); + +  uint8_t* code_allocation = reinterpret_cast<uint8_t*>(allocation); +  if (HasDualCodeMapping()) { +    code_allocation = TranslateAddress(code_allocation, exec_pages_, non_exec_pages_); +  } + +  FreeCode(code_allocation);  }  void JitCodeCache::FreeAllMethodHeaders( @@ -753,6 +884,16 @@ void JitCodeCache::WaitForPotentialCollectionToCompleteRunnable(Thread* self) {    }  } +const MemMap* JitCodeCache::GetUpdatableCodeMapping() const { +  if (HasDualCodeMapping()) { +    return &non_exec_pages_; +  } else if (HasCodeMapping()) { +    return &exec_pages_; +  } else { +    return nullptr; +  } +} +  uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,                                            ArtMethod* method,                                            uint8_t* stack_map, @@ -773,31 +914,52 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,      DCheckRootsAreValid(roots);    } -  size_t alignment = GetInstructionSetAlignment(kRuntimeISA); -  // Ensure the header ends up at expected instruction alignment. -  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment); -  size_t total_size = header_size + code_size; -    OatQuickMethodHeader* method_header = nullptr;    uint8_t* code_ptr = nullptr; -  uint8_t* memory = nullptr; +    MutexLock mu(self, lock_);    // We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to    // finish.    WaitForPotentialCollectionToCompleteRunnable(self);    {      ScopedCodeCacheWrite scc(this); -    memory = AllocateCode(total_size); -    if (memory == nullptr) { + +    size_t alignment = GetInstructionSetAlignment(kRuntimeISA); +    // Ensure the header ends up at expected instruction alignment. +    size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment); +    size_t total_size = header_size + code_size; + +    // AllocateCode allocates memory in non-executable region for alignment header and code. The +    // header size may include alignment padding. +    uint8_t* nox_memory = AllocateCode(total_size); +    if (nox_memory == nullptr) {        return nullptr;      } -    code_ptr = memory + header_size; +    // code_ptr points to non-executable code. +    code_ptr = nox_memory + header_size;      std::copy(code, code + code_size, code_ptr);      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); + +    // From here code_ptr points to executable code. +    if (non_exec_pages_.IsValid()) { +      code_ptr = TranslateAddress(code_ptr, non_exec_pages_, exec_pages_); +    } +      new (method_header) OatQuickMethodHeader(          (stack_map != nullptr) ? code_ptr - stack_map : 0u,          code_size); + +    DCHECK(!Runtime::Current()->IsAotCompiler()); +    if (has_should_deoptimize_flag) { +      method_header->SetHasShouldDeoptimizeFlag(); +    } + +    // Update method_header pointer to executable code region. +    if (non_exec_pages_.IsValid()) { +      method_header = TranslateAddress(method_header, non_exec_pages_, exec_pages_); +    } +      // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may      // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.      // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and @@ -813,16 +975,14 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,      // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to      // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have      // hardware support that broadcasts TLB invalidations and so their kernels have no software -    // based TLB shootdown. +    // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to +    // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on +    // platforms lacking the appropriate support.      art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore); -    DCHECK(!Runtime::Current()->IsAotCompiler()); -    if (has_should_deoptimize_flag) { -      method_header->SetHasShouldDeoptimizeFlag(); -    } -      number_of_compilations_++;    } +    // We need to update the entry point in the runnable state for the instrumentation.    {      // The following needs to be guarded by cha_lock_ also. Otherwise it's possible that the @@ -1167,9 +1327,9 @@ void JitCodeCache::SetFootprintLimit(size_t new_footprint) {    DCHECK(IsAlignedParam(per_space_footprint, kPageSize));    DCHECK_EQ(per_space_footprint * 2, new_footprint);    mspace_set_footprint_limit(data_mspace_, per_space_footprint); -  { +  if (HasCodeMapping()) {      ScopedCodeCacheWrite scc(this); -    mspace_set_footprint_limit(code_mspace_, per_space_footprint); +    mspace_set_footprint_limit(exec_mspace_, per_space_footprint);    }  } @@ -1244,8 +1404,8 @@ void JitCodeCache::GarbageCollectCache(Thread* self) {        number_of_collections_++;        live_bitmap_.reset(CodeCacheBitmap::Create(            "code-cache-bitmap", -          reinterpret_cast<uintptr_t>(code_map_.Begin()), -          reinterpret_cast<uintptr_t>(code_map_.Begin() + current_capacity_ / 2))); +          reinterpret_cast<uintptr_t>(exec_pages_.Begin()), +          reinterpret_cast<uintptr_t>(exec_pages_.Begin() + current_capacity_ / 2)));        collection_in_progress_ = true;      }    } @@ -1614,15 +1774,17 @@ ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self ATTRIBUTE_UNU  // NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock  // is already held.  void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS { -  if (code_mspace_ == mspace) { -    size_t result = code_end_; -    code_end_ += increment; -    return reinterpret_cast<void*>(result + code_map_.Begin()); +  if (mspace == exec_mspace_) { +    DCHECK(exec_mspace_ != nullptr); +    const MemMap* const code_pages = GetUpdatableCodeMapping(); +    void* result = code_pages->Begin() + exec_end_; +    exec_end_ += increment; +    return result;    } else {      DCHECK_EQ(data_mspace_, mspace); -    size_t result = data_end_; +    void* result = data_pages_.Begin() + data_end_;      data_end_ += increment; -    return reinterpret_cast<void*>(result + data_map_.Begin()); +    return result;    }  } @@ -1849,7 +2011,7 @@ void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,  uint8_t* JitCodeCache::AllocateCode(size_t code_size) {    size_t alignment = GetInstructionSetAlignment(kRuntimeISA);    uint8_t* result = reinterpret_cast<uint8_t*>( -      mspace_memalign(code_mspace_, alignment, code_size)); +      mspace_memalign(exec_mspace_, alignment, code_size));    size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);    // Ensure the header ends up at expected instruction alignment.    DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment); @@ -1859,7 +2021,7 @@ uint8_t* JitCodeCache::AllocateCode(size_t code_size) {  void JitCodeCache::FreeCode(uint8_t* code) {    used_memory_for_code_ -= mspace_usable_size(code); -  mspace_free(code_mspace_, code); +  mspace_free(exec_mspace_, code);  }  uint8_t* JitCodeCache::AllocateData(size_t data_size) { diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index e2aa01c121..76ad8db886 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -223,7 +223,7 @@ class JitCodeCache {        REQUIRES_SHARED(Locks::mutator_lock_);    bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS { -    return mspace == code_mspace_ || mspace == data_mspace_; +    return mspace == data_mspace_ || mspace == exec_mspace_;    }    void* MoreCore(const void* mspace, intptr_t increment); @@ -279,13 +279,13 @@ class JitCodeCache {   private:    // Take ownership of maps. -  JitCodeCache(MemMap&& code_map, -               MemMap&& data_map, -               size_t initial_code_capacity, +  JitCodeCache(MemMap&& data_pages, +               MemMap&& exec_pages, +               MemMap&& non_exec_pages,                 size_t initial_data_capacity, +               size_t initial_exec_capacity,                 size_t max_capacity, -               bool garbage_collect_code, -               int memmap_flags_prot_code); +               bool garbage_collect_code);    // Internal version of 'CommitCode' that will not retry if the    // allocation fails. Return null if the allocation fails. @@ -381,6 +381,16 @@ class JitCodeCache {    uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);    void FreeData(uint8_t* data) REQUIRES(lock_); +  bool HasDualCodeMapping() const { +    return non_exec_pages_.IsValid(); +  } + +  bool HasCodeMapping() const { +    return exec_pages_.IsValid(); +  } + +  const MemMap* GetUpdatableCodeMapping() const; +    bool IsWeakAccessEnabled(Thread* self) const;    void WaitUntilInlineCacheAccessible(Thread* self)        REQUIRES(!lock_) @@ -395,14 +405,17 @@ class JitCodeCache {    ConditionVariable lock_cond_ GUARDED_BY(lock_);    // Whether there is a code cache collection in progress.    bool collection_in_progress_ GUARDED_BY(lock_); -  // Mem map which holds code. -  MemMap code_map_;    // Mem map which holds data (stack maps and profiling info). -  MemMap data_map_; -  // The opaque mspace for allocating code. -  void* code_mspace_ GUARDED_BY(lock_); +  MemMap data_pages_; +  // Mem map which holds code and has executable permission. +  MemMap exec_pages_; +  // Mem map which holds code with non executable permission. Only valid for dual view JIT when +  // this is the non-executable view of code used to write updates. +  MemMap non_exec_pages_;    // The opaque mspace for allocating data.    void* data_mspace_ GUARDED_BY(lock_); +  // The opaque mspace for allocating code. +  void* exec_mspace_ GUARDED_BY(lock_);    // Bitmap for collecting code and data.    std::unique_ptr<CodeCacheBitmap> live_bitmap_;    // Holds compiled code associated with the shorty for a JNI stub. @@ -420,12 +433,12 @@ class JitCodeCache {    // The current capacity in bytes of the code cache.    size_t current_capacity_ GUARDED_BY(lock_); -  // The current footprint in bytes of the code portion of the code cache. -  size_t code_end_ GUARDED_BY(lock_); -    // The current footprint in bytes of the data portion of the code cache.    size_t data_end_ GUARDED_BY(lock_); +  // The current footprint in bytes of the code portion of the code cache. +  size_t exec_end_ GUARDED_BY(lock_); +    // Whether the last collection round increased the code cache.    bool last_collection_increased_code_cache_ GUARDED_BY(lock_); @@ -464,9 +477,6 @@ class JitCodeCache {    // Condition to wait on for accessing inline caches.    ConditionVariable inline_cache_cond_ GUARDED_BY(lock_); -  // Mapping flags for the code section. -  const int memmap_flags_prot_code_; -    friend class art::JitJniStubTestHelper;    friend class ScopedCodeCacheWrite; diff --git a/runtime/jni/java_vm_ext.cc b/runtime/jni/java_vm_ext.cc index 42406cf73c..6769368ee4 100644 --- a/runtime/jni/java_vm_ext.cc +++ b/runtime/jni/java_vm_ext.cc @@ -333,7 +333,7 @@ class Libraries {      }      ScopedThreadSuspension sts(self, kNative);      // Do this without holding the jni libraries lock to prevent possible deadlocks. -    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*); +    using JNI_OnUnloadFn = void(*)(JavaVM*, void*);      for (auto library : unload_libraries) {        void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);        if (sym == nullptr) { @@ -1026,7 +1026,7 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env,      self->SetClassLoaderOverride(class_loader);      VLOG(jni) << "[Calling JNI_OnLoad in \"" << path << "\"]"; -    typedef int (*JNI_OnLoadFn)(JavaVM*, void*); +    using JNI_OnLoadFn = int(*)(JavaVM*, void*);      JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);      int version = (*jni_on_load)(this, nullptr); diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index 0b615a6b9a..cf6543fa26 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -204,7 +204,7 @@ TEST_F(ObjectTest, AllocArray_FillUsable) {  template<typename ArrayT>  void TestPrimitiveArray(ClassLinker* cl) {    ScopedObjectAccess soa(Thread::Current()); -  typedef typename ArrayT::ElementType T; +  using T = typename ArrayT::ElementType;    StackHandleScope<2> hs(soa.Self());    Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); @@ -252,9 +252,9 @@ TEST_F(ObjectTest, PrimitiveArray_Short_Alloc) {  }  TEST_F(ObjectTest, PrimitiveArray_Double_Alloc) { -  typedef DoubleArray ArrayT; +  using ArrayT = DoubleArray;    ScopedObjectAccess soa(Thread::Current()); -  typedef typename ArrayT::ElementType T; +  using T = typename ArrayT::ElementType;    StackHandleScope<2> hs(soa.Self());    Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); @@ -283,9 +283,9 @@ TEST_F(ObjectTest, PrimitiveArray_Double_Alloc) {  }  TEST_F(ObjectTest, PrimitiveArray_Float_Alloc) { -  typedef FloatArray ArrayT; +  using ArrayT = FloatArray;    ScopedObjectAccess soa(Thread::Current()); -  typedef typename ArrayT::ElementType T; +  using T = typename ArrayT::ElementType;    StackHandleScope<2> hs(soa.Self());    Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); diff --git a/runtime/mirror/var_handle.cc b/runtime/mirror/var_handle.cc index 903826a047..ba99a07842 100644 --- a/runtime/mirror/var_handle.cc +++ b/runtime/mirror/var_handle.cc @@ -691,7 +691,7 @@ class TypeAdaptorAccessor : public Object::Accessor<T> {  template <typename T>  class FieldAccessViaAccessor {   public: -  typedef Object::Accessor<T> Accessor; +  using Accessor = Object::Accessor<T>;    // Apply an Accessor to get a field in an object.    static void Get(ObjPtr<Object> obj, diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 0e619407e5..b18a048c60 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -325,7 +325,7 @@ static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) {    Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env));  } -typedef std::map<std::string, ObjPtr<mirror::String>> StringTable; +using StringTable = std::map<std::string, ObjPtr<mirror::String>>;  class PreloadDexCachesStringsVisitor : public SingleRootVisitor {   public: diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index 0579b6e39d..5b965090d2 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -410,7 +410,7 @@ inline static bool ReadOatDexFileData(const OatFile& oat_file,      return false;    }    static_assert(std::is_trivial<T>::value, "T must be a trivial type"); -  typedef __attribute__((__aligned__(1))) T unaligned_type; +  using unaligned_type __attribute__((__aligned__(1))) = T;    *value = *reinterpret_cast<const unaligned_type*>(*oat);    *oat += sizeof(T);    return true; diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 1f0b26529a..92d2d44699 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -151,7 +151,7 @@ OatFileManager::~OatFileManager() {  }  std::vector<const OatFile*> OatFileManager::RegisterImageOatFiles( -    std::vector<gc::space::ImageSpace*> spaces) { +    const std::vector<gc::space::ImageSpace*>& spaces) {    std::vector<const OatFile*> oat_files;    for (gc::space::ImageSpace* space : spaces) {      oat_files.push_back(RegisterOatFile(space->ReleaseOatFile())); diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h index 4132b25280..7d96a7a678 100644 --- a/runtime/oat_file_manager.h +++ b/runtime/oat_file_manager.h @@ -73,7 +73,8 @@ class OatFileManager {    // Returns the oat files for the images, registers the oat files.    // Takes ownership of the imagespace's underlying oat files. -  std::vector<const OatFile*> RegisterImageOatFiles(std::vector<gc::space::ImageSpace*> spaces) +  std::vector<const OatFile*> RegisterImageOatFiles( +      const std::vector<gc::space::ImageSpace*>& spaces)        REQUIRES(!Locks::oat_file_manager_lock_);    // Finds or creates the oat file holding dex_location. Then loads and returns diff --git a/runtime/subtype_check_info_test.cc b/runtime/subtype_check_info_test.cc index 53230930e6..9bd135e4c2 100644 --- a/runtime/subtype_check_info_test.cc +++ b/runtime/subtype_check_info_test.cc @@ -131,7 +131,7 @@ struct SubtypeCheckInfoTest : public ::testing::Test {    // Create an SubtypeCheckInfo with the same depth, but with everything else reset.    // Returns: SubtypeCheckInfo in the Uninitialized state. -  static SubtypeCheckInfo CopyCleared(SubtypeCheckInfo sc) { +  static SubtypeCheckInfo CopyCleared(const SubtypeCheckInfo& sc) {      SubtypeCheckInfo cleared_copy{};      cleared_copy.depth_ = sc.depth_;      DCHECK_EQ(SubtypeCheckInfo::kUninitialized, cleared_copy.GetState()); diff --git a/runtime/thread.cc b/runtime/thread.cc index 8a8f53743e..ae7a1a74ab 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3147,8 +3147,10 @@ void Thread::ThrowNewWrappedException(const char* exception_class_descriptor,  }  void Thread::ThrowOutOfMemoryError(const char* msg) { -  LOG(WARNING) << StringPrintf("Throwing OutOfMemoryError \"%s\"%s", -      msg, (tls32_.throwing_OutOfMemoryError ? " (recursive case)" : "")); +  LOG(WARNING) << "Throwing OutOfMemoryError " +               << '"' << msg << '"' +               << " (VmSize " << GetProcessStatus("VmSize") +               << (tls32_.throwing_OutOfMemoryError ? ", recursive case)" : ")");    if (!tls32_.throwing_OutOfMemoryError) {      tls32_.throwing_OutOfMemoryError = true;      ThrowNewException("Ljava/lang/OutOfMemoryError;", msg); diff --git a/runtime/trace.cc b/runtime/trace.cc index 7e48bae581..0e8d318f22 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -1124,7 +1124,7 @@ static void DumpThread(Thread* t, void* arg) {  void Trace::DumpThreadList(std::ostream& os) {    Thread* self = Thread::Current(); -  for (auto it : exited_threads_) { +  for (const auto& it : exited_threads_) {      os << it.first << "\t" << it.second << "\n";    }    Locks::thread_list_lock_->AssertNotHeld(self); diff --git a/runtime/transaction.cc b/runtime/transaction.cc index c9766bc9ca..1e5b2bbd4c 100644 --- a/runtime/transaction.cc +++ b/runtime/transaction.cc @@ -320,7 +320,7 @@ void Transaction::VisitRoots(RootVisitor* visitor) {  void Transaction::VisitObjectLogs(RootVisitor* visitor) {    // List of moving roots. -  typedef std::pair<mirror::Object*, mirror::Object*> ObjectPair; +  using ObjectPair = std::pair<mirror::Object*, mirror::Object*>;    std::list<ObjectPair> moving_roots;    // Visit roots. @@ -348,7 +348,7 @@ void Transaction::VisitObjectLogs(RootVisitor* visitor) {  void Transaction::VisitArrayLogs(RootVisitor* visitor) {    // List of moving roots. -  typedef std::pair<mirror::Array*, mirror::Array*> ArrayPair; +  using ArrayPair = std::pair<mirror::Array*, mirror::Array*>;    std::list<ArrayPair> moving_roots;    for (auto& it : array_logs_) { diff --git a/sigchainlib/sigchain_test.cc b/sigchainlib/sigchain_test.cc index 53e1e40454..bb997877a1 100644 --- a/sigchainlib/sigchain_test.cc +++ b/sigchainlib/sigchain_test.cc @@ -38,7 +38,7 @@  #include "sigchain.h"  #if !defined(__BIONIC__) -typedef sigset_t sigset64_t; +using sigset64_t = sigset_t;  static int sigemptyset64(sigset64_t* set) {    return sigemptyset(set); diff --git a/simulator/code_simulator_container.cc b/simulator/code_simulator_container.cc index 3206bc7844..dc553dfe5d 100644 --- a/simulator/code_simulator_container.cc +++ b/simulator/code_simulator_container.cc @@ -34,13 +34,13 @@ CodeSimulatorContainer::CodeSimulatorContainer(InstructionSet target_isa)    if (libart_simulator_handle_ == nullptr) {      VLOG(simulator) << "Could not load " << libart_simulator_so_name << ": " << dlerror();    } else { -    typedef CodeSimulator* (*create_code_simulator_ptr_)(InstructionSet target_isa); -    create_code_simulator_ptr_ create_code_simulator_ = -        reinterpret_cast<create_code_simulator_ptr_>( +    using CreateCodeSimulatorPtr = CodeSimulator*(*)(InstructionSet); +    CreateCodeSimulatorPtr create_code_simulator = +        reinterpret_cast<CreateCodeSimulatorPtr>(              dlsym(libart_simulator_handle_, "CreateCodeSimulator")); -    DCHECK(create_code_simulator_ != nullptr) << "Fail to find symbol of CreateCodeSimulator: " +    DCHECK(create_code_simulator != nullptr) << "Fail to find symbol of CreateCodeSimulator: "          << dlerror(); -    simulator_ = create_code_simulator_(target_isa); +    simulator_ = create_code_simulator(target_isa);    }  } diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc index 33a8f5bba2..540e6ce357 100644 --- a/test/004-JniTest/jni_test.cc +++ b/test/004-JniTest/jni_test.cc @@ -62,7 +62,7 @@ static void* AttachHelper(void* arg) {    int attach_result = jvm->AttachCurrentThread(&env, &args);    CHECK_EQ(attach_result, 0); -  typedef void (*Fn)(JNIEnv*); +  using Fn = void(*)(JNIEnv*);    Fn fn = reinterpret_cast<Fn>(arg);    fn(env); @@ -704,7 +704,7 @@ class JniCallDefaultMethodsTest {    }   private: -  void TestCalls(const char* declaring_class, std::vector<const char*> methods) { +  void TestCalls(const char* declaring_class, const std::vector<const char*>& methods) {      jmethodID new_method = env_->GetMethodID(concrete_class_, "<init>", "()V");      jobject obj = env_->NewObject(concrete_class_, new_method);      CHECK(!env_->ExceptionCheck()); diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc index a74f7638bd..cc7e806ba5 100644 --- a/test/115-native-bridge/nativebridge.cc +++ b/test/115-native-bridge/nativebridge.cc @@ -45,7 +45,7 @@ static const android::NativeBridgeRuntimeCallbacks* gNativeBridgeArtCallbacks;  static jint trampoline_JNI_OnLoad(JavaVM* vm, void* reserved) {    JNIEnv* env = nullptr; -  typedef jint (*FnPtr_t)(JavaVM*, void*); +  using FnPtr_t = jint(*)(JavaVM*, void*);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("JNI_OnLoad")->fnPtr);    vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6); @@ -91,9 +91,8 @@ static jint trampoline_JNI_OnLoad(JavaVM* vm, void* reserved) {    return fnPtr(vm, reserved);  } -static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env, -                                                                     jclass klass) { -  typedef void (*FnPtr_t)(JNIEnv*, jclass); +static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env, jclass klass) { +  using FnPtr_t = void(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testFindClassOnAttachedNativeThread")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -102,7 +101,7 @@ static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env  static void trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEnv* env,                                                                             jclass klass) { -  typedef void (*FnPtr_t)(JNIEnv*, jclass); +  using FnPtr_t = void(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testFindFieldOnAttachedNativeThreadNative")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -111,7 +110,7 @@ static void trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEn  static void trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,                                                                            jclass klass) { -  typedef void (*FnPtr_t)(JNIEnv*, jclass); +  using FnPtr_t = void(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testCallStaticVoidMethodOnSubClassNative")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -119,7 +118,7 @@ static void trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv  }  static jobject trampoline_Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass klass) { -  typedef jobject (*FnPtr_t)(JNIEnv*, jclass); +  using FnPtr_t = jobject(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testGetMirandaMethodNative")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -127,7 +126,7 @@ static jobject trampoline_Java_Main_testGetMirandaMethodNative(JNIEnv* env, jcla  }  static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass) { -  typedef void (*FnPtr_t)(JNIEnv*, jclass); +  using FnPtr_t = void(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testNewStringObject")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -135,7 +134,7 @@ static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass)  }  static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass klass) { -  typedef void (*FnPtr_t)(JNIEnv*, jclass); +  using FnPtr_t = void(*)(JNIEnv*, jclass);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>      (find_native_bridge_method("testZeroLengthByteBuffers")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -145,8 +144,8 @@ static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass k  static jbyte trampoline_Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2,                                               jbyte b3, jbyte b4, jbyte b5, jbyte b6,                                               jbyte b7, jbyte b8, jbyte b9, jbyte b10) { -  typedef jbyte (*FnPtr_t)(JNIEnv*, jclass, jbyte, jbyte, jbyte, jbyte, jbyte, -                           jbyte, jbyte, jbyte, jbyte, jbyte); +  using FnPtr_t = jbyte(*)(JNIEnv*, jclass, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, +                           jbyte, jbyte);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("byteMethod")->fnPtr);    printf("%s called!\n", __FUNCTION__);    return fnPtr(env, klass, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10); @@ -155,8 +154,8 @@ static jbyte trampoline_Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1  static jshort trampoline_Java_Main_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2,                                                 jshort s3, jshort s4, jshort s5, jshort s6,                                                 jshort s7, jshort s8, jshort s9, jshort s10) { -  typedef jshort (*FnPtr_t)(JNIEnv*, jclass, jshort, jshort, jshort, jshort, jshort, -                            jshort, jshort, jshort, jshort, jshort); +  using FnPtr_t = jshort(*)(JNIEnv*, jclass, jshort, jshort, jshort, jshort, jshort, jshort, jshort, +                            jshort, jshort, jshort);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("shortMethod")->fnPtr);    printf("%s called!\n", __FUNCTION__);    return fnPtr(env, klass, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10); @@ -166,7 +165,7 @@ static jboolean trampoline_Java_Main_booleanMethod(JNIEnv* env, jclass klass, jb                                                     jboolean b2, jboolean b3, jboolean b4,                                                     jboolean b5, jboolean b6, jboolean b7,                                                     jboolean b8, jboolean b9, jboolean b10) { -  typedef jboolean (*FnPtr_t)(JNIEnv*, jclass, jboolean, jboolean, jboolean, jboolean, jboolean, +  using FnPtr_t = jboolean(*)(JNIEnv*, jclass, jboolean, jboolean, jboolean, jboolean, jboolean,                                jboolean, jboolean, jboolean, jboolean, jboolean);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("booleanMethod")->fnPtr);    printf("%s called!\n", __FUNCTION__); @@ -176,8 +175,8 @@ static jboolean trampoline_Java_Main_booleanMethod(JNIEnv* env, jclass klass, jb  static jchar trampoline_Java_Main_charMethod(JNIEnv* env, jclass klass, jchar c1, jchar c2,                                               jchar c3, jchar c4, jchar c5, jchar c6,                                               jchar c7, jchar c8, jchar c9, jchar c10) { -  typedef jchar (*FnPtr_t)(JNIEnv*, jclass, jchar, jchar, jchar, jchar, jchar, -                           jchar, jchar, jchar, jchar, jchar); +  using FnPtr_t = jchar(*)(JNIEnv*, jclass, jchar, jchar, jchar, jchar, jchar, jchar, jchar, jchar, +                           jchar, jchar);    FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("charMethod")->fnPtr);    printf("%s called!\n", __FUNCTION__);    return fnPtr(env, klass, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10); diff --git a/test/1900-track-alloc/alloc.cc b/test/1900-track-alloc/alloc.cc index db5617c54c..f2096111da 100644 --- a/test/1900-track-alloc/alloc.cc +++ b/test/1900-track-alloc/alloc.cc @@ -24,7 +24,7 @@  namespace art {  namespace Test1900TrackAlloc { -typedef jvmtiError (*GetGlobalState)(jvmtiEnv* env, jlong* allocated); +using GetGlobalState = jvmtiError(*)(jvmtiEnv* env, jlong* allocated);  struct AllocTrackingData {    GetGlobalState get_global_state; diff --git a/test/1940-ddms-ext/ddm_ext.cc b/test/1940-ddms-ext/ddm_ext.cc index cc29df9a49..452187bdcb 100644 --- a/test/1940-ddms-ext/ddm_ext.cc +++ b/test/1940-ddms-ext/ddm_ext.cc @@ -25,7 +25,7 @@  namespace art {  namespace Test1940DdmExt { -typedef jvmtiError (*DdmHandleChunk)(jvmtiEnv* env, +using DdmHandleChunk = jvmtiError(*)(jvmtiEnv* env,                                       jint type_in,                                       jint len_in,                                       const jbyte* data_in, diff --git a/test/1946-list-descriptors/descriptors.cc b/test/1946-list-descriptors/descriptors.cc index 01b306dea5..07fee6141b 100644 --- a/test/1946-list-descriptors/descriptors.cc +++ b/test/1946-list-descriptors/descriptors.cc @@ -24,7 +24,7 @@  namespace art {  namespace Test1946Descriptors { -typedef jvmtiError (*GetDescriptorList)(jvmtiEnv* env, jobject loader, jint* cnt, char*** descs); +using GetDescriptorList = jvmtiError(*)(jvmtiEnv* env, jobject loader, jint* cnt, char*** descs);  struct DescriptorData {    GetDescriptorList get_descriptor_list; diff --git a/test/1951-monitor-enter-no-suspend/raw_monitor.cc b/test/1951-monitor-enter-no-suspend/raw_monitor.cc index 0425e350fd..efd02b6ae4 100644 --- a/test/1951-monitor-enter-no-suspend/raw_monitor.cc +++ b/test/1951-monitor-enter-no-suspend/raw_monitor.cc @@ -26,7 +26,7 @@  namespace art {  namespace Test1951MonitorEnterNoSuspend { -typedef jvmtiError (*RawMonitorEnterNoSuspend)(jvmtiEnv* env, jrawMonitorID mon); +using RawMonitorEnterNoSuspend = jvmtiError(*)(jvmtiEnv* env, jrawMonitorID mon);  template <typename T>  static void Dealloc(T* t) { diff --git a/test/684-checker-simd-dotprod/expected.txt b/test/684-checker-simd-dotprod/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/684-checker-simd-dotprod/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/684-checker-simd-dotprod/info.txt b/test/684-checker-simd-dotprod/info.txt new file mode 100644 index 0000000000..6c1efb6296 --- /dev/null +++ b/test/684-checker-simd-dotprod/info.txt @@ -0,0 +1 @@ +Functional tests on dot product idiom SIMD vectorization. diff --git a/test/684-checker-simd-dotprod/src/Main.java b/test/684-checker-simd-dotprod/src/Main.java new file mode 100644 index 0000000000..e0c87161dd --- /dev/null +++ b/test/684-checker-simd-dotprod/src/Main.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import other.TestByte; +import other.TestCharShort; +import other.TestVarious; + +/** + * Tests for dot product idiom vectorization. + */ +public class Main { +  public static void main(String[] args) { +     TestByte.run(); +     TestCharShort.run(); +     TestVarious.run(); +     System.out.println("passed"); +  } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestByte.java b/test/684-checker-simd-dotprod/src/other/TestByte.java new file mode 100644 index 0000000000..9acfc59cc7 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestByte.java @@ -0,0 +1,484 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: byte case. + */ +public class TestByte { + +  public static final int ARRAY_SIZE = 1024; + +  /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdSimple(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC1:i\d+>>   Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC1>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC2:i\d+>>   Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddC2>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplex(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (a[i] & 0xff) * (b[i] & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Int8 +  public static final int testDotProdSignedWidening(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((short)(a[i])) * ((short)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Int8 +  public static final int testDotProdParamSigned(int x, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (byte)(x) * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Uint8 +  public static final int testDotProdParamUnsigned(int x, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (x & 0xff) * (b[i] & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  // No DOTPROD cases. + +  /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdIntParam(int x, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = b[i] * (x); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSignedToChar(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((char)(a[i])) * ((char)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  // Cases when result of Mul is type-converted are not supported. + +  /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      byte temp = (byte)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      s += (a[i] * b[i]) & 0xff; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)((a[i] & 0xff) * (b[i] & 0xff)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)((a[i] & 0xff) * (b[i] & 0xff)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      long temp = (long)((a[i] & 0xff) * (b[i] & 0xff)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (a[i] & 0xff) * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  private static void expectEquals(int expected, int result) { +    if (expected != result) { +      throw new Error("Expected: " + expected + ", found: " + result); +    } +  } + +  private static void testDotProd(byte[] b1, byte[] b2, int[] results) { +    expectEquals(results[0], testDotProdSimple(b1, b2)); +    expectEquals(results[1], testDotProdComplex(b1, b2)); +    expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2)); +    expectEquals(results[3], testDotProdComplexUnsigned(b1, b2)); +    expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2)); +    expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2)); +    expectEquals(results[6], testDotProdSignedWidening(b1, b2)); +    expectEquals(results[7], testDotProdParamSigned(-128, b2)); +    expectEquals(results[8], testDotProdParamUnsigned(-128, b2)); +    expectEquals(results[9], testDotProdIntParam(-128, b2)); +    expectEquals(results[10], testDotProdSignedToChar(b1, b2)); +    expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2)); +    expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2)); +    expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2)); +    expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2)); +    expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2)); +    expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2)); +    expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2)); +    expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2)); +    expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2)); +    expectEquals(results[20], testDotProdUnsignedSigned(b1, b2)); +  } + +  public static void run() { +    byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; +    byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; +    int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024, +                        64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 }; +    testDotProd(b1_1, b2_1, results_1); + +    byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; +    byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; +    int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280, +                        80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 }; +    testDotProd(b1_2, b2_2, results_2); + +    byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; +    byte[] b2_3 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 }; +    int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280, +                        41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 }; +    testDotProd(b1_3, b2_3, results_3); + +    byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; +    byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; +    int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920, +                       -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 }; +    testDotProd(b1_4, b2_4, results_4); +  } + +  public static void main(String[] args) { +    run(); +  } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestCharShort.java b/test/684-checker-simd-dotprod/src/other/TestCharShort.java new file mode 100644 index 0000000000..9cb9db59b3 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestCharShort.java @@ -0,0 +1,552 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: char and short case. + */ +public class TestCharShort { + +  public static final int ARRAY_SIZE = 1024; + +  /// CHECK-START: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int16  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdSimple(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC1:i\d+>>   Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:s\d+>>  TypeConversion [<<AddC1>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC2:i\d+>>   Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:s\d+>>  TypeConversion [<<AddC2>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplex(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdSimpleUnsigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:c\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:c\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexUnsigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:s\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:s\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexUnsignedCastedToSigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:c\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:c\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdComplexSignedCastedToUnsigned(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToInt(short[], short[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Int16 +  public static final int testDotProdSignedToInt(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((int)(a[i])) * ((int)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamSigned(int, short[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Int16 +  public static final int testDotProdParamSigned(int x, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (short)(x) * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamUnsigned(int, char[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Uint16 +  public static final int testDotProdParamUnsigned(int x, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (char)(x) * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdIntParam(int, short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdIntParam(int x, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = b[i] * (x); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToChar(short[], short[]) loop_optimization (after) +  /// CHECK-DAG:                  VecDotProd type:Uint16 +  public static final int testDotProdSignedToChar(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((char)(a[i])) * ((char)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  // Cases when result of Mul is type-converted are not supported. + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToSigned(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd type:Uint16 +  public static final int testDotProdSimpleMulCastedToSigned(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToUnsigned(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleMulCastedToUnsigned(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToSigned(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedMulCastedToSigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToUnsigned(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedMulCastedToUnsigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToShort(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToShort(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToChar(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleCastedToChar(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToShort(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToShort(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      short temp = (short)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToChar(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToChar(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      char temp = (char)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToLong(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSimpleUnsignedCastedToLong(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      long temp = (long)(a[i] * b[i]); +      s += temp; +    } +    return s - 1; +  } + +  // Narrowing conversions. + +  /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerSigned(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSignedNarrowerSigned(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((byte)(a[i])) * ((byte)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerUnsigned(short[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdSignedNarrowerUnsigned(short[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (a[i] & 0xff) * (b[i] & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerSigned(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdUnsignedNarrowerSigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((byte)(a[i])) * ((byte)(b[i])); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerUnsigned(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdUnsignedNarrowerUnsigned(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = (a[i] & 0xff) * (b[i] & 0xff); +      s += temp; +    } +    return s - 1; +  } + +  /// CHECK-START: int other.TestCharShort.testDotProdUnsignedSigned(char[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdUnsignedSigned(char[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s - 1; +  } + +  private static void expectEquals(int expected, int result) { +    if (expected != result) { +      throw new Error("Expected: " + expected + ", found: " + result); +    } +  } + +  private static void testDotProd(short[] s1, short[] s2, char[] c1, char[] c2, int[] results) { +    expectEquals(results[0], testDotProdSimple(s1, s2)); +    expectEquals(results[1], testDotProdComplex(s1, s2)); +    expectEquals(results[2], testDotProdSimpleUnsigned(c1, c2)); +    expectEquals(results[3], testDotProdComplexUnsigned(c1, c2)); +    expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(c1, c2)); +    expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(s1, s2)); +    expectEquals(results[6], testDotProdSignedToInt(s1, s2)); +    expectEquals(results[7], testDotProdParamSigned(-32768, s2)); +    expectEquals(results[8], testDotProdParamUnsigned(-32768, c2)); +    expectEquals(results[9], testDotProdIntParam(-32768, s2)); +    expectEquals(results[10], testDotProdSignedToChar(s1, s2)); +    expectEquals(results[11], testDotProdSimpleMulCastedToSigned(s1, s2)); +    expectEquals(results[12], testDotProdSimpleMulCastedToUnsigned(s1, s2)); +    expectEquals(results[13], testDotProdSimpleUnsignedMulCastedToSigned(c1, c2)); +    expectEquals(results[14], testDotProdSimpleUnsignedMulCastedToUnsigned(c1, c2)); +    expectEquals(results[15], testDotProdSimpleCastedToShort(s1, s2)); +    expectEquals(results[16], testDotProdSimpleCastedToChar(s1, s2)); +    expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(c1, c2)); +    expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(c1, c2)); +    expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(c1, c2)); +    expectEquals(results[20], testDotProdSignedNarrowerSigned(s1, s2)); +    expectEquals(results[21], testDotProdSignedNarrowerUnsigned(s1, s2)); +    expectEquals(results[22], testDotProdUnsignedNarrowerSigned(c1, c2)); +    expectEquals(results[23], testDotProdUnsignedNarrowerUnsigned(c1, c2)); +    expectEquals(results[24], testDotProdUnsignedSigned(c1, s2)); +  } + +  public static void run() { +    final short MAX_S = Short.MAX_VALUE; +    final short MIN_S = Short.MAX_VALUE; + +    short[] s1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    short[] s2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    char[]  c1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    char[]  c2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    int[] results_1 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, -2147483634, +                        2147352578, -2147418112, 2147418112, -2147418112, 2147352578, +                        2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, 130050, 2147352578 }; +    testDotProd(s1_1, s2_1, c1_1, c2_1, results_1); + +    short[] s1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; +    short[] s2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; +    char[]  c1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; +    char[]  c2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; +    int[] results_2 = { -262140, 12, -262140, 12, 12, 12, -262140, 131072, -131072, 131072, +                        -262140, 4, 4, 4, 4, 4, 4, 4, 4, -262140, 4, 260100, 4, 260100, -262140 }; +    testDotProd(s1_2, s2_2, c1_2, c2_2, results_2); + +    short[] s1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    short[] s2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    char[]  c1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    char[]  c2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; +    int[] results_3 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, +                        -2147483634, 2147352578, -2147418112, 2147418112, -2147418112, +                        2147352578, 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, +                        130050, 2147352578}; +    testDotProd(s1_3, s2_3, c1_3, c2_3, results_3); + + +    short[] s1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    short[] s2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    char[]  c1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    char[]  c2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    int[] results_4 = { -1073938429, -1073741811, -1073938429, -1073741811, -1073741811, +                        -1073741811, -1073938429, 1073840128, -1073840128, 1073840128, +                        -1073938429, 3, 3, 3, 3, 3, 3, 3, 3, -1073938429, 3, 195075, 3, +                        195075, -1073938429 }; +    testDotProd(s1_4, s2_4, c1_4, c2_4, results_4); +  } + +  public static void main(String[] args) { +    run(); +  } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestVarious.java b/test/684-checker-simd-dotprod/src/other/TestVarious.java new file mode 100644 index 0000000000..3f460982f2 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestVarious.java @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization. + */ +public class TestVarious { + +  /// CHECK-START: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89                                        loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Const89>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89                                        loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const89>>]                      loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8    loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdConstRight(byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp =  b[i] * 89; +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89                                        loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Const89>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89                                        loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const89>>]                      loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Uint8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdConstLeft(byte[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = 89 * (b[i] & 0xff); +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (before) +  /// CHECK-DAG: <<Param:i\d+>>   ParameterValue                                        loop:none +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<ConstL:i\d+>>  IntConstant 129                                       loop:none +  /// CHECK-DAG: <<AddP:i\d+>>    Add [<<Param>>,<<ConstL>>]                            loop:none +  /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>]                             loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<TypeCnv>>]                            loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (after) +  /// CHECK-DAG: <<Param:i\d+>>   ParameterValue                                        loop:none +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<ConstL:i\d+>>  IntConstant 129                                       loop:none +  /// CHECK-DAG: <<AddP:i\d+>>    Add [<<Param>>,<<ConstL>>]                            loop:none +  /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>]                             loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<TypeCnv>>]                      loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8    loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none +  public static final int testDotProdLoopInvariantConvRight(byte[] b, int param) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = b[i] * ((byte)(param + 129)); +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdByteToChar(char[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdByteToChar(char[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = ((char)((byte)(a[i] + 129))) * b[i]; +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdMixedSize(byte[], short[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdMixedSize(byte[] a, short[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdMixedSizeAndSign(byte[], char[]) loop_optimization (after) +  /// CHECK-NOT:                  VecDotProd +  public static final int testDotProdMixedSizeAndSign(byte[] a, char[] b) { +    int s = 1; +    for (int i = 0; i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:i\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:i\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                    loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                    loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul:d\d+>>     VecMul [<<Load1>>,<<Load2>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecAdd [<<Phi2>>,<<Mul>>]                 loop:<<Loop>>      outer_loop:none +  // +  /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                      loop:none +  /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]             loop:none +  public static final int testDotProdInt32(int[] a, int[] b) { +    int s = 1; +    for (int i = 0;  i < b.length; i++) { +      int temp = a[i] * b[i]; +      s += temp; +    } +    return s; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul1:i\d+>>    Mul [<<Get1>>,<<Get2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul1>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<Get1>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<Get2>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul2:i\d+>>    Mul [<<TypeC1>>,<<TypeC2>>]               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi3>>,<<Mul2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Set1:d\d+>>    VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Set2:d\d+>>    VecSetScalars [<<Const2>>]                            loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set1>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:d\d+>>    Phi [<<Set2>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  public static final int testDotProdBothSignedUnsigned1(byte[] a, byte[] b) { +    int s1 = 1; +    int s2 = 2; +    for (int i = 0; i < b.length; i++) { +      byte a_val = a[i]; +      byte b_val = b[i]; +      s1 += a_val * b_val; +      s2 += (a_val & 0xff) * (b_val & 0xff); +    } +    return s1 + s2; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none +  /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42                            loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<Get1>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul1:i\d+>>    Mul [<<Get2>>,<<TypeC1>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi3>>,<<Mul1>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul2:i\d+>>    Mul [<<Get1>>,<<Const42>>]                loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42                                        loop:none +  /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const42>>]                      loop:none +  /// CHECK-DAG: <<Set1:d\d+>>    VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Set2:d\d+>>    VecSetScalars [<<Const2>>]                            loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set1>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:d\d+>>    Phi [<<Set2>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi3>>,<<Load2>>,<<Load1>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8    loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  public static final int testDotProdBothSignedUnsigned2(byte[] a, byte[] b) { +    int s1 = 1; +    int s2 = 2; +    for (int i = 0; i < b.length; i++) { +      byte a_val = a[i]; +      byte b_val = b[i]; +      s2 += (a_val & 0xff) * (b_val & 0xff); +      s1 += a_val * 42; +    } +    return s1 + s2; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<GetB1:b\d+>>   ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<GetB2:b\d+>>   ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul1:i\d+>>    Mul [<<GetB1>>,<<GetB2>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul1>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<GetA1:a\d+>>   ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<GetA2:a\d+>>   ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul2:i\d+>>    Mul [<<GetA1>>,<<GetA2>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi3>>,<<Mul2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                                         loop:none +  /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none +  /// CHECK-DAG: <<Set1:d\d+>>    VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Set2:d\d+>>    VecSetScalars [<<Const2>>]                            loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set1>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:d\d+>>    Phi [<<Set2>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load3:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load4:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi3>>,<<Load3>>,<<Load4>>] type:Uint8  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none +  public static final int testDotProdBothSignedUnsignedDoubleLoad(byte[] a, byte[] b) { +    int s1 = 1; +    int s2 = 2; +    for (int i = 0; i < b.length; i++) { +      s1 += a[i] * b[i]; +      s2 += (a[i] & 0xff) * (b[i] & 0xff); +    } +    return s1 + s2; +  } + +  /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (before) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get1:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Get2:c\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]              loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeS1:s\d+>>  TypeConversion [<<Get1>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<TypeS2:s\d+>>  TypeConversion [<<Get2>>]                 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul1:i\d+>>    Mul [<<TypeS1>>,<<TypeS2>>]               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi3>>,<<Mul1>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Mul2:i\d+>>    Mul [<<Get1>>,<<Get2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul2>>]                   loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none + +  /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (after) +  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none +  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none +  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                                         loop:none +  /// CHECK-DAG: <<Const8:i\d+>>  IntConstant 8                                         loop:none +  /// CHECK-DAG: <<Set1:d\d+>>    VecSetScalars [<<Const1>>]                            loop:none +  /// CHECK-DAG: <<Set2:d\d+>>    VecSetScalars [<<Const2>>]                            loop:none +  // +  /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none +  /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set1>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Phi3:d\d+>>    Phi [<<Set2>>,{{d\d+}}]                               loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Int16  loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>>      outer_loop:none +  /// CHECK-DAG:                  Add [<<Phi1>>,<<Const8>>]                             loop:<<Loop>>      outer_loop:none +  public static final int testDotProdBothSignedUnsignedChar(char[] a, char[] b) { +    int s1 = 1; +    int s2 = 2; +    for (int i = 0; i < b.length; i++) { +      char a_val = a[i]; +      char b_val = b[i]; +      s2 += ((short)a_val) * ((short)b_val); +      s1 += a_val * b_val; +    } +    return s1 + s2; +  } + +  private static void expectEquals(int expected, int result) { +    if (expected != result) { +      throw new Error("Expected: " + expected + ", found: " + result); +    } +  } + +  public static void run() { +    final short MAX_S = Short.MAX_VALUE; +    final short MIN_S = Short.MAX_VALUE; + +    byte[] b1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; +    byte[] b2 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 }; + +    char[] c1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; +    char[] c2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + +    int[] i1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; +    int[] i2 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 }; + +    short[] s1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + +    expectEquals(56516, testDotProdConstRight(b2)); +    expectEquals(56516, testDotProdConstLeft(b2)); +    expectEquals(1271, testDotProdLoopInvariantConvRight(b2, 129)); +    expectEquals(-8519423, testDotProdByteToChar(c1, c2)); +    expectEquals(-8388351, testDotProdMixedSize(b1, s1)); +    expectEquals(-8388351, testDotProdMixedSizeAndSign(b1, c2)); +    expectEquals(-81279, testDotProdInt32(i1, i2)); +    expectEquals(3, testDotProdBothSignedUnsigned1(b1, b2)); +    expectEquals(54403, testDotProdBothSignedUnsigned2(b1, b2)); +    expectEquals(3, testDotProdBothSignedUnsignedDoubleLoad(b1, b2)); +    expectEquals(-262137, testDotProdBothSignedUnsignedChar(c1, c2)); +  } + +  public static void main(String[] args) { +    run(); +  } +} diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index 4c9fd96e79..a5fa332050 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -226,5 +226,12 @@            "libcore.libcore.io.FdsanTest#testParcelFileDescriptor",            "libcore.libcore.io.FdsanTest#testDatagramSocket",            "libcore.libcore.io.FdsanTest#testSocket"] +}, +{ +  description: "Timeout on heap-poisoning target builds", +  result: EXEC_FAILED, +  modes: [device], +  bug: 116446372, +  names: ["libcore.libcore.io.FdsanTest#testSocket"]  }  ] diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh index 04e80df50d..ef958d6b1a 100755 --- a/tools/setup-buildbot-device.sh +++ b/tools/setup-buildbot-device.sh @@ -43,7 +43,7 @@ seconds_per_hour=3600  # Kill logd first, so that when we set the adb buffer size later in this file,  # it is brought up again.  echo -e "${green}Killing logd, seen leaking on fugu/N${nc}" -adb shell killall -9 /system/bin/logd +adb shell pkill -9 -U logd logd && echo -e "${green}...logd killed${nc}"  # Update date on device if the difference with host is more than one hour.  if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then  |