diff options
100 files changed, 2424 insertions, 338 deletions
diff --git a/build/Android.bp b/build/Android.bp index 5f64c2d9f7..3eb4aaff79 100644 --- a/build/Android.bp +++ b/build/Android.bp @@ -23,6 +23,7 @@ art_clang_tidy_errors = [ "bugprone-virtual-near-miss", "modernize-use-bool-literals", "modernize-use-nullptr", + "modernize-use-using", "performance-faster-string-find", "performance-for-range-copy", "performance-implicit-conversion-in-loop", @@ -37,6 +38,7 @@ art_clang_tidy_errors_str = "bugprone-lambda-function-name" + ",modernize-redundant-void-arg" + ",modernize-use-bool-literals" + ",modernize-use-nullptr" + + ",modernize-use-using" + ",performance-faster-string-find" + ",performance-for-range-copy" + ",performance-implicit-conversion-in-loop" diff --git a/build/art.go b/build/art.go index 6c084867a7..f3cd3cacad 100644 --- a/build/art.go +++ b/build/art.go @@ -66,7 +66,7 @@ func globalFlags(ctx android.BaseContext) ([]string, []string) { "-DART_READ_BARRIER_TYPE_IS_"+barrierType+"=1") } - if envTrue(ctx, "ART_USE_GENERATIONAL_CC") { + if !envFalse(ctx, "ART_USE_GENERATIONAL_CC") { cflags = append(cflags, "-DART_USE_GENERATIONAL_CC=1") } diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index fe8b766d0f..183173b298 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -41,7 +41,7 @@ namespace { // anonymous namespace class Matcher { public: // Match function type. - typedef bool MatchFn(Matcher* matcher); + using MatchFn = bool(Matcher*); template <size_t size> static bool Match(const CodeItemDataAccessor* code_item, MatchFn* const (&pattern)[size]); diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 43169ba7eb..e79a96bc2a 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -1277,6 +1277,74 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins } } +void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + + // For Int8 and Uint8 we need a temp register. + if (DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) { + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); + DCHECK_EQ(4u, instruction->GetVectorLength()); + + size_t inputs_data_size = DataType::Size(a->GetPackedType()); + switch (inputs_data_size) { + case 1u: { + DCHECK_EQ(16u, a->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + if (instruction->IsZeroExtending()) { + // TODO: Use Armv8.4-A UDOT instruction when it is available. + __ Umull(tmp.V8H(), left.V8B(), right.V8B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } else { + // TODO: Use Armv8.4-A SDOT instruction when it is available. + __ Smull(tmp.V8H(), left.V8B(), right.V8B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + break; + } + case 2u: + DCHECK_EQ(8u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + __ Umlal(acc.V4S(), left.V4H(), right.V4H()); + __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); + } else { + __ Smlal(acc.V4S(), left.V4H(), right.V4H()); + __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; + } +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7b66b17983..62b6c4ea01 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -854,6 +854,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i } } +void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word // size equals to 4). static bool IsWordAligned(HVecMemoryOperation* instruction) { diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index df0e1485d6..24f4fb2d7b 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -1274,6 +1274,14 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst } } +void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index de354b63a1..972c49ebb1 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -1272,6 +1272,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in } } +void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 2502275b3a..c52ecc77c5 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1143,6 +1143,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4a67dafd8a..87d0106c3e 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1116,6 +1116,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6c77232361..39cbe5e850 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -8301,7 +8301,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 39d97899ae..e458dfffb4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -7542,7 +7542,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 5ac6e46003..3cbcc9e0c3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -231,6 +231,21 @@ class DataType { } } + static Type ToUnsigned(Type type) { + switch (type) { + case Type::kInt8: + return Type::kUint8; + case Type::kInt16: + return Type::kUint16; + case Type::kInt32: + return Type::kUint32; + case Type::kInt64: + return Type::kUint64; + default: + return type; + } + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 31db8c205f..a1af2be9de 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -106,8 +106,7 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) { } } -typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set, - DisassemblerOptions* options); +using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*); class HGraphVisualizerDisassembler { public: HGraphVisualizerDisassembler(InstructionSet instruction_set, @@ -564,6 +563,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << instruction->GetOpKind(); } + void VisitVecDotProd(HVecDotProd* instruction) override { + VisitVecOperation(instruction); + DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType(); + StartAttributeStream("type") << (instruction->IsZeroExtending() ? + DataType::ToUnsigned(arg_type) : + DataType::ToSigned(arg_type)); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7d66155b39..12b180d5ff 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -351,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + if (reduction->IsVecAdd() || + reduction->IsVecSub() || + reduction->IsVecSADAccumulate() || + reduction->IsVecDotProd()) { return HVecReduce::kSum; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -431,6 +434,23 @@ static void PeelByCount(HLoopInformation* loop_info, int count) { } } +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { + DataType::Type type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(type)) { + type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { + type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { + type = b->InputAt(0)->GetType(); + } + return type; +} + // // Public methods. // @@ -1289,6 +1309,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, DataType::Type type = instruction->GetType(); // Recognize SAD idiom or direct reduction. if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { @@ -1531,11 +1552,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd; return TrySetVectorLength(4); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoWideSAD; @@ -1580,12 +1601,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoMul | + kNoDiv | + kNoShift | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD| + kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; @@ -1610,11 +1642,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1639,11 +1671,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -2071,18 +2103,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* r = a; HInstruction* s = b; bool is_unsigned = false; - DataType::Type sub_type = a->GetType(); - if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { - sub_type = b->GetType(); - } - if (a->IsTypeConversion() && - DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = a->InputAt(0)->GetType(); - } - if (b->IsTypeConversion() && - DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = b->InputAt(0)->GetType(); - } + DataType::Type sub_type = GetNarrowerType(a, b); if (reduction_type != sub_type && (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { return false; @@ -2123,6 +2144,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, return false; } +// Method recognises the following dot product idiom: +// q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + return false; + } + + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + if (!v->IsMul() || v->GetType() != reduction_type) { + return false; + } + + HInstruction* a = v->InputAt(0); + HInstruction* b = v->InputAt(1); + HInstruction* r = a; + HInstruction* s = b; + DataType::Type op_type = GetNarrowerType(a, b); + bool is_unsigned = false; + + if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + return false; + } + op_type = HVecOperation::ToProperType(op_type, is_unsigned); + + if (!TrySetVectorType(op_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoDotProd)) { + return false; + } + + DCHECK(r != nullptr && s != nullptr); + // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = a; + s = b; + } + if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + VectorizeUse(node, r, generate_code, op_type, restrictions) && + VectorizeUse(node, s, generate_code, op_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + is_unsigned, + GetOtherVL(reduction_type, op_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 2b202fda75..1a842c4bf3 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -82,6 +82,7 @@ class HLoopOptimization : public HOptimization { kNoReduction = 1 << 9, // no reduction kNoSAD = 1 << 10, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening + kNoDotProd = 1 << 12, // no dot product }; /* @@ -217,6 +218,11 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 68f1a2406a..76887f9a5b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1453,6 +1453,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ M(VecSADAccumulate, VecOperation) \ + M(VecDotProd, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c7539f2846..597e399dd1 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1021,6 +1021,66 @@ class HVecSADAccumulate final : public HVecOperation { DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate); }; +// Performs dot product of two vectors and adds the result to wider precision components in +// the accumulator. +// +// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) = +// [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ], +// for m <= n, non-overlapping sums, +// for either both signed or both unsigned operands x, y. +// +// Notes: +// - packed type reflects the type of sum reduction, not the type of the operands. +// - IsZeroExtending() is used to determine the kind of signed/zero extension to be +// performed for the operands. +// +// TODO: Support types other than kInt32 for packed type. +class HVecDotProd final : public HVecOperation { + public: + HVecDotProd(ArenaAllocator* allocator, + HInstruction* accumulator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + bool is_zero_extending, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(kVecDotProd, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(DataType::IsIntegralType(packed_type)); + DCHECK(left->IsVecOperation()); + DCHECK(right->IsVecOperation()); + DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()), + ToSignedType(right->AsVecOperation()->GetPackedType())); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, left); + SetRawInputAt(2, right); + SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending); + } + + bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecDotProd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecDotProd); + + private: + // Additional packed bits. + static constexpr size_t kFieldHDotProdIsZeroExtending = + HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1; + static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad final : public HVecMemoryOperation { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 9ae025b3fe..3a550efeb8 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -399,7 +399,8 @@ class OptimizingCompiler final : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + void GenerateJitDebugInfo(ArtMethod* method, + const debug::MethodDebugInfo& method_debug_info) REQUIRES_SHARED(Locks::mutator_lock_); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -1406,7 +1407,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } -void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { +void OptimizingCompiler::GenerateJitDebugInfo( + ArtMethod* method, const debug::MethodDebugInfo& info) { const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); DCHECK(compiler_options.GenerateAnyDebugInfo()); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 399a6d8cbd..a8ab6cdd0c 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -174,8 +174,8 @@ class ParallelMoveTest : public ::testing::Test { template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true; template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; -typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> - ParallelMoveResolverTestTypes; +using ParallelMoveResolverTestTypes = + ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>; TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dda29a1b4b..db96e41064 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -440,7 +440,10 @@ static bool HasAliasInEnvironments(HInstruction* instruction) { return false; } -void SsaBuilder::ReplaceUninitializedStringPhis() { +// Returns whether the analysis succeeded. If it did not, we are going to bail +// to interpreter. +// TODO(ngeoffray): Remove this workaround. +bool SsaBuilder::ReplaceUninitializedStringPhis() { ScopedArenaHashSet<HInstruction*> seen_instructions( local_allocator_->Adapter(kArenaAllocGraphBuilder)); ScopedArenaVector<HInstruction*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); @@ -467,17 +470,23 @@ void SsaBuilder::ReplaceUninitializedStringPhis() { if (found_instance == nullptr) { found_instance = current->AsNewInstance(); } else { - DCHECK(found_instance == current); + if (found_instance != current) { + return false; + } } } else if (current->IsPhi()) { // Push all inputs to the worklist. Those should be Phis or NewInstance. for (HInstruction* input : current->GetInputs()) { - DCHECK(input->IsPhi() || input->IsNewInstance()) << input->DebugName(); + if (!input->IsPhi() && !input->IsNewInstance()) { + return false; + } worklist.push_back(input); } } else { // The verifier prevents any other DEX uses of the uninitialized string. - DCHECK(current->IsEqual() || current->IsNotEqual()); + if (!current->IsEqual() && !current->IsNotEqual()) { + return false; + } continue; } current->ReplaceUsesDominatedBy(invoke, invoke); @@ -487,13 +496,18 @@ void SsaBuilder::ReplaceUninitializedStringPhis() { // be Phi, or Equal/NotEqual. for (const HUseListNode<HInstruction*>& use : current->GetUses()) { HInstruction* user = use.GetUser(); - DCHECK(user->IsPhi() || user->IsEqual() || user->IsNotEqual()) << user->DebugName(); + if (!user->IsPhi() && !user->IsEqual() && !user->IsNotEqual()) { + return false; + } worklist.push_back(user); } } while (!worklist.empty()); seen_instructions.clear(); - DCHECK(found_instance != nullptr); + if (found_instance == nullptr) { + return false; + } } + return true; } void SsaBuilder::RemoveRedundantUninitializedStrings() { @@ -547,7 +561,9 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // Replace Phis that feed in a String.<init>, as well as their aliases, with // the actual String allocation invocation. We do this first, as the phis stored in // the data structure might get removed from the graph in later stages during `BuildSsa`. - ReplaceUninitializedStringPhis(); + if (!ReplaceUninitializedStringPhis()) { + return kAnalysisSkipped; + } // Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference if we created an equivalent phi. So we need diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 765544508e..bae15acf98 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -123,7 +123,7 @@ class SsaBuilder : public ValueObject { HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); - void ReplaceUninitializedStringPhis(); + bool ReplaceUninitializedStringPhis(); HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 1ba535f4c3..a673e3210c 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -179,7 +179,7 @@ void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) { return; } - typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); const std::vector<uint8_t>& old_stream = data.first; const std::vector<DelayedAdvancePC>& advances = data.second; diff --git a/compiler/utils/mips/assembler_mips32r5_test.cc b/compiler/utils/mips/assembler_mips32r5_test.cc index bd73c12dc5..98fc44ba5d 100644 --- a/compiler/utils/mips/assembler_mips32r5_test.cc +++ b/compiler/utils/mips/assembler_mips32r5_test.cc @@ -38,12 +38,12 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler, uint32_t, mips::VectorRegister> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t, - mips::VectorRegister> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t, + mips::VectorRegister>; // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 9637c25e7e..723c489f21 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -38,12 +38,12 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, uint32_t, mips::VectorRegister> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t, - mips::VectorRegister> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t, + mips::VectorRegister>; // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index f137c60eb8..4f8ccee2c2 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -37,11 +37,11 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, mips::FRegister, uint32_t> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t>; // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 6df9562fd5..29d2beda96 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -52,7 +52,7 @@ void Mips64Assembler::PatchCFI() { return; } - typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); const std::vector<uint8_t>& old_stream = data.first; const std::vector<DelayedAdvancePC>& advances = data.second; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 3218ae3a90..66711c3210 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -41,12 +41,12 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, uint32_t, mips64::VectorRegister> { public: - typedef AssemblerTest<mips64::Mips64Assembler, - mips64::Mips64Label, - mips64::GpuRegister, - mips64::FpuRegister, - uint32_t, - mips64::VectorRegister> Base; + using Base = AssemblerTest<mips64::Mips64Assembler, + mips64::Mips64Label, + mips64::GpuRegister, + mips64::FpuRegister, + uint32_t, + mips64::VectorRegister>; // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> // and reimplement it without the verification against `assembly_string`. b/73903608 diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index b03c40aa3e..ad75174d23 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -44,11 +44,11 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::XmmRegister, x86::Immediate> { public: - typedef AssemblerTest<x86::X86Assembler, - x86::Address, - x86::Register, - x86::XmmRegister, - x86::Immediate> Base; + using Base = AssemblerTest<x86::X86Assembler, + x86::Address, + x86::Register, + x86::XmmRegister, + x86::Immediate>; protected: std::string GetArchitectureString() override { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 65711e0855..fe42f9b19b 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -137,11 +137,11 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64::XmmRegister, x86_64::Immediate> { public: - typedef AssemblerTest<x86_64::X86_64Assembler, - x86_64::Address, - x86_64::CpuRegister, - x86_64::XmmRegister, - x86_64::Immediate> Base; + using Base = AssemblerTest<x86_64::X86_64Assembler, + x86_64::Address, + x86_64::CpuRegister, + x86_64::XmmRegister, + x86_64::Immediate>; protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... diff --git a/dex2oat/linker/elf_writer_test.cc b/dex2oat/linker/elf_writer_test.cc index ef85fd16ff..1d578ab9d1 100644 --- a/dex2oat/linker/elf_writer_test.cc +++ b/dex2oat/linker/elf_writer_test.cc @@ -164,7 +164,7 @@ TEST_F(ElfWriterTest, EncodeDecodeOatPatches) { // Patch manually. std::vector<uint8_t> expected = initial_data; for (uintptr_t location : patch_locations) { - typedef __attribute__((__aligned__(1))) uint32_t UnalignedAddress; + using UnalignedAddress __attribute__((__aligned__(1))) = uint32_t; *reinterpret_cast<UnalignedAddress*>(expected.data() + location) += delta; } diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc index e89de84739..acd49d5b45 100644 --- a/dex2oat/linker/oat_writer.cc +++ b/dex2oat/linker/oat_writer.cc @@ -92,10 +92,10 @@ static constexpr bool kOatWriterForceOatCodeLayout = false; static constexpr bool kOatWriterDebugOatCodeLayout = false; -typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader; +using UnalignedDexFileHeader __attribute__((__aligned__(1))) = DexFile::Header; const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) { - return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data); + return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data); } class ChecksumUpdatingOutputStream : public OutputStream { diff --git a/dex2oat/linker/x86/relative_patcher_x86_base.cc b/dex2oat/linker/x86/relative_patcher_x86_base.cc index 6a9690d768..07cd724308 100644 --- a/dex2oat/linker/x86/relative_patcher_x86_base.cc +++ b/dex2oat/linker/x86/relative_patcher_x86_base.cc @@ -50,7 +50,7 @@ void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t displacement = target_offset - patch_offset; displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + using unaligned_int32_t __attribute__((__aligned__(1))) = int32_t; reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; } diff --git a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc index 9633564999..c80f6a92f2 100644 --- a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc +++ b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc @@ -31,7 +31,7 @@ void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, uint32_t displacement = target_offset - patch_offset; displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + using unaligned_int32_t __attribute__((__aligned__(1))) = int32_t; reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; } diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc index f09d448493..6b2a1b9a70 100644 --- a/dexdump/dexdump.cc +++ b/dexdump/dexdump.cc @@ -69,14 +69,14 @@ FILE* gOutFile = stdout; /* * Data types that match the definitions in the VM specification. */ -typedef uint8_t u1; -typedef uint16_t u2; -typedef uint32_t u4; -typedef uint64_t u8; -typedef int8_t s1; -typedef int16_t s2; -typedef int32_t s4; -typedef int64_t s8; +using u1 = uint8_t; +using u2 = uint16_t; +using u4 = uint32_t; +using u8 = uint64_t; +using s1 = int8_t; +using s2 = int16_t; +using s4 = int32_t; +using s8 = int64_t; /* * Basic information about a field or a method. diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc index 23be19dd2e..067daa7842 100644 --- a/dexlist/dexlist.cc +++ b/dexlist/dexlist.cc @@ -55,9 +55,9 @@ static FILE* gOutFile = stdout; /* * Data types that match the definitions in the VM specification. */ -typedef uint8_t u1; -typedef uint32_t u4; -typedef uint64_t u8; +using u1 = uint8_t; +using u4 = uint32_t; +using u8 = uint64_t; /* * Returns a newly-allocated string for the "dot version" of the class diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index c1a6f59341..94ea0064e6 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -137,12 +137,12 @@ class DisassemblerArm::CustomDisassembler final : public PrintDisassembler { void DisassemblerArm::CustomDisassembler::CustomDisassemblerStream::PrintLiteral(LocationType type, int32_t offset) { // Literal offsets are not required to be aligned, so we may need unaligned access. - typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1))); - typedef const uint16_t unaligned_uint16_t __attribute__ ((aligned (1))); - typedef const int32_t unaligned_int32_t __attribute__ ((aligned (1))); - typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1))); - typedef const float unaligned_float __attribute__ ((aligned (1))); - typedef const double unaligned_double __attribute__ ((aligned (1))); + using unaligned_int16_t __attribute__((__aligned__(1))) = const int16_t; + using unaligned_uint16_t __attribute__((__aligned__(1))) = const uint16_t; + using unaligned_int32_t __attribute__((__aligned__(1))) = const int32_t; + using unaligned_int64_t __attribute__((__aligned__(1))) = const int64_t; + using unaligned_float __attribute__((__aligned__(1))) = const float; + using unaligned_double __attribute__((__aligned__(1))) = const double; // Zeros are used for the LocationType values this function does not care about. const size_t literal_size[kVst4Location + 1] = { diff --git a/libartbase/base/macros.h b/libartbase/base/macros.h index 33866bba08..315f4d265d 100644 --- a/libartbase/base/macros.h +++ b/libartbase/base/macros.h @@ -48,6 +48,7 @@ template<typename T> ART_FRIEND_TEST(test_set_name, individual_test) #define OFFSETOF_MEMBERPTR(t, f) \ (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT +#define ALIGNED(x) __attribute__ ((__aligned__(x))) #define PACKED(x) __attribute__ ((__aligned__(x), __packed__)) // Stringify the argument. diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc index 2242fe877e..0f172fdcfb 100644 --- a/libartbase/base/utils.cc +++ b/libartbase/base/utils.cc @@ -24,6 +24,7 @@ #include <sys/wait.h> #include <unistd.h> +#include <fstream> #include <memory> #include "android-base/file.h" @@ -213,4 +214,25 @@ void SleepForever() { } } +std::string GetProcessStatus(const char* key) { + // Build search pattern of key and separator. + std::string pattern(key); + pattern.push_back(':'); + + // Search for status lines starting with pattern. + std::ifstream fs("/proc/self/status"); + std::string line; + while (std::getline(fs, line)) { + if (strncmp(pattern.c_str(), line.c_str(), pattern.size()) == 0) { + // Skip whitespace in matching line (if any). + size_t pos = line.find_first_not_of(" \t", pattern.size()); + if (UNLIKELY(pos == std::string::npos)) { + break; + } + return std::string(line, pos); + } + } + return "<unknown>"; +} + } // namespace art diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h index e6a0459e27..9c7105599c 100644 --- a/libartbase/base/utils.h +++ b/libartbase/base/utils.h @@ -216,6 +216,11 @@ static inline void CheckedCall(const Func& function, const char* what, Args... a } } +// Lookup value for a given key in /proc/self/status. Keys and values are separated by a ':' in +// the status file. Returns value found on success and "<unknown>" if the key is not found or +// there is an I/O error. +std::string GetProcessStatus(const char* key); + } // namespace art #endif // ART_LIBARTBASE_BASE_UTILS_H_ diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc index 892d1fd5bf..9bd50c309a 100644 --- a/libartbase/base/utils_test.cc +++ b/libartbase/base/utils_test.cc @@ -126,4 +126,12 @@ TEST_F(UtilsTest, BoundsCheckedCast) { EXPECT_EQ(BoundsCheckedCast<const uint64_t*>(buffer + 57, buffer, buffer_end), nullptr); } +TEST_F(UtilsTest, GetProcessStatus) { + EXPECT_EQ("utils_test", GetProcessStatus("Name")); + EXPECT_EQ("R (running)", GetProcessStatus("State")); + EXPECT_EQ("<unknown>", GetProcessStatus("tate")); + EXPECT_EQ("<unknown>", GetProcessStatus("e")); + EXPECT_EQ("<unknown>", GetProcessStatus("Dummy")); +} + } // namespace art diff --git a/libdexfile/dex/dex_file_loader.cc b/libdexfile/dex/dex_file_loader.cc index 400c32b519..4aafc665ee 100644 --- a/libdexfile/dex/dex_file_loader.cc +++ b/libdexfile/dex/dex_file_loader.cc @@ -25,10 +25,6 @@ #include "standard_dex_file.h" #include "ziparchive/zip_archive.h" -// system/core/zip_archive definitions. -struct ZipEntry; -typedef void* ZipArchiveHandle; - namespace art { namespace { diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 707fc1c9ed..d30ec3157d 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -419,7 +419,7 @@ class OatDumper { return instruction_set_; } - typedef std::vector<std::unique_ptr<const DexFile>> DexFileUniqV; + using DexFileUniqV = std::vector<std::unique_ptr<const DexFile>>; bool Dump(std::ostream& os) { bool success = true; @@ -2480,7 +2480,7 @@ class ImageDumper { size_t bytes; size_t count; }; - typedef SafeMap<std::string, SizeAndCount> SizeAndCountTable; + using SizeAndCountTable = SafeMap<std::string, SizeAndCount>; SizeAndCountTable sizes_and_counts; void Update(const char* descriptor, size_t object_bytes_in) { diff --git a/openjdkjvmti/object_tagging.cc b/openjdkjvmti/object_tagging.cc index ba242ef1e8..1562fb6eb6 100644 --- a/openjdkjvmti/object_tagging.cc +++ b/openjdkjvmti/object_tagging.cc @@ -36,6 +36,7 @@ #include "art_jvmti.h" #include "events-inl.h" #include "jvmti_weak_table-inl.h" +#include "mirror/object-inl.h" namespace openjdkjvmti { diff --git a/openjdkjvmti/ti_monitor.cc b/openjdkjvmti/ti_monitor.cc index df29098ec4..f71328a6b6 100644 --- a/openjdkjvmti/ti_monitor.cc +++ b/openjdkjvmti/ti_monitor.cc @@ -38,6 +38,7 @@ #include "art_jvmti.h" #include "gc_root-inl.h" +#include "mirror/object-inl.h" #include "monitor.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc index 286b6867a3..f9707d3738 100644 --- a/profman/profile_assistant_test.cc +++ b/profman/profile_assistant_test.cc @@ -116,9 +116,9 @@ class ProfileAssistantTest : public CommonRuntimeTest { void SetupBasicProfile(const std::string& id, uint32_t checksum, uint16_t number_of_methods, - const std::vector<uint32_t> hot_methods, - const std::vector<uint32_t> startup_methods, - const std::vector<uint32_t> post_startup_methods, + const std::vector<uint32_t>& hot_methods, + const std::vector<uint32_t>& startup_methods, + const std::vector<uint32_t>& post_startup_methods, const ScratchFile& profile, ProfileCompilationInfo* info) { std::string dex_location = "location1" + id; diff --git a/runtime/Android.bp b/runtime/Android.bp index 15ccb70df0..f4b8697470 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -93,6 +93,7 @@ libart_cc_defaults { "instrumentation.cc", "intern_table.cc", "interpreter/interpreter.cc", + "interpreter/interpreter_cache.cc", "interpreter/interpreter_common.cc", "interpreter/interpreter_intrinsics.cc", "interpreter/interpreter_switch_impl.cc", diff --git a/runtime/asm_support.h b/runtime/asm_support.h index e65c19495e..00c9360ba4 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -96,6 +96,10 @@ ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET, #define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET, art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value()) +// Offset of field Thread::interpreter_cache_. +#define THREAD_INTERPRETER_CACHE_OFFSET (144 + 312 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_INTERPRETER_CACHE_OFFSET, + art::Thread::InterpreterCacheOffset<POINTER_SIZE>().Int32Value()) // Offsets within ShadowFrame. #define SHADOWFRAME_LINK_OFFSET 0 diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 28b29125cd..b2ddff3f6a 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -227,18 +227,15 @@ void BaseMutex::DumpAll(std::ostream& os) { // No mutexes have been created yet during at startup. return; } - typedef std::set<BaseMutex*>::const_iterator It; os << "(Contended)\n"; - for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) { - BaseMutex* mutex = *it; + for (const BaseMutex* mutex : *all_mutexes) { if (mutex->HasEverContended()) { mutex->Dump(os); os << "\n"; } } os << "(Never contented)\n"; - for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) { - BaseMutex* mutex = *it; + for (const BaseMutex* mutex : *all_mutexes) { if (!mutex->HasEverContended()) { mutex->Dump(os); os << "\n"; diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index befeea463a..d95f71a315 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -307,7 +307,7 @@ struct FieldGapsComparator { return lhs.size < rhs.size || (lhs.size == rhs.size && lhs.start_offset > rhs.start_offset); } }; -typedef std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator> FieldGaps; +using FieldGaps = std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator>; // Adds largest aligned gaps to queue of gaps. static void AddFieldGap(uint32_t gap_start, uint32_t gap_end, FieldGaps* gaps) { @@ -1251,6 +1251,8 @@ void AppImageClassLoadersAndDexCachesHelper::Update( ClassTable::ClassSet* new_class_set) REQUIRES(!Locks::dex_lock_) REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedTrace app_image_timing("AppImage:Updating"); + Thread* const self = Thread::Current(); gc::Heap* const heap = Runtime::Current()->GetHeap(); const ImageHeader& header = space->GetImageHeader(); @@ -1311,7 +1313,7 @@ void AppImageClassLoadersAndDexCachesHelper::Update( } if (ClassLinker::kAppImageMayContainStrings) { // Fixup all the literal strings happens at app images which are supposed to be interned. - ScopedTrace timing("Fixup String Intern in image and dex_cache"); + ScopedTrace timing("AppImage:InternString"); const auto& image_header = space->GetImageHeader(); const auto bitmap = space->GetMarkBitmap(); // bitmap of objects const uint8_t* target_base = space->GetMemMap()->Begin(); @@ -1324,7 +1326,7 @@ void AppImageClassLoadersAndDexCachesHelper::Update( bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_intern_visitor); } if (kVerifyArtMethodDeclaringClasses) { - ScopedTrace timing("Verify declaring classes"); + ScopedTrace timing("AppImage:VerifyDeclaringClasses"); ReaderMutexLock rmu(self, *Locks::heap_bitmap_lock_); VerifyDeclaringClassVisitor visitor; header.VisitPackedArtMethods(&visitor, space->Begin(), kRuntimePointerSize); @@ -1842,7 +1844,7 @@ bool ClassLinker::AddImageSpace( // Force every app image class's SubtypeCheck to be at least kIninitialized. // // See also ImageWriter::FixupClass. - ScopedTrace trace("Recalculate app image SubtypeCheck bitstrings"); + ScopedTrace trace("AppImage:RecacluateSubtypeCheckBitstrings"); MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); for (const ClassTable::TableSlot& root : temp_set) { SubtypeCheck<ObjPtr<mirror::Class>>::EnsureInitialized(root.Read()); @@ -1862,7 +1864,7 @@ bool ClassLinker::AddImageSpace( if (kIsDebugBuild && app_image) { // This verification needs to happen after the classes have been added to the class loader. // Since it ensures classes are in the class table. - ScopedTrace trace("VerifyAppImage"); + ScopedTrace trace("AppImage:Verify"); VerifyAppImage(header, class_loader, dex_caches, class_table, space); } @@ -2301,7 +2303,7 @@ ObjPtr<mirror::Class> ClassLinker::EnsureResolved(Thread* self, return klass; } -typedef std::pair<const DexFile*, const DexFile::ClassDef*> ClassPathEntry; +using ClassPathEntry = std::pair<const DexFile*, const DexFile::ClassDef*>; // Search a collection of DexFiles for a descriptor ClassPathEntry FindInClassPath(const char* descriptor, @@ -6448,7 +6450,7 @@ static bool NotSubinterfaceOfAny( // iftable must be large enough to hold all interfaces without changing its size. static size_t FillIfTable(ObjPtr<mirror::IfTable> iftable, size_t super_ifcount, - std::vector<ObjPtr<mirror::Class>> to_process) + const std::vector<ObjPtr<mirror::Class>>& to_process) REQUIRES(Roles::uninterruptible_) REQUIRES_SHARED(Locks::mutator_lock_) { // This is the set of all class's already in the iftable. Used to make checking if a class has @@ -8474,7 +8476,7 @@ mirror::MethodHandle* ClassLinker::ResolveMethodHandleForMethod( target_method->GetShorty(&shorty_length); int32_t num_params = static_cast<int32_t>(shorty_length + receiver_count - 1); - StackHandleScope<7> hs(self); + StackHandleScope<5> hs(self); ObjPtr<mirror::Class> array_of_class = GetClassRoot<mirror::ObjectArray<mirror::Class>>(this); Handle<mirror::ObjectArray<mirror::Class>> method_params(hs.NewHandle( mirror::ObjectArray<mirror::Class>::Alloc(self, array_of_class, num_params))); @@ -8483,20 +8485,25 @@ mirror::MethodHandle* ClassLinker::ResolveMethodHandleForMethod( return nullptr; } + const DexFile* dex_file = referrer->GetDexFile(); + const DexFile::MethodId& method_id = dex_file->GetMethodId(method_handle.field_or_method_idx_); int32_t index = 0; if (receiver_count != 0) { - // Insert receiver - method_params->Set(index++, target_method->GetDeclaringClass()); + // Insert receiver. Use the class identified in the method handle rather than the declaring + // class of the resolved method which may be super class or default interface method + // (b/115964401). + ObjPtr<mirror::Class> receiver_class = LookupResolvedType(method_id.class_idx_, referrer); + // receiver_class should have been resolved when resolving the target method. + DCHECK(receiver_class != nullptr); + method_params->Set(index++, receiver_class); } - DexFileParameterIterator it(*target_method->GetDexFile(), target_method->GetPrototype()); - Handle<mirror::DexCache> target_method_dex_cache(hs.NewHandle(target_method->GetDexCache())); - Handle<mirror::ClassLoader> target_method_class_loader(hs.NewHandle(target_method->GetClassLoader())); + + const DexFile::ProtoId& proto_id = dex_file->GetProtoId(method_id.proto_idx_); + DexFileParameterIterator it(*dex_file, proto_id); while (it.HasNext()) { DCHECK_LT(index, num_params); const dex::TypeIndex type_idx = it.GetTypeIdx(); - ObjPtr<mirror::Class> klass = ResolveType(type_idx, - target_method_dex_cache, - target_method_class_loader); + ObjPtr<mirror::Class> klass = ResolveType(type_idx, referrer); if (nullptr == klass) { DCHECK(self->IsExceptionPending()); return nullptr; @@ -8505,7 +8512,8 @@ mirror::MethodHandle* ClassLinker::ResolveMethodHandleForMethod( it.Next(); } - Handle<mirror::Class> return_type = hs.NewHandle(target_method->ResolveReturnType()); + Handle<mirror::Class> return_type = + hs.NewHandle(ResolveType(proto_id.return_type_idx_, referrer)); if (UNLIKELY(return_type.IsNull())) { DCHECK(self->IsExceptionPending()); return nullptr; diff --git a/runtime/class_linker.h b/runtime/class_linker.h index efe29d3127..e06a398089 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -111,7 +111,7 @@ class AllocatorVisitor { class ClassLinker { public: - static constexpr bool kAppImageMayContainStrings = false; + static constexpr bool kAppImageMayContainStrings = true; explicit ClassLinker(InternTable* intern_table); virtual ~ClassLinker(); diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc index e7715c4934..ce742fe47e 100644 --- a/runtime/elf_file.cc +++ b/runtime/elf_file.cc @@ -1417,7 +1417,7 @@ template <typename ElfTypes> void ElfFileImpl<ElfTypes>::ApplyOatPatches( const uint8_t* patches, const uint8_t* patches_end, Elf_Addr delta, uint8_t* to_patch, const uint8_t* to_patch_end) { - typedef __attribute__((__aligned__(1))) Elf_Addr UnalignedAddress; + using UnalignedAddress __attribute__((__aligned__(1))) = Elf_Addr; while (patches < patches_end) { to_patch += DecodeUnsignedLeb128(&patches); DCHECK_LE(patches, patches_end) << "Unexpected end of patch list."; diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index c2007760c6..5af1dd3cf7 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -35,8 +35,10 @@ namespace space { // will not try to allocate a new region from the beginning of the // region space, but from the last allocated region. This allocation // strategy reduces region reuse and should help catch some GC bugs -// earlier. -static constexpr bool kCyclicRegionAllocation = true; +// earlier. However, cyclic region allocation can also create memory +// fragmentation at the region level (see b/33795328); therefore, we +// only enable it in debug mode. +static constexpr bool kCyclicRegionAllocation = kIsDebugBuild; // A space that consists of equal-sized regions. class RegionSpace final : public ContinuousMemMapAllocSpace { diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index 464c2b749f..ae31a542b7 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -164,6 +164,8 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<in DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest)))) #define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)))) +#define THREAD_INTERPRETER_CACHE_SIZE_LOG2 8 +DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_INTERPRETER_CACHE_SIZE_LOG2), (static_cast<int32_t>((art::Thread::InterpreterCacheSizeLog2())))) #define JIT_CHECK_OSR (-1) DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR)))) #define JIT_HOTNESS_DISABLE (-2) diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index e8a47d1087..9467c4c952 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -148,11 +148,11 @@ enum HprofBasicType { hprof_basic_long = 11, }; -typedef uint32_t HprofStringId; -typedef uint32_t HprofClassObjectId; -typedef uint32_t HprofClassSerialNumber; -typedef uint32_t HprofStackTraceSerialNumber; -typedef uint32_t HprofStackFrameId; +using HprofStringId = uint32_t; +using HprofClassObjectId = uint32_t; +using HprofClassSerialNumber = uint32_t; +using HprofStackTraceSerialNumber = uint32_t; +using HprofStackFrameId = uint32_t; static constexpr HprofStackTraceSerialNumber kHprofNullStackTrace = 0; class EndianOutput { diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index 8ab4a9b47e..d20522574b 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -21,6 +21,7 @@ #include "base/utils.h" #include "jni/java_vm_ext.h" #include "jni/jni_internal.h" +#include "mirror/object-inl.h" #include "nth_caller_visitor.h" #include "reference_table.h" #include "runtime.h" diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 048c6e4d66..df66061d01 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -56,7 +56,7 @@ static void InterpreterJni(Thread* self, ScopedObjectAccessUnchecked soa(self); if (method->IsStatic()) { if (shorty == "L") { - typedef jobject (fntype)(JNIEnv*, jclass); + using fntype = jobject(JNIEnv*, jclass); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -67,35 +67,35 @@ static void InterpreterJni(Thread* self, } result->SetL(soa.Decode<mirror::Object>(jresult)); } else if (shorty == "V") { - typedef void (fntype)(JNIEnv*, jclass); + using fntype = void(JNIEnv*, jclass); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); fn(soa.Env(), klass.get()); } else if (shorty == "Z") { - typedef jboolean (fntype)(JNIEnv*, jclass); + using fntype = jboolean(JNIEnv*, jclass); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); result->SetZ(fn(soa.Env(), klass.get())); } else if (shorty == "BI") { - typedef jbyte (fntype)(JNIEnv*, jclass, jint); + using fntype = jbyte(JNIEnv*, jclass, jint); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); result->SetB(fn(soa.Env(), klass.get(), args[0])); } else if (shorty == "II") { - typedef jint (fntype)(JNIEnv*, jclass, jint); + using fntype = jint(JNIEnv*, jclass, jint); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); result->SetI(fn(soa.Env(), klass.get(), args[0])); } else if (shorty == "LL") { - typedef jobject (fntype)(JNIEnv*, jclass, jobject); + using fntype = jobject(JNIEnv*, jclass, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -108,14 +108,14 @@ static void InterpreterJni(Thread* self, } result->SetL(soa.Decode<mirror::Object>(jresult)); } else if (shorty == "IIZ") { - typedef jint (fntype)(JNIEnv*, jclass, jint, jboolean); + using fntype = jint(JNIEnv*, jclass, jint, jboolean); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); result->SetI(fn(soa.Env(), klass.get(), args[0], args[1])); } else if (shorty == "ILI") { - typedef jint (fntype)(JNIEnv*, jclass, jobject, jint); + using fntype = jint(JNIEnv*, jclass, jobject, jint); fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>( method->GetEntryPointFromJni())); ScopedLocalRef<jclass> klass(soa.Env(), @@ -125,7 +125,7 @@ static void InterpreterJni(Thread* self, ScopedThreadStateChange tsc(self, kNative); result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1])); } else if (shorty == "SIZ") { - typedef jshort (fntype)(JNIEnv*, jclass, jint, jboolean); + using fntype = jshort(JNIEnv*, jclass, jint, jboolean); fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetEntryPointFromJni())); ScopedLocalRef<jclass> klass(soa.Env(), @@ -133,14 +133,14 @@ static void InterpreterJni(Thread* self, ScopedThreadStateChange tsc(self, kNative); result->SetS(fn(soa.Env(), klass.get(), args[0], args[1])); } else if (shorty == "VIZ") { - typedef void (fntype)(JNIEnv*, jclass, jint, jboolean); + using fntype = void(JNIEnv*, jclass, jint, jboolean); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); ScopedThreadStateChange tsc(self, kNative); fn(soa.Env(), klass.get(), args[0], args[1]); } else if (shorty == "ZLL") { - typedef jboolean (fntype)(JNIEnv*, jclass, jobject, jobject); + using fntype = jboolean(JNIEnv*, jclass, jobject, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -151,7 +151,7 @@ static void InterpreterJni(Thread* self, ScopedThreadStateChange tsc(self, kNative); result->SetZ(fn(soa.Env(), klass.get(), arg0.get(), arg1.get())); } else if (shorty == "ZILL") { - typedef jboolean (fntype)(JNIEnv*, jclass, jint, jobject, jobject); + using fntype = jboolean(JNIEnv*, jclass, jint, jobject, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -162,7 +162,7 @@ static void InterpreterJni(Thread* self, ScopedThreadStateChange tsc(self, kNative); result->SetZ(fn(soa.Env(), klass.get(), args[0], arg1.get(), arg2.get())); } else if (shorty == "VILII") { - typedef void (fntype)(JNIEnv*, jclass, jint, jobject, jint, jint); + using fntype = void(JNIEnv*, jclass, jint, jobject, jint, jint); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -171,7 +171,7 @@ static void InterpreterJni(Thread* self, ScopedThreadStateChange tsc(self, kNative); fn(soa.Env(), klass.get(), args[0], arg1.get(), args[2], args[3]); } else if (shorty == "VLILII") { - typedef void (fntype)(JNIEnv*, jclass, jobject, jint, jobject, jint, jint); + using fntype = void(JNIEnv*, jclass, jobject, jint, jobject, jint, jint); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jclass> klass(soa.Env(), soa.AddLocalReference<jclass>(method->GetDeclaringClass())); @@ -187,7 +187,7 @@ static void InterpreterJni(Thread* self, } } else { if (shorty == "L") { - typedef jobject (fntype)(JNIEnv*, jobject); + using fntype = jobject(JNIEnv*, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jobject> rcvr(soa.Env(), soa.AddLocalReference<jobject>(receiver)); @@ -198,14 +198,14 @@ static void InterpreterJni(Thread* self, } result->SetL(soa.Decode<mirror::Object>(jresult)); } else if (shorty == "V") { - typedef void (fntype)(JNIEnv*, jobject); + using fntype = void(JNIEnv*, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jobject> rcvr(soa.Env(), soa.AddLocalReference<jobject>(receiver)); ScopedThreadStateChange tsc(self, kNative); fn(soa.Env(), rcvr.get()); } else if (shorty == "LL") { - typedef jobject (fntype)(JNIEnv*, jobject, jobject); + using fntype = jobject(JNIEnv*, jobject, jobject); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jobject> rcvr(soa.Env(), soa.AddLocalReference<jobject>(receiver)); @@ -219,7 +219,7 @@ static void InterpreterJni(Thread* self, result->SetL(soa.Decode<mirror::Object>(jresult)); ScopedThreadStateChange tsc(self, kNative); } else if (shorty == "III") { - typedef jint (fntype)(JNIEnv*, jobject, jint, jint); + using fntype = jint(JNIEnv*, jobject, jint, jint); fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni()); ScopedLocalRef<jobject> rcvr(soa.Env(), soa.AddLocalReference<jobject>(receiver)); diff --git a/runtime/interpreter/interpreter_cache.cc b/runtime/interpreter/interpreter_cache.cc new file mode 100644 index 0000000000..e43fe318cc --- /dev/null +++ b/runtime/interpreter/interpreter_cache.cc @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "interpreter_cache.h" +#include "thread-inl.h" + +namespace art { + +void InterpreterCache::Clear(Thread* owning_thread) { + DCHECK(owning_thread->GetInterpreterCache() == this); + DCHECK(owning_thread == Thread::Current() || owning_thread->IsSuspended()); + data_.fill(Entry{}); +} + +bool InterpreterCache::IsCalledFromOwningThread() { + return Thread::Current()->GetInterpreterCache() == this; +} + +} // namespace art diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h new file mode 100644 index 0000000000..b4966fd615 --- /dev/null +++ b/runtime/interpreter/interpreter_cache.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ +#define ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ + +#include <array> +#include <atomic> + +#include "base/bit_utils.h" +#include "base/macros.h" + +namespace art { + +class Instruction; +class Thread; + +// Small fast thread-local cache for the interpreter. +// The key for the cache is the dex instruction pointer. +// The interpretation of the value depends on the opcode. +// Presence of entry might imply some performance pre-conditions. +// All operations must be done from the owning thread, +// or at a point when the owning thread is suspended. +// +// The values stored for opcodes in the cache currently are: +// iget/iput: The field offset. The field must be non-volatile. +// sget/sput: The ArtField* pointer. The field must be non-volitile. +// +// Aligned to 16-bytes to make it easier to get the address of the cache +// from assembly (it ensures that the offset is valid immediate value). +class ALIGNED(16) InterpreterCache { + // Aligned since we load the whole entry in single assembly instruction. + typedef std::pair<const Instruction*, size_t> Entry ALIGNED(2 * sizeof(size_t)); + + public: + // 2x size increase/decrease corresponds to ~0.5% interpreter performance change. + // Value of 256 has around 75% cache hit rate. + static constexpr size_t kSize = 256; + + InterpreterCache() { + // We can not use the Clear() method since the constructor will not + // be called from the owning thread. + data_.fill(Entry{}); + } + + // Clear the whole cache. It requires the owning thread for DCHECKs. + void Clear(Thread* owning_thread); + + ALWAYS_INLINE bool Get(const Instruction* key, /* out */ size_t* value) { + DCHECK(IsCalledFromOwningThread()); + Entry& entry = data_[IndexOf(key)]; + if (LIKELY(entry.first == key)) { + *value = entry.second; + return true; + } + return false; + } + + ALWAYS_INLINE void Set(const Instruction* key, size_t value) { + DCHECK(IsCalledFromOwningThread()); + data_[IndexOf(key)] = Entry{key, value}; + } + + private: + bool IsCalledFromOwningThread(); + + static ALWAYS_INLINE size_t IndexOf(const Instruction* key) { + static_assert(IsPowerOfTwo(kSize), "Size must be power of two"); + size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1); + DCHECK_LT(index, kSize); + return index; + } + + std::array<Entry, kSize> data_; +}; + +} // namespace art + +#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index b324b4c99d..a607b48786 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -121,56 +121,15 @@ template<bool is_range, bool do_assignability_check> bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data, JValue* result); -// Handles streamlined non-range invoke static, direct and virtual instructions originating in -// mterp. Access checks and instrumentation other than jit profiling are not supported, but does -// support interpreter intrinsics if applicable. -// Returns true on success, otherwise throws an exception and returns false. -template<InvokeType type> -static inline bool DoFastInvoke(Thread* self, - ShadowFrame& shadow_frame, - const Instruction* inst, - uint16_t inst_data, - JValue* result) { - const uint32_t method_idx = inst->VRegB_35c(); - const uint32_t vregC = inst->VRegC_35c(); - ObjPtr<mirror::Object> receiver = (type == kStatic) - ? nullptr - : shadow_frame.GetVRegReference(vregC); - ArtMethod* sf_method = shadow_frame.GetMethod(); - ArtMethod* const called_method = FindMethodFromCode<type, false>( - method_idx, &receiver, sf_method, self); - // The shadow frame should already be pushed, so we don't need to update it. - if (UNLIKELY(called_method == nullptr)) { - CHECK(self->IsExceptionPending()); - result->SetJ(0); - return false; - } else if (UNLIKELY(!called_method->IsInvokable())) { - called_method->ThrowInvocationTimeError(); - result->SetJ(0); - return false; - } else { - jit::Jit* jit = Runtime::Current()->GetJit(); - if (jit != nullptr && type == kVirtual) { - jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); - } - if (called_method->IsIntrinsic()) { - if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data, - shadow_frame.GetResultRegister())) { - return !self->IsExceptionPending(); - } - } - return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result); - } -} - // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range]. // Returns true on success, otherwise throws an exception and returns false. -template<InvokeType type, bool is_range, bool do_access_check> -static inline bool DoInvoke(Thread* self, - ShadowFrame& shadow_frame, - const Instruction* inst, - uint16_t inst_data, - JValue* result) { +template<InvokeType type, bool is_range, bool do_access_check, bool fast_invoke = false> +static ALWAYS_INLINE bool DoInvoke(Thread* self, + ShadowFrame& shadow_frame, + const Instruction* inst, + uint16_t inst_data, + JValue* result) + REQUIRES_SHARED(Locks::mutator_lock_) { // Make sure to check for async exceptions before anything else. if (UNLIKELY(self->ObserveAsyncException())) { return false; @@ -196,12 +155,24 @@ static inline bool DoInvoke(Thread* self, if (jit != nullptr && (type == kVirtual || type == kInterface)) { jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); } - // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT. - if (type == kVirtual || type == kInterface) { - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); - if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) { - instrumentation->InvokeVirtualOrInterface( - self, receiver.Ptr(), sf_method, shadow_frame.GetDexPC(), called_method); + // The fast invoke is used from mterp for some invoke variants. + // The non-fast version is used from switch interpreter and it might not support intrinsics. + // TODO: Unify both paths. + if (fast_invoke) { + if (called_method->IsIntrinsic()) { + if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data, + shadow_frame.GetResultRegister())) { + return !self->IsExceptionPending(); + } + } + } else { + // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT. + if (type == kVirtual || type == kInterface) { + instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); + if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) { + instrumentation->InvokeVirtualOrInterface( + self, receiver.Ptr(), sf_method, shadow_frame.GetDexPC(), called_method); + } } } return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data, @@ -277,7 +248,8 @@ bool DoInvokeCustom(Thread* self, template<bool is_range> static inline bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data, - JValue* result) { + JValue* result) + REQUIRES_SHARED(Locks::mutator_lock_) { const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c(); ObjPtr<mirror::Object> const receiver = shadow_frame.GetVRegReference(vregC); if (UNLIKELY(receiver == nullptr)) { @@ -601,52 +573,6 @@ void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame, uint16_t this_obj_vreg, JValue result); -// Explicitly instantiate all DoInvoke functions. -#define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check) \ - template REQUIRES_SHARED(Locks::mutator_lock_) \ - bool DoInvoke<_type, _is_range, _do_check>(Thread* self, \ - ShadowFrame& shadow_frame, \ - const Instruction* inst, uint16_t inst_data, \ - JValue* result) - -#define EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(_type) \ - EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, false); \ - EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, false, true); \ - EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, false); \ - EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, true, true); - -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kStatic) // invoke-static/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kDirect) // invoke-direct/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kVirtual) // invoke-virtual/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kSuper) // invoke-super/range. -EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL(kInterface) // invoke-interface/range. -#undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL -#undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL - -// Explicitly instantiate all DoFastInvoke functions. -#define EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(_type) \ - template REQUIRES_SHARED(Locks::mutator_lock_) \ - bool DoFastInvoke<_type>(Thread* self, \ - ShadowFrame& shadow_frame, \ - const Instruction* inst, uint16_t inst_data, \ - JValue* result) - -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kStatic); // invoke-static -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kDirect); // invoke-direct -EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kVirtual); // invoke-virtual -#undef EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL - -// Explicitly instantiate all DoInvokeVirtualQuick functions. -#define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range) \ - template REQUIRES_SHARED(Locks::mutator_lock_) \ - bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame, \ - const Instruction* inst, uint16_t inst_data, \ - JValue* result) - -EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(false); // invoke-virtual-quick. -EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true); // invoke-virtual-quick-range. -#undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK - } // namespace interpreter } // namespace art diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 65c1aa8a79..fbc96f7e18 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -172,7 +172,7 @@ extern "C" size_t MterpInvokeVirtual(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoFastInvoke<kVirtual>( + return DoInvoke<kVirtual, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>( self, *shadow_frame, inst, inst_data, result_register); } @@ -183,7 +183,7 @@ extern "C" size_t MterpInvokeSuper(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kSuper, false, false>( + return DoInvoke<kSuper, /*is_range*/ false, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -194,7 +194,7 @@ extern "C" size_t MterpInvokeInterface(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kInterface, false, false>( + return DoInvoke<kInterface, /*is_range*/ false, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -205,7 +205,7 @@ extern "C" size_t MterpInvokeDirect(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoFastInvoke<kDirect>( + return DoInvoke<kDirect, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>( self, *shadow_frame, inst, inst_data, result_register); } @@ -216,7 +216,7 @@ extern "C" size_t MterpInvokeStatic(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoFastInvoke<kStatic>( + return DoInvoke<kStatic, /*is_range*/ false, /*access_check*/ false, /*fast_invoke*/ true>( self, *shadow_frame, inst, inst_data, result_register); } @@ -249,7 +249,7 @@ extern "C" size_t MterpInvokeVirtualRange(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kVirtual, true, false>( + return DoInvoke<kVirtual, /*is_range*/ true, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -260,7 +260,7 @@ extern "C" size_t MterpInvokeSuperRange(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kSuper, true, false>( + return DoInvoke<kSuper, /*is_range*/ true, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -271,7 +271,7 @@ extern "C" size_t MterpInvokeInterfaceRange(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kInterface, true, false>( + return DoInvoke<kInterface, /*is_range*/ true, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -282,7 +282,7 @@ extern "C" size_t MterpInvokeDirectRange(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kDirect, true, false>( + return DoInvoke<kDirect, /*is_range*/ true, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -293,7 +293,7 @@ extern "C" size_t MterpInvokeStaticRange(Thread* self, REQUIRES_SHARED(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); - return DoInvoke<kStatic, true, false>( + return DoInvoke<kStatic, /*is_range*/ true, /*access_check*/ false>( self, *shadow_frame, inst, inst_data, result_register); } @@ -748,6 +748,10 @@ NO_INLINE bool MterpFieldAccessSlow(Instruction* inst, return true; } +// This methods is called from assembly to handle field access instructions. +// +// This method is fairly hot. It is long, but it has been carefully optimized. +// It contains only fully inlined methods -> no spills -> no prologue/epilogue. template<typename PrimType, FindFieldType kAccessType> ALWAYS_INLINE bool MterpFieldAccessFast(Instruction* inst, uint16_t inst_data, @@ -756,8 +760,32 @@ ALWAYS_INLINE bool MterpFieldAccessFast(Instruction* inst, REQUIRES_SHARED(Locks::mutator_lock_) { constexpr bool kIsStatic = (kAccessType & FindFieldFlags::StaticBit) != 0; + // Try to find the field in small thread-local cache first. + InterpreterCache* tls_cache = self->GetInterpreterCache(); + size_t tls_value; + if (LIKELY(tls_cache->Get(inst, &tls_value))) { + // The meaning of the cache value is opcode-specific. + // It is ArtFiled* for static fields and the raw offset for instance fields. + size_t offset = kIsStatic + ? reinterpret_cast<ArtField*>(tls_value)->GetOffset().SizeValue() + : tls_value; + if (kIsDebugBuild) { + uint32_t field_idx = kIsStatic ? inst->VRegB_21c() : inst->VRegC_22c(); + ArtField* field = FindFieldFromCode<kAccessType, /* access_checks */ false>( + field_idx, shadow_frame->GetMethod(), self, sizeof(PrimType)); + DCHECK_EQ(offset, field->GetOffset().SizeValue()); + } + ObjPtr<mirror::Object> obj = kIsStatic + ? reinterpret_cast<ArtField*>(tls_value)->GetDeclaringClass() + : MakeObjPtr(shadow_frame->GetVRegReference(inst->VRegB_22c(inst_data))); + if (LIKELY(obj != nullptr)) { + MterpFieldAccess<PrimType, kAccessType>( + inst, inst_data, shadow_frame, obj, MemberOffset(offset), /* is_volatile */ false); + return true; + } + } + // This effectively inlines the fast path from ArtMethod::GetDexCache. - // It avoids non-inlined call which in turn allows elimination of the prologue and epilogue. ArtMethod* referrer = shadow_frame->GetMethod(); if (LIKELY(!referrer->IsObsolete())) { // Avoid read barriers, since we need only the pointer to the native (non-movable) @@ -777,6 +805,14 @@ ALWAYS_INLINE bool MterpFieldAccessFast(Instruction* inst, ? field->GetDeclaringClass().Ptr() : shadow_frame->GetVRegReference(inst->VRegB_22c(inst_data)); if (LIKELY(kIsStatic || obj != nullptr)) { + // Only non-volatile fields are allowed in the thread-local cache. + if (LIKELY(!field->IsVolatile())) { + if (kIsStatic) { + tls_cache->Set(inst, reinterpret_cast<uintptr_t>(field)); + } else { + tls_cache->Set(inst, field->GetOffset().SizeValue()); + } + } MterpFieldAccess<PrimType, kAccessType>( inst, inst_data, shadow_frame, obj, field->GetOffset(), field->IsVolatile()); return true; diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index d4b51af903..4cd378256e 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -1180,19 +1180,19 @@ static void UnstartedMemoryPeek( } case Primitive::kPrimShort: { - typedef int16_t unaligned_short __attribute__ ((aligned (1))); + using unaligned_short __attribute__((__aligned__(1))) = int16_t; result->SetS(*reinterpret_cast<unaligned_short*>(static_cast<intptr_t>(address))); return; } case Primitive::kPrimInt: { - typedef int32_t unaligned_int __attribute__ ((aligned (1))); + using unaligned_int __attribute__((__aligned__(1))) = int32_t; result->SetI(*reinterpret_cast<unaligned_int*>(static_cast<intptr_t>(address))); return; } case Primitive::kPrimLong: { - typedef int64_t unaligned_long __attribute__ ((aligned (1))); + using unaligned_long __attribute__((__aligned__(1))) = int64_t; result->SetJ(*reinterpret_cast<unaligned_long*>(static_cast<intptr_t>(address))); return; } @@ -1919,11 +1919,16 @@ void UnstartedRuntime::UnstartedJNIUnsafeGetArrayIndexScaleForComponentType( result->SetI(Primitive::ComponentSize(primitive_type)); } -typedef void (*InvokeHandler)(Thread* self, ShadowFrame* shadow_frame, JValue* result, - size_t arg_size); +using InvokeHandler = void(*)(Thread* self, + ShadowFrame* shadow_frame, + JValue* result, + size_t arg_size); -typedef void (*JNIHandler)(Thread* self, ArtMethod* method, mirror::Object* receiver, - uint32_t* args, JValue* result); +using JNIHandler = void(*)(Thread* self, + ArtMethod* method, + mirror::Object* receiver, + uint32_t* args, + JValue* result); static bool tables_initialized_ = false; static std::unordered_map<std::string, InvokeHandler> invoke_handlers_; diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc index 200fc5b334..bd2705d530 100644 --- a/runtime/interpreter/unstarted_runtime_test.cc +++ b/runtime/interpreter/unstarted_runtime_test.cc @@ -261,7 +261,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekShort) { UnstartedMemoryPeekShort(self, tmp.get(), &result, 0); - typedef int16_t unaligned_short __attribute__ ((aligned (1))); + using unaligned_short __attribute__((__aligned__(1))) = int16_t; const unaligned_short* short_ptr = reinterpret_cast<const unaligned_short*>(base_ptr + i); EXPECT_EQ(result.GetS(), *short_ptr); } @@ -284,7 +284,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekInt) { UnstartedMemoryPeekInt(self, tmp.get(), &result, 0); - typedef int32_t unaligned_int __attribute__ ((aligned (1))); + using unaligned_int __attribute__((__aligned__(1))) = int32_t; const unaligned_int* int_ptr = reinterpret_cast<const unaligned_int*>(base_ptr + i); EXPECT_EQ(result.GetI(), *int_ptr); } @@ -307,7 +307,7 @@ TEST_F(UnstartedRuntimeTest, MemoryPeekLong) { UnstartedMemoryPeekLong(self, tmp.get(), &result, 0); - typedef int64_t unaligned_long __attribute__ ((aligned (1))); + using unaligned_long __attribute__((__aligned__(1))) = int64_t; const unaligned_long* long_ptr = reinterpret_cast<const unaligned_long*>(base_ptr + i); EXPECT_EQ(result.GetJ(), *long_ptr); } diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index 1e61ba0f2d..0a54e38698 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -1432,7 +1432,7 @@ static JdwpError DDM_Chunk(JdwpState* state, Request* request, ExpandBuf* pReply /* * Handler map decl. */ -typedef JdwpError (*JdwpRequestHandler)(JdwpState* state, Request* request, ExpandBuf* reply); +using JdwpRequestHandler = JdwpError(*)(JdwpState* state, Request* request, ExpandBuf* reply); struct JdwpHandlerMap { uint8_t cmdSet; diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc index 63fb22cfce..6cd719a55c 100644 --- a/runtime/jit/debugger_interface.cc +++ b/runtime/jit/debugger_interface.cc @@ -77,11 +77,11 @@ namespace art { extern "C" { - typedef enum { + enum JITAction { JIT_NOACTION = 0, JIT_REGISTER_FN, JIT_UNREGISTER_FN - } JITAction; + }; struct JITCodeEntry { // Atomic to ensure the reader can always iterate over the linked list diff --git a/runtime/jni/java_vm_ext.cc b/runtime/jni/java_vm_ext.cc index 42406cf73c..6769368ee4 100644 --- a/runtime/jni/java_vm_ext.cc +++ b/runtime/jni/java_vm_ext.cc @@ -333,7 +333,7 @@ class Libraries { } ScopedThreadSuspension sts(self, kNative); // Do this without holding the jni libraries lock to prevent possible deadlocks. - typedef void (*JNI_OnUnloadFn)(JavaVM*, void*); + using JNI_OnUnloadFn = void(*)(JavaVM*, void*); for (auto library : unload_libraries) { void* const sym = library->FindSymbol("JNI_OnUnload", nullptr); if (sym == nullptr) { @@ -1026,7 +1026,7 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, self->SetClassLoaderOverride(class_loader); VLOG(jni) << "[Calling JNI_OnLoad in \"" << path << "\"]"; - typedef int (*JNI_OnLoadFn)(JavaVM*, void*); + using JNI_OnLoadFn = int(*)(JavaVM*, void*); JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym); int version = (*jni_on_load)(this, nullptr); diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index 0b615a6b9a..cf6543fa26 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -204,7 +204,7 @@ TEST_F(ObjectTest, AllocArray_FillUsable) { template<typename ArrayT> void TestPrimitiveArray(ClassLinker* cl) { ScopedObjectAccess soa(Thread::Current()); - typedef typename ArrayT::ElementType T; + using T = typename ArrayT::ElementType; StackHandleScope<2> hs(soa.Self()); Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); @@ -252,9 +252,9 @@ TEST_F(ObjectTest, PrimitiveArray_Short_Alloc) { } TEST_F(ObjectTest, PrimitiveArray_Double_Alloc) { - typedef DoubleArray ArrayT; + using ArrayT = DoubleArray; ScopedObjectAccess soa(Thread::Current()); - typedef typename ArrayT::ElementType T; + using T = typename ArrayT::ElementType; StackHandleScope<2> hs(soa.Self()); Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); @@ -283,9 +283,9 @@ TEST_F(ObjectTest, PrimitiveArray_Double_Alloc) { } TEST_F(ObjectTest, PrimitiveArray_Float_Alloc) { - typedef FloatArray ArrayT; + using ArrayT = FloatArray; ScopedObjectAccess soa(Thread::Current()); - typedef typename ArrayT::ElementType T; + using T = typename ArrayT::ElementType; StackHandleScope<2> hs(soa.Self()); Handle<ArrayT> a = hs.NewHandle(ArrayT::Alloc(soa.Self(), 2)); diff --git a/runtime/mirror/var_handle.cc b/runtime/mirror/var_handle.cc index 903826a047..ba99a07842 100644 --- a/runtime/mirror/var_handle.cc +++ b/runtime/mirror/var_handle.cc @@ -691,7 +691,7 @@ class TypeAdaptorAccessor : public Object::Accessor<T> { template <typename T> class FieldAccessViaAccessor { public: - typedef Object::Accessor<T> Accessor; + using Accessor = Object::Accessor<T>; // Apply an Accessor to get a field in an object. static void Get(ObjPtr<Object> obj, diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 71fabd0250..0d1fe44725 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -323,6 +323,9 @@ static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) { } Runtime* const runtime = Runtime::Current(); bool all_deleted = true; + // We need to clear the caches since they may contain pointers to the dex instructions. + // Different dex file can be loaded at the same memory location later by chance. + Thread::ClearAllInterpreterCaches(); { ScopedObjectAccess soa(env); ObjPtr<mirror::Object> dex_files_object = soa.Decode<mirror::Object>(cookie); diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 0e619407e5..b18a048c60 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -325,7 +325,7 @@ static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) { Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env)); } -typedef std::map<std::string, ObjPtr<mirror::String>> StringTable; +using StringTable = std::map<std::string, ObjPtr<mirror::String>>; class PreloadDexCachesStringsVisitor : public SingleRootVisitor { public: diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index 0579b6e39d..5b965090d2 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -410,7 +410,7 @@ inline static bool ReadOatDexFileData(const OatFile& oat_file, return false; } static_assert(std::is_trivial<T>::value, "T must be a trivial type"); - typedef __attribute__((__aligned__(1))) T unaligned_type; + using unaligned_type __attribute__((__aligned__(1))) = T; *value = *reinterpret_cast<const unaligned_type*>(*oat); *oat += sizeof(T); return true; diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 1f0b26529a..a9ef9a3fa9 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -151,7 +151,7 @@ OatFileManager::~OatFileManager() { } std::vector<const OatFile*> OatFileManager::RegisterImageOatFiles( - std::vector<gc::space::ImageSpace*> spaces) { + const std::vector<gc::space::ImageSpace*>& spaces) { std::vector<const OatFile*> oat_files; for (gc::space::ImageSpace* space : spaces) { oat_files.push_back(RegisterOatFile(space->ReleaseOatFile())); @@ -526,6 +526,8 @@ std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat( if (source_oat_file != nullptr) { bool added_image_space = false; if (source_oat_file->IsExecutable()) { + ScopedTrace app_image_timing("AppImage:Loading"); + // We need to throw away the image space if we are debuggable but the oat-file source of the // image is not otherwise we might get classes with inlined methods or other such things. std::unique_ptr<gc::space::ImageSpace> image_space; diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h index 4132b25280..7d96a7a678 100644 --- a/runtime/oat_file_manager.h +++ b/runtime/oat_file_manager.h @@ -73,7 +73,8 @@ class OatFileManager { // Returns the oat files for the images, registers the oat files. // Takes ownership of the imagespace's underlying oat files. - std::vector<const OatFile*> RegisterImageOatFiles(std::vector<gc::space::ImageSpace*> spaces) + std::vector<const OatFile*> RegisterImageOatFiles( + const std::vector<gc::space::ImageSpace*>& spaces) REQUIRES(!Locks::oat_file_manager_lock_); // Finds or creates the oat file holding dex_location. Then loads and returns diff --git a/runtime/scoped_thread_state_change.cc b/runtime/scoped_thread_state_change.cc index edbce05325..ae833b48de 100644 --- a/runtime/scoped_thread_state_change.cc +++ b/runtime/scoped_thread_state_change.cc @@ -20,6 +20,7 @@ #include "base/casts.h" #include "jni/java_vm_ext.h" +#include "mirror/object-inl.h" #include "obj_ptr-inl.h" #include "runtime-inl.h" diff --git a/runtime/subtype_check_info_test.cc b/runtime/subtype_check_info_test.cc index 53230930e6..9bd135e4c2 100644 --- a/runtime/subtype_check_info_test.cc +++ b/runtime/subtype_check_info_test.cc @@ -131,7 +131,7 @@ struct SubtypeCheckInfoTest : public ::testing::Test { // Create an SubtypeCheckInfo with the same depth, but with everything else reset. // Returns: SubtypeCheckInfo in the Uninitialized state. - static SubtypeCheckInfo CopyCleared(SubtypeCheckInfo sc) { + static SubtypeCheckInfo CopyCleared(const SubtypeCheckInfo& sc) { SubtypeCheckInfo cleared_copy{}; cleared_copy.depth_ = sc.depth_; DCHECK_EQ(SubtypeCheckInfo::kUninitialized, cleared_copy.GetState()); diff --git a/runtime/thread.cc b/runtime/thread.cc index 8a8f53743e..b6f0965fce 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3147,8 +3147,10 @@ void Thread::ThrowNewWrappedException(const char* exception_class_descriptor, } void Thread::ThrowOutOfMemoryError(const char* msg) { - LOG(WARNING) << StringPrintf("Throwing OutOfMemoryError \"%s\"%s", - msg, (tls32_.throwing_OutOfMemoryError ? " (recursive case)" : "")); + LOG(WARNING) << "Throwing OutOfMemoryError " + << '"' << msg << '"' + << " (VmSize " << GetProcessStatus("VmSize") + << (tls32_.throwing_OutOfMemoryError ? ", recursive case)" : ")"); if (!tls32_.throwing_OutOfMemoryError) { tls32_.throwing_OutOfMemoryError = true; ThrowNewException("Ljava/lang/OutOfMemoryError;", msg); @@ -4076,4 +4078,13 @@ void Thread::SetReadBarrierEntrypoints() { UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true); } +void Thread::ClearAllInterpreterCaches() { + static struct ClearInterpreterCacheClosure : Closure { + virtual void Run(Thread* thread) { + thread->GetInterpreterCache()->Clear(thread); + } + } closure; + Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); +} + } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index d169a62198..3c85b80976 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -38,6 +38,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "handle_scope.h" #include "instrumentation.h" +#include "interpreter/interpreter_cache.h" #include "jvalue.h" #include "managed_stack.h" #include "offsets.h" @@ -1299,6 +1300,29 @@ class Thread { jobject thread_group) REQUIRES_SHARED(Locks::mutator_lock_); + ALWAYS_INLINE InterpreterCache* GetInterpreterCache() { + return &interpreter_cache_; + } + + // Clear all thread-local interpreter caches. + // + // Since the caches are keyed by memory pointer to dex instructions, this must be + // called when any dex code is unloaded (before different code gets loaded at the + // same memory location). + // + // If presence of cache entry implies some pre-conditions, this must also be + // called if the pre-conditions might no longer hold true. + static void ClearAllInterpreterCaches(); + + template<PointerSize pointer_size> + static ThreadOffset<pointer_size> InterpreterCacheOffset() { + return ThreadOffset<pointer_size>(OFFSETOF_MEMBER(Thread, interpreter_cache_)); + } + + static int InterpreterCacheSizeLog2() { + return WhichPowerOf2(InterpreterCache::kSize); + } + private: explicit Thread(bool daemon); ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_); @@ -1788,6 +1812,11 @@ class Thread { // be false for threads where '!can_call_into_java_'. bool can_be_suspended_by_user_code_; + // Small thread-local cache to be used from the interpreter. + // It is keyed by dex instruction pointer. + // The value is opcode-depended (e.g. field offset). + InterpreterCache interpreter_cache_; + friend class Dbg; // For SetStateUnsafe. friend class gc::collector::SemiSpace; // For getting stack traces. friend class Runtime; // For CreatePeer. diff --git a/runtime/trace.cc b/runtime/trace.cc index 7e48bae581..0e8d318f22 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -1124,7 +1124,7 @@ static void DumpThread(Thread* t, void* arg) { void Trace::DumpThreadList(std::ostream& os) { Thread* self = Thread::Current(); - for (auto it : exited_threads_) { + for (const auto& it : exited_threads_) { os << it.first << "\t" << it.second << "\n"; } Locks::thread_list_lock_->AssertNotHeld(self); diff --git a/runtime/transaction.cc b/runtime/transaction.cc index c9766bc9ca..1e5b2bbd4c 100644 --- a/runtime/transaction.cc +++ b/runtime/transaction.cc @@ -320,7 +320,7 @@ void Transaction::VisitRoots(RootVisitor* visitor) { void Transaction::VisitObjectLogs(RootVisitor* visitor) { // List of moving roots. - typedef std::pair<mirror::Object*, mirror::Object*> ObjectPair; + using ObjectPair = std::pair<mirror::Object*, mirror::Object*>; std::list<ObjectPair> moving_roots; // Visit roots. @@ -348,7 +348,7 @@ void Transaction::VisitObjectLogs(RootVisitor* visitor) { void Transaction::VisitArrayLogs(RootVisitor* visitor) { // List of moving roots. - typedef std::pair<mirror::Array*, mirror::Array*> ArrayPair; + using ArrayPair = std::pair<mirror::Array*, mirror::Array*>; std::list<ArrayPair> moving_roots; for (auto& it : array_logs_) { diff --git a/sigchainlib/sigchain_test.cc b/sigchainlib/sigchain_test.cc index 53e1e40454..bb997877a1 100644 --- a/sigchainlib/sigchain_test.cc +++ b/sigchainlib/sigchain_test.cc @@ -38,7 +38,7 @@ #include "sigchain.h" #if !defined(__BIONIC__) -typedef sigset_t sigset64_t; +using sigset64_t = sigset_t; static int sigemptyset64(sigset64_t* set) { return sigemptyset(set); diff --git a/simulator/code_simulator_container.cc b/simulator/code_simulator_container.cc index 3206bc7844..dc553dfe5d 100644 --- a/simulator/code_simulator_container.cc +++ b/simulator/code_simulator_container.cc @@ -34,13 +34,13 @@ CodeSimulatorContainer::CodeSimulatorContainer(InstructionSet target_isa) if (libart_simulator_handle_ == nullptr) { VLOG(simulator) << "Could not load " << libart_simulator_so_name << ": " << dlerror(); } else { - typedef CodeSimulator* (*create_code_simulator_ptr_)(InstructionSet target_isa); - create_code_simulator_ptr_ create_code_simulator_ = - reinterpret_cast<create_code_simulator_ptr_>( + using CreateCodeSimulatorPtr = CodeSimulator*(*)(InstructionSet); + CreateCodeSimulatorPtr create_code_simulator = + reinterpret_cast<CreateCodeSimulatorPtr>( dlsym(libart_simulator_handle_, "CreateCodeSimulator")); - DCHECK(create_code_simulator_ != nullptr) << "Fail to find symbol of CreateCodeSimulator: " + DCHECK(create_code_simulator != nullptr) << "Fail to find symbol of CreateCodeSimulator: " << dlerror(); - simulator_ = create_code_simulator_(target_isa); + simulator_ = create_code_simulator(target_isa); } } diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc index 33a8f5bba2..540e6ce357 100644 --- a/test/004-JniTest/jni_test.cc +++ b/test/004-JniTest/jni_test.cc @@ -62,7 +62,7 @@ static void* AttachHelper(void* arg) { int attach_result = jvm->AttachCurrentThread(&env, &args); CHECK_EQ(attach_result, 0); - typedef void (*Fn)(JNIEnv*); + using Fn = void(*)(JNIEnv*); Fn fn = reinterpret_cast<Fn>(arg); fn(env); @@ -704,7 +704,7 @@ class JniCallDefaultMethodsTest { } private: - void TestCalls(const char* declaring_class, std::vector<const char*> methods) { + void TestCalls(const char* declaring_class, const std::vector<const char*>& methods) { jmethodID new_method = env_->GetMethodID(concrete_class_, "<init>", "()V"); jobject obj = env_->NewObject(concrete_class_, new_method); CHECK(!env_->ExceptionCheck()); diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc index a74f7638bd..cc7e806ba5 100644 --- a/test/115-native-bridge/nativebridge.cc +++ b/test/115-native-bridge/nativebridge.cc @@ -45,7 +45,7 @@ static const android::NativeBridgeRuntimeCallbacks* gNativeBridgeArtCallbacks; static jint trampoline_JNI_OnLoad(JavaVM* vm, void* reserved) { JNIEnv* env = nullptr; - typedef jint (*FnPtr_t)(JavaVM*, void*); + using FnPtr_t = jint(*)(JavaVM*, void*); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("JNI_OnLoad")->fnPtr); vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6); @@ -91,9 +91,8 @@ static jint trampoline_JNI_OnLoad(JavaVM* vm, void* reserved) { return fnPtr(vm, reserved); } -static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env, - jclass klass) { - typedef void (*FnPtr_t)(JNIEnv*, jclass); +static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env, jclass klass) { + using FnPtr_t = void(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testFindClassOnAttachedNativeThread")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -102,7 +101,7 @@ static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env static void trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEnv* env, jclass klass) { - typedef void (*FnPtr_t)(JNIEnv*, jclass); + using FnPtr_t = void(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testFindFieldOnAttachedNativeThreadNative")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -111,7 +110,7 @@ static void trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEn static void trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env, jclass klass) { - typedef void (*FnPtr_t)(JNIEnv*, jclass); + using FnPtr_t = void(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testCallStaticVoidMethodOnSubClassNative")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -119,7 +118,7 @@ static void trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv } static jobject trampoline_Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass klass) { - typedef jobject (*FnPtr_t)(JNIEnv*, jclass); + using FnPtr_t = jobject(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testGetMirandaMethodNative")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -127,7 +126,7 @@ static jobject trampoline_Java_Main_testGetMirandaMethodNative(JNIEnv* env, jcla } static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass) { - typedef void (*FnPtr_t)(JNIEnv*, jclass); + using FnPtr_t = void(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testNewStringObject")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -135,7 +134,7 @@ static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass) } static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass klass) { - typedef void (*FnPtr_t)(JNIEnv*, jclass); + using FnPtr_t = void(*)(JNIEnv*, jclass); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> (find_native_bridge_method("testZeroLengthByteBuffers")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -145,8 +144,8 @@ static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass k static jbyte trampoline_Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2, jbyte b3, jbyte b4, jbyte b5, jbyte b6, jbyte b7, jbyte b8, jbyte b9, jbyte b10) { - typedef jbyte (*FnPtr_t)(JNIEnv*, jclass, jbyte, jbyte, jbyte, jbyte, jbyte, - jbyte, jbyte, jbyte, jbyte, jbyte); + using FnPtr_t = jbyte(*)(JNIEnv*, jclass, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, jbyte, + jbyte, jbyte); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("byteMethod")->fnPtr); printf("%s called!\n", __FUNCTION__); return fnPtr(env, klass, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10); @@ -155,8 +154,8 @@ static jbyte trampoline_Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1 static jshort trampoline_Java_Main_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2, jshort s3, jshort s4, jshort s5, jshort s6, jshort s7, jshort s8, jshort s9, jshort s10) { - typedef jshort (*FnPtr_t)(JNIEnv*, jclass, jshort, jshort, jshort, jshort, jshort, - jshort, jshort, jshort, jshort, jshort); + using FnPtr_t = jshort(*)(JNIEnv*, jclass, jshort, jshort, jshort, jshort, jshort, jshort, jshort, + jshort, jshort, jshort); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("shortMethod")->fnPtr); printf("%s called!\n", __FUNCTION__); return fnPtr(env, klass, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10); @@ -166,7 +165,7 @@ static jboolean trampoline_Java_Main_booleanMethod(JNIEnv* env, jclass klass, jb jboolean b2, jboolean b3, jboolean b4, jboolean b5, jboolean b6, jboolean b7, jboolean b8, jboolean b9, jboolean b10) { - typedef jboolean (*FnPtr_t)(JNIEnv*, jclass, jboolean, jboolean, jboolean, jboolean, jboolean, + using FnPtr_t = jboolean(*)(JNIEnv*, jclass, jboolean, jboolean, jboolean, jboolean, jboolean, jboolean, jboolean, jboolean, jboolean, jboolean); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("booleanMethod")->fnPtr); printf("%s called!\n", __FUNCTION__); @@ -176,8 +175,8 @@ static jboolean trampoline_Java_Main_booleanMethod(JNIEnv* env, jclass klass, jb static jchar trampoline_Java_Main_charMethod(JNIEnv* env, jclass klass, jchar c1, jchar c2, jchar c3, jchar c4, jchar c5, jchar c6, jchar c7, jchar c8, jchar c9, jchar c10) { - typedef jchar (*FnPtr_t)(JNIEnv*, jclass, jchar, jchar, jchar, jchar, jchar, - jchar, jchar, jchar, jchar, jchar); + using FnPtr_t = jchar(*)(JNIEnv*, jclass, jchar, jchar, jchar, jchar, jchar, jchar, jchar, jchar, + jchar, jchar); FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("charMethod")->fnPtr); printf("%s called!\n", __FUNCTION__); return fnPtr(env, klass, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10); diff --git a/test/1900-track-alloc/alloc.cc b/test/1900-track-alloc/alloc.cc index db5617c54c..f2096111da 100644 --- a/test/1900-track-alloc/alloc.cc +++ b/test/1900-track-alloc/alloc.cc @@ -24,7 +24,7 @@ namespace art { namespace Test1900TrackAlloc { -typedef jvmtiError (*GetGlobalState)(jvmtiEnv* env, jlong* allocated); +using GetGlobalState = jvmtiError(*)(jvmtiEnv* env, jlong* allocated); struct AllocTrackingData { GetGlobalState get_global_state; diff --git a/test/1940-ddms-ext/ddm_ext.cc b/test/1940-ddms-ext/ddm_ext.cc index cc29df9a49..452187bdcb 100644 --- a/test/1940-ddms-ext/ddm_ext.cc +++ b/test/1940-ddms-ext/ddm_ext.cc @@ -25,7 +25,7 @@ namespace art { namespace Test1940DdmExt { -typedef jvmtiError (*DdmHandleChunk)(jvmtiEnv* env, +using DdmHandleChunk = jvmtiError(*)(jvmtiEnv* env, jint type_in, jint len_in, const jbyte* data_in, diff --git a/test/1946-list-descriptors/descriptors.cc b/test/1946-list-descriptors/descriptors.cc index 01b306dea5..07fee6141b 100644 --- a/test/1946-list-descriptors/descriptors.cc +++ b/test/1946-list-descriptors/descriptors.cc @@ -24,7 +24,7 @@ namespace art { namespace Test1946Descriptors { -typedef jvmtiError (*GetDescriptorList)(jvmtiEnv* env, jobject loader, jint* cnt, char*** descs); +using GetDescriptorList = jvmtiError(*)(jvmtiEnv* env, jobject loader, jint* cnt, char*** descs); struct DescriptorData { GetDescriptorList get_descriptor_list; diff --git a/test/1951-monitor-enter-no-suspend/raw_monitor.cc b/test/1951-monitor-enter-no-suspend/raw_monitor.cc index 0425e350fd..efd02b6ae4 100644 --- a/test/1951-monitor-enter-no-suspend/raw_monitor.cc +++ b/test/1951-monitor-enter-no-suspend/raw_monitor.cc @@ -26,7 +26,7 @@ namespace art { namespace Test1951MonitorEnterNoSuspend { -typedef jvmtiError (*RawMonitorEnterNoSuspend)(jvmtiEnv* env, jrawMonitorID mon); +using RawMonitorEnterNoSuspend = jvmtiError(*)(jvmtiEnv* env, jrawMonitorID mon); template <typename T> static void Dealloc(T* t) { diff --git a/test/684-checker-simd-dotprod/expected.txt b/test/684-checker-simd-dotprod/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/684-checker-simd-dotprod/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/684-checker-simd-dotprod/info.txt b/test/684-checker-simd-dotprod/info.txt new file mode 100644 index 0000000000..6c1efb6296 --- /dev/null +++ b/test/684-checker-simd-dotprod/info.txt @@ -0,0 +1 @@ +Functional tests on dot product idiom SIMD vectorization. diff --git a/test/684-checker-simd-dotprod/src/Main.java b/test/684-checker-simd-dotprod/src/Main.java new file mode 100644 index 0000000000..e0c87161dd --- /dev/null +++ b/test/684-checker-simd-dotprod/src/Main.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import other.TestByte; +import other.TestCharShort; +import other.TestVarious; + +/** + * Tests for dot product idiom vectorization. + */ +public class Main { + public static void main(String[] args) { + TestByte.run(); + TestCharShort.run(); + TestVarious.run(); + System.out.println("passed"); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestByte.java b/test/684-checker-simd-dotprod/src/other/TestByte.java new file mode 100644 index 0000000000..9acfc59cc7 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestByte.java @@ -0,0 +1,484 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: byte case. + */ +public class TestByte { + + public static final int ARRAY_SIZE = 1024; + + /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimple(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplex(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int8 + public static final int testDotProdSignedWidening(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i])) * ((short)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int8 + public static final int testDotProdParamSigned(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (byte)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint8 + public static final int testDotProdParamUnsigned(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (x & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + // No DOTPROD cases. + + /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdIntParam(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * (x); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i])) * ((char)(b[i])); + s += temp; + } + return s - 1; + } + + // Cases when result of Mul is type-converted are not supported. + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + byte temp = (byte)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + s += (a[i] * b[i]) & 0xff; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + long temp = (long)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * b[i]; + s += temp; + } + return s - 1; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void testDotProd(byte[] b1, byte[] b2, int[] results) { + expectEquals(results[0], testDotProdSimple(b1, b2)); + expectEquals(results[1], testDotProdComplex(b1, b2)); + expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2)); + expectEquals(results[3], testDotProdComplexUnsigned(b1, b2)); + expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2)); + expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2)); + expectEquals(results[6], testDotProdSignedWidening(b1, b2)); + expectEquals(results[7], testDotProdParamSigned(-128, b2)); + expectEquals(results[8], testDotProdParamUnsigned(-128, b2)); + expectEquals(results[9], testDotProdIntParam(-128, b2)); + expectEquals(results[10], testDotProdSignedToChar(b1, b2)); + expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2)); + expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2)); + expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2)); + expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2)); + expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2)); + expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2)); + expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2)); + expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2)); + expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2)); + expectEquals(results[20], testDotProdUnsignedSigned(b1, b2)); + } + + public static void run() { + byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024, + 64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 }; + testDotProd(b1_1, b2_1, results_1); + + byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280, + 80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 }; + testDotProd(b1_2, b2_2, results_2); + + byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2_3 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280, + 41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 }; + testDotProd(b1_3, b2_3, results_3); + + byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920, + -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 }; + testDotProd(b1_4, b2_4, results_4); + } + + public static void main(String[] args) { + run(); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestCharShort.java b/test/684-checker-simd-dotprod/src/other/TestCharShort.java new file mode 100644 index 0000000000..9cb9db59b3 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestCharShort.java @@ -0,0 +1,552 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: char and short case. + */ +public class TestCharShort { + + public static final int ARRAY_SIZE = 1024; + + /// CHECK-START: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimple(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplex(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimpleUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsignedCastedToSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexSignedCastedToUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToInt(short[], short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int16 + public static final int testDotProdSignedToInt(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((int)(a[i])) * ((int)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamSigned(int, short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int16 + public static final int testDotProdParamSigned(int x, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (short)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamUnsigned(int, char[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint16 + public static final int testDotProdParamUnsigned(int x, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (char)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdIntParam(int, short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdIntParam(int x, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * (x); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToChar(short[], short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint16 + public static final int testDotProdSignedToChar(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i])) * ((char)(b[i])); + s += temp; + } + return s - 1; + } + + // Cases when result of Mul is type-converted are not supported. + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToSigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd type:Uint16 + public static final int testDotProdSimpleMulCastedToSigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleMulCastedToUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToSigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedMulCastedToSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedMulCastedToUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToShort(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToShort(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToChar(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToChar(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToShort(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToShort(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToChar(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToChar(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToLong(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToLong(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + long temp = (long)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + // Narrowing conversions. + + /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerSigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedNarrowerSigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i])) * ((byte)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedNarrowerUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerSigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedNarrowerSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i])) * ((byte)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedNarrowerUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedSigned(char[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedSigned(char[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void testDotProd(short[] s1, short[] s2, char[] c1, char[] c2, int[] results) { + expectEquals(results[0], testDotProdSimple(s1, s2)); + expectEquals(results[1], testDotProdComplex(s1, s2)); + expectEquals(results[2], testDotProdSimpleUnsigned(c1, c2)); + expectEquals(results[3], testDotProdComplexUnsigned(c1, c2)); + expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(c1, c2)); + expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(s1, s2)); + expectEquals(results[6], testDotProdSignedToInt(s1, s2)); + expectEquals(results[7], testDotProdParamSigned(-32768, s2)); + expectEquals(results[8], testDotProdParamUnsigned(-32768, c2)); + expectEquals(results[9], testDotProdIntParam(-32768, s2)); + expectEquals(results[10], testDotProdSignedToChar(s1, s2)); + expectEquals(results[11], testDotProdSimpleMulCastedToSigned(s1, s2)); + expectEquals(results[12], testDotProdSimpleMulCastedToUnsigned(s1, s2)); + expectEquals(results[13], testDotProdSimpleUnsignedMulCastedToSigned(c1, c2)); + expectEquals(results[14], testDotProdSimpleUnsignedMulCastedToUnsigned(c1, c2)); + expectEquals(results[15], testDotProdSimpleCastedToShort(s1, s2)); + expectEquals(results[16], testDotProdSimpleCastedToChar(s1, s2)); + expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(c1, c2)); + expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(c1, c2)); + expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(c1, c2)); + expectEquals(results[20], testDotProdSignedNarrowerSigned(s1, s2)); + expectEquals(results[21], testDotProdSignedNarrowerUnsigned(s1, s2)); + expectEquals(results[22], testDotProdUnsignedNarrowerSigned(c1, c2)); + expectEquals(results[23], testDotProdUnsignedNarrowerUnsigned(c1, c2)); + expectEquals(results[24], testDotProdUnsignedSigned(c1, s2)); + } + + public static void run() { + final short MAX_S = Short.MAX_VALUE; + final short MIN_S = Short.MAX_VALUE; + + short[] s1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + short[] s2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + int[] results_1 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, -2147483634, + 2147352578, -2147418112, 2147418112, -2147418112, 2147352578, + 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, 130050, 2147352578 }; + testDotProd(s1_1, s2_1, c1_1, c2_1, results_1); + + short[] s1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + short[] s2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + char[] c1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + char[] c2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + int[] results_2 = { -262140, 12, -262140, 12, 12, 12, -262140, 131072, -131072, 131072, + -262140, 4, 4, 4, 4, 4, 4, 4, 4, -262140, 4, 260100, 4, 260100, -262140 }; + testDotProd(s1_2, s2_2, c1_2, c2_2, results_2); + + short[] s1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + short[] s2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + int[] results_3 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, + -2147483634, 2147352578, -2147418112, 2147418112, -2147418112, + 2147352578, 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, + 130050, 2147352578}; + testDotProd(s1_3, s2_3, c1_3, c2_3, results_3); + + + short[] s1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + short[] s2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + int[] results_4 = { -1073938429, -1073741811, -1073938429, -1073741811, -1073741811, + -1073741811, -1073938429, 1073840128, -1073840128, 1073840128, + -1073938429, 3, 3, 3, 3, 3, 3, 3, 3, -1073938429, 3, 195075, 3, + 195075, -1073938429 }; + testDotProd(s1_4, s2_4, c1_4, c2_4, results_4); + } + + public static void main(String[] args) { + run(); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestVarious.java b/test/684-checker-simd-dotprod/src/other/TestVarious.java new file mode 100644 index 0000000000..3f460982f2 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestVarious.java @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization. + */ +public class TestVarious { + + /// CHECK-START: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Const89>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const89>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdConstRight(byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * 89; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Const89>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const89>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdConstLeft(byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = 89 * (b[i] & 0xff); + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (before) + /// CHECK-DAG: <<Param:i\d+>> ParameterValue loop:none + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConstL:i\d+>> IntConstant 129 loop:none + /// CHECK-DAG: <<AddP:i\d+>> Add [<<Param>>,<<ConstL>>] loop:none + /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<TypeCnv>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (after) + /// CHECK-DAG: <<Param:i\d+>> ParameterValue loop:none + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<ConstL:i\d+>> IntConstant 129 loop:none + /// CHECK-DAG: <<AddP:i\d+>> Add [<<Param>>,<<ConstL>>] loop:none + /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<TypeCnv>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdLoopInvariantConvRight(byte[] b, int param) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * ((byte)(param + 129)); + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdByteToChar(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdByteToChar(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)((byte)(a[i] + 129))) * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdMixedSize(byte[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdMixedSize(byte[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdMixedSizeAndSign(byte[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdMixedSizeAndSign(byte[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:d\d+>> VecMul [<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdInt32(int[] a, int[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsigned1(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + byte a_val = a[i]; + byte b_val = b[i]; + s1 += a_val * b_val; + s2 += (a_val & 0xff) * (b_val & 0xff); + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<Get2>>,<<TypeC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<Get1>>,<<Const42>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const42>>] loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load2>>,<<Load1>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsigned2(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + byte a_val = a[i]; + byte b_val = b[i]; + s2 += (a_val & 0xff) * (b_val & 0xff); + s1 += a_val * 42; + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetB1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetB2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<GetB1>>,<<GetB2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetA1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetA2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<GetA1>>,<<GetA2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load3:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load4:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load3>>,<<Load4>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsignedDoubleLoad(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + s1 += a[i] * b[i]; + s2 += (a[i] & 0xff) * (b[i] & 0xff); + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeS1:s\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeS2:s\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<TypeS1>>,<<TypeS2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsignedChar(char[] a, char[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + char a_val = a[i]; + char b_val = b[i]; + s2 += ((short)a_val) * ((short)b_val); + s1 += a_val * b_val; + } + return s1 + s2; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void run() { + final short MAX_S = Short.MAX_VALUE; + final short MIN_S = Short.MAX_VALUE; + + byte[] b1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + + char[] c1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + + int[] i1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + int[] i2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + + short[] s1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + + expectEquals(56516, testDotProdConstRight(b2)); + expectEquals(56516, testDotProdConstLeft(b2)); + expectEquals(1271, testDotProdLoopInvariantConvRight(b2, 129)); + expectEquals(-8519423, testDotProdByteToChar(c1, c2)); + expectEquals(-8388351, testDotProdMixedSize(b1, s1)); + expectEquals(-8388351, testDotProdMixedSizeAndSign(b1, c2)); + expectEquals(-81279, testDotProdInt32(i1, i2)); + expectEquals(3, testDotProdBothSignedUnsigned1(b1, b2)); + expectEquals(54403, testDotProdBothSignedUnsigned2(b1, b2)); + expectEquals(3, testDotProdBothSignedUnsignedDoubleLoad(b1, b2)); + expectEquals(-262137, testDotProdBothSignedUnsignedChar(c1, c2)); + } + + public static void main(String[] args) { + run(); + } +} diff --git a/test/979-const-method-handle/expected.txt b/test/979-const-method-handle/expected.txt index bbaaedb0af..85317092ff 100644 --- a/test/979-const-method-handle/expected.txt +++ b/test/979-const-method-handle/expected.txt @@ -7,3 +7,11 @@ name is HoverFly 2.718281828459045 repeatConstMethodHandle() Attempting to set Math.E raised IAE +Quack +Moo +Woof +Test +Getting field in TestTokenizer raised WMTE (woohoo!) +Stack: tos was 7 +Stack: capacity was 10 +Stack: capacity is 2 diff --git a/test/979-const-method-handle/src/Main.java b/test/979-const-method-handle/src/Main.java index 427ca7a306..5368a22b21 100644 --- a/test/979-const-method-handle/src/Main.java +++ b/test/979-const-method-handle/src/Main.java @@ -18,6 +18,11 @@ import annotations.ConstantMethodHandle; import annotations.ConstantMethodType; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodType; +import java.lang.invoke.WrongMethodTypeException; + +import java.io.StreamTokenizer; +import java.io.StringReader; +import java.util.Stack; class Main { /** @@ -45,6 +50,12 @@ class Main { private int field; } + private static class TestTokenizer extends StreamTokenizer { + public TestTokenizer(String message) { + super(new StringReader(message)); + } + } + @ConstantMethodType( returnType = String.class, parameterTypes = {int.class, Integer.class, System.class}) @@ -136,6 +147,48 @@ class Main { return null; } + @ConstantMethodHandle( + kind = ConstantMethodHandle.INSTANCE_GET, + owner = "java/io/StreamTokenizer", + fieldOrMethodName = "sval", + descriptor = "Ljava/lang/String;") + private static MethodHandle getSval() { + unreachable(); + return null; + } + + // This constant-method-handle references a private instance field. If + // referenced in bytecode it raises IAE at load time. + @ConstantMethodHandle( + kind = ConstantMethodHandle.INSTANCE_PUT, + owner = "java/io/StreamTokenizer", + fieldOrMethodName = "peekc", + descriptor = "I") + private static MethodHandle putPeekc() { + unreachable(); + return null; + } + + @ConstantMethodHandle( + kind = ConstantMethodHandle.INVOKE_VIRTUAL, + owner = "java/util/Stack", + fieldOrMethodName = "pop", + descriptor = "()Ljava/lang/Object;") + private static MethodHandle stackPop() { + unreachable(); + return null; + } + + @ConstantMethodHandle( + kind = ConstantMethodHandle.INVOKE_VIRTUAL, + owner = "java/util/Stack", + fieldOrMethodName = "trimToSize", + descriptor = "()V") + private static MethodHandle stackTrim() { + unreachable(); + return null; + } + private static void repeatConstMethodHandle() throws Throwable { System.out.println("repeatConstMethodHandle()"); String[] values = {"A", "B", "C"}; @@ -166,5 +219,29 @@ class Main { } catch (IllegalAccessError expected) { System.out.println("Attempting to set Math.E raised IAE"); } + + StreamTokenizer st = new StreamTokenizer(new StringReader("Quack Moo Woof")); + while (st.nextToken() != StreamTokenizer.TT_EOF) { + System.out.println((String) getSval().invokeExact(st)); + } + + TestTokenizer tt = new TestTokenizer("Test message 123"); + tt.nextToken(); + System.out.println((String) getSval().invoke(tt)); + try { + System.out.println((String) getSval().invokeExact(tt)); + } catch (WrongMethodTypeException wmte) { + System.out.println("Getting field in TestTokenizer raised WMTE (woohoo!)"); + } + + Stack stack = new Stack(); + stack.push(Integer.valueOf(3)); + stack.push(Integer.valueOf(5)); + stack.push(Integer.valueOf(7)); + Object tos = stackPop().invokeExact(stack); + System.out.println("Stack: tos was " + tos); + System.out.println("Stack: capacity was " + stack.capacity()); + stackTrim().invokeExact(stack); + System.out.println("Stack: capacity is " + stack.capacity()); } } diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index de55440184..d5db76a82a 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -988,6 +988,7 @@ else if [ "$USE_GDB" = "y" ]; then # When running under gdb, we cannot do piping and grepping... + echo "Run 'gdbclient.py -p <pid printed below>' to debug." $cmdline "$@" else if [ "$TIME_OUT" != "gdb" ]; then diff --git a/test/knownfailures.json b/test/knownfailures.json index f0b88e9a32..8ca0012282 100644 --- a/test/knownfailures.json +++ b/test/knownfailures.json @@ -13,7 +13,17 @@ }, { "tests": "080-oom-fragmentation", - "description": "Disable 080-oom-fragmentation due to flakes.", + "description": ["Disable 080-oom-fragmentation for GSS GC due to lack of", + "support for allocations larger than 32MB."], + "env_vars": {"ART_DEFAULT_GC_TYPE": "GSS"}, + "bug": "http://b/33795328" + }, + { + "tests": "080-oom-fragmentation", + "description": ["Disable 080-oom-fragmentation for CC collector in debug mode", + "because of potential fragmentation caused by the region space's", + "cyclic region allocation (which is enabled in debug mode)."], + "variant": "debug", "bug": "http://b/33795328" }, { diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def index 1364b558ec..7e1df6b267 100644 --- a/tools/cpp-define-generator/constant_thread.def +++ b/tools/cpp-define-generator/constant_thread.def @@ -27,5 +27,4 @@ DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST, int32_t, art::kSuspendRequest) DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest) DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest) DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST, int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest) - -#undef DEFINE_THREAD_CONSTANT +DEFINE_THREAD_CONSTANT(INTERPRETER_CACHE_SIZE_LOG2, int32_t, art::Thread::InterpreterCacheSizeLog2()) diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index 4c9fd96e79..a5fa332050 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -226,5 +226,12 @@ "libcore.libcore.io.FdsanTest#testParcelFileDescriptor", "libcore.libcore.io.FdsanTest#testDatagramSocket", "libcore.libcore.io.FdsanTest#testSocket"] +}, +{ + description: "Timeout on heap-poisoning target builds", + result: EXEC_FAILED, + modes: [device], + bug: 116446372, + names: ["libcore.libcore.io.FdsanTest#testSocket"] } ] diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index 13f756c246..20e5c64eaa 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -311,16 +311,16 @@ if [[ "$plugin" != "" ]]; then vm_args="$vm_args --vm-arg $plugin" fi -# Because we're running debuggable, we discard any AOT code. -# Therefore we run de2oat with 'quicken' to avoid spending time compiling. -vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken" -debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=quicken" +if [[ $mode != "ri" ]]; then + # Because we're running debuggable, we discard any AOT code. + # Therefore we run de2oat with 'quicken' to avoid spending time compiling. + vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken" + debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=quicken" -if $instant_jit; then - debuggee_args="$debuggee_args -Xjitthreshold:0" -fi + if $instant_jit; then + debuggee_args="$debuggee_args -Xjitthreshold:0" + fi -if [[ $mode != "ri" ]]; then vm_args="$vm_args --vm-arg -Xusejit:$use_jit" debuggee_args="$debuggee_args -Xusejit:$use_jit" fi diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh index 04e80df50d..ef958d6b1a 100755 --- a/tools/setup-buildbot-device.sh +++ b/tools/setup-buildbot-device.sh @@ -43,7 +43,7 @@ seconds_per_hour=3600 # Kill logd first, so that when we set the adb buffer size later in this file, # it is brought up again. echo -e "${green}Killing logd, seen leaking on fugu/N${nc}" -adb shell killall -9 /system/bin/logd +adb shell pkill -9 -U logd logd && echo -e "${green}...logd killed${nc}" # Update date on device if the difference with host is more than one hour. if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then |