diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/inliner.cc | 17 | ||||
-rw-r--r-- | compiler/optimizing/inliner.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 145 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/optimization.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/optimization.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 20 |
11 files changed, 168 insertions, 25 deletions
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ec9322270a..417d794264 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -28,7 +28,6 @@ #include "dex/inline_method_analyser.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "instruction_simplifier.h" @@ -408,7 +407,7 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } -static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool IsMethodUnverified(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { if (!method->GetDeclaringClass()->IsVerified()) { if (Runtime::Current()->UseJitCompilation()) { @@ -417,8 +416,9 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return true; } uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); - if (!compiler_driver->IsMethodVerifiedWithoutFailures( - method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { + if (!compiler_options.IsMethodVerifiedWithoutFailures(method->GetDexMethodIndex(), + class_def_idx, + *method->GetDexFile())) { // Method has soft or hard failures, don't analyze. return true; } @@ -426,11 +426,11 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return false; } -static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool AlwaysThrows(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(method != nullptr); // Skip non-compilable and unverified methods. - if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) { + if (!method->IsCompilable() || IsMethodUnverified(compiler_options, method)) { return false; } // Skip native methods, methods with try blocks, and methods that are too large. @@ -518,7 +518,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); } } - } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) { + } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) { // Set always throws property for non-inlined method call with single target // (unless it was obtained through CHA, because that would imply we have // to add the CHA dependency, which seems not worth it). @@ -1500,7 +1500,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (IsMethodUnverified(compiler_driver_, method)) { + if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) << "Method " << method->PrettyMethod() << " couldn't be verified, so it cannot be inlined"; @@ -2069,7 +2069,6 @@ void HInliner::RunOptimizations(HGraph* callee_graph, codegen_, outer_compilation_unit_, dex_compilation_unit, - compiler_driver_, handles_, inline_stats_, total_number_of_dex_registers_ + accessor.RegistersSize(), diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 6fd0c204b2..8ac2163a94 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -38,7 +38,6 @@ class HInliner : public HOptimization { CodeGenerator* codegen, const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, - CompilerDriver* compiler_driver, VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, @@ -51,7 +50,6 @@ class HInliner : public HOptimization { outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), codegen_(codegen), - compiler_driver_(compiler_driver), total_number_of_dex_registers_(total_number_of_dex_registers), total_number_of_instructions_(total_number_of_instructions), parent_(parent), @@ -280,7 +278,6 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; - CompilerDriver* const compiler_driver_; const size_t total_number_of_dex_registers_; size_t total_number_of_instructions_; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 6d04b0e9d9..1688ea7811 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2950,6 +2950,151 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { __ Mvn(out, out); } +// The threshold for sizes of arrays to use the library provided implementation +// of CRC32.updateBytes instead of the intrinsic. +static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations + = new (allocator_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// +// Note: The intrinsic is not used if len exceeds a threshold. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + auto masm = GetVIXLAssembler(); + auto locations = invoke->GetLocations(); + + auto slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + + Register length = WRegisterFrom(locations->InAt(3)); + __ Cmp(length, kCRC32UpdateBytesThreshold); + __ B(slow_path->GetEntryLabel(), hi); + + const uint32_t array_data_offset = + mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + Register array = XRegisterFrom(locations->InAt(1)); + auto offset = locations->InAt(2); + if (offset.IsConstant()) { + int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); + __ Add(ptr, array, array_data_offset + offset_value); + } else { + __ Add(ptr, array, array_data_offset); + __ Add(ptr, ptr, XRegisterFrom(offset)); + } + + // The algorithm of CRC32 of bytes is: + // crc = ~crc + // process a few first bytes to make the array 8-byte aligned + // while array has 8 bytes do: + // crc = crc32_of_8bytes(crc, 8_bytes(array)) + // if array has 4 bytes: + // crc = crc32_of_4bytes(crc, 4_bytes(array)) + // if array has 2 bytes: + // crc = crc32_of_2bytes(crc, 2_bytes(array)) + // if array has a byte: + // crc = crc32_of_byte(crc, 1_byte(array)) + // crc = ~crc + + vixl::aarch64::Label loop, done; + vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte; + vixl::aarch64::Label aligned2, aligned4, aligned8; + + // Use VIXL scratch registers as the VIXL macro assembler won't use them in + // instructions below. + UseScratchRegisterScope temps(masm); + Register len = temps.AcquireW(); + Register array_elem = temps.AcquireW(); + + Register out = WRegisterFrom(locations->Out()); + __ Mvn(out, WRegisterFrom(locations->InAt(0))); + __ Mov(len, length); + + __ Tbz(ptr, 0, &aligned2); + __ Subs(len, len, 1); + __ B(&done, lo); + __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex)); + __ Crc32b(out, out, array_elem); + + __ Bind(&aligned2); + __ Tbz(ptr, 1, &aligned4); + __ Subs(len, len, 2); + __ B(&process_1byte, lo); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&aligned4); + __ Tbz(ptr, 2, &aligned8); + __ Subs(len, len, 4); + __ B(&process_2bytes, lo); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&aligned8); + __ Subs(len, len, 8); + // If len < 8 go to process data by 4 bytes, 2 bytes and a byte. + __ B(&process_4bytes, lo); + + // The main loop processing data by 8 bytes. + __ Bind(&loop); + __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex)); + __ Subs(len, len, 8); + __ Crc32x(out, out, array_elem.X()); + // if len >= 8, process the next 8 bytes. + __ B(&loop, hs); + + // Process the data which is less than 8 bytes. + // The code generated below works with values of len + // which come in the range [-8, 0]. + // The first three bits are used to detect whether 4 bytes or 2 bytes or + // a byte can be processed. + // The checking order is from bit 2 to bit 0: + // bit 2 is set: at least 4 bytes available + // bit 1 is set: at least 2 bytes available + // bit 0 is set: at least a byte available + __ Bind(&process_4bytes); + // Goto process_2bytes if less than four bytes available + __ Tbz(len, 2, &process_2bytes); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&process_2bytes); + // Goto process_1bytes if less than two bytes available + __ Tbz(len, 1, &process_1byte); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&process_1byte); + // Goto done if no bytes available + __ Tbz(len, 0, &done); + __ Ldrb(array_elem, MemOperand(ptr)); + __ Crc32b(out, out, array_elem); + + __ Bind(&done); + __ Mvn(out, out); + + __ Bind(slow_path->GetExitLabel()); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 4d45a9991c..88f1457c20 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3060,6 +3060,7 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 21fb7d7f1c..08ba0a0adf 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2697,6 +2697,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 4b86f5d423..59d3ba2488 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2347,6 +2347,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a73f4e8b94..1d94950e4d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3071,6 +3071,7 @@ UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 88c766fabc..4f0b61d88e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2738,6 +2738,7 @@ UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 0f971e100e..b75afad4c8 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -181,7 +181,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { ArenaVector<HOptimization*> optimizations(allocator->Adapter()); @@ -258,7 +257,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( codegen, dex_compilation_unit, // outer_compilation_unit dex_compilation_unit, // outermost_compilation_unit - driver, handles, stats, accessor.RegistersSize(), diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 490007d9d9..ce44b5f81a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -147,7 +147,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 641368b87a..92aaa19121 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -321,7 +321,6 @@ class OptimizingCompiler final : public Compiler { graph, compilation_stats_.get(), codegen, - GetCompilerDriver(), dex_compilation_unit, handles); DCHECK_EQ(length, optimizations.size()); @@ -962,9 +961,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( arena_stack, dex_file, method_idx, - compiler_driver->GetCompilerOptions().GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, - compiler_driver->GetCompilerOptions().GetDebuggable(), + compiler_options.GetDebuggable(), /* osr */ false); DCHECK(Runtime::Current()->IsAotCompiler()); @@ -1043,12 +1042,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); + const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); CompiledMethod* compiled_method = nullptr; Runtime* runtime = Runtime::Current(); DCHECK(runtime->IsAotCompiler()); - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + const VerifiedMethod* verified_method = compiler_options.GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || + if (compiler_options.IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || verifier::CanCompilerHandleVerificationFailure( verified_method->GetEncounteredVerificationFailures())) { ArenaAllocator allocator(runtime->GetArenaPool()); @@ -1080,7 +1080,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + DCHECK(compiler_options.IsBootImage()); codegen.reset( TryCompileIntrinsic(&allocator, &arena_stack, @@ -1099,7 +1099,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &code_allocator, dex_compilation_unit, method, - compiler_driver->GetCompilerOptions().IsBaseline(), + compiler_options.IsBaseline(), /* osr= */ false, &handles)); } @@ -1128,7 +1128,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } } else { MethodCompilationStat method_stat; - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + if (compiler_options.VerifyAtRuntime()) { method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime; } else { method_stat = MethodCompilationStat::kNotCompiledVerificationError; @@ -1137,8 +1137,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } if (kIsDebugBuild && - compiler_driver->GetCompilerOptions().CompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetCompilerOptions().GetInstructionSet())) { + compiler_options.CompilingWithCoreImage() && + IsInstructionSetSupported(compiler_options.GetInstructionSet())) { // For testing purposes, we put a special marker on method names // that should be compiled with this compiler (when the // instruction set is supported). This makes sure we're not |