diff options
author | 2023-04-03 12:34:45 +0100 | |
---|---|---|
committer | 2023-04-04 12:31:27 +0000 | |
commit | 76d519b039fcdebf58e05dd42df4dc6cc08251e5 (patch) | |
tree | 9ab4e5e9c6cd0be1c90e6aef425f84e7a90dd960 /compiler/optimizing | |
parent | edf865bdfba5cd48726b8b668baf2be19a285025 (diff) |
Inline unimplemented intrinsics
There are intrinsics that are unimplemented i.e. we didn't
hand-craft code for them. Allow the inliner to inline those.
Since our optimizations expect InvokeVirtual, I stopped the
de-virtualization of intrinsics. That could be re-added
if we modify optimizations like TryReplaceStringBuilderAppend.
Test: art/test/testrunner/testrunner.py --host --64 --optimizing -b
Test: art/test/testrunner/testrunner.py --target --64 --optimizing
Test: compiling the APK in the bug and seeing the inline
Bug: 262585898
Fixes: 262585898
Change-Id: I501b69c4ffd9082ca8ffacb1cd1cd5d1ab3668a8
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 12 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 30 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 35 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 30 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 59 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 30 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 55 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 30 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 47 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 39 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 66 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 61 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 54 |
15 files changed, 348 insertions, 217 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c1bc32aa1e..c9f42b52f5 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1055,7 +1055,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics) : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), @@ -1080,7 +1081,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, is_leaf_(true), needs_suspend_check_entry_(false), requires_current_method_(false), - code_generation_data_() { + code_generation_data_(), + unimplemented_intrinsics_(unimplemented_intrinsics) { if (GetGraph()->IsCompilingOsr()) { // Make OSR methods have all registers spilled, this simplifies the logic of // jumping to the compiled code directly. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ee80357791..9872efaa4a 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -304,6 +304,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. + bool IsImplementedIntrinsic(HInvoke* invoke) const { + return invoke->IsIntrinsic() && + !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; + } + size_t GetNumberOfCoreCalleeSaveRegisters() const { return POPCOUNT(core_callee_save_mask_); } @@ -749,7 +755,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats); + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics); virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -893,6 +900,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. std::unique_ptr<CodeGenerationData> code_generation_data_; + // Which intrinsics we don't have handcrafted code for. + art::ArrayRef<const bool> unimplemented_intrinsics_; + friend class OptimizingCFITest; ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 079c440033..03b2f9eff2 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -936,6 +936,33 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const return Location::RegisterLocation(x15.GetCode()); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -946,7 +973,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, callee_saved_core_registers.GetList(), callee_saved_fp_registers.GetList(), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_neon_(graph, this), diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index deba88b860..6190364d1d 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -123,6 +123,41 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi vixl::aarch64::d15.GetCode()); Location ARM64ReturnLocation(DataType::Type return_type); +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class SlowPathCodeARM64 : public SlowPathCode { public: explicit SlowPathCodeARM64(HInstruction* instruction) diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 51d6a46ddb..cc34e76861 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1907,6 +1907,33 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, return final_label; } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1917,7 +1944,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, kCoreCalleeSaves.GetList(), ComputeSRegisterListMask(kFpuCalleeSaves), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 872a17b285..f5abe6951a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -119,6 +119,65 @@ class CodeGeneratorARMVIXL; using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>; using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \ + V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \ + V(UnsafeCASLong) /* High register pressure */ \ + V(SystemArrayCopyChar) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(MathFmaDouble) \ + V(MathFmaFloat) \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeCASLong) /* High register pressure */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) \ + V(JdkUnsafeCompareAndSetLong) + class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { public: explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 58cb56d243..0cbdbe391d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1106,6 +1106,33 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { __ fs()->call(Address::Absolute(entry_point_offset)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1118,7 +1145,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, | (1 << kFakeReturnRegister), 0, compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 9f09e171fe..d27155f31d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -48,6 +48,61 @@ static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +#define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ + V(MathRoundDouble) \ + V(FloatIsInfinite) \ + V(DoubleIsInfinite) \ + V(IntegerHighestOneBit) \ + V(LongHighestOneBit) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: InvokeRuntimeCallingConvention() diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2d7dc441ea..47de888f32 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1494,6 +1494,33 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); @@ -1510,7 +1537,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), arraysize(kFpuCalleeSaves)), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1fac62fa34..dff2e799e0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -53,6 +53,53 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = // these are not clobbered by any direct call to native code (such as math intrinsics). static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; +#define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index c24ac84d37..41dc5eb206 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -182,7 +182,7 @@ bool HInliner::Run() { HInstruction* next = instruction->GetNext(); HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. - if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { + if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) { if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = @@ -1272,6 +1272,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, return false; } + // Don't try to devirtualize intrinsics as it breaks pattern matching from later phases. + // TODO(solanes): This `if` could be removed if we update optimizations like + // TryReplaceStringBuilderAppend. + if (invoke_instruction->IsIntrinsic()) { + return false; + } + // Don't bother trying to call directly a default conflict method. It // doesn't have a proper MethodReference, but also `GetCanonicalMethod` // will return an actual default implementation. @@ -1344,7 +1351,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ReferenceTypeInfo receiver_type, bool do_rtp, bool is_speculative) { - DCHECK(!invoke_instruction->IsIntrinsic()); + DCHECK(!codegen_->IsImplementedIntrinsic(invoke_instruction)); HInstruction* return_replacement = nullptr; if (!TryBuildAndInline( diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 946d4348af..a1f79ed70b 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -5761,42 +5761,9 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARM64) diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 55d51fe6ef..4df1088e20 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -5549,69 +5549,9 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Compare) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Min) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Max) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCompareAndSetLong) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARMVIXL) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a76b773a3c..868fd4a120 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -4832,64 +4832,9 @@ void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) { } } -UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86, FP16Max) -UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index cf3d63b4f4..9d0d5f155e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -4990,57 +4990,9 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen __ jmp(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Max) - -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86_64) |