diff options
author | 2018-01-19 14:50:10 +0000 | |
---|---|---|
committer | 2018-01-22 18:35:16 +0000 | |
commit | 4d17987da58d9411adbed1a18203d76d6119612d (patch) | |
tree | f2953a0eb3ebc3f8533d22c14f4a09d7f0d4168d | |
parent | e57043081e6b091a9fd23a84043373148ae72f1f (diff) |
ART: Add entrypoint and intrinsic for Math.pow().
MathBenchmarks.java#timePow results on taimen's little cores
fixed at frequency 1401600 with forced JIT compilation:
- before:
- X32: 356.33 (@FastNative), 315.39 (@CriticalNative)
- X64: 357.31 (@FastNative), 315.37 (@CriticalNative)
- after (LICM defeats the benchmark):
- X32: 2.88
- X64: 2.87
- after but with kAllSideEffects to prevent LICM:
- X32: 275.42
- X64: 275.67
Test: Rely on TreeHugger.
Bug: 70727450
Change-Id: Iaa31f70acabbd57c163cfeafe02eed67c1348861
21 files changed, 68 insertions, 7 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca1b451e6b..2f8e33f941 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 99b8b5df74..830d0403e4 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 113c9de5a2..cafa5228d9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 521bad27e2..89f1818be2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index baa410b884..46b7f3f1ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6dd8b8e1f5..6483b7cb2a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 0a094352e4..674dc9a78b 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", + " 224: f8d9 c2c4 ldr.w ip, [r9, #708] ; 0x2c4\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index c45166b17e..d5a87837f6 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -485,7 +485,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(76U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(24U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(162 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 78b9e46d77..80080e9832 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -144,6 +144,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pAsin = asin; qpoints->pAtan = atan; qpoints->pAtan2 = atan2; + qpoints->pPow = pow; qpoints->pCbrt = cbrt; qpoints->pCosh = cosh; qpoints->pExp = exp; diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 80bf3abc6f..4c43b7ed3d 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -168,6 +168,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pAsin = asin; qpoints->pAtan = atan; qpoints->pAtan2 = atan2; + qpoints->pPow = pow; qpoints->pCbrt = cbrt; qpoints->pCosh = cosh; qpoints->pExp = exp; diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 209f36705a..badee59568 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -348,6 +348,8 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { static_assert(IsDirectEntrypoint(kQuickAtan), "Direct C stub marked non-direct."); qpoints->pAtan2 = atan2; static_assert(IsDirectEntrypoint(kQuickAtan2), "Direct C stub marked non-direct."); + qpoints->pPow = pow; + static_assert(IsDirectEntrypoint(kQuickPow), "Direct C stub marked non-direct."); qpoints->pCbrt = cbrt; static_assert(IsDirectEntrypoint(kQuickCbrt), "Direct C stub marked non-direct."); qpoints->pCosh = cosh; diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index 35cbd1dcc0..bdfb9421df 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -165,6 +165,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pAsin = asin; qpoints->pAtan = atan; qpoints->pAtan2 = atan2; + qpoints->pPow = pow; qpoints->pCbrt = cbrt; qpoints->pCosh = cosh; qpoints->pExp = exp; diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 24bf9cc07c..ffb0c94cc7 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -68,6 +68,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pAsin = asin; qpoints->pAtan = atan; qpoints->pAtan2 = atan2; + qpoints->pPow = pow; qpoints->pCbrt = cbrt; qpoints->pCosh = cosh; qpoints->pExp = exp; diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 3656f83b58..6bae69c495 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -91,6 +91,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pAsin = asin; qpoints->pAtan = atan; qpoints->pAtan2 = atan2; + qpoints->pPow = pow; qpoints->pCbrt = cbrt; qpoints->pCosh = cosh; qpoints->pExp = exp; diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 3cf2b93690..2f7d6ab98f 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -73,7 +73,7 @@ ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, // Offset of field Thread::tlsPtr_.mterp_current_ibase. #define THREAD_CURRENT_IBASE_OFFSET \ - (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 161) * __SIZEOF_POINTER__) + (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 162) * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET, art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.mterp_default_ibase. diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 74e7c180b8..48a56f2fbf 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -91,6 +91,7 @@ V(Asin, double, double) \ V(Atan, double, double) \ V(Atan2, double, double, double) \ + V(Pow, double, double, double) \ V(Cbrt, double, double) \ V(Cosh, double, double) \ V(Exp, double, double) \ diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 7c912d0a4a..1fdf439d3f 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -238,7 +238,8 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pPow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pPow, pCbrt, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*)); diff --git a/runtime/image.cc b/runtime/image.cc index dd0c1487c0..8e3615ffcf 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -26,7 +26,7 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '3', '\0' }; // ClassStatus in high bits. +const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '4', '\0' }; // Math.pow() intrinsic. ImageHeader::ImageHeader(uint32_t image_begin, uint32_t image_size, diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc index 99a4f763c9..681a582b5d 100644 --- a/runtime/interpreter/interpreter_intrinsics.cc +++ b/runtime/interpreter/interpreter_intrinsics.cc @@ -478,6 +478,7 @@ bool MterpHandleIntrinsic(ShadowFrame* shadow_frame, UNIMPLEMENTED_CASE(MathLog /* (D)D */) UNIMPLEMENTED_CASE(MathLog10 /* (D)D */) UNIMPLEMENTED_CASE(MathNextAfter /* (DD)D */) + UNIMPLEMENTED_CASE(MathPow /* (DD)D */) UNIMPLEMENTED_CASE(MathSinh /* (D)D */) INTRINSIC_CASE(MathTan) UNIMPLEMENTED_CASE(MathTanh /* (D)D */) diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h index d007728750..da08793f59 100644 --- a/runtime/intrinsics_list.h +++ b/runtime/intrinsics_list.h @@ -136,6 +136,7 @@ V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "asin", "(D)D") \ V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan", "(D)D") \ V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan2", "(DD)D") \ + V(MathPow, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "pow", "(DD)D") \ V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cbrt", "(D)D") \ V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cosh", "(D)D") \ V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "exp", "(D)D") \ diff --git a/runtime/oat.h b/runtime/oat.h index 36099b93dc..8f81010a06 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Last oat version changed reason: ClassStatus in high bits. - static constexpr uint8_t kOatVersion[] = { '1', '3', '7', '\0' }; + // Last oat version changed reason: Math.pow() intrinsic. + static constexpr uint8_t kOatVersion[] = { '1', '3', '8', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |