diff options
author | 2021-11-02 20:01:06 +0530 | |
---|---|---|
committer | 2021-11-11 11:32:25 +0000 | |
commit | 6545ee31288b72e117450ea77b07f3593bff6180 (patch) | |
tree | 60085d1b253758ba32d8b585e493f634d95e422a | |
parent | a0232ad060c65d0ac3e022c9f721b41c02fd0b71 (diff) |
Support Math.fma Intrinsic for x86 and x86_64
Intrinsic Implementation is 2287x faster than base
Time for base: 188.25s
Time for patch: 0.0822s
Test: ./run-test --host --64 --optimizing 082-inline-execute
Test: ./run-test --host --64 --optimizing --instruction-set-features sse4.1 082-inline-execute
Test: ./run-test --host --64 --optimizing --instruction-set-features sse4.1,avx2 082-inline-execute
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
Change-Id: I68b96a35f41f3bb23d7e26d87fee1dd2a5ebf6a6
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 50 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 50 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 37 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 3 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 12 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 37 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 3 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 10 | ||||
-rw-r--r-- | runtime/image.cc | 4 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_intrinsics.cc | 2 | ||||
-rw-r--r-- | runtime/intrinsics_list.h | 2 | ||||
-rw-r--r-- | test/082-inline-execute/src/Main.java | 20 |
16 files changed, 238 insertions, 4 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7924e56b59..b5677e5735 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1087,7 +1087,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator()), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f737d0628c..a2faa43e28 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1481,7 +1481,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator()), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()), constant_area_start_(0), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index a7c5639921..a1ba873542 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -5597,6 +5597,9 @@ UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); // 1.8. +UNIMPLEMENTED_INTRINSIC(ARM64, MathFmaDouble) +UNIMPLEMENTED_INTRINSIC(ARM64, MathFmaFloat) + UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 2b3d48b1a6..fe4c27e993 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -5526,6 +5526,9 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); // 1.8. +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat) + UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 5e55a1b9da..6015a6d4d9 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -728,6 +728,17 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invo locations->SetOut(Location::FpuRegisterLocation(XMM0)); } +static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + DCHECK_EQ(invoke->GetNumberOfArguments(), 3U); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } @@ -4733,6 +4744,44 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* i GenerateVarHandleGetAndBitwiseOp(invoke, codegen_); } +static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) { + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + X86Assembler* assembler = codegen->GetAssembler(); + XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>(); + if (invoke->GetType() == DataType::Type::kFloat32) { + __ vfmadd213ss(left, right, accumulator); + } else { + DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); + __ vfmadd213sd(left, right, accumulator); + } +} + +void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + GenerateMathFma(invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) { + if (codegen_->GetInstructionSetFeatures().HasAVX2()) { + CreateFPFPFPToFPCallLocations(allocator_, invoke); + } +} + +void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + GenerateMathFma(invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) { + if (codegen_->GetInstructionSetFeatures().HasAVX2()) { + CreateFPFPFPToFPCallLocations(allocator_, invoke); + } +} + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) @@ -4775,6 +4824,7 @@ UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); // 1.8. + UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4d617f7350..c536492917 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -551,6 +551,17 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invo CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations); } +static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + DCHECK_EQ(invoke->GetNumberOfArguments(), 3U); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } @@ -3262,6 +3273,44 @@ class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 { bool is_atomic_; }; +static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); + X86_64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>(); + if (invoke->GetType() == DataType::Type::kFloat32) { + __ vfmadd213ss(left, right, accumulator); + } else { + DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); + __ vfmadd213sd(left, right, accumulator); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + GenerateMathFma(invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) { + if (codegen_->GetInstructionSetFeatures().HasAVX2()) { + CreateFPFPFPToFPCallLocations(allocator_, invoke); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + GenerateMathFma(invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) { + if (codegen_->GetInstructionSetFeatures().HasAVX2()) { + CreateFPFPFPToFPCallLocations(allocator_, invoke); + } +} + // Generate subtype check without read barriers. static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen, VarHandleSlowPathX86_64* slow_path, @@ -4713,6 +4762,7 @@ UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); // 1.8. + UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index b6708de81c..4b64e92136 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -783,6 +783,43 @@ void X86Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { EmitXmmRegisterOperand(dst, src2); } +void X86Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; + ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); + X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(left); + ByteOne = EmitVexPrefixByteOne(/*R=*/ false, + /*X=*/ false, + /*B=*/ false, + SET_VEX_M_0F_38); + ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); + + EmitUint8(ByteZero); + EmitUint8(ByteOne); + EmitUint8(ByteTwo); + EmitUint8(0xA9); + EmitXmmRegisterOperand(acc, right); +} + +void X86Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; + ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); + X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(left); + ByteOne = EmitVexPrefixByteOne(/*R=*/ false, + /*X=*/ false, + /*B=*/ false, + SET_VEX_M_0F_38); + ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); + + EmitUint8(ByteZero); + EmitUint8(ByteOne); + EmitUint8(ByteTwo); + EmitUint8(0xA9); + EmitXmmRegisterOperand(acc, right); +} void X86Assembler::movapd(XmmRegister dst, XmmRegister src) { if (CpuHasAVXorAVX2FeatureFlag()) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index f6e7fbc8cd..dc2427d9d9 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -492,6 +492,9 @@ class X86Assembler final : public Assembler { void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right); void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right); + void vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right); + void vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right); + void movapd(XmmRegister dst, XmmRegister src); // move void movapd(XmmRegister dst, const Address& src); // load aligned void movupd(XmmRegister dst, const Address& src); // load unaligned diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 92ce788f11..5db2b65179 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -1073,6 +1073,18 @@ TEST_F(AssemblerX86AVXTest, VPMAddWD) { RepeatFFF(&x86::X86Assembler::vpmaddwd, "vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd"); } +TEST_F(AssemblerX86AVXTest, VFMadd213SS) { + DriverStr( + RepeatFFF(&x86::X86Assembler::vfmadd213ss, + "vfmadd213ss %{reg3}, %{reg2}, %{reg1}"), "vfmadd213ss"); +} + +TEST_F(AssemblerX86AVXTest, VFMadd213SD) { + DriverStr( + RepeatFFF(&x86::X86Assembler::vfmadd213sd, + "vfmadd213sd %{reg3}, %{reg2}, %{reg1}"), "vfmadd213sd"); +} + TEST_F(AssemblerX86Test, PHAddW) { DriverStr(RepeatFF(&x86::X86Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 0f90ec894e..5edc3097fb 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -984,6 +984,43 @@ void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2 EmitXmmRegisterOperand(dst.LowBits(), src2); } +void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; + ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); + X86_64ManagedRegister vvvv_reg = + X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister()); + ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(), + /*X=*/ false, + right.NeedsRex(), + SET_VEX_M_0F_38); + ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + EmitUint8(ByteTwo); + EmitUint8(0xA9); + EmitXmmRegisterOperand(acc.LowBits(), right); +} + +void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; + ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); + X86_64ManagedRegister vvvv_reg = + X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister()); + ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(), + /*X=*/ false, + right.NeedsRex(), + SET_VEX_M_0F_38); + ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + EmitUint8(ByteTwo); + EmitUint8(0xA9); + EmitXmmRegisterOperand(acc.LowBits(), right); +} void X86_64Assembler::flds(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 2eb018db55..2cbc9d9e7c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -528,6 +528,9 @@ class X86_64Assembler final : public Assembler { void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right); void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right); + void vfmadd213ss(XmmRegister accumulator, XmmRegister left, XmmRegister right); + void vfmadd213sd(XmmRegister accumulator, XmmRegister left, XmmRegister right); + void movapd(XmmRegister dst, XmmRegister src); // move void movapd(XmmRegister dst, const Address& src); // load aligned void movupd(XmmRegister dst, const Address& src); // load unaligned diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 38fc568e1e..4c07354ace 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1866,6 +1866,16 @@ TEST_F(AssemblerX86_64AVXTest, VPmaddwd) { "vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd"); } +TEST_F(AssemblerX86_64AVXTest, VFmadd213ss) { + DriverStr(RepeatFFF(&x86_64::X86_64Assembler::vfmadd213ss, + "vfmadd213ss %{reg3}, %{reg2}, %{reg1}"), "vfmadd213ss"); +} + +TEST_F(AssemblerX86_64AVXTest, VFmadd213sd) { + DriverStr(RepeatFFF(&x86_64::X86_64Assembler::vfmadd213sd, + "vfmadd213sd %{reg3}, %{reg2}, %{reg1}"), "vfmadd213sd"); +} + TEST_F(AssemblerX86_64Test, Phaddw) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw"); } diff --git a/runtime/image.cc b/runtime/image.cc index 7e258d15b2..e62380a89d 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -29,8 +29,8 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -// Last change: added multiple kJdkUnsafe intrinsics -const uint8_t ImageHeader::kImageVersion[] = { '1', '0', '4', '\0' }; +// Last change: Math.fma(double, double, double) intrinsic. +const uint8_t ImageHeader::kImageVersion[] = { '1', '0', '5', '\0' }; ImageHeader::ImageHeader(uint32_t image_reservation_size, uint32_t component_count, diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc index 2e18d5f01c..9c6c614a9a 100644 --- a/runtime/interpreter/interpreter_intrinsics.cc +++ b/runtime/interpreter/interpreter_intrinsics.cc @@ -468,6 +468,8 @@ bool MterpHandleIntrinsic(ShadowFrame* shadow_frame, INTRINSIC_CASE(MathAbsFloat) INTRINSIC_CASE(MathAbsLong) INTRINSIC_CASE(MathAbsInt) + UNIMPLEMENTED_CASE(MathFmaDouble /* (DDD)D */) + UNIMPLEMENTED_CASE(MathFmaFloat /* (FFF)F */) UNIMPLEMENTED_CASE(MathMinDoubleDouble /* (DD)D */) UNIMPLEMENTED_CASE(MathMinFloatFloat /* (FF)F */) INTRINSIC_CASE(MathMinLongLong) diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h index 67cb124871..568daffefe 100644 --- a/runtime/intrinsics_list.h +++ b/runtime/intrinsics_list.h @@ -124,6 +124,8 @@ V(MathAbsFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(F)F") \ V(MathAbsLong, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(J)J") \ V(MathAbsInt, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(I)I") \ + V(MathFmaDouble, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "fma", "(DDD)D") \ + V(MathFmaFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "fma", "(FFF)F") \ V(MathMinDoubleDouble, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(DD)D") \ V(MathMinFloatFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(FF)F") \ V(MathMinLongLong, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(JJ)J") \ diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java index f4a234b437..97ecf47876 100644 --- a/test/082-inline-execute/src/Main.java +++ b/test/082-inline-execute/src/Main.java @@ -45,6 +45,8 @@ public class Main { test_Math_isInfinite_D(); test_Math_isInfinite_F(); test_Math_multiplyHigh(); + test_Math_fma_D(); + test_Math_fma_F(); test_Short_reverseBytes(); test_Integer_reverseBytes(); test_Long_reverseBytes(); @@ -985,6 +987,24 @@ public class Main { Assert.assertEquals(Math.multiplyHigh(Long.MAX_VALUE, Long.MAX_VALUE), 4611686018427387903L); } + public static void test_Math_fma_D() { + final double MAX_D = Double.MAX_VALUE; + final double MIN_D = Double.MIN_VALUE; + Math.fma(3.0, 4.0, 5.0); + Assert.assertEquals(Double.compare(Math.fma(3.0, 4.0, 5.0), 17.0), 0); + Assert.assertEquals(Double.compare(Math.fma(MAX_D, MIN_D, 1.0), 1.0000000000000009), 0); + Assert.assertEquals(Double.compare(Math.fma(MAX_D, MIN_D, 0.0), 8.881784197001251E-16), 0); + } + + public static void test_Math_fma_F() { + final float MAX_F = Float.MAX_VALUE; + final float MIN_F = Float.MIN_VALUE; + Math.fma(3.0f, 4.0f, 5.0f); + Assert.assertEquals(Float.compare(Math.fma(3.0f, 4.0f, 5.0f), 17.0f), 0); + Assert.assertEquals(Float.compare(Math.fma(MAX_F, MIN_F, 1.0f), 1.0000005f), 0); + Assert.assertEquals(Float.compare(Math.fma(MAX_F, MIN_F, 0.0f), 4.7683712978141557E-7f), 0); + } + public static void test_StrictMath_abs_I() { StrictMath.abs(-1); Assert.assertEquals(StrictMath.abs(0), 0); |