Support Math.fma Intrinsic for x86 and x86_64
Intrinsic Implementation is 2287x faster than base
Time for base: 188.25s
Time for patch: 0.0822s
Test: ./run-test --host --64 --optimizing 082-inline-execute
Test: ./run-test --host --64 --optimizing --instruction-set-features sse4.1 082-inline-execute
Test: ./run-test --host --64 --optimizing --instruction-set-features sse4.1,avx2 082-inline-execute
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
Change-Id: I68b96a35f41f3bb23d7e26d87fee1dd2a5ebf6a6
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7924e56..b5677e5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1087,7 +1087,8 @@
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator()),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f737d06..a2faa43 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1481,7 +1481,8 @@
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator()),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
constant_area_start_(0),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index a7c5639..a1ba873 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -5597,6 +5597,9 @@
UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARM64, MathFmaDouble)
+UNIMPLEMENTED_INTRINSIC(ARM64, MathFmaFloat)
+
UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 2b3d48b..fe4c27e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -5526,6 +5526,9 @@
UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat)
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 5e55a1b..6015a6d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -728,6 +728,17 @@
locations->SetOut(Location::FpuRegisterLocation(XMM0));
}
+static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
@@ -4733,6 +4744,44 @@
GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
}
+static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
+ DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ X86Assembler* assembler = codegen->GetAssembler();
+ XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
+ if (invoke->GetType() == DataType::Type::kFloat32) {
+ __ vfmadd213ss(left, right, accumulator);
+ } else {
+ DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
+ __ vfmadd213sd(left, right, accumulator);
+ }
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ GenerateMathFma(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
+ if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
+ CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ }
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ GenerateMathFma(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
+ if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
+ CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
@@ -4775,6 +4824,7 @@
UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
// 1.8.
+
UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4d617f7..c536492 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -551,6 +551,17 @@
CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
}
+static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
@@ -3262,6 +3273,44 @@
bool is_atomic_;
};
+static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
+ X86_64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
+ if (invoke->GetType() == DataType::Type::kFloat32) {
+ __ vfmadd213ss(left, right, accumulator);
+ } else {
+ DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
+ __ vfmadd213sd(left, right, accumulator);
+ }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ GenerateMathFma(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
+ if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
+ CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ GenerateMathFma(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
+ if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
+ CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ }
+}
+
// Generate subtype check without read barriers.
static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
VarHandleSlowPathX86_64* slow_path,
@@ -4713,6 +4762,7 @@
UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
// 1.8.
+
UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b6708de..4b64e92 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -783,6 +783,43 @@
EmitXmmRegisterOperand(dst, src2);
}
+void X86Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ /*X=*/ false,
+ /*B=*/ false,
+ SET_VEX_M_0F_38);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(ByteTwo);
+ EmitUint8(0xA9);
+ EmitXmmRegisterOperand(acc, right);
+}
+
+void X86Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ /*X=*/ false,
+ /*B=*/ false,
+ SET_VEX_M_0F_38);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(ByteTwo);
+ EmitUint8(0xA9);
+ EmitXmmRegisterOperand(acc, right);
+}
void X86Assembler::movapd(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index f6e7fbc..dc2427d 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -492,6 +492,9 @@
void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right);
+ void vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right);
+
void movapd(XmmRegister dst, XmmRegister src); // move
void movapd(XmmRegister dst, const Address& src); // load aligned
void movupd(XmmRegister dst, const Address& src); // load unaligned
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 92ce788..5db2b65 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -1073,6 +1073,18 @@
RepeatFFF(&x86::X86Assembler::vpmaddwd, "vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd");
}
+TEST_F(AssemblerX86AVXTest, VFMadd213SS) {
+ DriverStr(
+ RepeatFFF(&x86::X86Assembler::vfmadd213ss,
+ "vfmadd213ss %{reg3}, %{reg2}, %{reg1}"), "vfmadd213ss");
+}
+
+TEST_F(AssemblerX86AVXTest, VFMadd213SD) {
+ DriverStr(
+ RepeatFFF(&x86::X86Assembler::vfmadd213sd,
+ "vfmadd213sd %{reg3}, %{reg2}, %{reg1}"), "vfmadd213sd");
+}
+
TEST_F(AssemblerX86Test, PHAddW) {
DriverStr(RepeatFF(&x86::X86Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 0f90ec8..5edc309 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -984,6 +984,43 @@
EmitXmmRegisterOperand(dst.LowBits(), src2);
}
+void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
+ ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
+ /*X=*/ false,
+ right.NeedsRex(),
+ SET_VEX_M_0F_38);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(ByteTwo);
+ EmitUint8(0xA9);
+ EmitXmmRegisterOperand(acc.LowBits(), right);
+}
+
+void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
+ ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
+ /*X=*/ false,
+ right.NeedsRex(),
+ SET_VEX_M_0F_38);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(ByteTwo);
+ EmitUint8(0xA9);
+ EmitXmmRegisterOperand(acc.LowBits(), right);
+}
void X86_64Assembler::flds(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xD9);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 2eb018d..2cbc9d9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -528,6 +528,9 @@
void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vfmadd213ss(XmmRegister accumulator, XmmRegister left, XmmRegister right);
+ void vfmadd213sd(XmmRegister accumulator, XmmRegister left, XmmRegister right);
+
void movapd(XmmRegister dst, XmmRegister src); // move
void movapd(XmmRegister dst, const Address& src); // load aligned
void movupd(XmmRegister dst, const Address& src); // load unaligned
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 38fc568..4c07354 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1866,6 +1866,16 @@
"vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd");
}
+TEST_F(AssemblerX86_64AVXTest, VFmadd213ss) {
+ DriverStr(RepeatFFF(&x86_64::X86_64Assembler::vfmadd213ss,
+ "vfmadd213ss %{reg3}, %{reg2}, %{reg1}"), "vfmadd213ss");
+}
+
+TEST_F(AssemblerX86_64AVXTest, VFmadd213sd) {
+ DriverStr(RepeatFFF(&x86_64::X86_64Assembler::vfmadd213sd,
+ "vfmadd213sd %{reg3}, %{reg2}, %{reg1}"), "vfmadd213sd");
+}
+
TEST_F(AssemblerX86_64Test, Phaddw) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
}
diff --git a/runtime/image.cc b/runtime/image.cc
index 7e258d1..e62380a 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,8 +29,8 @@
namespace art {
const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-// Last change: added multiple kJdkUnsafe intrinsics
-const uint8_t ImageHeader::kImageVersion[] = { '1', '0', '4', '\0' };
+// Last change: Math.fma(double, double, double) intrinsic.
+const uint8_t ImageHeader::kImageVersion[] = { '1', '0', '5', '\0' };
ImageHeader::ImageHeader(uint32_t image_reservation_size,
uint32_t component_count,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 2e18d5f..9c6c614 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -468,6 +468,8 @@
INTRINSIC_CASE(MathAbsFloat)
INTRINSIC_CASE(MathAbsLong)
INTRINSIC_CASE(MathAbsInt)
+ UNIMPLEMENTED_CASE(MathFmaDouble /* (DDD)D */)
+ UNIMPLEMENTED_CASE(MathFmaFloat /* (FFF)F */)
UNIMPLEMENTED_CASE(MathMinDoubleDouble /* (DD)D */)
UNIMPLEMENTED_CASE(MathMinFloatFloat /* (FF)F */)
INTRINSIC_CASE(MathMinLongLong)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index 67cb124..568daff 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -124,6 +124,8 @@
V(MathAbsFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(F)F") \
V(MathAbsLong, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(J)J") \
V(MathAbsInt, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "abs", "(I)I") \
+ V(MathFmaDouble, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "fma", "(DDD)D") \
+ V(MathFmaFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "fma", "(FFF)F") \
V(MathMinDoubleDouble, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(DD)D") \
V(MathMinFloatFloat, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(FF)F") \
V(MathMinLongLong, kStatic, kNeedsEnvironment, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "min", "(JJ)J") \
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index f4a234b..97ecf47 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -45,6 +45,8 @@
test_Math_isInfinite_D();
test_Math_isInfinite_F();
test_Math_multiplyHigh();
+ test_Math_fma_D();
+ test_Math_fma_F();
test_Short_reverseBytes();
test_Integer_reverseBytes();
test_Long_reverseBytes();
@@ -985,6 +987,24 @@
Assert.assertEquals(Math.multiplyHigh(Long.MAX_VALUE, Long.MAX_VALUE), 4611686018427387903L);
}
+ public static void test_Math_fma_D() {
+ final double MAX_D = Double.MAX_VALUE;
+ final double MIN_D = Double.MIN_VALUE;
+ Math.fma(3.0, 4.0, 5.0);
+ Assert.assertEquals(Double.compare(Math.fma(3.0, 4.0, 5.0), 17.0), 0);
+ Assert.assertEquals(Double.compare(Math.fma(MAX_D, MIN_D, 1.0), 1.0000000000000009), 0);
+ Assert.assertEquals(Double.compare(Math.fma(MAX_D, MIN_D, 0.0), 8.881784197001251E-16), 0);
+ }
+
+ public static void test_Math_fma_F() {
+ final float MAX_F = Float.MAX_VALUE;
+ final float MIN_F = Float.MIN_VALUE;
+ Math.fma(3.0f, 4.0f, 5.0f);
+ Assert.assertEquals(Float.compare(Math.fma(3.0f, 4.0f, 5.0f), 17.0f), 0);
+ Assert.assertEquals(Float.compare(Math.fma(MAX_F, MIN_F, 1.0f), 1.0000005f), 0);
+ Assert.assertEquals(Float.compare(Math.fma(MAX_F, MIN_F, 0.0f), 4.7683712978141557E-7f), 0);
+ }
+
public static void test_StrictMath_abs_I() {
StrictMath.abs(-1);
Assert.assertEquals(StrictMath.abs(0), 0);