diff options
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 1 | ||||
-rw-r--r-- | runtime/hidden_api.h | 1 | ||||
-rw-r--r-- | runtime/image.cc | 2 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_intrinsics.cc | 1 | ||||
-rw-r--r-- | runtime/intrinsics_list.h | 1 | ||||
-rw-r--r-- | test/580-fp16/src-art/Main.java | 55 |
11 files changed, 80 insertions, 9 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 185d487dff..6a666c9eef 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -3216,6 +3216,30 @@ void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) { __ Fcvt(out, half); } +void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + LocationSummary* locations = new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope scratch_scope(masm); + FPRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0)); + FPRegister half = scratch_scope.AcquireH(); + Register out = WRegisterFrom(invoke->GetLocations()->Out()); + __ Fcvt(half, in); + __ Fmov(out, half); + __ Sxth(out, out); // sign extend due to returning a short type. +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 65f388837d..74e861fa8e 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3071,6 +3071,7 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index f71d281d5a..b18bbdde2d 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2708,6 +2708,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(MIPS, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 7b87b03b50..e4627db33f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2358,6 +2358,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS64, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(MIPS64, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 5a622ca6d1..95aa4c0eaa 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3082,6 +3082,7 @@ UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index cbf66069fe..8dbc0d3062 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2749,6 +2749,7 @@ UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); diff --git a/runtime/hidden_api.h b/runtime/hidden_api.h index a21225b376..2ef3522eee 100644 --- a/runtime/hidden_api.h +++ b/runtime/hidden_api.h @@ -358,6 +358,7 @@ ALWAYS_INLINE inline uint32_t GetRuntimeFlags(ArtMethod* method) return 0u; case Intrinsics::kUnsafeGetLong: case Intrinsics::kFP16ToFloat: + case Intrinsics::kFP16ToHalf: return kAccCorePlatformApi; default: // Remaining intrinsics are public API. We DCHECK that in SetIntrinsic(). diff --git a/runtime/image.cc b/runtime/image.cc index 11fac590b0..256b957c60 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -29,7 +29,7 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '8', '\0' }; // FP16ToFloat intrinsic +const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '9', '\0' }; // FP16ToHalf intrinsic ImageHeader::ImageHeader(uint32_t image_reservation_size, uint32_t component_count, diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc index 6b2d989cd3..3759225b91 100644 --- a/runtime/interpreter/interpreter_intrinsics.cc +++ b/runtime/interpreter/interpreter_intrinsics.cc @@ -574,6 +574,7 @@ bool MterpHandleIntrinsic(ShadowFrame* shadow_frame, UNIMPLEMENTED_CASE(CRC32UpdateBytes /* (I[BII)I */) UNIMPLEMENTED_CASE(CRC32UpdateByteBuffer /* (IJII)I */) UNIMPLEMENTED_CASE(FP16ToFloat /* (S)F */) + UNIMPLEMENTED_CASE(FP16ToHalf /* (F)S */) INTRINSIC_CASE(VarHandleFullFence) INTRINSIC_CASE(VarHandleAcquireFence) INTRINSIC_CASE(VarHandleReleaseFence) diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h index 15ae309624..bb41ca732d 100644 --- a/runtime/intrinsics_list.h +++ b/runtime/intrinsics_list.h @@ -166,6 +166,7 @@ V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow, "Llibcore/io/Memory;", "pokeLongNative", "(JJ)V") \ V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow, "Llibcore/io/Memory;", "pokeShortNative", "(JS)V") \ V(FP16ToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "toFloat", "(S)F") \ + V(FP16ToHalf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "toHalf", "(F)S") \ V(StringCharAt, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "charAt", "(I)C") \ V(StringCompareTo, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I") \ V(StringEquals, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z") \ diff --git a/test/580-fp16/src-art/Main.java b/test/580-fp16/src-art/Main.java index 798b52dd34..a89e1000d0 100644 --- a/test/580-fp16/src-art/Main.java +++ b/test/580-fp16/src-art/Main.java @@ -28,15 +28,14 @@ public class Main { return Float.floatToRawIntBits(f); } - public static void assertEquals(int expected, int actual) { - if (expected != actual) { - throw new Error("Expected: " + expected + ", found: " + actual); + public static void assertEquals(short expected, short calculated) { + if (expected != calculated) { + throw new Error("Expected: " + expected + ", Calculated: " + calculated); } } - - public static void assertEquals(float expected, float actual) { - if (expected != actual) { - throw new Error("Expected: " + expected + ", found: " + actual); + public static void assertEquals(float expected, float calculated) { + if (expected != calculated) { + throw new Error("Expected: " + expected + ", Calculated: " + calculated); } } @@ -47,8 +46,48 @@ public class Main { // NaN inputs are tested below. continue; } - assertEquals(FP16.toHalf(FP16.toFloat(h)), h); + assertEquals(h, FP16.toHalf(FP16.toFloat(h))); } + + // These asserts check some known values and edge cases for FP16.toHalf + // and have been inspired by the cts HalfTest. + // Zeroes, NaN and infinities + assertEquals(FP16.POSITIVE_ZERO, FP16.toHalf(0.0f)); + assertEquals(FP16.NEGATIVE_ZERO, FP16.toHalf(-0.0f)); + assertEquals(FP16.NaN, FP16.toHalf(Float.NaN)); + assertEquals(FP16.POSITIVE_INFINITY, FP16.toHalf(Float.POSITIVE_INFINITY)); + assertEquals(FP16.NEGATIVE_INFINITY, FP16.toHalf(Float.NEGATIVE_INFINITY)); + // Known values + assertEquals((short) 0x3c01, FP16.toHalf(1.0009765625f)); + assertEquals((short) 0xc000, FP16.toHalf(-2.0f)); + assertEquals((short) 0x0400, FP16.toHalf(6.10352e-5f)); + assertEquals((short) 0x7bff, FP16.toHalf(65504.0f)); + assertEquals((short) 0x3555, FP16.toHalf(1.0f / 3.0f)); + // Subnormals + assertEquals((short) 0x03ff, FP16.toHalf(6.09756e-5f)); + assertEquals(FP16.MIN_VALUE, FP16.toHalf(5.96046e-8f)); + assertEquals((short) 0x83ff, FP16.toHalf(-6.09756e-5f)); + assertEquals((short) 0x8001, FP16.toHalf(-5.96046e-8f)); + // Subnormals (flushed to +/-0) + assertEquals(FP16.POSITIVE_ZERO, FP16.toHalf(5.96046e-9f)); + assertEquals(FP16.NEGATIVE_ZERO, FP16.toHalf(-5.96046e-9f)); + // Test for values that overflow the mantissa bits into exp bits + assertEquals(0x1000, FP16.toHalf(Float.intBitsToFloat(0x39fff000))); + assertEquals(0x0400, FP16.toHalf(Float.intBitsToFloat(0x387fe000))); + // Floats with absolute value above +/-65519 are rounded to +/-inf + // when using round-to-even + assertEquals(0x7bff, FP16.toHalf(65519.0f)); + assertEquals(0x7bff, FP16.toHalf(65519.9f)); + assertEquals(FP16.POSITIVE_INFINITY, FP16.toHalf(65520.0f)); + assertEquals(FP16.NEGATIVE_INFINITY, FP16.toHalf(-65520.0f)); + // Check if numbers are rounded to nearest even when they + // cannot be accurately represented by Half + assertEquals(0x6800, FP16.toHalf(2049.0f)); + assertEquals(0x6c00, FP16.toHalf(4098.0f)); + assertEquals(0x7000, FP16.toHalf(8196.0f)); + assertEquals(0x7400, FP16.toHalf(16392.0f)); + assertEquals(0x7800, FP16.toHalf(32784.0f)); + // FP16 SNaN/QNaN inputs to float // The most significant bit of mantissa: // V |