diff options
author | 2019-11-11 15:29:59 +0000 | |
---|---|---|
committer | 2019-11-28 09:51:05 +0000 | |
commit | 457e9fa3833ef11530056d010f247ad087fd2184 (patch) | |
tree | 54b8a9dcf44646c3e43a9085d581660c5d9a0132 | |
parent | 17a39babb7f42cbe108d6fab2760cbdc68b821a2 (diff) |
ARM64: FP16 greater/less/greaterEquals/lessEquals intrinsics for ARMv8
This CL implements intrinsics for greater, greaterEquals, less,
lessEquals methods with ARMv8.2 FP16 instructions. This requires the
ARMv8.2 AArch64 asimd half precision extension.
The time required in milliseconds to execute the below code for the four
intrinsics on Pixel3 is (The code below is for FP16.less but is similar
for the rest of the intrinsics):
- Java implementation libcore.util.FP16.less():
- big cluster only: 19876
- little cluster only: 47525
- arm64 Intrinisic implementationi for less:
- big cluster only: 14526 (~27% faster)
- little cluster only: 45815 (~4% faster)
- Java implementation libcore.util.FP16.lessEquals():
- big cluster only: 19856
- little cluster only: 47419
- arm64 Intrinisic implementation for lessEquals:
- big cluster only: 14469 (~27% faster)
- little cluster only: 45762 (~4% faster)
- Java implementation libcore.util.FP16.greater():
- big cluster only: 19854
- little cluster only: 47623
- arm64 Intrinisic implementation for greater:
- big cluster only: 14519 (~27% faster)
- little cluster only: 45722 (~4% faster)
- Java implementation libcore.util.FP16.greaterEquals():
- big cluster only: 19865
- little cluster only: 47216
- arm64 Intrinisic implementation for greaterEquals:
- big cluster only: 14485 (~27% faster)
- little cluster only: 45729 (~4% faster)
public static boolean benchmarkComparison(){
boolean ret = false;
long before = 0;
long after = 0;
before = System.currentTimeMillis();
for(long i = 0; i < 1e9; i++){
// FP16.toHalf(12.3) = 0x4a26, FP16.toHalf(12.4) = 0x4a33
// FP16.toHalf(-12.3) = 0xca26, FP16.toHalf(-12.4) = 0xca33
ret |= FP16.less((short) 0x4a26,(short) 0x4a33);
ret |= FP16.less((short) 0x4a33,(short) 0x4a26);
ret |= FP16.less((short) 0xca26,(short) 0xca33);
ret |= FP16.less((short) 0xca33,(short) 0xca26);
}
after = System.currentTimeMillis();
System.out.println("Time of FP16.less (ms): " + (after - before));
System.out.println(ret);
return ret;
}
Test: 580-fp16
Test: art/test/testrunner/run_build_test_target.py -j80 art-test-javac
Change-Id: Id1a2c3e7328c82c798fcaf1fa74f5908a822cd0b
-rw-r--r-- | compiler/optimizing/common_arm64.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 95 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 4 | ||||
-rw-r--r-- | runtime/hidden_api.h | 4 | ||||
-rw-r--r-- | runtime/image.cc | 2 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_intrinsics.cc | 4 | ||||
-rw-r--r-- | runtime/intrinsics_list.h | 4 | ||||
-rw-r--r-- | test/580-fp16/src-art/Main.java | 159 |
12 files changed, 292 insertions, 1 deletions
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 9c80f323ba..36e68e49d0 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -107,6 +107,11 @@ inline vixl::aarch64::VRegister SRegisterFrom(Location location) { return vixl::aarch64::VRegister::GetSRegFromCode(location.reg()); } +inline vixl::aarch64::FPRegister HRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()) << location; + return vixl::aarch64::FPRegister::GetHRegFromCode(location.reg()); +} + inline vixl::aarch64::VRegister FPRegisterFrom(Location location, DataType::Type type) { DCHECK(DataType::IsFloatingPointType(type)) << type; return type == DataType::Type::kFloat64 ? DRegisterFrom(location) : SRegisterFrom(location); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 542bd176e1..bdeb6a462d 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -54,6 +54,7 @@ using helpers::RegisterFrom; using helpers::SRegisterFrom; using helpers::WRegisterFrom; using helpers::XRegisterFrom; +using helpers::HRegisterFrom; using helpers::InputRegisterAt; using helpers::OutputRegister; @@ -299,6 +300,14 @@ static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + static void GenReverseBytes(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -3304,6 +3313,92 @@ void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) { GenerateFP16Round(invoke, codegen_, masm, roundOp); } +template<typename OP> +void GenerateFP16Compare(HInvoke* invoke, + CodeGeneratorARM64* codegen, + MacroAssembler* masm, + const OP compareOp) { + DCHECK(codegen->GetInstructionSetFeatures().HasFP16()); + LocationSummary* locations = invoke->GetLocations(); + Register out = WRegisterFrom(locations->Out()); + VRegister half0 = HRegisterFrom(locations->GetTemp(0)); + VRegister half1 = HRegisterFrom(locations->GetTemp(1)); + __ Fmov(half0, WRegisterFrom(locations->InAt(0))); + __ Fmov(half1, WRegisterFrom(locations->InAt(1))); + compareOp(out, half0, half1); +} + +static inline void GenerateFP16Compare(HInvoke* invoke, + CodeGeneratorARM64* codegen, + MacroAssembler* masm, + vixl::aarch64::Condition cond) { + auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) { + __ Fcmp(in0, in1); + __ Cset(out, cond); + }; + GenerateFP16Compare(invoke, codegen, masm, compareOp); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, gt); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, ge); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, mi); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, ls); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 1dfebddf1e..89e5203461 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3075,6 +3075,10 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor) UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil) UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ea9c591a20..537255f476 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2712,6 +2712,10 @@ UNIMPLEMENTED_INTRINSIC(MIPS, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(MIPS, FP16Floor) UNIMPLEMENTED_INTRINSIC(MIPS, FP16Ceil) UNIMPLEMENTED_INTRINSIC(MIPS, FP16Rint) +UNIMPLEMENTED_INTRINSIC(MIPS, FP16Greater) +UNIMPLEMENTED_INTRINSIC(MIPS, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(MIPS, FP16Less) +UNIMPLEMENTED_INTRINSIC(MIPS, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index fd939026a8..59203945bb 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2362,6 +2362,10 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(MIPS64, FP16Floor) UNIMPLEMENTED_INTRINSIC(MIPS64, FP16Ceil) UNIMPLEMENTED_INTRINSIC(MIPS64, FP16Rint) +UNIMPLEMENTED_INTRINSIC(MIPS64, FP16Greater) +UNIMPLEMENTED_INTRINSIC(MIPS64, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(MIPS64, FP16Less) +UNIMPLEMENTED_INTRINSIC(MIPS64, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 9d3cecbbed..6d7462e3c1 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3086,6 +3086,10 @@ UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(X86, FP16Floor) UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil) UNIMPLEMENTED_INTRINSIC(X86, FP16Rint) +UNIMPLEMENTED_INTRINSIC(X86, FP16Greater) +UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(X86, FP16Less) +UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 1111a59955..0f6b00653d 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2753,6 +2753,10 @@ UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor) UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil) UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); diff --git a/runtime/hidden_api.h b/runtime/hidden_api.h index 0f6eab08cf..c6f08653d7 100644 --- a/runtime/hidden_api.h +++ b/runtime/hidden_api.h @@ -359,6 +359,10 @@ ALWAYS_INLINE inline uint32_t GetRuntimeFlags(ArtMethod* method) case Intrinsics::kUnsafeGetLong: case Intrinsics::kFP16Ceil: case Intrinsics::kFP16Floor: + case Intrinsics::kFP16Greater: + case Intrinsics::kFP16GreaterEquals: + case Intrinsics::kFP16Less: + case Intrinsics::kFP16LessEquals: case Intrinsics::kFP16ToFloat: case Intrinsics::kFP16ToHalf: case Intrinsics::kFP16Rint: diff --git a/runtime/image.cc b/runtime/image.cc index 2566f80cc3..07fcc8b9b6 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -29,7 +29,7 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '3', '\0' }; // FP16Rint intrinsic +const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '4', '\0' }; // FP16 gt/ge/lt/le intrinsic ImageHeader::ImageHeader(uint32_t image_reservation_size, uint32_t component_count, diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc index 5d23e350d8..63bd9673e6 100644 --- a/runtime/interpreter/interpreter_intrinsics.cc +++ b/runtime/interpreter/interpreter_intrinsics.cc @@ -578,6 +578,10 @@ bool MterpHandleIntrinsic(ShadowFrame* shadow_frame, UNIMPLEMENTED_CASE(FP16Floor /* (S)S */) UNIMPLEMENTED_CASE(FP16Ceil /* (S)S */) UNIMPLEMENTED_CASE(FP16Rint /* (S)S */) + UNIMPLEMENTED_CASE(FP16Greater /* (SS)Z */) + UNIMPLEMENTED_CASE(FP16GreaterEquals /* (SS)Z */) + UNIMPLEMENTED_CASE(FP16Less /* (SS)Z */) + UNIMPLEMENTED_CASE(FP16LessEquals /* (SS)Z */) INTRINSIC_CASE(VarHandleFullFence) INTRINSIC_CASE(VarHandleAcquireFence) INTRINSIC_CASE(VarHandleReleaseFence) diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h index 2bd738c075..fc4734ef74 100644 --- a/runtime/intrinsics_list.h +++ b/runtime/intrinsics_list.h @@ -170,6 +170,10 @@ V(FP16Rint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "rint", "(S)S") \ V(FP16ToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "toFloat", "(S)F") \ V(FP16ToHalf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "toHalf", "(F)S") \ + V(FP16Greater, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "greater", "(SS)Z") \ + V(FP16GreaterEquals, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "greaterEquals", "(SS)Z") \ + V(FP16Less, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "less", "(SS)Z") \ + V(FP16LessEquals, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Llibcore/util/FP16;", "lessEquals", "(SS)Z") \ V(StringCharAt, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "charAt", "(I)C") \ V(StringCompareTo, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I") \ V(StringEquals, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z") \ diff --git a/test/580-fp16/src-art/Main.java b/test/580-fp16/src-art/Main.java index 2dbec8be4b..14b15f8358 100644 --- a/test/580-fp16/src-art/Main.java +++ b/test/580-fp16/src-art/Main.java @@ -38,6 +38,17 @@ public class Main { throw new Error("Expected: " + expected + ", Calculated: " + calculated); } } + static public void assertTrue(boolean condition) { + if (!condition) { + throw new Error("condition not true"); + } + } + + static public void assertFalse(boolean condition) { + if (condition) { + throw new Error("condition not false"); + } + } public static void testHalfToFloatToHalfConversions(){ // Test FP16 to float and back to Half for all possible Short values @@ -190,6 +201,150 @@ public class Main { } + public static void testGreater() { + assertTrue(FP16.greater(FP16.POSITIVE_INFINITY, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.greater(FP16.POSITIVE_INFINITY, FP16.MAX_VALUE)); + assertFalse(FP16.greater(FP16.MAX_VALUE, FP16.POSITIVE_INFINITY)); + assertFalse(FP16.greater(FP16.NEGATIVE_INFINITY, FP16.LOWEST_VALUE)); + assertTrue(FP16.greater(FP16.LOWEST_VALUE, FP16.NEGATIVE_INFINITY)); + assertFalse(FP16.greater(FP16.NEGATIVE_ZERO, FP16.POSITIVE_ZERO)); + assertFalse(FP16.greater(FP16.POSITIVE_ZERO, FP16.NEGATIVE_ZERO)); + assertFalse(FP16.greater(FP16.toHalf(12.3f), FP16.NaN)); + assertFalse(FP16.greater(FP16.NaN, FP16.toHalf(12.3f))); + assertTrue(FP16.greater(FP16.MIN_NORMAL, FP16.MIN_VALUE)); + assertFalse(FP16.greater(FP16.MIN_VALUE, FP16.MIN_NORMAL)); + assertTrue(FP16.greater(FP16.toHalf(12.4f), FP16.toHalf(12.3f))); + assertFalse(FP16.greater(FP16.toHalf(12.3f), FP16.toHalf(12.4f))); + assertFalse(FP16.greater(FP16.toHalf(-12.4f), FP16.toHalf(-12.3f))); + assertTrue(FP16.greater(FP16.toHalf(-12.3f), FP16.toHalf(-12.4f))); + assertTrue(FP16.greater((short) 0x3ff, FP16.MIN_VALUE)); + + assertFalse(FP16.greater(FP16.toHalf(-1.0f), FP16.toHalf(0.0f))); + assertTrue(FP16.greater(FP16.toHalf(0.0f), FP16.toHalf(-1.0f))); + assertFalse(FP16.greater(FP16.toHalf(-1.0f), FP16.toHalf(-1.0f))); + assertFalse(FP16.greater(FP16.toHalf(-1.3f), FP16.toHalf(-1.3f))); + assertTrue(FP16.greater(FP16.toHalf(1.0f), FP16.toHalf(0.0f))); + assertFalse(FP16.greater(FP16.toHalf(0.0f), FP16.toHalf(1.0f))); + assertFalse(FP16.greater(FP16.toHalf(1.0f), FP16.toHalf(1.0f))); + assertFalse(FP16.greater(FP16.toHalf(1.3f), FP16.toHalf(1.3f))); + assertFalse(FP16.greater(FP16.toHalf(-0.1f), FP16.toHalf(0.0f))); + assertTrue(FP16.greater(FP16.toHalf(0.0f), FP16.toHalf(-0.1f))); + assertFalse(FP16.greater(FP16.toHalf(-0.1f), FP16.toHalf(-0.1f))); + assertTrue(FP16.greater(FP16.toHalf(0.1f), FP16.toHalf(0.0f))); + assertFalse(FP16.greater(FP16.toHalf(0.0f), FP16.toHalf(0.1f))); + assertFalse(FP16.greater(FP16.toHalf(0.1f), FP16.toHalf(0.1f))); + } + + public static void testGreaterEquals() { + assertTrue(FP16.greaterEquals(FP16.POSITIVE_INFINITY, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.greaterEquals(FP16.POSITIVE_INFINITY, FP16.MAX_VALUE)); + assertFalse(FP16.greaterEquals(FP16.MAX_VALUE, FP16.POSITIVE_INFINITY)); + assertFalse(FP16.greaterEquals(FP16.NEGATIVE_INFINITY, FP16.LOWEST_VALUE)); + assertTrue(FP16.greaterEquals(FP16.LOWEST_VALUE, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.greaterEquals(FP16.NEGATIVE_ZERO, FP16.POSITIVE_ZERO)); + assertTrue(FP16.greaterEquals(FP16.POSITIVE_ZERO, FP16.NEGATIVE_ZERO)); + assertFalse(FP16.greaterEquals(FP16.toHalf(12.3f), FP16.NaN)); + assertFalse(FP16.greaterEquals(FP16.NaN, FP16.toHalf(12.3f))); + assertTrue(FP16.greaterEquals(FP16.MIN_NORMAL, FP16.MIN_VALUE)); + assertFalse(FP16.greaterEquals(FP16.MIN_VALUE, FP16.MIN_NORMAL)); + assertTrue(FP16.greaterEquals(FP16.toHalf(12.4f), FP16.toHalf(12.3f))); + assertFalse(FP16.greaterEquals(FP16.toHalf(12.3f), FP16.toHalf(12.4f))); + assertFalse(FP16.greaterEquals(FP16.toHalf(-12.4f), FP16.toHalf(-12.3f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(-12.3f), FP16.toHalf(-12.4f))); + assertTrue(FP16.greaterEquals((short) 0x3ff, FP16.MIN_VALUE)); + assertTrue(FP16.greaterEquals(FP16.NEGATIVE_INFINITY, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.greaterEquals(FP16.POSITIVE_INFINITY, FP16.POSITIVE_INFINITY)); + assertTrue(FP16.greaterEquals(FP16.toHalf(12.12356f), FP16.toHalf(12.12356f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(-12.12356f), FP16.toHalf(-12.12356f))); + + assertFalse(FP16.greaterEquals(FP16.toHalf(-1.0f), FP16.toHalf(0.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(0.0f), FP16.toHalf(-1.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(-1.0f), FP16.toHalf(-1.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(-1.3f), FP16.toHalf(-1.3f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(1.0f), FP16.toHalf(0.0f))); + assertFalse(FP16.greaterEquals(FP16.toHalf(0.0f), FP16.toHalf(1.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(1.0f), FP16.toHalf(1.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(1.3f), FP16.toHalf(1.3f))); + assertFalse(FP16.greaterEquals(FP16.toHalf(-0.1f), FP16.toHalf(0.0f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(0.0f), FP16.toHalf(-0.1f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(-0.1f), FP16.toHalf(-0.1f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(0.1f), FP16.toHalf(0.0f))); + assertFalse(FP16.greaterEquals(FP16.toHalf(0.0f), FP16.toHalf(0.1f))); + assertTrue(FP16.greaterEquals(FP16.toHalf(0.1f), FP16.toHalf(0.1f))); + } + + public static void testLess() { + assertTrue(FP16.less(FP16.NEGATIVE_INFINITY, FP16.POSITIVE_INFINITY)); + assertTrue(FP16.less(FP16.MAX_VALUE, FP16.POSITIVE_INFINITY)); + assertFalse(FP16.less(FP16.POSITIVE_INFINITY, FP16.MAX_VALUE)); + assertFalse(FP16.less(FP16.LOWEST_VALUE, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.less(FP16.NEGATIVE_INFINITY, FP16.LOWEST_VALUE)); + assertFalse(FP16.less(FP16.POSITIVE_ZERO, FP16.NEGATIVE_ZERO)); + assertFalse(FP16.less(FP16.NEGATIVE_ZERO, FP16.POSITIVE_ZERO)); + assertFalse(FP16.less(FP16.NaN, FP16.toHalf(12.3f))); + assertFalse(FP16.less(FP16.toHalf(12.3f), FP16.NaN)); + assertTrue(FP16.less(FP16.MIN_VALUE, FP16.MIN_NORMAL)); + assertFalse(FP16.less(FP16.MIN_NORMAL, FP16.MIN_VALUE)); + assertTrue(FP16.less(FP16.toHalf(12.3f), FP16.toHalf(12.4f))); + assertFalse(FP16.less(FP16.toHalf(12.4f), FP16.toHalf(12.3f))); + assertFalse(FP16.less(FP16.toHalf(-12.3f), FP16.toHalf(-12.4f))); + assertTrue(FP16.less(FP16.toHalf(-12.4f), FP16.toHalf(-12.3f))); + assertTrue(FP16.less(FP16.MIN_VALUE, (short) 0x3ff)); + + assertTrue(FP16.less(FP16.toHalf(-1.0f), FP16.toHalf(0.0f))); + assertFalse(FP16.less(FP16.toHalf(0.0f), FP16.toHalf(-1.0f))); + assertFalse(FP16.less(FP16.toHalf(-1.0f), FP16.toHalf(-1.0f))); + assertFalse(FP16.less(FP16.toHalf(-1.3f), FP16.toHalf(-1.3f))); + assertFalse(FP16.less(FP16.toHalf(1.0f), FP16.toHalf(0.0f))); + assertTrue(FP16.less(FP16.toHalf(0.0f), FP16.toHalf(1.0f))); + assertFalse(FP16.less(FP16.toHalf(1.0f), FP16.toHalf(1.0f))); + assertFalse(FP16.less(FP16.toHalf(1.3f), FP16.toHalf(1.3f))); + assertTrue(FP16.less(FP16.toHalf(-0.1f), FP16.toHalf(0.0f))); + assertFalse(FP16.less(FP16.toHalf(0.0f), FP16.toHalf(-0.1f))); + assertFalse(FP16.less(FP16.toHalf(-0.1f), FP16.toHalf(-0.1f))); + assertFalse(FP16.less(FP16.toHalf(0.1f), FP16.toHalf(0.0f))); + assertTrue(FP16.less(FP16.toHalf(0.0f), FP16.toHalf(0.1f))); + assertFalse(FP16.less(FP16.toHalf(0.1f), FP16.toHalf(0.1f))); + } + + public static void testLessEquals() { + assertTrue(FP16.lessEquals(FP16.NEGATIVE_INFINITY, FP16.POSITIVE_INFINITY)); + assertTrue(FP16.lessEquals(FP16.MAX_VALUE, FP16.POSITIVE_INFINITY)); + assertFalse(FP16.lessEquals(FP16.POSITIVE_INFINITY, FP16.MAX_VALUE)); + assertFalse(FP16.lessEquals(FP16.LOWEST_VALUE, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.lessEquals(FP16.NEGATIVE_INFINITY, FP16.LOWEST_VALUE)); + assertTrue(FP16.lessEquals(FP16.POSITIVE_ZERO, FP16.NEGATIVE_ZERO)); + assertTrue(FP16.lessEquals(FP16.NEGATIVE_ZERO, FP16.POSITIVE_ZERO)); + assertFalse(FP16.lessEquals(FP16.NaN, FP16.toHalf(12.3f))); + assertFalse(FP16.lessEquals(FP16.toHalf(12.3f), FP16.NaN)); + assertTrue(FP16.lessEquals(FP16.MIN_VALUE, FP16.MIN_NORMAL)); + assertFalse(FP16.lessEquals(FP16.MIN_NORMAL, FP16.MIN_VALUE)); + assertTrue(FP16.lessEquals(FP16.toHalf(12.3f), FP16.toHalf(12.4f))); + assertFalse(FP16.lessEquals(FP16.toHalf(12.4f), FP16.toHalf(12.3f))); + assertFalse(FP16.lessEquals(FP16.toHalf(-12.3f), FP16.toHalf(-12.4f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-12.4f), FP16.toHalf(-12.3f))); + assertTrue(FP16.lessEquals(FP16.MIN_VALUE, (short) 0x3ff)); + assertTrue(FP16.lessEquals(FP16.NEGATIVE_INFINITY, FP16.NEGATIVE_INFINITY)); + assertTrue(FP16.lessEquals(FP16.POSITIVE_INFINITY, FP16.POSITIVE_INFINITY)); + assertTrue(FP16.lessEquals(FP16.toHalf(12.12356f), FP16.toHalf(12.12356f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-12.12356f), FP16.toHalf(-12.12356f))); + + assertTrue(FP16.lessEquals(FP16.toHalf(-1.0f), FP16.toHalf(0.0f))); + assertFalse(FP16.lessEquals(FP16.toHalf(0.0f), FP16.toHalf(-1.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-1.0f), FP16.toHalf(-1.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-1.3f), FP16.toHalf(-1.3f))); + assertFalse(FP16.lessEquals(FP16.toHalf(1.0f), FP16.toHalf(0.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(0.0f), FP16.toHalf(1.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(1.0f), FP16.toHalf(1.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(1.3f), FP16.toHalf(1.3f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-0.1f), FP16.toHalf(0.0f))); + assertFalse(FP16.lessEquals(FP16.toHalf(0.0f), FP16.toHalf(-0.1f))); + assertTrue(FP16.lessEquals(FP16.toHalf(-0.1f), FP16.toHalf(-0.1f))); + assertFalse(FP16.lessEquals(FP16.toHalf(0.1f), FP16.toHalf(0.0f))); + assertTrue(FP16.lessEquals(FP16.toHalf(0.0f), FP16.toHalf(0.1f))); + assertTrue(FP16.lessEquals(FP16.toHalf(0.1f), FP16.toHalf(0.1f))); + } + public static void main(String args[]) { testHalfToFloatToHalfConversions(); testToHalf(); @@ -197,5 +352,9 @@ public class Main { testFloor(); testCeil(); testRint(); + testGreater(); + testGreaterEquals(); + testLessEquals(); + testLess(); } } |