Blame - arch/mips/math-emu/dp_maddf.c - LeafOS-Devices/android_kernel_realme_mt6785

blob: e0d9be5fbf4cd541406b93c5d2add6814a82a9eb [file] [log] [blame]

Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	1	/*
				2	* IEEE754 floating point arithmetic
				3	* double precision: MADDF.f (Fused Multiply Add)
				4	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
				5	*
				6	* MIPS floating point support
				7	* Copyright (C) 2015 Imagination Technologies, Ltd.
				8	* Author: Markos Chandras <markos.chandras@imgtec.com>
				9	*
				10	* This program is free software; you can distribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the
				12	* Free Software Foundation; version 2 of the License.
				13	*/
				14
				15	#include "ieee754dp.h"
				16
Paul Burton	d728f67	2016-04-21 14:04:50 +0100	[diff] [blame]	17
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	18	/* 128 bits shift right logical with rounding. */
				19	void srl128(u64 hptr, u64 lptr, int count)
				20	{
				21	u64 low;
				22
				23	if (count >= 128) {
				24	lptr = hptr != 0 \|\| *lptr != 0;
				25	*hptr = 0;
				26	} else if (count >= 64) {
				27	if (count == 64) {
				28	lptr = hptr \| (*lptr != 0);
				29	} else {
				30	low = *lptr;
				31	lptr = hptr >> (count - 64);
				32	lptr \|= (hptr << (128 - count)) != 0 \|\| low != 0;
				33	}
				34	*hptr = 0;
				35	} else {
				36	low = *lptr;
				37	lptr = low >> count \| hptr << (64 - count);
				38	*lptr \|= (low << (64 - count)) != 0;
				39	hptr = hptr >> count;
				40	}
				41	}
				42
Paul Burton	d728f67	2016-04-21 14:04:50 +0100	[diff] [blame]	43	static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
				44	union ieee754dp y, enum maddf_flags flags)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	45	{
				46	int re;
				47	int rs;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	48	unsigned lxm;
				49	unsigned hxm;
				50	unsigned lym;
				51	unsigned hym;
				52	u64 lrm;
				53	u64 hrm;
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	54	u64 lzm;
				55	u64 hzm;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	56	u64 t;
				57	u64 at;
				58	int s;
				59
				60	COMPXDP;
				61	COMPYDP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	62	COMPZDP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	63
				64	EXPLODEXDP;
				65	EXPLODEYDP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	66	EXPLODEZDP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	67
				68	FLUSHXDP;
				69	FLUSHYDP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	70	FLUSHZDP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	71
				72	ieee754_clearcx();
				73
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	74	/*
				75	* Handle the cases when at least one of x, y or z is a NaN.
				76	* Order of precedence is sNaN, qNaN and z, x, y.
				77	*/
				78	if (zc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	79	return ieee754dp_nanxcpt(z);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	80	if (xc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	81	return ieee754dp_nanxcpt(x);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	82	if (yc == IEEE754_CLASS_SNAN)
				83	return ieee754dp_nanxcpt(y);
				84	if (zc == IEEE754_CLASS_QNAN)
				85	return z;
				86	if (xc == IEEE754_CLASS_QNAN)
				87	return x;
				88	if (yc == IEEE754_CLASS_QNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	89	return y;
				90
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	91	if (zc == IEEE754_CLASS_DNORM)
				92	DPDNORMZ;
				93	/* ZERO z cases are handled separately below */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	94
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	95	switch (CLPAIR(xc, yc)) {
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	96
				97	/*
				98	* Infinity handling
				99	*/
				100	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
				101	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	102	ieee754_setcx(IEEE754_INVALID_OPERATION);
				103	return ieee754dp_indef();
				104
				105	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
				106	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
				107	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
				108	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
				109	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	110	if ((zc == IEEE754_CLASS_INF) &&
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	111	((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) \|\|
				112	((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	113	/*
				114	* Cases of addition of infinities with opposite signs
				115	* or subtraction of infinities with same signs.
				116	*/
				117	ieee754_setcx(IEEE754_INVALID_OPERATION);
				118	return ieee754dp_indef();
				119	}
				120	/*
				121	* z is here either not an infinity, or an infinity having the
				122	* same sign as product (x*y) (in case of MADDF.D instruction)
				123	* or product -(x*y) (in MSUBF.D case). The result must be an
				124	* infinity, and its sign is determined only by the value of
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	125	* (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	126	*/
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	127	if (flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	128	return ieee754dp_inf(1 ^ (xs ^ ys));
				129	else
				130	return ieee754dp_inf(xs ^ ys);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	131
				132	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
				133	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
				134	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
				135	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
				136	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
				137	if (zc == IEEE754_CLASS_INF)
				138	return ieee754dp_inf(zs);
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	139	if (zc == IEEE754_CLASS_ZERO) {
				140	/* Handle cases +0 + (-0) and similar ones. */
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	141	if ((!(flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	142	&& (zs == (xs ^ ys))) \|\|
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	143	((flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	144	&& (zs != (xs ^ ys))))
				145	/*
				146	* Cases of addition of zeros of equal signs
				147	* or subtraction of zeroes of opposite signs.
				148	* The sign of the resulting zero is in any
				149	* such case determined only by the sign of z.
				150	*/
				151	return z;
				152
				153	return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
				154	}
				155	/* xy is here 0, and z is not 0, so just return z /
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	156	return z;
				157
				158	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
				159	DPDNORMX;
				160
				161	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	162	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	163	return ieee754dp_inf(zs);
				164	DPDNORMY;
				165	break;
				166
				167	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	168	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	169	return ieee754dp_inf(zs);
				170	DPDNORMX;
				171	break;
				172
				173	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	174	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	175	return ieee754dp_inf(zs);
				176	/* fall through to real computations */
				177	}
				178
				179	/* Finally get to do some computation */
				180
				181	/*
				182	* Do the multiplication bit first
				183	*
				184	* rm = xm * ym, re = xe + ye basically
				185	*
				186	* At this point xm and ym should have been normalized.
				187	*/
				188	assert(xm & DP_HIDDEN_BIT);
				189	assert(ym & DP_HIDDEN_BIT);
				190
				191	re = xe + ye;
				192	rs = xs ^ ys;
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	193	if (flags & MADDF_NEGATE_PRODUCT)
Paul Burton	d728f67	2016-04-21 14:04:50 +0100	[diff] [blame]	194	rs ^= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	195
				196	/* shunt to top of word */
				197	xm <<= 64 - (DP_FBITS + 1);
				198	ym <<= 64 - (DP_FBITS + 1);
				199
				200	/*
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	201	* Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	202	*/
				203
				204	/* 32 * 32 => 64 */
				205	#define DPXMULT(x, y) ((u64)(x) * (u64)y)
				206
				207	lxm = xm;
				208	hxm = xm >> 32;
				209	lym = ym;
				210	hym = ym >> 32;
				211
				212	lrm = DPXMULT(lxm, lym);
				213	hrm = DPXMULT(hxm, hym);
				214
				215	t = DPXMULT(lxm, hym);
				216
				217	at = lrm + (t << 32);
				218	hrm += at < lrm;
				219	lrm = at;
				220
				221	hrm = hrm + (t >> 32);
				222
				223	t = DPXMULT(hxm, lym);
				224
				225	at = lrm + (t << 32);
				226	hrm += at < lrm;
				227	lrm = at;
				228
				229	hrm = hrm + (t >> 32);
				230
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	231	/* Put explicit bit at bit 126 if necessary */
				232	if ((int64_t)hrm < 0) {
				233	lrm = (hrm << 63) \| (lrm >> 1);
				234	hrm = hrm >> 1;
Paul Burton	5c18c936	2016-04-21 14:04:53 +0100	[diff] [blame]	235	re++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	236	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	237
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	238	assert(hrm & (1 << 62));
Aleksandar Markovic	ddbfff7	2017-06-19 17:50:12 +0200	[diff] [blame]	239
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	240	if (zc == IEEE754_CLASS_ZERO) {
				241	/*
				242	* Move explicit bit from bit 126 to bit 55 since the
				243	* ieee754dp_format code expects the mantissa to be
				244	* 56 bits wide (53 + 3 rounding bits).
				245	*/
				246	srl128(&hrm, &lrm, (126 - 55));
				247	return ieee754dp_format(rs, re, lrm);
				248	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	249
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	250	/* Move explicit bit from bit 52 to bit 126 */
				251	lzm = 0;
				252	hzm = zm << 10;
				253	assert(hzm & (1 << 62));
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	254
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	255	/* Make the exponents the same */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	256	if (ze > re) {
				257	/*
				258	* Have to shift y fraction right to align.
				259	*/
				260	s = ze - re;
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	261	srl128(&hrm, &lrm, s);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	262	re += s;
				263	} else if (re > ze) {
				264	/*
				265	* Have to shift x fraction right to align.
				266	*/
				267	s = re - ze;
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	268	srl128(&hzm, &lzm, s);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	269	ze += s;
				270	}
				271	assert(ze == re);
				272	assert(ze <= DP_EMAX);
				273
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	274	/* Do the addition */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	275	if (zs == rs) {
				276	/*
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	277	* Generate 128 bit result by adding two 127 bit numbers
				278	* leaving result in hzm:lzm, zs and ze.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	279	*/
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	280	hzm = hzm + hrm + (lzm > (lzm + lrm));
				281	lzm = lzm + lrm;
				282	if ((int64_t)hzm < 0) { /* carry out */
				283	srl128(&hzm, &lzm, 1);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	284	ze++;
				285	}
				286	} else {
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	287	if (hzm > hrm \|\| (hzm == hrm && lzm >= lrm)) {
				288	hzm = hzm - hrm - (lzm < lrm);
				289	lzm = lzm - lrm;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	290	} else {
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	291	hzm = hrm - hzm - (lrm < lzm);
				292	lzm = lrm - lzm;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	293	zs = rs;
				294	}
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	295	if (lzm == 0 && hzm == 0)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	296	return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
				297
				298	/*
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	299	* Put explicit bit at bit 126 if necessary.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	300	*/
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	301	if (hzm == 0) {
				302	/* left shift by 63 or 64 bits */
				303	if ((int64_t)lzm < 0) {
				304	/* MSB of lzm is the explicit bit */
				305	hzm = lzm >> 1;
				306	lzm = lzm << 63;
				307	ze -= 63;
				308	} else {
				309	hzm = lzm;
				310	lzm = 0;
				311	ze -= 64;
				312	}
				313	}
				314
				315	t = 0;
				316	while ((hzm >> (62 - t)) == 0)
				317	t++;
				318
				319	assert(t <= 62);
				320	if (t) {
				321	hzm = hzm << t \| lzm >> (64 - t);
				322	lzm = lzm << t;
				323	ze -= t;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	324	}
				325	}
				326
Douglas Leung	2cfa582	2017-07-27 18:08:59 +0200	[diff] [blame]	327	/*
				328	* Move explicit bit from bit 126 to bit 55 since the
				329	* ieee754dp_format code expects the mantissa to be
				330	* 56 bits wide (53 + 3 rounding bits).
				331	*/
				332	srl128(&hzm, &lzm, (126 - 55));
				333
				334	return ieee754dp_format(zs, ze, lzm);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	335	}
Paul Burton	d728f67	2016-04-21 14:04:50 +0100	[diff] [blame]	336
				337	union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
				338	union ieee754dp y)
				339	{
				340	return _dp_maddf(z, x, y, 0);
				341	}
				342
				343	union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
				344	union ieee754dp y)
				345	{
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	346	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
Paul Burton	d728f67	2016-04-21 14:04:50 +0100	[diff] [blame]	347	}