blob: e0d9be5fbf4cd541406b93c5d2add6814a82a9eb [file] [log] [blame]
Markos Chandrase24c3be2015-08-13 09:56:31 +02001/*
2 * IEEE754 floating point arithmetic
3 * double precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754dp.h"
16
Paul Burtond728f672016-04-21 14:04:50 +010017
Douglas Leung2cfa5822017-07-27 18:08:59 +020018/* 128 bits shift right logical with rounding. */
19void srl128(u64 *hptr, u64 *lptr, int count)
20{
21 u64 low;
22
23 if (count >= 128) {
24 *lptr = *hptr != 0 || *lptr != 0;
25 *hptr = 0;
26 } else if (count >= 64) {
27 if (count == 64) {
28 *lptr = *hptr | (*lptr != 0);
29 } else {
30 low = *lptr;
31 *lptr = *hptr >> (count - 64);
32 *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
33 }
34 *hptr = 0;
35 } else {
36 low = *lptr;
37 *lptr = low >> count | *hptr << (64 - count);
38 *lptr |= (low << (64 - count)) != 0;
39 *hptr = *hptr >> count;
40 }
41}
42
Paul Burtond728f672016-04-21 14:04:50 +010043static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
44 union ieee754dp y, enum maddf_flags flags)
Markos Chandrase24c3be2015-08-13 09:56:31 +020045{
46 int re;
47 int rs;
Markos Chandrase24c3be2015-08-13 09:56:31 +020048 unsigned lxm;
49 unsigned hxm;
50 unsigned lym;
51 unsigned hym;
52 u64 lrm;
53 u64 hrm;
Douglas Leung2cfa5822017-07-27 18:08:59 +020054 u64 lzm;
55 u64 hzm;
Markos Chandrase24c3be2015-08-13 09:56:31 +020056 u64 t;
57 u64 at;
58 int s;
59
60 COMPXDP;
61 COMPYDP;
Paul Burtone2d11e12016-04-21 14:04:51 +010062 COMPZDP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020063
64 EXPLODEXDP;
65 EXPLODEYDP;
Paul Burtone2d11e12016-04-21 14:04:51 +010066 EXPLODEZDP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020067
68 FLUSHXDP;
69 FLUSHYDP;
Paul Burtone2d11e12016-04-21 14:04:51 +010070 FLUSHZDP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020071
72 ieee754_clearcx();
73
Aleksandar Markovice840be62017-07-27 18:08:54 +020074 /*
75 * Handle the cases when at least one of x, y or z is a NaN.
76 * Order of precedence is sNaN, qNaN and z, x, y.
77 */
78 if (zc == IEEE754_CLASS_SNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020079 return ieee754dp_nanxcpt(z);
Aleksandar Markovice840be62017-07-27 18:08:54 +020080 if (xc == IEEE754_CLASS_SNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020081 return ieee754dp_nanxcpt(x);
Aleksandar Markovice840be62017-07-27 18:08:54 +020082 if (yc == IEEE754_CLASS_SNAN)
83 return ieee754dp_nanxcpt(y);
84 if (zc == IEEE754_CLASS_QNAN)
85 return z;
86 if (xc == IEEE754_CLASS_QNAN)
87 return x;
88 if (yc == IEEE754_CLASS_QNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020089 return y;
90
Aleksandar Markovice840be62017-07-27 18:08:54 +020091 if (zc == IEEE754_CLASS_DNORM)
92 DPDNORMZ;
93 /* ZERO z cases are handled separately below */
Markos Chandrase24c3be2015-08-13 09:56:31 +020094
Aleksandar Markovice840be62017-07-27 18:08:54 +020095 switch (CLPAIR(xc, yc)) {
Markos Chandrase24c3be2015-08-13 09:56:31 +020096
97 /*
98 * Infinity handling
99 */
100 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
101 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandrase24c3be2015-08-13 09:56:31 +0200102 ieee754_setcx(IEEE754_INVALID_OPERATION);
103 return ieee754dp_indef();
104
105 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
106 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
107 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
108 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
109 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Aleksandar Markovic0c64fe62017-07-27 18:08:55 +0200110 if ((zc == IEEE754_CLASS_INF) &&
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200111 ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
112 ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
Aleksandar Markovic0c64fe62017-07-27 18:08:55 +0200113 /*
114 * Cases of addition of infinities with opposite signs
115 * or subtraction of infinities with same signs.
116 */
117 ieee754_setcx(IEEE754_INVALID_OPERATION);
118 return ieee754dp_indef();
119 }
120 /*
121 * z is here either not an infinity, or an infinity having the
122 * same sign as product (x*y) (in case of MADDF.D instruction)
123 * or product -(x*y) (in MSUBF.D case). The result must be an
124 * infinity, and its sign is determined only by the value of
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200125 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
Aleksandar Markovic0c64fe62017-07-27 18:08:55 +0200126 */
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200127 if (flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic0c64fe62017-07-27 18:08:55 +0200128 return ieee754dp_inf(1 ^ (xs ^ ys));
129 else
130 return ieee754dp_inf(xs ^ ys);
Markos Chandrase24c3be2015-08-13 09:56:31 +0200131
132 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
133 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
134 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
135 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
136 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
137 if (zc == IEEE754_CLASS_INF)
138 return ieee754dp_inf(zs);
Aleksandar Markovic7cf64ce2017-07-27 18:08:56 +0200139 if (zc == IEEE754_CLASS_ZERO) {
140 /* Handle cases +0 + (-0) and similar ones. */
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200141 if ((!(flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic7cf64ce2017-07-27 18:08:56 +0200142 && (zs == (xs ^ ys))) ||
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200143 ((flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic7cf64ce2017-07-27 18:08:56 +0200144 && (zs != (xs ^ ys))))
145 /*
146 * Cases of addition of zeros of equal signs
147 * or subtraction of zeroes of opposite signs.
148 * The sign of the resulting zero is in any
149 * such case determined only by the sign of z.
150 */
151 return z;
152
153 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
154 }
155 /* x*y is here 0, and z is not 0, so just return z */
Markos Chandrase24c3be2015-08-13 09:56:31 +0200156 return z;
157
158 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
159 DPDNORMX;
160
161 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200162 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200163 return ieee754dp_inf(zs);
164 DPDNORMY;
165 break;
166
167 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200168 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200169 return ieee754dp_inf(zs);
170 DPDNORMX;
171 break;
172
173 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200174 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200175 return ieee754dp_inf(zs);
176 /* fall through to real computations */
177 }
178
179 /* Finally get to do some computation */
180
181 /*
182 * Do the multiplication bit first
183 *
184 * rm = xm * ym, re = xe + ye basically
185 *
186 * At this point xm and ym should have been normalized.
187 */
188 assert(xm & DP_HIDDEN_BIT);
189 assert(ym & DP_HIDDEN_BIT);
190
191 re = xe + ye;
192 rs = xs ^ ys;
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200193 if (flags & MADDF_NEGATE_PRODUCT)
Paul Burtond728f672016-04-21 14:04:50 +0100194 rs ^= 1;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200195
196 /* shunt to top of word */
197 xm <<= 64 - (DP_FBITS + 1);
198 ym <<= 64 - (DP_FBITS + 1);
199
200 /*
Douglas Leung2cfa5822017-07-27 18:08:59 +0200201 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200202 */
203
204 /* 32 * 32 => 64 */
205#define DPXMULT(x, y) ((u64)(x) * (u64)y)
206
207 lxm = xm;
208 hxm = xm >> 32;
209 lym = ym;
210 hym = ym >> 32;
211
212 lrm = DPXMULT(lxm, lym);
213 hrm = DPXMULT(hxm, hym);
214
215 t = DPXMULT(lxm, hym);
216
217 at = lrm + (t << 32);
218 hrm += at < lrm;
219 lrm = at;
220
221 hrm = hrm + (t >> 32);
222
223 t = DPXMULT(hxm, lym);
224
225 at = lrm + (t << 32);
226 hrm += at < lrm;
227 lrm = at;
228
229 hrm = hrm + (t >> 32);
230
Douglas Leung2cfa5822017-07-27 18:08:59 +0200231 /* Put explicit bit at bit 126 if necessary */
232 if ((int64_t)hrm < 0) {
233 lrm = (hrm << 63) | (lrm >> 1);
234 hrm = hrm >> 1;
Paul Burton5c18c9362016-04-21 14:04:53 +0100235 re++;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200236 }
Markos Chandrase24c3be2015-08-13 09:56:31 +0200237
Douglas Leung2cfa5822017-07-27 18:08:59 +0200238 assert(hrm & (1 << 62));
Aleksandar Markovicddbfff72017-06-19 17:50:12 +0200239
Douglas Leung2cfa5822017-07-27 18:08:59 +0200240 if (zc == IEEE754_CLASS_ZERO) {
241 /*
242 * Move explicit bit from bit 126 to bit 55 since the
243 * ieee754dp_format code expects the mantissa to be
244 * 56 bits wide (53 + 3 rounding bits).
245 */
246 srl128(&hrm, &lrm, (126 - 55));
247 return ieee754dp_format(rs, re, lrm);
248 }
Markos Chandrase24c3be2015-08-13 09:56:31 +0200249
Douglas Leung2cfa5822017-07-27 18:08:59 +0200250 /* Move explicit bit from bit 52 to bit 126 */
251 lzm = 0;
252 hzm = zm << 10;
253 assert(hzm & (1 << 62));
Markos Chandrase24c3be2015-08-13 09:56:31 +0200254
Douglas Leung2cfa5822017-07-27 18:08:59 +0200255 /* Make the exponents the same */
Markos Chandrase24c3be2015-08-13 09:56:31 +0200256 if (ze > re) {
257 /*
258 * Have to shift y fraction right to align.
259 */
260 s = ze - re;
Douglas Leung2cfa5822017-07-27 18:08:59 +0200261 srl128(&hrm, &lrm, s);
Markos Chandrase24c3be2015-08-13 09:56:31 +0200262 re += s;
263 } else if (re > ze) {
264 /*
265 * Have to shift x fraction right to align.
266 */
267 s = re - ze;
Douglas Leung2cfa5822017-07-27 18:08:59 +0200268 srl128(&hzm, &lzm, s);
Markos Chandrase24c3be2015-08-13 09:56:31 +0200269 ze += s;
270 }
271 assert(ze == re);
272 assert(ze <= DP_EMAX);
273
Douglas Leung2cfa5822017-07-27 18:08:59 +0200274 /* Do the addition */
Markos Chandrase24c3be2015-08-13 09:56:31 +0200275 if (zs == rs) {
276 /*
Douglas Leung2cfa5822017-07-27 18:08:59 +0200277 * Generate 128 bit result by adding two 127 bit numbers
278 * leaving result in hzm:lzm, zs and ze.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200279 */
Douglas Leung2cfa5822017-07-27 18:08:59 +0200280 hzm = hzm + hrm + (lzm > (lzm + lrm));
281 lzm = lzm + lrm;
282 if ((int64_t)hzm < 0) { /* carry out */
283 srl128(&hzm, &lzm, 1);
Markos Chandrase24c3be2015-08-13 09:56:31 +0200284 ze++;
285 }
286 } else {
Douglas Leung2cfa5822017-07-27 18:08:59 +0200287 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
288 hzm = hzm - hrm - (lzm < lrm);
289 lzm = lzm - lrm;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200290 } else {
Douglas Leung2cfa5822017-07-27 18:08:59 +0200291 hzm = hrm - hzm - (lrm < lzm);
292 lzm = lrm - lzm;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200293 zs = rs;
294 }
Douglas Leung2cfa5822017-07-27 18:08:59 +0200295 if (lzm == 0 && hzm == 0)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200296 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
297
298 /*
Douglas Leung2cfa5822017-07-27 18:08:59 +0200299 * Put explicit bit at bit 126 if necessary.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200300 */
Douglas Leung2cfa5822017-07-27 18:08:59 +0200301 if (hzm == 0) {
302 /* left shift by 63 or 64 bits */
303 if ((int64_t)lzm < 0) {
304 /* MSB of lzm is the explicit bit */
305 hzm = lzm >> 1;
306 lzm = lzm << 63;
307 ze -= 63;
308 } else {
309 hzm = lzm;
310 lzm = 0;
311 ze -= 64;
312 }
313 }
314
315 t = 0;
316 while ((hzm >> (62 - t)) == 0)
317 t++;
318
319 assert(t <= 62);
320 if (t) {
321 hzm = hzm << t | lzm >> (64 - t);
322 lzm = lzm << t;
323 ze -= t;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200324 }
325 }
326
Douglas Leung2cfa5822017-07-27 18:08:59 +0200327 /*
328 * Move explicit bit from bit 126 to bit 55 since the
329 * ieee754dp_format code expects the mantissa to be
330 * 56 bits wide (53 + 3 rounding bits).
331 */
332 srl128(&hzm, &lzm, (126 - 55));
333
334 return ieee754dp_format(zs, ze, lzm);
Markos Chandrase24c3be2015-08-13 09:56:31 +0200335}
Paul Burtond728f672016-04-21 14:04:50 +0100336
337union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
338 union ieee754dp y)
339{
340 return _dp_maddf(z, x, y, 0);
341}
342
343union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
344 union ieee754dp y)
345{
Aleksandar Markovicae11c062017-07-27 18:08:57 +0200346 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
Paul Burtond728f672016-04-21 14:04:50 +0100347}