rts-real32.c
1 //! @file rts-real32.c
2 //! @author (see below)
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Runtime support for REAL*32 and COMPLEX*64.
25
26 // The code is based on the HPA Library, available from:
27 // <http://download-mirror.savannah.gnu.org/releases/hpalib/>
28 //
29 // Copyright (C) 2000 Daniel A. Atkinson <DanAtk@aol.com>
30 // Copyright (C) 2004 Ivano Primi <ivprimi@libero.it>
31 // Copyright (C) 2022 Marcel van der Veer <algol68g@xs4all.nl> - VIF version.
32 //
33 // HPA code is adapted to work with VIF to implement REAL*32 and COMPLEX*64.
34 // HPA was choosen since it stores a 256-bit float as a 256-bit struct, which
35 // is convenient for FORTRAN.
36 //
37 // The HPA Library is free software; you can redistribute it and/or
38 // modify it under the terms of the GNU Lesser General Public
39 // License as published by the Free Software Foundation; either
40 // version 2.1 of the License, or (at your option) any later version.
41 //
42 // The HPA Library is distributed in the hope that it will be useful,
43 // but WITHOUT ANY WARRANTY; without even the implied warranty of
44 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
45 // Lesser General Public License for more details.
46 //
47 // You should have received a copy of the GNU Lesser General Public
48 // License along with the HPA Library; if not, write to the Free
49 // Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
50 // 02110-1301 USA.
51
52 // The functions forming the HPA library are all implemented in a portable
53 // fashion in the C language. The IEEE 754 standard for floating point
54 // hardware and software is assumed in the PC/Unix version of this library.
55 // A REAL*32 number is represented as a combination of the following elements:
56 //
57 // sign bit(s): 0 -> positive, 1 -> negative ;
58 // exponent(e): 15-bit biased integer (bias=16383) ;
59 // mantissa(m): 15 words of 16 bit length *including* leading 1.
60 //
61 // The range of representable numbers is then given by
62 //
63 // 2^16384 > x > 2^[-16383] => 1.19*10^4932 > x > 1.68*10^-[4932]
64 //
65 // Special values of the exponent are:
66 //
67 // all ones -> infinity (floating point overflow)
68 // all zeros -> number = zero.
69 //
70 // Underflow in operations is handled by a flush to zero. Thus, a number
71 // with the exponent zero and nonzero mantissa is invalid (not-a-number).
72 // From the point of view of the HPA library, a complex number is a
73 // structure formed by two REAL*32 numbers.
74 //
75 // HPA cannot extend precision beyond the preset, hardcoded precision.
76 // Hence some math routines will not achieve full precision.
77
78 #include <vif.h>
79 #include <rts-real32.h>
80
81 static const char *errmsg[] = {
82 "No error",
83 "Division by zero",
84 "Out of domain",
85 "Bad exponent",
86 "Floating point overflow",
87 "Invalid error code"
88 };
89
90 #define SEXP(z) ((unt_2 *) &(z.value))
91
92 void _pi32 (real_32 *x)
93 {
94 *x = X_PI;
95 }
96
97 int_4 xsigerr (int_4 errcond, int_4 errcode, const char *where)
98 {
99 // errcode must come from the evaluation of an error condition.
100 // errcode, which should describe the type of the error,
101 // should always be one between XEDIV, XEDOM, XEBADEXP and XFPOFLOW.
102 if (errcond == 0) {
103 errcode = 0;
104 }
105 if (errcode < 0 || errcode > XNERR) {
106 errcode = XEINV;
107 }
108 if (errcode != 0) {
109 RTE (where, errmsg[errcode]);
110 }
111 return errcode;
112 }
113
114 // Elementary stuff.
115
116 inline real_32 xneg (real_32 s)
117 {
118 unt_2 *p = SEXP (s);
119 *p ^= X_SIGN_MASK;
120 return s;
121 }
122
123 inline real_32 xabs (real_32 s)
124 {
125 unt_2 *p = SEXP (s);
126 *p &= X_EXPO_MASK;
127 return s;
128 }
129
130 inline int_4 xgetexp (const real_32 * ps)
131 {
132 unt_2 *q = (unt_2 *) &(ps->value);
133 return (*q & X_EXPO_MASK) - X_EXPO_BIAS;
134 }
135
136 inline int_4 xgetsgn (const real_32 * ps)
137 {
138 unt_2 *q = (unt_2 *) &(ps->value);
139 return (*q & X_SIGN_MASK);
140 }
141
142 int_4 xreal_cmp (const real_32 * pa, const real_32 * pb)
143 {
144 unt_2 e, k, *p, *q, p0, q0;
145 int_4 m;
146 p = (unt_2 *) &(pa->value);
147 q = (unt_2 *) &(pb->value);
148 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
149 if (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) {
150 // *pa is actually zero
151 p0 = 0;
152 } else {
153 p0 = *p;
154 }
155 for (m = 1; m <= FLT256_LEN && q[m] == 0; m++);
156 if (m > FLT256_LEN && (*q & X_EXPO_MASK) < X_EXPO_MASK) {
157 // *pb is actually zero
158 q0 = 0;
159 } else {
160 q0 = *q;
161 }
162 e = p0 & X_SIGN_MASK;
163 k = q0 & X_SIGN_MASK;
164 if (e && !k) {
165 return -1;
166 } else if (!e && k) {
167 return 1;
168 } else { // *pa and *pb have the same sign
169 m = (e) ? -1 : 1;
170 e = p0 & X_EXPO_MASK;
171 k = q0 & X_EXPO_MASK;
172 if (e > k) {
173 return m;
174 } else if (e < k) {
175 return -m;
176 } else {
177 for (e = 0; *(++p) == *(++q) && e < FLT256_LEN; ++e);
178 if (e < FLT256_LEN) {
179 return (*p > *q ? m : -m);
180 } else {
181 return 0;
182 }
183 }
184 }
185 }
186
187 real_32 xreal_2 (real_32 s, int_4 m)
188 {
189 unt_2 *p = SEXP (s);
190 int_4 e;
191 for (e = 1; e <= FLT256_LEN && p[e] == 0; e++);
192 if (e <= FLT256_LEN) {
193 e = *p & X_EXPO_MASK; // biased exponent
194 if (e + m < 0) {
195 return X_0;
196 } else if ((xsigerr (e + m >= X_EXPO_MASK, XFPOFLOW, NULL))) {
197 return ((s.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
198 } else {
199 *p += m;
200 return s;
201 }
202 } else { // s is zero or +-Inf
203 return s;
204 }
205 }
206
207 real_32 xsfmod (real_32 s, int_4 *p)
208 {
209 unt_2 *pa = SEXP (s);
210 unt_2 *pb = pa + 1;
211 int_2 e, k;
212 e = (*pa & X_EXPO_MASK) - X_EXPO_BIAS;
213 if ((xsigerr (e >= 15, XFPOFLOW, NULL))) {
214 *p = -1;
215 return s;
216 } else if (e < 0) {
217 *p = 0;
218 return s;
219 }
220 *p = *pb >> (15 - e);
221 xlshift (++e, pb, FLT256_LEN);
222 *pa -= e;
223 for (e = 0; *pb == 0 && e < xMax_p; ++pb, e += 16);
224 if (e == xMax_p) {
225 return X_0;
226 }
227 for (k = 0; !((*pb << k) & X_SIGN_MASK); ++k);
228 if ((k += e)) {
229 xlshift (k, pa + 1, FLT256_LEN);
230 *pa -= k;
231 }
232 return s;
233 }
234
235 void xlshift (int_4 n, unt_2 *pm, int_4 m)
236 {
237 unt_2 *pc = pm + m - 1;
238 if (n < 16 * m) {
239 unt_2 *pa = pm + n / 16;
240 m = n % 16;
241 n = 16 - m;
242 while (pa < pc) {
243 *pm = (*pa++) << m;
244 *pm++ |= *pa >> n;
245 }
246 *pm++ = *pa << m;
247 }
248 while (pm <= pc) {
249 *pm++ = 0;
250 }
251 }
252
253 void xrshift (int_4 n, unt_2 *pm, int_4 m)
254 {
255 unt_2 *pc = pm + m - 1;
256 if (n < 16 * m) {
257 unt_2 *pa = pc - n / 16;
258 m = n % 16;
259 n = 16 - m;
260 while (pa > pm) {
261 *pc = (*pa--) >> m;
262 *pc-- |= *pa << n;
263 }
264 *pc-- = *pa >> m;
265 }
266 while (pc >= pm) {
267 *pc-- = 0;
268 }
269 }
270
271 real_32 _xI (real_32 x)
272 {
273 // Intrinsic function so arguments are not pointers.
274 return x;
275 }
276
277 real_32 _xnint (real_32 x)
278 {
279 // Intrinsic function so arguments are not pointers.
280 if (xgetsgn (&x) == 0) {
281 return xfloor (xadd (x, X_1_OVER_2, FALSE));
282 } else {
283 return xneg (xfloor (xadd (X_1_OVER_2, x, TRUE)));
284 }
285 }
286
287 real_32 _xint (real_32 x)
288 {
289 // Intrinsic function so arguments are not pointers.
290 if (xgetsgn (&x) == 0) {
291 return xtrunc (x);
292 } else {
293 return xneg (xtrunc (x));
294 }
295 }
296
297 real_32 _xmax (real_32 a, real_32 b)
298 {
299 // Intrinsic function so arguments are not pointers.
300 if (xge (a, b)) {
301 return a;
302 } else {
303 return b;
304 }
305 }
306
307 real_32 _xmin (real_32 a, real_32 b)
308 {
309 // Intrinsic function so arguments are not pointers.
310 if (xle (a, b)) {
311 return a;
312 } else {
313 return b;
314 }
315 }
316
317 real_32 _xmod (real_32 a, real_32 b)
318 {
319 // Intrinsic function so arguments are not pointers.
320 real_32 q = xdiv (a, b);
321 if (xsgn (&q) >= 0) {
322 q = xfloor (q);
323 } else {
324 q = xneg (xfloor (xneg (q)));
325 }
326 return xadd (a, xmul (b, q), 1);
327 }
328
329 real_32 xadd (real_32 s, real_32 t, int_4 f)
330 {
331 REAL32 pe;
332 unt_2 h, u;
333 unt_2 *pc, *pf = pe;
334 unt_4 n = 0;
335 unt_2 *pa = SEXP (s);
336 unt_2 *pb = SEXP (t);
337 int_2 e = *pa & X_EXPO_MASK;
338 int_2 k = *pb & X_EXPO_MASK;
339 if (f != 0) {
340 *pb ^= X_SIGN_MASK;
341 }
342 u = (*pb ^ *pa) & X_SIGN_MASK;
343 f = 0;
344 if (e > k) {
345 if ((k = e - k) >= xMax_p) {
346 return s;
347 }
348 xrshift (k, pb + 1, FLT256_LEN);
349 } else if (e < k) {
350 if ((e = k - e) >= xMax_p) {
351 return t;
352 }
353 xrshift (e, pa + 1, FLT256_LEN);
354 e = k;
355 pc = pa;
356 pa = pb;
357 pb = pc;
358 } else if (u != 0) {
359 for (pc = pa, pf = pb; *(++pc) == *(++pf) && f < FLT256_LEN; ++f);
360 if (f >= FLT256_LEN) {
361 return X_0;
362 }
363 if (*pc < *pf) {
364 pc = pa;
365 pa = pb;
366 pb = pc;
367 }
368 pf = pe + f;
369 }
370 h = *pa & X_SIGN_MASK;
371 if (u != 0) {
372 for (pc = pb + FLT256_LEN; pc > pb; --pc) {
373 *pc = ~(*pc);
374 }
375 n = 1L;
376 }
377 for (pc = pe + FLT256_LEN, pa += FLT256_LEN, pb += FLT256_LEN; pc > pf;) {
378 n += *pa;
379 pa--;
380 n += *pb;
381 pb--;
382 *pc = n;
383 pc--;
384 n >>= 16;
385 }
386 if (u != 0) {
387 for (; *(++pc) == 0; ++f);
388 for (k = 0; !((*pc << k) & X_SIGN_MASK); ++k);
389 if ((k += 16 * f)) {
390 if ((e -= k) <= 0) {
391 return X_0;
392 }
393 xlshift (k, pe + 1, FLT256_LEN);
394 }
395 } else {
396 if (n != 0) {
397 ++e;
398 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NULL))) {
399 return (!h ? X_PLUS_INF : X_MINUS_INF);
400 }
401 ++pf;
402 xrshift (1, pf, FLT256_LEN);
403 *pf |= X_SIGN_MASK;
404 }
405 }
406 *pe = e;
407 *pe |= h;
408 return *(real_32 *) pe;
409 }
410
411 real_32 xmul (real_32 s, real_32 t)
412 {
413 unt_2 pe[FLT256_LEN + 2], *q0, *q1, h;
414 unt_2 *pa, *pb, *pc;
415 unt_4 m, n, p;
416 int_2 e;
417 int_2 k;
418 q0 = SEXP (s);
419 q1 = SEXP (t);
420 e = (*q0 & X_EXPO_MASK) - X_EXPO_BIAS;
421 k = (*q1 & X_EXPO_MASK) + 1;
422 if ((xsigerr (e > (short) X_EXPO_MASK - k, XFPOFLOW, NULL))) {
423 return (((s.value[0] & X_SIGN_MASK) ^ (t.value[0] & X_SIGN_MASK)) ? X_MINUS_INF : X_PLUS_INF);
424 }
425 if ((e += k) <= 0) {
426 return X_0;
427 }
428 h = (*q0 ^ *q1) & X_SIGN_MASK;
429 for (++q1, k = FLT256_LEN, p = n = 0L, pc = pe + FLT256_LEN + 1; k > 0; --k) {
430 for (pa = q0 + k, pb = q1; pa > q0;) {
431 m = *pa--;
432 m *= *pb++;
433 n += (m & 0xffffL);
434 p += (m >> 16);
435 }
436 *pc-- = n;
437 n = p + (n >> 16);
438 p = 0L;
439 }
440 *pc = n;
441 if (!(*pc & X_SIGN_MASK)) {
442 --e;
443 if (e <= 0) {
444 return X_0;
445 }
446 xlshift (1, pc, FLT256_LEN + 1);
447 }
448 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NULL))) {
449 return (!h ? X_PLUS_INF : X_MINUS_INF);
450 }
451 *pe = e;
452 *pe |= h;
453 return *(real_32 *) pe;
454 }
455
456 real_32 xdiv (real_32 s, real_32 t)
457 {
458 unt_2 *pd = SEXP (s), *pc = SEXP (t);
459 // Next makes xdiv robust at extreme exponents - MvdV.
460 if ((*pd & X_EXPO_MASK) == (*pc & X_EXPO_MASK)) {
461 *pd &= ~X_EXPO_MASK;
462 *pc &= ~X_EXPO_MASK;
463 }
464 // HPA implementation.
465 unt_2 e = *pc;
466 *pc = X_EXPO_BIAS;
467 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xdiv()"))) {
468 return X_0;
469 } else {
470 real_32 a = dbltox (1 / xtodbl (t));
471 *pc = e;
472 pc = SEXP (a);
473 *pc += X_EXPO_BIAS - (e & X_EXPO_MASK);
474 *pc |= e & X_SIGN_MASK;
475 for (unt_2 i = 0; i < xItt_div; ++i) {
476 a = xmul (a, xadd (X_2, xmul (a, t), 1));
477 }
478 return xmul (s, a);
479 }
480 }
481
482
483 real_32 xevtch (real_32 z, real_32 * a, int_4 m)
484 {
485 real_32 *p, f, t, tp, w;
486 w = xreal_2 (z, 1);
487 t = X_0;
488 tp = X_0;
489 for (p = a + m; p > a;) {
490 f = xadd (*p--, xadd (xmul (w, t), tp, 1), 0);
491 tp = t;
492 t = f;
493 }
494 return xadd (*p, xadd (xmul (z, t), tp, 1), 0);
495 }
496
497 real_32 xexp2 (real_32 x)
498 {
499 real_32 s, d, f;
500 unt_2 *pf = SEXP (x);
501 int_4 m, k;
502 if (xreal_cmp (&x, &xE2min) < 0) {
503 return X_0;
504 } else if ((xsigerr (xreal_cmp (&x, &xE2max) > 0, XFPOFLOW, NULL))) {
505 return X_PLUS_INF;
506 } else {
507 m = (*pf & X_SIGN_MASK) ? 1 : 0;
508 x = xsfmod (x, &k);
509 if (m != 0) {
510 k *= -1;
511 }
512 // -X_EXPO_BIAS <= k <= +X_EXPO_BIAS
513 x = xmul (x, X_LN_2);
514 if (xgetexp (&x) > -X_EXPO_BIAS) {
515 x = xreal_2 (x, -1);
516 s = xmul (x, x);
517 f = X_0;
518 for (d = inttox (m = xMS_exp); m > 1; m -= 2, d = inttox (m)) {
519 f = xdiv (s, xadd (d, f, 0));
520 }
521 f = xdiv (x, xadd (d, f, 0));
522 f = xdiv (xadd (d, f, 0), xadd (d, f, 1));
523 } else {
524 f = X_1;
525 }
526 pf = SEXP (f);
527 if (-k > *pf) {
528 return X_0;
529 } else {
530 *pf += k;
531 if ((xsigerr (*pf >= X_EXPO_MASK, XFPOFLOW, NULL))) {
532 return X_PLUS_INF;
533 } else {
534 return f;
535 }
536 }
537 }
538 }
539
540 real_32 xexp (real_32 z)
541 {
542 return xexp2 (xmul (z, X_LOG2_E));
543 }
544
545 real_32 xexp10 (real_32 z)
546 {
547 return xexp2 (xmul (z, X_LOG2_10));
548 }
549
550 real_32 xfmod (real_32 s, real_32 t, real_32 * q)
551 {
552 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xfmod()"))) {
553 return X_0;
554 } else {
555 unt_2 *p, mask = 0xffff;
556 int_2 e, i;
557 int_4 u;
558 *q = xdiv (s, t);
559 p = (unt_2 *) &(q->value);
560 u = (*p & X_SIGN_MASK) ? 0 : 1;
561 e = (*p &= X_EXPO_MASK); // biased exponent of *q
562 e = e < X_EXPO_BIAS ? 0 : e - X_EXPO_BIAS + 1;
563 for (i = 1; e / 16 > 0; i++, e -= 16);
564 if (i <= FLT256_LEN) {
565 // e = 0, ..., 15
566 mask <<= 16 - e;
567 p[i] &= mask;
568 for (i++; i <= FLT256_LEN; p[i] = 0, i++);
569 }
570 // Now *q == abs(quotient of (s/t))
571 return xadd (s, xmul (t, *q), u);
572 }
573 }
574
575 real_32 xfrexp (real_32 s, int_4 *p)
576 {
577 unt_2 *ps = SEXP (s), u;
578 *p = (*ps & X_EXPO_MASK) - X_EXPO_BIAS + 1;
579 u = *ps & X_SIGN_MASK;
580 *ps = X_EXPO_BIAS - 1;
581 *ps |= u;
582 return s;
583 }
584
585 static void nullify (int_4 skip, unt_2 *p, int_4 k)
586 {
587 // After skipping the first 'skip' bytes of the vector 'p',
588 // this function nullifies all the remaining ones. 'k' is
589 // the number of words forming the vector p.
590 // Warning: 'skip' must be positive !
591 int_4 i;
592 unt_2 mask = 0xffff;
593 for (i = 0; skip / 16 > 0; i++, skip -= 16);
594 if (i < k) {
595 // skip = 0, ..., 15
596 mask <<= 16 - skip;
597 p[i] &= mask;
598 for (i++; i < k; p[i] = 0, i++);
599 }
600 }
601
602 static void canonic_form (real_32 * px)
603 {
604 unt_2 *p, u;
605 int_2 e, i, j, skip;
606 p = (unt_2 *) &(px->value);
607 e = (*p & X_EXPO_MASK); // biased exponent of x
608 u = (*p & X_SIGN_MASK); // sign of x
609 if (e < X_EXPO_BIAS - 1) {
610 return;
611 } else {
612 unt_2 mask = 0xffff;
613 // e >= X_EXPO_BIAS - 1
614 for (i = 1, skip = e + 1 - X_EXPO_BIAS; skip / 16 > 0; i++, skip -= 16);
615 if (i <= FLT256_LEN) {
616 // skip = 0, ..., 15
617 mask >>= skip;
618 if ((p[i] & mask) != mask) {
619 return;
620 } else {
621 for (j = i + 1; j <= FLT256_LEN && p[j] == 0xffff; j++);
622 if (j > FLT256_LEN) {
623 p[i] -= mask;
624 for (j = i + 1; j <= FLT256_LEN; p[j] = 0, j++);
625 if (!(p[1] & 0x8000)) {
626 p[1] = 0x8000;
627 *p = ++e;
628 *p |= u;
629 } else if (u != 0) {
630 *px = xadd (*px, X_1, 1);
631 } else {
632 *px = xadd (*px, X_1, 0);
633 }
634 }
635 }
636 }
637 }
638 }
639
640 real_32 xfrac (real_32 x)
641 {
642 // xfrac(x) returns the fractional part of the number x.
643 // xfrac(x) has the same sign as x.
644 unt_2 u, *p;
645 int_2 e;
646 int_4 n;
647 canonic_form (&x);
648 p = SEXP (x);
649 e = (*p & X_EXPO_MASK); // biased exponent of x
650 if (e < X_EXPO_BIAS) {
651 return x; // The integer part of x is zero
652 } else {
653 u = *p & X_SIGN_MASK; // sign of x
654 n = e - X_EXPO_BIAS + 1;
655 xlshift (n, p + 1, FLT256_LEN);
656 e = X_EXPO_BIAS - 1;
657 // Now I have to take in account the rule
658 // of the leading one.
659 while (e > 0 && !(p[1] & X_SIGN_MASK)) {
660 xlshift (1, p + 1, FLT256_LEN);
661 e -= 1;
662 }
663 // Now p+1 points to the fractionary part of x,
664 // u is its sign, e is its biased exponent.
665 p[0] = e;
666 p[0] |= u;
667 return *(real_32 *) p;
668 }
669 }
670
671 real_32 xtrunc (real_32 x)
672 {
673 // xtrunc(x) returns the integer part of the number x.
674 // xtrunc(x) has the same sign as x.
675 unt_2 *p;
676 int_2 e;
677 canonic_form (&x);
678 p = SEXP (x);
679 e = (*p & X_EXPO_MASK); // biased exponent of x
680 if (e < X_EXPO_BIAS) {
681 return X_0; // The integer part of x is zero
682 } else {
683 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
684 return *(real_32 *) p;
685 }
686 }
687
688 real_32 xround (real_32 x)
689 {
690 return xtrunc (xadd (x, X_RND_CORR, x.value[0] & X_SIGN_MASK));
691 }
692
693 real_32 xceil (real_32 x)
694 {
695 unt_2 *ps = SEXP (x);
696 if ((*ps & X_SIGN_MASK)) {
697 return xtrunc (x);
698 } else {
699 real_32 y = xfrac (x);
700 // y has the same sign as x (see above).
701 return (xreal_cmp (&y, &X_0) > 0 ? xadd (xtrunc (x), X_1, 0) : x);
702 }
703 }
704
705 real_32 xfloor (real_32 x)
706 {
707 unt_2 *ps = SEXP (x);
708 if ((*ps & X_SIGN_MASK)) {
709 real_32 y = xfrac (x);
710 // y has the same sign as x (see above).
711 return (xreal_cmp (&y, &X_0) < 0 ? xadd (xtrunc (x), X_1, 1) : x);
712 } else {
713 return xtrunc (x);
714 }
715 }
716
717 static void xadd_correction (real_32 * px, int_4 k)
718 {
719 int_2 e = (px->value[0] & X_EXPO_MASK) - X_EXPO_BIAS;
720 // e = (e > 0 ? e : 0);
721 *px = xadd (*px, xreal_2 (X_FIX_CORR, e), k);
722 }
723
724 real_32 xfix (real_32 x)
725 {
726 unt_2 *p;
727 int_2 e;
728 xadd_correction (&x, x.value[0] & X_SIGN_MASK);
729 p = SEXP (x);
730 e = (*p & X_EXPO_MASK); // biased exponent of x
731 if (e < X_EXPO_BIAS) {
732 return X_0; // The integer part of x is zero
733 } else {
734 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
735 return *(real_32 *) p;
736 }
737 }
738
739 real_32 xtanh (real_32 z)
740 {
741 real_32 s, d, f;
742 int_4 m, k;
743 if ((k = xgetexp (&z)) > xK_tanh) {
744 if (xgetsgn (&z)) {
745 return xneg (X_1);
746 } else {
747 return X_1;
748 }
749 }
750 if (k < xK_lin) {
751 return z;
752 }
753 ++k;
754 if (k > 0) {
755 z = xreal_2 (z, -k);
756 }
757 s = xmul (z, z);
758 f = X_0;
759 for (d = inttox (m = xMS_hyp); m > 1;) {
760 f = xdiv (s, xadd (d, f, 0));
761 d = inttox (m -= 2);
762 }
763 f = xdiv (z, xadd (d, f, 0));
764 for (; k > 0; --k) {
765 f = xdiv (xreal_2 (f, 1), xadd (d, xmul (f, f), 0));
766 }
767 return f;
768 }
769
770 real_32 xsinh (real_32 z)
771 {
772 int_4 k;
773 if ((k = xgetexp (&z)) < xK_lin) {
774 return z;
775 } else if (k < 0) {
776 z = xtanh (xreal_2 (z, -1));
777 return xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 1));
778 } else {
779 z = xexp (z);
780 return xreal_2 (xadd (z, xdiv (X_1, z), 1), -1);
781 }
782 }
783
784 real_32 xcosh (real_32 z)
785 {
786 if (xgetexp (&z) < xK_lin) {
787 return X_1;
788 }
789 z = xexp (z);
790 return xreal_2 (xadd (z, xdiv (X_1, z), 0), -1);
791 }
792
793 real_32 xatanh (real_32 x)
794 {
795 real_32 y = x;
796 y.value[0] &= X_EXPO_MASK; // Now y == abs(x)
797 if ((xsigerr (xreal_cmp (&y, &X_1) >= 0, XEDOM, "xatanh"))) {
798 return ((x.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
799 } else {
800 y = xdiv (xadd (X_1, x, 0), xadd (X_1, x, 1));
801 return xreal_2 (xlog (y), -1);
802 }
803 }
804
805 real_32 xasinh (real_32 x)
806 {
807 real_32 y = xmul (x, x);
808 y = xsqrt (xadd (X_1, y, 0));
809 if ((x.value[0] & X_SIGN_MASK)) {
810 return xneg (xlog (xadd (y, x, 1)));
811 } else {
812 return xlog (xadd (x, y, 0));
813 }
814 }
815
816 real_32 xacosh (real_32 x)
817 {
818 if ((xsigerr (xreal_cmp (&x, &X_1) < 0, XEDOM, "xacosh()"))) {
819 return X_0;
820 } else {
821 real_32 y = xmul (x, x);
822 y = xsqrt (xadd (y, X_1, 1));
823 return xlog (xadd (x, y, 0));
824 }
825 }
826
827 real_32 xatan (real_32 z)
828 {
829 real_32 s, f;
830 int_4 k, m;
831 if ((k = xgetexp (&z)) < xK_lin) {
832 return z;
833 }
834 if (k >= 0) {
835 // k>=0 is equivalent to abs(z) >= 1.0
836 z = xdiv (X_1, z);
837 m = 1;
838 } else {
839 m = 0;
840 }
841 f = dbltox (atan (xtodbl (z)));
842 s = xadd (X_1, xmul (z, z), 0);
843 for (k = 0; k < xItt_div; ++k) {
844 f = xadd (f, xdiv (xadd (z, xtan (f), 1), s), 0);
845 }
846 if (m != 0) {
847 if (xgetsgn (&f)) {
848 return xadd (xneg (X_PI_OVER_2), f, 1);
849 } else {
850 return xadd (X_PI_OVER_2, f, 1);
851 }
852 } else {
853 return f;
854 }
855 }
856
857 real_32 xasin (real_32 z)
858 {
859 real_32 u = z;
860 u.value[0] &= X_EXPO_MASK;
861 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xasin()"))) {
862 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
863 } else {
864 if (xgetexp (&z) < xK_lin) {
865 return z;
866 }
867 u = xsqrt (xadd (X_1, xmul (z, z), 1));
868 if (xgetexp (&u) == -X_EXPO_BIAS) {
869 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
870 }
871 return xatan (xdiv (z, u));
872 }
873 }
874
875 real_32 xacos (real_32 z)
876 {
877 real_32 u = z;
878 u.value[0] &= X_EXPO_MASK;
879 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xacos()"))) {
880 return ((xgetsgn (&z)) ? X_PI : X_0);
881 } else {
882 if (xgetexp (&z) == -X_EXPO_BIAS) {
883 return X_PI_OVER_2;
884 }
885 u = xsqrt (xadd (X_1, xmul (z, z), 1));
886 u = xatan (xdiv (u, z));
887 if (xgetsgn (&z)) {
888 return xadd (X_PI, u, 0);
889 } else {
890 return u;
891 }
892 }
893 }
894 // Kindly added by A.Haumer 2010-04.09
895
896 real_32 xatan2 (real_32 y, real_32 x)
897 {
898 int_4 rs, is;
899 rs = xsgn (&x);
900 is = xsgn (&y);
901 if (rs > 0) {
902 return xatan (xdiv (y, x));
903 } else if (rs < 0) {
904 x.value[0] ^= X_SIGN_MASK;
905 y.value[0] ^= X_SIGN_MASK;
906 if (is >= 0) {
907 return xadd (X_PI, xatan (xdiv (y, x)), 0);
908 } else {
909 return xadd (xatan (xdiv (y, x)), X_PI, 1);
910 }
911 } else { // x is zero !
912 if (!xsigerr (is == 0, XEDOM, "xatan2()")) {
913 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
914 } else {
915 return X_0; // Dummy value :)
916 }
917 }
918 }
919
920 real_32 xlog (real_32 z)
921 {
922 real_32 f, h;
923 int_4 k, m;
924 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog()"))) {
925 return X_MINUS_INF;
926 } else if (xreal_cmp (&z, &X_1) == 0) {
927 return X_0;
928 } else {
929 z = xfrexp (z, &m);
930 z = xmul (z, X_SQRT_2);
931 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
932 h = xreal_2 (z, 1);
933 z = xmul (z, z);
934 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
935 h = xmul (h, z);
936 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
937 }
938 return xadd (f, xmul (X_LN_2, dbltox (m - .5)), 0);
939 }
940 }
941
942 real_32 xlog2 (real_32 z)
943 {
944 real_32 f, h;
945 int_4 k, m;
946 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog2()"))) {
947 return X_MINUS_INF;
948 } else if (xreal_cmp (&z, &X_1) == 0) {
949 return X_0;
950 } else {
951 z = xfrexp (z, &m);
952 z = xmul (z, X_SQRT_2);
953 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
954 h = xreal_2 (z, 1);
955 z = xmul (z, z);
956 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
957 h = xmul (h, z);
958 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
959 }
960 return xadd (xmul (f, X_LOG2_E), dbltox (m - .5), 0);
961 }
962 }
963
964 real_32 xlog10 (real_32 z)
965 {
966 real_32 w = xlog (z);
967 if (xreal_cmp (&w, &X_MINUS_INF) <= 0) {
968 return X_MINUS_INF;
969 } else {
970 return xmul (w, X_LOG10_E);
971 }
972 }
973
974 int_4 xeq (real_32 x1, real_32 x2)
975 {
976 return (xreal_cmp (&x1, &x2) == 0);
977 }
978
979 int_4 xneq (real_32 x1, real_32 x2)
980 {
981 return (xreal_cmp (&x1, &x2) != 0);
982 }
983
984 int_4 xgt (real_32 x1, real_32 x2)
985 {
986 return (xreal_cmp (&x1, &x2) > 0);
987 }
988
989 int_4 xge (real_32 x1, real_32 x2)
990 {
991 return (xreal_cmp (&x1, &x2) >= 0);
992 }
993
994 int_4 xlt (real_32 x1, real_32 x2)
995 {
996 return (xreal_cmp (&x1, &x2) < 0);
997 }
998
999 int_4 xle (real_32 x1, real_32 x2)
1000 {
1001 return (xreal_cmp (&x1, &x2) <= 0);
1002 }
1003 // xisNaN (&x) returns 1 if and only if x is not a valid number
1004
1005 int_4 xisNaN (const real_32 * u)
1006 {
1007 unt_2 *p = (unt_2 *) &(u->value);
1008 if (*p != 0) {
1009 return 0;
1010 } else {
1011 int_4 i;
1012 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1013 return (i <= FLT256_LEN ? 1 : 0);
1014 }
1015 }
1016
1017 int_4 xis0 (const real_32 * u)
1018 {
1019 unt_2 *p = (unt_2 *) &(u->value);
1020 int_4 m;
1021 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1022 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? 1 : 0);
1023 }
1024
1025 int_4 xnot0 (const real_32 * u)
1026 {
1027 unt_2 *p = (unt_2 *) &(u->value);
1028 int_4 m;
1029 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1030 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? 0 : 1);
1031 }
1032
1033 int_4 xlt1 (const real_32 u)
1034 {
1035 return (xlt (xabs (u), X_1));
1036 }
1037
1038 int_4 xsgn (const real_32 * u)
1039 {
1040 unt_2 *p = (unt_2 *) &(u->value);
1041 int_4 m;
1042 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1043 if ((m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) || !*p) {
1044 return 0;
1045 } else {
1046 return ((*p & X_SIGN_MASK) ? -1 : 1);
1047 }
1048 }
1049
1050 int_4 xisPinf (const real_32 * u)
1051 {
1052 return (*u->value == X_EXPO_MASK ? 1 : 0);
1053 }
1054
1055 int_4 xisMinf (const real_32 * u)
1056 {
1057 return (*u->value == (X_EXPO_MASK | X_SIGN_MASK) ? 1 : 0);
1058 }
1059
1060 int_4 xisordnumb (const real_32 * u)
1061 {
1062 int_4 isNaN, isfinite;
1063 unt_2 *p = (unt_2 *) &(u->value);
1064 if (*p != 0) {
1065 isNaN = 0;
1066 } else {
1067 int_4 i;
1068 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1069 isNaN = i <= FLT256_LEN;
1070 }
1071 isfinite = (*p & X_EXPO_MASK) < X_EXPO_MASK;
1072 return (!isNaN && (isfinite) ? 1 : 0);
1073 }
1074
1075 real_32 xpwr (real_32 s, int_4 n)
1076 {
1077 real_32 t;
1078 unsigned k, m;
1079 t = X_1;
1080 if (n < 0) {
1081 m = -n;
1082 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1083 return X_0;
1084 }
1085 s = xdiv (X_1, s);
1086 } else {
1087 m = n;
1088 }
1089 if (m != 0) {
1090 k = 1;
1091 while (1) {
1092 if (k & m) {
1093 t = xmul (s, t);
1094 }
1095 if ((k <<= 1) <= m) {
1096 s = xmul (s, s);
1097 } else {
1098 break;
1099 }
1100 }
1101 } else {
1102 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1103 }
1104 return t;
1105 }
1106
1107 real_32 xpow (real_32 x, real_32 y)
1108 {
1109 if (xsigerr ((xgetsgn (&x)) || xgetexp (&x) == -X_EXPO_BIAS, XEDOM, "xpow()")) {
1110 return X_0;
1111 } else {
1112 return xexp2 (xmul (xlog2 (x), y));
1113 }
1114 }
1115
1116 real_32 xsqrt (real_32 z)
1117 {
1118 real_32 s, h;
1119 int_2 m, e;
1120 unt_2 *pc;
1121 if ((xsigerr ((xgetsgn (&z)), XEDOM, "xsqrt()"))) {
1122 return X_0;
1123 } else {
1124 pc = SEXP (z);
1125 if (*pc == 0) {
1126 return X_0;
1127 }
1128 e = *pc - X_EXPO_BIAS;
1129 *pc = X_EXPO_BIAS + (e % 2);
1130 e /= 2;
1131 s = dbltox (sqrt (xtodbl (z)));
1132 for (m = 0; m < xItt_div; ++m) {
1133 h = xdiv (xadd (z, xmul (s, s), 1), xreal_2 (s, 1));
1134 s = xadd (s, h, 0);
1135 }
1136 pc = SEXP (s);
1137 *pc += e;
1138 return s;
1139 }
1140 }
1141
1142 static int_4 xodd (real_32 x)
1143 {
1144 unt_2 *p = SEXP (x);
1145 int_2 e, i;
1146 e = (*p & X_EXPO_MASK) - X_EXPO_BIAS; // exponent of x
1147 if (e < 0) {
1148 return 0;
1149 } else {
1150 for (i = 1; e / 16 > 0; i++, e -= 16);
1151 // Now e = 0, ..., 15
1152 return (i <= FLT256_LEN ? p[i] & 0x8000 >> e : 0);
1153 }
1154 }
1155
1156 real_32 xtan (real_32 z)
1157 {
1158 int_4 k, m;
1159 z = rred (z, 't', &k);
1160 if ((xsigerr (xreal_cmp (&z, &X_PI_OVER_2) >= 0, XEDOM, "xtan()"))) {
1161 return (!k ? X_PLUS_INF : X_MINUS_INF);
1162 } else {
1163 if (xreal_cmp (&z, &X_PI_OVER_4) == 1) {
1164 m = 1;
1165 z = xadd (X_PI_OVER_2, z, 1);
1166 } else {
1167 m = 0;
1168 }
1169 if (k != 0) {
1170 z = xneg (c_tan (z));
1171 } else {
1172 z = c_tan (z);
1173 }
1174 if (m != 0) {
1175 return xdiv (X_1, z);
1176 } else {
1177 return z;
1178 }
1179 }
1180 }
1181
1182 real_32 xcos (real_32 z)
1183 {
1184 int_4 k;
1185 z = rred (z, 'c', &k);
1186 if (xgetexp (&z) < xK_lin) {
1187 if (k != 0) {
1188 return xneg (X_1);
1189 } else {
1190 return X_1;
1191 }
1192 }
1193 z = c_tan (xreal_2 (z, -1));
1194 z = xmul (z, z);
1195 z = xdiv (xadd (X_1, z, 1), xadd (X_1, z, 0));
1196 if (k != 0) {
1197 return xneg (z);
1198 } else {
1199 return z;
1200 }
1201 }
1202
1203 real_32 xsin (real_32 z)
1204 {
1205 int_4 k;
1206 z = rred (z, 's', &k);
1207 if (xgetexp (&z) >= xK_lin) {
1208 z = c_tan (xreal_2 (z, -1));
1209 z = xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 0));
1210 }
1211 if (k != 0) {
1212 return xneg (z);
1213 } else {
1214 return z;
1215 }
1216 }
1217
1218 static real_32 c_tan (real_32 z)
1219 {
1220 real_32 s, f, d;
1221 int_4 m;
1222 unt_2 k;
1223 if (xgetexp (&z) < xK_lin)
1224 return z;
1225 s = xneg (xmul (z, z));
1226 for (k = 1; k <= FLT256_LEN && s.value[k] == 0; k++);
1227 if ((xsigerr (s.value[0] == 0xffff && k > FLT256_LEN, XFPOFLOW, NULL))) {
1228 return X_0;
1229 } else {
1230 f = X_0;
1231 for (d = inttox (m = xMS_trg); m > 1;) {
1232 f = xdiv (s, xadd (d, f, 0));
1233 d = inttox (m -= 2);
1234 }
1235 return xdiv (z, xadd (d, f, 0));
1236 }
1237 }
1238
1239 static real_32 rred (real_32 z, int_4 kf, int_4 *ps)
1240 {
1241 real_32 is, q;
1242 if (xgetsgn (&z)) {
1243 z = xneg (z);
1244 is = X_1;
1245 } else {
1246 is = X_0;
1247 }
1248 z = xfmod (z, X_PI, &q);
1249 if (kf == 't') {
1250 q = is;
1251 } else if (kf == 's') {
1252 q = xadd (q, is, 0);
1253 }
1254 if (xreal_cmp (&z, &X_PI_OVER_2) == 1) {
1255 z = xadd (X_PI, z, 1);
1256 if (kf == 'c' || kf == 't') {
1257 q = xadd (q, X_1, 0);
1258 }
1259 }
1260 *ps = (xodd (q)) ? 1 : 0;
1261 return z;
1262 }
1263
1264 // COMPLEX*64
1265
1266 complex_64 cxadd (complex_64 z1, complex_64 z2, int_4 k)
1267 {
1268 complex_64 w;
1269 w.re = xadd (z1.re, z2.re, k);
1270 w.im = xadd (z1.im, z2.im, k);
1271 return w;
1272 }
1273
1274 complex_64 cxsum (complex_64 z1, complex_64 z2)
1275 {
1276 complex_64 w;
1277 w.re = xadd (z1.re, z2.re, 0);
1278 w.im = xadd (z1.im, z2.im, 0);
1279 return w;
1280 }
1281
1282 complex_64 cxsub (complex_64 z1, complex_64 z2)
1283 {
1284 complex_64 w;
1285 w.re = xadd (z1.re, z2.re, 1);
1286 w.im = xadd (z1.im, z2.im, 1);
1287 return w;
1288 }
1289
1290 complex_64 cxmul (complex_64 z1, complex_64 z2)
1291 {
1292 complex_64 w;
1293 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1294 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1295 return w;
1296 }
1297
1298 complex_64 cxrmul (real_32 c, complex_64 z)
1299 {
1300 complex_64 w;
1301 w.re = xmul (c, z.re);
1302 w.im = xmul (c, z.im);
1303 return w;
1304 }
1305
1306 complex_64 cxdrot (complex_64 z)
1307 {
1308 complex_64 y;
1309 y.re = z.im;
1310 y.im = z.re;
1311 // Multiplication by +i
1312 y.re.value[0] ^= X_SIGN_MASK;
1313 return y;
1314 }
1315
1316 complex_64 cxrrot (complex_64 z)
1317 {
1318 complex_64 y;
1319 y.re = z.im;
1320 y.im = z.re;
1321 // Multiplication by -i
1322 y.im.value[0] ^= X_SIGN_MASK;
1323 return y;
1324 }
1325
1326 complex_64 cxdiv (complex_64 z1, complex_64 z2)
1327 {
1328 int_4 tv = cxrec (z2, &z2);
1329 if (!xsigerr (!tv, XEDIV, "cxdiv()")) {
1330 complex_64 w;
1331 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1332 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1333 return w;
1334 } else {
1335 return X_0_0I;
1336 }
1337 }
1338
1339 complex_64 cxinv (complex_64 z)
1340 {
1341 int_4 tv = cxrec (z, &z);
1342 if (!xsigerr (!tv, XEDOM, "cxinv()")) {
1343 return z;
1344 } else {
1345 return X_0_0I;
1346 }
1347 }
1348
1349 complex_64 cxsqr (complex_64 z)
1350 {
1351 complex_64 w;
1352 w.re = xadd (xmul (z.re, z.re), xmul (z.im, z.im), 1);
1353 w.im = xmul (X_2, xmul (z.im, z.re));
1354 return w;
1355 }
1356
1357 complex_64 cxsqrt (complex_64 z)
1358 {
1359 complex_64 w;
1360 if (xsgn (&(z.re)) == 0 && xsgn (&(z.im)) == 0) {
1361 w = X_0_0I;
1362 } else if (xis0 (&(z.im))) {
1363 real_32 s = xsqrt (xabs (z.re));
1364 if (xsgn (&(z.re)) == -1) {
1365 w = (complex_64) {X_0, s};
1366 } else {
1367 w = (complex_64) {s, X_0};
1368 }
1369 } else {
1370 real_32 mod = xsqrt (cxabs (z)), arg = xreal_2 (cxarg (z), -1);
1371 w.re = xmul (mod, xcos (arg));
1372 w.im = xmul (mod, xsin (arg));
1373 }
1374 return w;
1375 }
1376
1377 complex_64 cxreset (real_32 re, real_32 im)
1378 {
1379 complex_64 y;
1380 y.re = re;
1381 y.im = im;
1382 return y;
1383 }
1384
1385 complex_64 cxconv (real_32 x)
1386 {
1387 complex_64 y;
1388 y.re = x;
1389 y.im = X_0;
1390 return y;
1391 }
1392
1393 real_32 cxre (complex_64 z)
1394 {
1395 return z.re;
1396 }
1397
1398 real_32 cxim (complex_64 z)
1399 {
1400 return z.im;
1401 }
1402
1403 complex_64 cxswap (complex_64 z)
1404 {
1405 complex_64 y;
1406 y.re = z.im;
1407 y.im = z.re;
1408 return y;
1409 }
1410
1411 complex_64 cxneg (complex_64 z)
1412 {
1413 z.re.value[0] ^= X_SIGN_MASK;
1414 z.im.value[0] ^= X_SIGN_MASK;
1415 return z;
1416 }
1417
1418 complex_64 cxconj (complex_64 z)
1419 {
1420 return (z.im.value[0] ^= X_SIGN_MASK, z);
1421 }
1422
1423 #define XBOUND FLT256_LEN * 16 + 8
1424
1425 real_32 cxabs (complex_64 z)
1426 {
1427 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1428 return X_0;
1429 } else {
1430 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1431 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1432 if (ea > eb + XBOUND) {
1433 return z.re;
1434 } else if (eb > ea + XBOUND) {
1435 return z.im;
1436 } else {
1437 z.re.value[0] -= eb;
1438 z.im.value[0] = X_EXPO_BIAS;
1439 real_32 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1440 x.value[0] += eb;
1441 return x;
1442 }
1443 }
1444 }
1445
1446 real_32 cxarg (complex_64 z)
1447 {
1448 int_4 rs = xsgn (&(z.re)), is = xsgn (&(z.im));
1449 if (rs > 0) {
1450 return xatan (xdiv (z.im, z.re));
1451 } else if (rs < 0) {
1452 z.re.value[0] ^= X_SIGN_MASK;
1453 z.im.value[0] ^= X_SIGN_MASK;
1454 if (is >= 0) {
1455 return xadd (X_PI, xatan (xdiv (z.im, z.re)), 0);
1456 } else {
1457 return xadd (xatan (xdiv (z.im, z.re)), X_PI, 1);
1458 }
1459 } else { // z.re is zero !
1460 if (!xsigerr (is == 0, XEDOM, "cxarg()")) {
1461 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
1462 } else {
1463 return xneg (X_PI_OVER_2); // Dummy value :)
1464 }
1465 }
1466 }
1467
1468 int_4 cxrec (complex_64 z, complex_64 * w)
1469 {
1470 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1471 return 0;
1472 } else {
1473 int_4 sa = z.re.value[0] & X_SIGN_MASK;
1474 int_4 sb = z.im.value[0] & X_SIGN_MASK;
1475 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1476 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1477 real_32 x;
1478 if (ea > eb + XBOUND) {
1479 x = z.re;
1480 } else if (eb > ea + XBOUND) {
1481 x = z.im;
1482 } else {
1483 z.re.value[0] -= eb;
1484 z.im.value[0] = X_EXPO_BIAS;
1485 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1486 x.value[0] += eb;
1487 z.re.value[0] += eb;
1488 z.im.value[0] += eb;
1489 }
1490 w->re = xdiv (xdiv (z.re, x), x);
1491 w->im = xdiv (xdiv (z.im, x), x);
1492 w->re.value[0] |= sa;
1493 w->im.value[0] |= X_SIGN_MASK ^ sb;
1494 return 1;
1495 }
1496 }
1497
1498 complex_64 cxexp (complex_64 z)
1499 {
1500 complex_64 w;
1501 w.re = xmul (xexp (z.re), xcos (z.im));
1502 w.im = xmul (xexp (z.re), xsin (z.im));
1503 return w;
1504 }
1505
1506 complex_64 cxlog (complex_64 z)
1507 {
1508 real_32 mod;
1509 complex_64 w;
1510 mod = cxabs (z);
1511 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog()")) {
1512 return X_0_0I;
1513 } else {
1514 w.re = xlog (mod);
1515 w.im = cxarg (z);
1516 return w;
1517 }
1518 }
1519
1520 complex_64 cxlog10 (complex_64 z)
1521 {
1522 real_32 mod;
1523 complex_64 w;
1524 mod = cxabs (z);
1525 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog10()")) {
1526 return X_0_0I;
1527 } else {
1528 w.re = xlog10 (mod);
1529 w.im = xmul (cxarg (z), X_LOG10_E);
1530 return w;
1531 }
1532 }
1533
1534 complex_64 cxlog2 (complex_64 z)
1535 {
1536 real_32 mod;
1537 complex_64 w;
1538 mod = cxabs (z);
1539 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog2()")) {
1540 return X_0_0I;
1541 } else {
1542 w.re = xlog2 (mod);
1543 w.im = xmul (cxarg (z), X_LOG2_E);
1544 return w;
1545 }
1546 }
1547
1548 complex_64 cxlog_sqrt (complex_64 z)
1549 {
1550 real_32 mod = cxabs (z);
1551 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog_sqrt()")) {
1552 return X_0_0I;
1553 } else {
1554 complex_64 w;
1555 w.re = xreal_2 (xlog (mod), -1);
1556 w.im = xreal_2 (cxarg (z), -1);
1557 return w;
1558 }
1559 }
1560
1561 complex_64 cxsinh (complex_64 z)
1562 {
1563 complex_64 w = cxsub (cxexp (z), cxexp (cxneg (z)));
1564 w.re = xreal_2 (w.re, -1);
1565 w.im = xreal_2 (w.im, -1);
1566 return w;
1567 }
1568
1569 complex_64 cxcosh (complex_64 z)
1570 {
1571 complex_64 w = cxsum (cxexp (z), cxexp (cxneg (z)));
1572 w.re = xreal_2 (w.re, -1);
1573 w.im = xreal_2 (w.im, -1);
1574 return w;
1575 }
1576
1577 complex_64 cxtanh (complex_64 z)
1578 {
1579 if (xsigerr (xreal_cmp (&(z.re), &xEmax) > 0, XFPOFLOW, NULL)) {
1580 return X_1_0I;
1581 } else if (xsigerr (xreal_cmp (&(z.re), &xEmin) < 0, XFPOFLOW, NULL)) {
1582 return cxneg (X_1_0I);
1583 } else {
1584 complex_64 w;
1585 if (xsigerr (!cxrec (cxcosh (z), &w), XEDOM, "cxtanh()")) {
1586 return X_0_0I;
1587 } else {
1588 return cxmul (cxsinh (z), w);
1589 }
1590 }
1591 }
1592
1593 complex_64 cxasinh (complex_64 z)
1594 {
1595 // This way cxasinh() works fine also with real numbers very near to -oo.
1596 complex_64 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1597 real_32 ls = cxabs (cxsum (z, w));
1598 real_32 rs = xmul (X_VSV, cxabs (z));
1599 if (xreal_cmp (&ls, &rs) < 0) {
1600 return cxneg (cxlog (cxsub (w, z)));
1601 } else {
1602 return cxlog (cxsum (z, w));
1603 }
1604 }
1605
1606 complex_64 cxacosh (complex_64 z)
1607 {
1608 complex_64 w = cxsqrt (cxsub (cxsqr (z), X_1_0I));
1609 real_32 ls = cxabs (cxsum (z, w));
1610 real_32 rs = xmul (X_VSV, cxabs (z));
1611 if (xreal_cmp (&ls, &rs) < 0) {
1612 return cxneg (cxlog (cxsub (z, w)));
1613 } else {
1614 return cxlog (cxsum (z, w));
1615 }
1616 }
1617
1618 complex_64 cxatanh (complex_64 z)
1619 {
1620 real_32 t = xadd (xabs (z.re), X_1, 1);
1621 int_4 errcond = xsgn (&(z.im)) == 0 && xsgn (&t) == 0;
1622 if (xsigerr (errcond, XEDOM, "cxatanh()")) {
1623 return X_0_0I;
1624 } else {
1625 complex_64 w = cxdiv (cxsum (X_1_0I, z), cxsub (X_1_0I, z));
1626 w = cxlog_sqrt (w);
1627 return w;
1628 }
1629 }
1630
1631 complex_64 cxgdiv (complex_64 z1, complex_64 z2)
1632 {
1633 z1.re = xround (z1.re);
1634 z1.im = xround (z1.im);
1635 z2.re = xround (z2.re);
1636 z2.im = xround (z2.im);
1637 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1638 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgdiv()")) {
1639 return X_0_0I;
1640 } else {
1641 complex_64 w;
1642 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1643 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1644 w.re = xround (xdiv (w.re, mod2));
1645 w.im = xround (xdiv (w.im, mod2));
1646 return w;
1647 }
1648 }
1649
1650 complex_64 cxidiv (complex_64 z1, complex_64 z2)
1651 {
1652 z1.re = xround (z1.re);
1653 z1.im = xround (z1.im);
1654 z2.re = xround (z2.re);
1655 z2.im = xround (z2.im);
1656 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1657 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxidiv()")) {
1658 return X_0_0I;
1659 } else {
1660 complex_64 w;
1661 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1662 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1663 w.re = xfix (xdiv (w.re, mod2));
1664 w.im = xfix (xdiv (w.im, mod2));
1665 return w;
1666 }
1667 }
1668
1669 complex_64 cxgmod (complex_64 z1, complex_64 z2)
1670 {
1671 z1.re = xround (z1.re);
1672 z1.im = xround (z1.im);
1673 z2.re = xround (z2.re);
1674 z2.im = xround (z2.im);
1675 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1676 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgmod()")) {
1677 return X_0_0I;
1678 } else {
1679 complex_64 w, z;
1680 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1681 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1682 w.re = xround (xdiv (w.re, mod2));
1683 w.im = xround (xdiv (w.im, mod2));
1684 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1685 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1686 w.re = xround (xadd (z1.re, z.re, 1));
1687 w.im = xround (xadd (z1.im, z.im, 1));
1688 return w;
1689 }
1690 }
1691
1692 complex_64 cxmod (complex_64 z1, complex_64 z2)
1693 {
1694 z1.re = xround (z1.re);
1695 z1.im = xround (z1.im);
1696 z2.re = xround (z2.re);
1697 z2.im = xround (z2.im);
1698 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1699 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxmod()")) {
1700 return X_0_0I;
1701 } else {
1702 complex_64 w, z;
1703 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1704 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1705 w.re = xfix (xdiv (w.re, mod2));
1706 w.im = xfix (xdiv (w.im, mod2));
1707 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1708 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1709 w.re = xround (xadd (z1.re, z.re, 1));
1710 w.im = xround (xadd (z1.im, z.im, 1));
1711 return w;
1712 }
1713 }
1714
1715 static void unit_root (int_4 i, int_4 n, real_32 * a, real_32 * b)
1716 {
1717 // We assume n != 0
1718 i %= n;
1719 *a = xdiv (xmul (xreal_2 (inttox (i), 1), X_PI), inttox (n));
1720 *b = xsin (*a);
1721 *a = xcos (*a);
1722 if (xgetexp (b) < -80) {
1723 *b = X_0;
1724 }
1725 if (xgetexp (a) < -80) {
1726 *a = X_0;
1727 }
1728 }
1729
1730 complex_64 cxpwr (complex_64 z, int_4 n)
1731 {
1732 real_32 mod = cxabs (z);
1733 if (xsgn (&mod) <= 0) {
1734 (void) xsigerr (n <= 0, XEBADEXP, "cxpwr()");
1735 return X_0_0I;
1736 } else {
1737 real_32 arg = xmul (inttox (n), cxarg (z));
1738 mod = xpwr (mod, n);
1739 complex_64 w;
1740 w.re = xmul (mod, xcos (arg));
1741 w.im = xmul (mod, xsin (arg));
1742 return w;
1743 }
1744 }
1745
1746 complex_64 cxroot (complex_64 z, int_4 i, int_4 n)
1747 {
1748 if (xsigerr (n == 0, XEBADEXP, "cxroot()")) {
1749 return X_0_0I;
1750 } else {
1751 real_32 mod = cxabs (z);
1752 if (xsgn (&mod) <= 0) {
1753 (void) xsigerr (n < 0, XEBADEXP, "cxroot()");
1754 return X_0_0I;
1755 } else { // mod > 0
1756 real_32 arg = xdiv (cxarg (z), inttox (n));
1757 real_32 e = xdiv (X_1, inttox (n)); // 1/n
1758 // x^e = exp(e*log(x)) for any x > 0
1759 mod = xexp (xmul (e, xlog (mod)));
1760 complex_64 w, zz;
1761 w.re = xmul (mod, xcos (arg));
1762 w.im = xmul (mod, xsin (arg));
1763 real_32 a, b;
1764 unit_root (i, n, &a, &b);
1765 zz.re = xadd (xmul (w.re, a), xmul (w.im, b), 1);
1766 zz.im = xadd (xmul (w.im, a), xmul (w.re, b), 0);
1767 return zz;
1768 }
1769 }
1770 }
1771
1772 complex_64 cxpow (complex_64 z1, complex_64 z2)
1773 {
1774 real_32 mod = cxabs (z1);
1775 if (xsgn (&mod) <= 0) {
1776 (void) xsigerr (xsgn (&z2.re) <= 0, XEBADEXP, "cxpow()");
1777 return X_0_0I;
1778 } else {
1779 real_32 arg = cxarg (z1);
1780 real_32 a = xadd (xmul (z2.re, xlog (mod)), xmul (z2.im, arg), 1);
1781 real_32 b = xadd (xmul (z2.re, arg), xmul (z2.im, xlog (mod)), 0);
1782 complex_64 w;
1783 w.re = xmul (xexp (a), xcos (b));
1784 w.im = xmul (xexp (a), xsin (b));
1785 return w;
1786 }
1787 }
1788
1789 int_4 cxis0 (const complex_64 * z)
1790 {
1791 return (xis0 (&z->re) && xis0 (&z->im));
1792 }
1793
1794 int_4 cxnot0 (const complex_64 * z)
1795 {
1796 return (xnot0 (&z->re) || xnot0 (&z->im));
1797 }
1798
1799 int_4 cxeq (complex_64 z1, complex_64 z2)
1800 {
1801 return (xreal_cmp (&z1.re, &z2.re) == 0 && xreal_cmp (&z1.im, &z2.im) == 0);
1802 }
1803
1804 int_4 cxneq (complex_64 z1, complex_64 z2)
1805 {
1806 return (xreal_cmp (&z1.re, &z2.re) != 0 || xreal_cmp (&z1.im, &z2.im) != 0);
1807 }
1808
1809 complex_64 cxsin (complex_64 z)
1810 {
1811 complex_64 w1, w2;
1812 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1813 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1814 w1 = cxsub (cxexp (w1), cxexp (w2));
1815 w2.re = xreal_2 (w1.im, -1);
1816 w1.re.value[0] ^= X_SIGN_MASK;
1817 w2.im = xreal_2 (w1.re, -1); // Now w2= (exp(i*z)-exp(-i*z))/2i
1818 return w2;
1819 }
1820
1821 complex_64 cxcos (complex_64 z)
1822 {
1823 complex_64 w1, w2;
1824 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1825 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1826 w1 = cxsum (cxexp (w1), cxexp (w2));
1827 w2.re = xreal_2 (w1.re, -1);
1828 w2.im = xreal_2 (w1.im, -1);
1829 return w2;
1830 }
1831
1832 complex_64 cxtan (complex_64 z)
1833 {
1834 if (xsigerr (xreal_cmp (&(z.im), &xEmax) > 0, XFPOFLOW, NULL)) {
1835 return X_0_1I;
1836 } else if (xsigerr (xreal_cmp (&(z.im), &xEmin) < 0, XFPOFLOW, NULL)) {
1837 return cxneg (X_0_1I);
1838 } else {
1839 complex_64 w;
1840 if (xsigerr (!cxrec (cxcos (z), &w), XEDOM, "cxtan()")) {
1841 return X_0_0I;
1842 } else {
1843 return cxmul (cxsin (z), w);
1844 }
1845 }
1846 }
1847
1848 complex_64 cxasin (complex_64 z)
1849 {
1850 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1851 real_32 ls = cxabs (cxsum (cxdrot (z), w));
1852 real_32 rs = xmul (X_VSV, cxabs (z));
1853 if (xreal_cmp (&ls, &rs) < 0) {
1854 w = cxdrot (cxlog (cxsub (w, cxdrot (z))));
1855 } else {
1856 w = cxrrot (cxlog (cxsum (cxdrot (z), w)));
1857 }
1858 return w;
1859 }
1860
1861 complex_64 cxacos (complex_64 z)
1862 {
1863 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1864 real_32 ls = cxabs (cxsum (z, cxdrot (w)));
1865 real_32 rs = xmul (X_VSV, cxabs (z));
1866 if (xreal_cmp (&ls, &rs) < 0) {
1867 w = cxdrot (cxlog (cxsub (z, cxdrot (w))));
1868 } else {
1869 w = cxrrot (cxlog (cxsum (z, cxdrot (w))));
1870 }
1871 return w;
1872 }
1873
1874 complex_64 cxatan (complex_64 z)
1875 {
1876 real_32 mod = xadd (xabs (z.im), X_1, 1);
1877 int_4 errcond = xsgn (&(z.re)) == 0 && xsgn (&mod) == 0;
1878 if (xsigerr (errcond, XEDOM, "cxatan()")) {
1879 return X_0_0I;
1880 } else {
1881 // This way, cxatan() works fine also with complex numbers very far from the origin.
1882 complex_64 w;
1883 mod = cxabs (z);
1884 if (xreal_cmp (&mod, &X_VGV) > 0) {
1885 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1886 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), w);
1887 w = cxrrot (cxlog (w));
1888 } else {
1889 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), cxsub (X_1_0I, cxdrot (z)));
1890 w = cxrrot (cxlog_sqrt (w));
1891 }
1892 return w;
1893 }
1894 }
1895
1896 complex_64 cxdbl(complex_16 z)
1897 {
1898 complex_64 zz;
1899 zz.re = dbltox (creal (z));
1900 zz.im = dbltox (cimag (z));
1901 return zz;
1902 }
1903
1904 complex_64 cxquad(complex_32 z)
1905 {
1906 complex_64 zz;
1907 zz.re = dbltox (crealq (z));
1908 zz.im = dbltox (cimagq (z));
1909 return zz;
1910 }
1911
1912 complex_64 _cquadtop (complex_64 *zz, complex_32 z)
1913 {
1914 zz->re = dbltox (crealq (z));
1915 zz->im = dbltox (cimagq (z));
1916 return *zz;
1917 }
1918
1919 complex_64 cxreal32(real_32 z)
1920 {
1921 complex_64 zz;
1922 zz.re = z;
1923 zz.im = X_0;
1924 return zz;
1925 }
1926
1927 complex_64 _coctotop(complex_64 *zz, real_32 z)
1928 {
1929 zz->re = z;
1930 zz->im = X_0;
1931 return *zz;
1932 }
1933
1934 // VIF additions (REAL*32)
1935
1936 real_32 xtenup (int_4 n)
1937 {
1938 if (n == 0) {
1939 return X_1;
1940 } else if (n == 1) {
1941 return X_10;
1942 } else if (n == -1) {
1943 return X_1_OVER_10;
1944 }
1945 real_32 s = X_10, t;
1946 unsigned k, m;
1947 t = X_1;
1948 if (n < 0) {
1949 m = -n;
1950 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1951 return X_0;
1952 }
1953 s = xdiv (X_1, s);
1954 } else {
1955 m = n;
1956 }
1957 if (m != 0) {
1958 k = 1;
1959 while (1) {
1960 if (k & m) {
1961 t = xmul (s, t);
1962 }
1963 if ((k <<= 1) <= m) {
1964 s = xmul (s, s);
1965 } else {
1966 break;
1967 }
1968 }
1969 } else {
1970 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1971 }
1972 return t;
1973 }
1974
1975 real_32 xcotan (real_32 x)
1976 {
1977 // Intrinsic function so arguments are not pointers.
1978 return xdiv (X_1, xtan (x));
1979 }
1980
1981 real_32 xacotan (real_32 x)
1982 {
1983 // Intrinsic function so arguments are not pointers.
1984 return xatan (xdiv (X_1, x));
1985 }
1986
1987 real_32 _xsgn (real_32 a, real_32 b)
1988 {
1989 // Intrinsic function so arguments are not pointers.
1990 real_32 x = (xgetsgn (&a) == 0 ? a : xneg (a));
1991 return (xgetsgn (&b) == 0 ? x : xneg (x));
1992 }
1993
1994 real_32 _zabs_64 (real_32 re, real_32 im)
1995 {
1996 // Intrinsic function so arguments are not pointers.
1997 return cxabs (CMPLXX (re, im));
1998 }
1999
2000 // Conversion.
2001
2002 real_32 inttox (int_4 n)
2003 {
2004 REAL32 pe;
2005 unt_2 *pc, e;
2006 unt_4 k, h;
2007 bzero (pe, sizeof (pe));
2008 k = ABS (n);
2009 pc = (unt_2 *) &k;
2010 if (n == 0) {
2011 return *(real_32 *) pe;
2012 }
2013
2014 #if __BYTE_ORDER == __LITTLE_ENDIAN
2015 pe[1] = *(pc + 1);
2016 pe[2] = *pc;
2017 #else
2018 pe[1] = *pc;
2019 pe[2] = *(pc + 1);
2020 #endif
2021
2022 for (e = 0, h = 1; h <= k && e < ((8 * sizeof (unt_4)) - 1); h <<= 1, ++e);
2023 if (h <= k) {
2024 e += 1;
2025 }
2026 *pe = X_EXPO_BIAS + e - 1;
2027 if (n < 0) {
2028 *pe |= X_SIGN_MASK;
2029 }
2030 xlshift ((8 * sizeof (unt_4)) - e, pe + 1, FLT256_LEN);
2031 return *(real_32 *) pe;
2032 }
2033
2034 real_4 xtoflt (real_32 s)
2035 {
2036 // An extended floating point_4 number is represented as a combination of the
2037 // following elements:
2038 //
2039 // sign bit(s): 0 -> positive, 1 -> negative ;
2040 // exponent(e): 15-bit biased integer (bias=16383) ;
2041 // mantissa(m): 7 (or more) words of 16 bit length with the
2042 // leading 1 explicitly represented.
2043 //
2044 // Thus f = (-1)^s*2^[e-16383] *m , with 1 <= m < 2 .
2045 //
2046 // This format supports a dynamic range of:
2047 //
2048 // 2^16384 > f > 2^[-16383] or
2049 // 1.19*10^4932 > f > 1.68*10^-[4932].
2050 //
2051 // Special values of the exponent are:
2052 //
2053 // all ones -> infinity (floating point_4 overflow)
2054 // all zeros -> number = zero.
2055 //
2056 // Underflow in operations is handled by a flush to zero. Thus, a number with
2057 // the exponent zero and nonzero mantissa is invalid (not-a-number).
2058 //
2059 //
2060 union {unt_2 pe[2]; real_4 f;} v;
2061 unt_2 *pc, u;
2062 int_2 i, e;
2063 pc = SEXP (s);
2064 u = *pc & X_SIGN_MASK;
2065 e = (*pc & X_EXPO_MASK) - xF_bias;
2066 //
2067 // u is the sign of the number s.
2068 // e == (exponent of s) + 127
2069 //
2070 if (e >= xF_max) {
2071 return (!u ? FLT_MAX : -FLT_MAX);
2072 }
2073 if (e <= 0) {
2074 return 0.;
2075 }
2076 for (i = 0; i < 2; v.pe[i] = *++pc, i++);
2077 // In the IEEE 754 Standard the leading 1 is not represented.
2078 v.pe[0] &= X_EXPO_MASK;
2079 // Now in pe[0],pe[1] we have 31 bits of mantissa.
2080 // But only the first 23 ones must be put in the
2081 // final real_4 number.
2082 xrshift (xF_lex - 1, v.pe, 2);
2083 // We have just loaded the mantissa and now we
2084 // are going to load exponent and sign.
2085 v.pe[0] |= (e << (16 - xF_lex));
2086 v.pe[0] |= u;
2087 #if __BYTE_ORDER == __LITTLE_ENDIAN
2088 u = v.pe[0];
2089 v.pe[0] = v.pe[1];
2090 v.pe[1] = u;
2091 #endif
2092 return v.f;
2093 }
2094
2095 real_32 flttox (real_4 y)
2096 {
2097 REAL32 pe;
2098 unt_2 *pc, u;
2099 int_2 i, e;
2100 if (y < FLT_MIN && y > -FLT_MIN) {
2101 return X_0;
2102 }
2103 bzero (pe, sizeof (pe));
2104 pc = (unt_2 *) &y;
2105 #if __BYTE_ORDER == __LITTLE_ENDIAN
2106 pc += 1;
2107 #endif
2108 u = *pc & X_SIGN_MASK;
2109 e = xF_bias + ((*pc & X_EXPO_MASK) >> (16 - xF_lex));
2110 // Now u is the sign of y and e is the
2111 // biased exponent (exponent + bias).
2112 #if __BYTE_ORDER == __LITTLE_ENDIAN
2113 for (i = 1; i < 3; pe[i] = *pc--, i++);
2114 #else
2115 for (i = 1; i < 3; pe[i] = *pc++, i++);
2116 #endif
2117 pc = pe + 1;
2118 xlshift (xF_lex - 1, pc, 2);
2119 *pc |= X_SIGN_MASK;
2120 // We have just put in pe[1],pe[2] the whole
2121 // mantissa of y with a leading 1.
2122 // Now we have only to put exponent and sign
2123 // in pe[0].
2124 *pe = e;
2125 *pe |= u;
2126 return *(real_32 *) pe;
2127 }
2128
2129 real_8 xtodbl (real_32 s)
2130 {
2131 union {unt_2 pe[4]; real_8 d;} v;
2132 unt_2 *pc, u;
2133 int_2 i, e;
2134 pc = SEXP (s);
2135 u = *pc & X_SIGN_MASK;
2136 e = (*pc & X_EXPO_MASK) - xD_bias;
2137 if (e >= xD_max) {
2138 return (!u ? DBL_MAX : -DBL_MAX);
2139 }
2140 if (e <= 0) {
2141 return 0.;
2142 }
2143 for (i = 0; i < 4; v.pe[i] = *++pc, i++);
2144 v.pe[0] &= X_EXPO_MASK;
2145 xrshift (xD_lex - 1, v.pe, 4);
2146 v.pe[0] |= (e << (16 - xD_lex));
2147 v.pe[0] |= u;
2148 #if __BYTE_ORDER == __LITTLE_ENDIAN
2149 u = v.pe[3];
2150 v.pe[3] = v.pe[0];
2151 v.pe[0] = u;
2152 u = v.pe[2];
2153 v.pe[2] = v.pe[1];
2154 v.pe[1] = u;
2155 #endif
2156 return v.d;
2157 }
2158
2159 real_32 dbltox (real_8 y)
2160 {
2161 REAL32 pe;
2162 unt_2 *pc, u;
2163 int_2 i, e;
2164 if (y < DBL_MIN && y > -DBL_MIN) {
2165 return X_0;
2166 }
2167 bzero (pe, sizeof (pe));
2168 pc = (unt_2 *) &y;
2169 #if __BYTE_ORDER == __LITTLE_ENDIAN
2170 pc += 3;
2171 #endif
2172 u = *pc & X_SIGN_MASK;
2173 e = xD_bias + ((*pc & X_EXPO_MASK) >> (16 - xD_lex));
2174 #if __BYTE_ORDER == __LITTLE_ENDIAN
2175 for (i = 1; i < 5; pe[i] = *pc--, i++);
2176 #else
2177 for (i = 1; i < 5; pe[i] = *pc++, i++);
2178 #endif
2179 pc = pe + 1;
2180 xlshift (xD_lex - 1, pc, 4);
2181 *pc |= X_SIGN_MASK;
2182 *pe = e;
2183 *pe |= u;
2184 return *(real_32 *) pe;
2185 }
2186
2187 real_32 _quadtox (real_16 x)
2188 {
2189 // Intrinsic function so arguments are not pointers.
2190 REAL32 z;
2191 unt_2 *y;
2192 int_4 i;
2193 if (x == 0.0q) {
2194 return X_0;
2195 }
2196 int_4 sinf = isinfq (x);
2197 if (sinf == 1) {
2198 return X_PLUS_INF;
2199 } else if (sinf == -1) {
2200 return X_MINUS_INF;
2201 }
2202 if (isnanq (x)) {
2203 return X_NAN;
2204 }
2205 bzero (z, sizeof (z));
2206 y = (unt_2 *) &x;
2207 for (i = 0; i <= 7; i++) {
2208 #if __BYTE_ORDER == __LITTLE_ENDIAN
2209 z[i] = y[7 - i];
2210 #else
2211 z[i] = y[i];
2212 #endif
2213 }
2214 // real_16 skips the default first bit, HPA lib does not.
2215 unt_2 cy = 0x8000;
2216 for (i = 1; i < 8; i++) {
2217 if (z[i] & 0x1) {
2218 z[i] = (z[i] >> 1) | cy;
2219 cy = 0x8000;
2220 } else {
2221 z[i] = (z[i] >> 1) | cy;
2222 cy = 0x0;
2223 }
2224 }
2225 z[8] |= cy;
2226 return *(real_32 *) z;
2227 }
2228
2229 real_32 _quadtop (real_32 *z, real_16 x)
2230 {
2231 *z = _quadtox (x);
2232 return *z;
2233 }
2234
2235 real_16 _xtoquad (real_32 x)
2236 {
2237 // Intrinsic function so arguments are not pointers.
2238 REAL16 y;
2239 REAL32 z;
2240 int_4 i;
2241 memcpy (z, x.value, sizeof (real_32));
2242 // Catch NaN, +-Inf is handled correctly.
2243 if (xisNaN (&x)) {
2244 z[0] = 0x7fff;
2245 z[1] = 0xffff;
2246 }
2247 // real_16 skips the default first bit, HPA lib does not.
2248 unt_2 cy = (z[8] & 0x8000 ? 0x1 : 0x0);
2249 for (i = 7; i > 0; i--) {
2250 if (z[i] & 0x8000) {
2251 z[i] = (z[i] << 1) | cy;
2252 cy = 0x1;
2253 } else {
2254 z[i] = (z[i] << 1) | cy;
2255 cy = 0x0;
2256 }
2257 }
2258 for (i = 0; i < 8; i++) {
2259 #if __BYTE_ORDER == __LITTLE_ENDIAN
2260 y[i] = z[7 - i];
2261 #else
2262 y[i] = z[i];
2263 #endif
2264 }
2265 // Avoid 'dereferencing type-punned pointer will break strict-aliasing rules'
2266 real_16 u;
2267 memcpy (&u, &y[0], sizeof (real_16));
2268 return u;
2269 }
2270
2271 real_32 strtox (char *s, char **end)
2272 {
2273 // This routine replaces the HPA solution - MvdV.
2274 // Algol 68 Genie employs the same algorithm.
2275 //
2276 // Initialize.
2277 #define N (FLT256_DIG + FLT256_GUARD)
2278 errno = 0;
2279 real_32 y[N];
2280 if (end != NULL && (*end) != NULL) {
2281 (*end) = &(s[0]);
2282 }
2283 while (isspace (s[0])) {
2284 s++;
2285 }
2286 real_32 W;
2287 if (s[0] == '-') {
2288 W = X_MINUS_1;
2289 s++;
2290 } else if (s[0] == '+') {
2291 W = X_1;
2292 s++;
2293 } else {
2294 W = X_1;
2295 }
2296 // Scan mantissa digits and put them into "y".
2297 while (s[0] == '0') {
2298 s++;
2299 }
2300 int dot = -1, pos = 0, pow = 0;
2301 while (pow < N && (isdigit (s[pos]) || s[pos] == '.')) {
2302 if (s[pos] == '.') {
2303 dot = pos;
2304 } else {
2305 switch (s[pos]) {
2306 case '0': y[pow] = X_0; break;
2307 case '1': y[pow] = W; break;
2308 case '2': y[pow] = xmul (X_2, W); break;
2309 case '3': y[pow] = xmul (X_3, W); break;
2310 case '4': y[pow] = xmul (X_4, W); break;
2311 case '5': y[pow] = xmul (X_5, W); break;
2312 case '6': y[pow] = xmul (X_6, W); break;
2313 case '7': y[pow] = xmul (X_7, W); break;
2314 case '8': y[pow] = xmul (X_8, W); break;
2315 case '9': y[pow] = xmul (X_9, W); break;
2316 }
2317 W = xdiv (W, X_10);
2318 pow++;
2319 }
2320 pos++;
2321 }
2322 // Skip trailing digits.
2323 while (isdigit (s[pos])) {
2324 pos++;
2325 }
2326 if (end != NULL && (*end) != NULL) {
2327 (*end) = &(s[pos]);
2328 }
2329 // Sum from low to high to preserve precision.
2330 real_32 sum = X_0;
2331 for (int k = pow - 1; k >= 0; k--) {
2332 sum = xsum (sum, y[k]);
2333 }
2334 // Optional exponent.
2335 int expo = 0;
2336 while (s[pos] == ' ') {
2337 pos++;
2338 }
2339 if (tolower (s[pos]) == 'e' || tolower (s[pos]) == 'q' || tolower (s[pos]) == 'x') {
2340 pos++;
2341 if (isdigit (s[pos]) || s[pos] == '-' || s[pos] == '+') {
2342 expo = (int) strtol (&(s[pos]), end, 10);
2343 }
2344 }
2345 // Standardize.
2346 if (dot >= 0) {
2347 expo += dot - 1;
2348 } else {
2349 expo += pow - 1;
2350 }
2351 while (xnot0 (&sum) && xlt1 (sum)) {
2352 sum = xmul (sum, X_10);
2353 expo--;
2354 }
2355 if (errno == 0) {
2356 return xmul (sum, xtenup (expo));
2357 } else {
2358 return X_0;
2359 }
2360 #undef N
2361 }
2362
2363 real_32 atox (char *q)
2364 {
2365 return strtox (q, NULL);
2366 }
2367
2368 int_4 _xint4 (real_32 x_)
2369 {
2370 static real_16 y_;
2371 y_ = _xtoquad (x_);
2372 return (int_4) _qnint (y_);
2373 }
2374
2375 int_4 _xnint4 (real_32 x_)
2376 {
2377 static real_16 y_;
2378 if (xgt (x_, X_0)) {
2379 y_ = _xtoquad (xsum (x_, X_1_OVER_2));
2380 }
2381 else {
2382 y_ = _xtoquad (xsub (x_, X_1_OVER_2));
2383 }
2384 return (int_4) (y_);
2385 }
2386
2387 int_8 _xint8 (real_32 x_)
2388 {
2389 static real_16 y_;
2390 y_ = _xtoquad (x_);
2391 return (int_8) (y_);
2392 }
2393
2394 int_8 _xnint8 (real_32 x_)
2395 {
2396 static real_16 y_;
2397 if (xgt (x_, X_0)) {
2398 y_ = _xtoquad (xsum (x_, X_1_OVER_2));
2399 }
2400 else {
2401 y_ = _xtoquad (xsub (x_, X_1_OVER_2));
2402 }
2403 return (int_8) (y_);
2404 }
2405
2406 void _xdump (real_32 *z)
2407 {
2408 printf ("{{");
2409 for (int i = 0; i <= FLT256_LEN; i++) {
2410 printf("0x%04x", (z->value)[i]);
2411 if (i < FLT256_LEN) {
2412 printf(", ");
2413 }
2414 }
2415 printf ("}}\n");
2416 fflush (stdout);
2417 }
2418
2419 real_32 _x1mach (int_4 *i)
2420 {
2421 switch (*i) {
2422 // d1mach(1) = b**(emin-1), the smallest positive magnitude.
2423 case 1: return FLT256_MIN;
2424 // d1mach(2) = b**emax*(1 - b**(-t)), the largest magnitude.
2425 case 2: return FLT256_MAX;
2426 // d1mach(3) = b**(-t), the smallest relative spacing.
2427 case 3: return FLT256_EPSILON_HALF;
2428 // d1mach(4) = b**(1-t), the largest relative spacing.
2429 case 4: return FLT256_EPSILON;
2430 // d1mach(5) = log10(b)
2431 case 5: return X_LOG10_2;
2432 //
2433 default: return X_0;
2434 }
2435 }
2436
© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)
|