rts-real32.c
1 //! @file rts-real32.c
2 //! @author (see below)
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Runtime support for REAL*32 and COMPLEX*64.
25
26 // The code is based on the HPA Library, available from:
27 // <http://download-mirror.savannah.gnu.org/releases/hpalib/>
28 //
29 // Copyright (C) 2000 Daniel A. Atkinson <DanAtk@aol.com>
30 // Copyright (C) 2004 Ivano Primi <ivprimi@libero.it>
31 // Copyright (C) 2022 Marcel van der Veer <algol68g@xs4all.nl> - VIF version.
32 //
33 // HPA code is adapted to work with VIF to implement REAL*32 and COMPLEX*64.
34 // HPA was choosen since it stores a 256-bit float as a 256-bit struct, which
35 // is convenient for FORTRAN.
36 //
37 // The HPA Library is free software; you can redistribute it and/or
38 // modify it under the terms of the GNU Lesser General Public
39 // License as published by the Free Software Foundation; either
40 // version 2.1 of the License, or (at your option) any later version.
41 //
42 // The HPA Library is distributed in the hope that it will be useful,
43 // but WITHOUT ANY WARRANTY; without even the implied warranty of
44 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
45 // Lesser General Public License for more details.
46 //
47 // You should have received a copy of the GNU Lesser General Public
48 // License along with the HPA Library; if not, write to the Free
49 // Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
50 // 02110-1301 USA.
51
52 // The functions forming the HPA library are all implemented in a portable
53 // fashion in the C language. The IEEE 754 standard for floating point
54 // hardware and software is assumed in the PC/Unix version of this library.
55 // A REAL*32 number is represented as a combination of the following elements:
56 //
57 // sign bit(s): 0 -> positive, 1 -> negative ;
58 // exponent(e): 15-bit biased integer (bias=16383) ;
59 // mantissa(m): 15 words of 16 bit length *including* leading 1.
60 //
61 // The range of representable numbers is then given by
62 //
63 // 2^16384 > x > 2^[-16383] => 1.19*10^4932 > x > 1.68*10^-[4932]
64 //
65 // Special values of the exponent are:
66 //
67 // all ones -> infinity (floating point overflow)
68 // all zeros -> number = zero.
69 //
70 // Underflow in operations is handled by a flush to zero. Thus, a number
71 // with the exponent zero and nonzero mantissa is invalid (not-a-number).
72 // From the point of view of the HPA library, a complex number is a
73 // structure formed by two REAL*32 numbers.
74 //
75 // HPA cannot extend precision beyond the preset, hardcoded precision.
76 // Hence some math routines will not achieve full precision.
77
78 #include <vif.h>
79 #include <rts-real32.h>
80
81 static const char *errmsg[] = {
82 "No error",
83 "Division by zero",
84 "Out of domain",
85 "Bad exponent",
86 "Floating point overflow",
87 "Invalid error code"
88 };
89
90 #define SEXP(z) ((unt_2 *) &(z.value))
91
92 void _pi32 (real_32 *x)
93 {
94 *x = X_PI;
95 }
96
97 int_4 xsigerr (int_4 errcond, int_4 errcode, const char *where)
98 {
99 // errcode must come from the evaluation of an error condition.
100 // errcode, which should describe the type of the error,
101 // should always be one between XEDIV, XEDOM, XEBADEXP and XFPOFLOW.
102 if (errcond == 0) {
103 errcode = 0;
104 }
105 if (errcode < 0 || errcode > XNERR) {
106 errcode = XEINV;
107 }
108 if (errcode != 0) {
109 RTE (where, errmsg[errcode]);
110 }
111 return errcode;
112 }
113
114 // Elementary stuff.
115
116 inline real_32 xneg (real_32 s)
117 {
118 unt_2 *p = SEXP (s);
119 *p ^= X_SIGN_MASK;
120 return s;
121 }
122
123 inline real_32 xabs (real_32 s)
124 {
125 unt_2 *p = SEXP (s);
126 *p &= X_EXPO_MASK;
127 return s;
128 }
129
130 inline int_4 xgetexp (const real_32 * ps)
131 {
132 unt_2 *q = (unt_2 *) &(ps->value);
133 return (*q & X_EXPO_MASK) - X_EXPO_BIAS;
134 }
135
136 inline int_4 xgetsgn (const real_32 * ps)
137 {
138 unt_2 *q = (unt_2 *) &(ps->value);
139 return (*q & X_SIGN_MASK);
140 }
141
142 int_4 xreal_cmp (const real_32 * pa, const real_32 * pb)
143 {
144 unt_2 e, k, *p, *q, p0, q0;
145 int_4 m;
146 p = (unt_2 *) &(pa->value);
147 q = (unt_2 *) &(pb->value);
148 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
149 if (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) {
150 // *pa is actually zero
151 p0 = 0;
152 } else {
153 p0 = *p;
154 }
155 for (m = 1; m <= FLT256_LEN && q[m] == 0; m++);
156 if (m > FLT256_LEN && (*q & X_EXPO_MASK) < X_EXPO_MASK) {
157 // *pb is actually zero
158 q0 = 0;
159 } else {
160 q0 = *q;
161 }
162 e = p0 & X_SIGN_MASK;
163 k = q0 & X_SIGN_MASK;
164 if (e && !k) {
165 return -1;
166 } else if (!e && k) {
167 return 1;
168 } else { // *pa and *pb have the same sign
169 m = (e) ? -1 : 1;
170 e = p0 & X_EXPO_MASK;
171 k = q0 & X_EXPO_MASK;
172 if (e > k) {
173 return m;
174 } else if (e < k) {
175 return -m;
176 } else {
177 for (e = 0; *(++p) == *(++q) && e < FLT256_LEN; ++e);
178 if (e < FLT256_LEN) {
179 return (*p > *q ? m : -m);
180 } else {
181 return 0;
182 }
183 }
184 }
185 }
186
187 real_32 xreal_2 (real_32 s, int_4 m)
188 {
189 unt_2 *p = SEXP (s);
190 int_4 e;
191 for (e = 1; e <= FLT256_LEN && p[e] == 0; e++);
192 if (e <= FLT256_LEN) {
193 e = *p & X_EXPO_MASK; // biased exponent
194 if (e + m < 0) {
195 return X_0;
196 } else if ((xsigerr (e + m >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
197 return ((s.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
198 } else {
199 *p += m;
200 return s;
201 }
202 } else { // s is zero or +-Inf
203 return s;
204 }
205 }
206
207 real_32 xsfmod (real_32 s, int_4 *p)
208 {
209 unt_2 *pa = SEXP (s);
210 unt_2 *pb = pa + 1;
211 int_2 e, k;
212 e = (*pa & X_EXPO_MASK) - X_EXPO_BIAS;
213 if ((xsigerr (e >= 15, XFPOFLOW, NO_TEXT))) {
214 *p = -1;
215 return s;
216 } else if (e < 0) {
217 *p = 0;
218 return s;
219 }
220 *p = *pb >> (15 - e);
221 xlshift (++e, pb, FLT256_LEN);
222 *pa -= e;
223 for (e = 0; *pb == 0 && e < xMax_p; ++pb, e += 16);
224 if (e == xMax_p) {
225 return X_0;
226 }
227 for (k = 0; !((*pb << k) & X_SIGN_MASK); ++k);
228 if ((k += e)) {
229 xlshift (k, pa + 1, FLT256_LEN);
230 *pa -= k;
231 }
232 return s;
233 }
234
235 void xlshift (int_4 n, unt_2 *pm, int_4 m)
236 {
237 unt_2 *pc = pm + m - 1;
238 if (n < 16 * m) {
239 unt_2 *pa = pm + n / 16;
240 m = n % 16;
241 n = 16 - m;
242 while (pa < pc) {
243 *pm = (*pa++) << m;
244 *pm++ |= *pa >> n;
245 }
246 *pm++ = *pa << m;
247 }
248 while (pm <= pc) {
249 *pm++ = 0;
250 }
251 }
252
253 void xrshift (int_4 n, unt_2 *pm, int_4 m)
254 {
255 unt_2 *pc = pm + m - 1;
256 if (n < 16 * m) {
257 unt_2 *pa = pc - n / 16;
258 m = n % 16;
259 n = 16 - m;
260 while (pa > pm) {
261 *pc = (*pa--) >> m;
262 *pc-- |= *pa << n;
263 }
264 *pc-- = *pa >> m;
265 }
266 while (pc >= pm) {
267 *pc-- = 0;
268 }
269 }
270
271 real_32 _xI (real_32 x)
272 {
273 // Intrinsic function so arguments are not pointers.
274 return x;
275 }
276
277 real_32 _aintx (real_32 x)
278 {
279 // Intrinsic function so arguments are not pointers.
280 return xtrunc (x);
281 }
282
283 real_32 _anintx (real_32 x)
284 {
285 // Intrinsic function so arguments are not pointers.
286 if (xgetsgn (&x) == 0) {
287 return xtrunc (xsum (x, X_1_OVER_2));
288 } else {
289 return xtrunc (xsub (x, X_1_OVER_2));
290 }
291 }
292
293 real_32 _xmax (real_32 a, real_32 b)
294 {
295 // Intrinsic function so arguments are not pointers.
296 if (xge (a, b)) {
297 return a;
298 } else {
299 return b;
300 }
301 }
302
303 real_32 _xmin (real_32 a, real_32 b)
304 {
305 // Intrinsic function so arguments are not pointers.
306 if (xle (a, b)) {
307 return a;
308 } else {
309 return b;
310 }
311 }
312
313 real_32 _xmod (real_32 a, real_32 b)
314 {
315 // Intrinsic function so arguments are not pointers.
316 real_32 q = xdiv (a, b);
317 if (xsgn (&q) >= 0) {
318 q = xfloor (q);
319 } else {
320 q = xneg (xfloor (xneg (q)));
321 }
322 return xadd (a, xmul (b, q), 1);
323 }
324
325 real_32 xadd (real_32 s, real_32 t, int_4 f)
326 {
327 REAL32 pe;
328 unt_2 h, u;
329 unt_2 *pc, *pf = pe;
330 unt_4 n = 0;
331 unt_2 *pa = SEXP (s);
332 unt_2 *pb = SEXP (t);
333 int_2 e = *pa & X_EXPO_MASK;
334 int_2 k = *pb & X_EXPO_MASK;
335 if (f != 0) {
336 *pb ^= X_SIGN_MASK;
337 }
338 u = (*pb ^ *pa) & X_SIGN_MASK;
339 f = 0;
340 if (e > k) {
341 if ((k = e - k) >= xMax_p) {
342 return s;
343 }
344 xrshift (k, pb + 1, FLT256_LEN);
345 } else if (e < k) {
346 if ((e = k - e) >= xMax_p) {
347 return t;
348 }
349 xrshift (e, pa + 1, FLT256_LEN);
350 e = k;
351 pc = pa;
352 pa = pb;
353 pb = pc;
354 } else if (u != 0) {
355 for (pc = pa, pf = pb; *(++pc) == *(++pf) && f < FLT256_LEN; ++f);
356 if (f >= FLT256_LEN) {
357 return X_0;
358 }
359 if (*pc < *pf) {
360 pc = pa;
361 pa = pb;
362 pb = pc;
363 }
364 pf = pe + f;
365 }
366 h = *pa & X_SIGN_MASK;
367 if (u != 0) {
368 for (pc = pb + FLT256_LEN; pc > pb; --pc) {
369 *pc = ~(*pc);
370 }
371 n = 1L;
372 }
373 for (pc = pe + FLT256_LEN, pa += FLT256_LEN, pb += FLT256_LEN; pc > pf;) {
374 n += *pa;
375 pa--;
376 n += *pb;
377 pb--;
378 *pc = n;
379 pc--;
380 n >>= 16;
381 }
382 if (u != 0) {
383 for (; *(++pc) == 0; ++f);
384 for (k = 0; !((*pc << k) & X_SIGN_MASK); ++k);
385 if ((k += 16 * f)) {
386 if ((e -= k) <= 0) {
387 return X_0;
388 }
389 xlshift (k, pe + 1, FLT256_LEN);
390 }
391 } else {
392 if (n != 0) {
393 ++e;
394 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
395 return (!h ? X_PLUS_INF : X_MINUS_INF);
396 }
397 ++pf;
398 xrshift (1, pf, FLT256_LEN);
399 *pf |= X_SIGN_MASK;
400 }
401 }
402 *pe = e;
403 *pe |= h;
404 return *(real_32 *) pe;
405 }
406
407 real_32 xmul (real_32 s, real_32 t)
408 {
409 unt_2 pe[FLT256_LEN + 2], *q0, *q1, h;
410 unt_2 *pa, *pb, *pc;
411 unt_4 m, n, p;
412 int_2 e;
413 int_2 k;
414 q0 = SEXP (s);
415 q1 = SEXP (t);
416 e = (*q0 & X_EXPO_MASK) - X_EXPO_BIAS;
417 k = (*q1 & X_EXPO_MASK) + 1;
418 if ((xsigerr (e > (short) X_EXPO_MASK - k, XFPOFLOW, NO_TEXT))) {
419 return (((s.value[0] & X_SIGN_MASK) ^ (t.value[0] & X_SIGN_MASK)) ? X_MINUS_INF : X_PLUS_INF);
420 }
421 if ((e += k) <= 0) {
422 return X_0;
423 }
424 h = (*q0 ^ *q1) & X_SIGN_MASK;
425 for (++q1, k = FLT256_LEN, p = n = 0L, pc = pe + FLT256_LEN + 1; k > 0; --k) {
426 for (pa = q0 + k, pb = q1; pa > q0;) {
427 m = *pa--;
428 m *= *pb++;
429 n += (m & 0xffffL);
430 p += (m >> 16);
431 }
432 *pc-- = n;
433 n = p + (n >> 16);
434 p = 0L;
435 }
436 *pc = n;
437 if (!(*pc & X_SIGN_MASK)) {
438 --e;
439 if (e <= 0) {
440 return X_0;
441 }
442 xlshift (1, pc, FLT256_LEN + 1);
443 }
444 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
445 return (!h ? X_PLUS_INF : X_MINUS_INF);
446 }
447 *pe = e;
448 *pe |= h;
449 return *(real_32 *) pe;
450 }
451
452 real_32 xdiv (real_32 s, real_32 t)
453 {
454 unt_2 *pd = SEXP (s), *pc = SEXP (t);
455 // Next makes xdiv robust at extreme exponents - MvdV.
456 if ((*pd & X_EXPO_MASK) == (*pc & X_EXPO_MASK)) {
457 *pd &= ~X_EXPO_MASK;
458 *pc &= ~X_EXPO_MASK;
459 }
460 // HPA implementation.
461 unt_2 e = *pc;
462 *pc = X_EXPO_BIAS;
463 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xdiv()"))) {
464 return X_0;
465 } else {
466 real_32 a = dbltox (1 / xtodbl (t));
467 *pc = e;
468 pc = SEXP (a);
469 *pc += X_EXPO_BIAS - (e & X_EXPO_MASK);
470 *pc |= e & X_SIGN_MASK;
471 for (unt_2 i = 0; i < xItt_div; ++i) {
472 a = xmul (a, xadd (X_2, xmul (a, t), 1));
473 }
474 return xmul (s, a);
475 }
476 }
477
478 real_32 xevtch (real_32 z, real_32 * a, int_4 m)
479 {
480 real_32 *p, f, t, tp, w;
481 w = xreal_2 (z, 1);
482 t = X_0;
483 tp = X_0;
484 for (p = a + m; p > a;) {
485 f = xadd (*p--, xadd (xmul (w, t), tp, 1), 0);
486 tp = t;
487 t = f;
488 }
489 return xadd (*p, xadd (xmul (z, t), tp, 1), 0);
490 }
491
492 real_32 xexp2 (real_32 x)
493 {
494 real_32 s, d, f;
495 unt_2 *pf = SEXP (x);
496 int_4 m, k;
497 if (xreal_cmp (&x, &xE2min) < 0) {
498 return X_0;
499 } else if ((xsigerr (xreal_cmp (&x, &xE2max) > 0, XFPOFLOW, NO_TEXT))) {
500 return X_PLUS_INF;
501 } else {
502 m = (*pf & X_SIGN_MASK) ? 1 : 0;
503 x = xsfmod (x, &k);
504 if (m != 0) {
505 k *= -1;
506 }
507 // -X_EXPO_BIAS <= k <= +X_EXPO_BIAS
508 x = xmul (x, X_LN_2);
509 if (xgetexp (&x) > -X_EXPO_BIAS) {
510 x = xreal_2 (x, -1);
511 s = xmul (x, x);
512 f = X_0;
513 for (d = inttox (m = xMS_exp); m > 1; m -= 2, d = inttox (m)) {
514 f = xdiv (s, xadd (d, f, 0));
515 }
516 f = xdiv (x, xadd (d, f, 0));
517 f = xdiv (xadd (d, f, 0), xadd (d, f, 1));
518 } else {
519 f = X_1;
520 }
521 pf = SEXP (f);
522 if (-k > *pf) {
523 return X_0;
524 } else {
525 *pf += k;
526 if ((xsigerr (*pf >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
527 return X_PLUS_INF;
528 } else {
529 return f;
530 }
531 }
532 }
533 }
534
535 real_32 xexp (real_32 z)
536 {
537 return xexp2 (xmul (z, X_LOG2_E));
538 }
539
540 real_32 xexp10 (real_32 z)
541 {
542 return xexp2 (xmul (z, X_LOG2_10));
543 }
544
545 real_32 xfmod (real_32 s, real_32 t, real_32 * q)
546 {
547 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xfmod()"))) {
548 return X_0;
549 } else {
550 unt_2 *p, mask = 0xffff;
551 int_2 e, i;
552 int_4 u;
553 *q = xdiv (s, t);
554 p = (unt_2 *) &(q->value);
555 u = (*p & X_SIGN_MASK) ? 0 : 1;
556 e = (*p &= X_EXPO_MASK); // biased exponent of *q
557 e = e < X_EXPO_BIAS ? 0 : e - X_EXPO_BIAS + 1;
558 for (i = 1; e / 16 > 0; i++, e -= 16);
559 if (i <= FLT256_LEN) {
560 // e = 0, ..., 15
561 mask <<= 16 - e;
562 p[i] &= mask;
563 for (i++; i <= FLT256_LEN; p[i] = 0, i++);
564 }
565 // Now *q == abs(quotient of (s/t))
566 return xadd (s, xmul (t, *q), u);
567 }
568 }
569
570 real_32 xfrexp (real_32 s, int_4 *p)
571 {
572 unt_2 *ps = SEXP (s), u;
573 *p = (*ps & X_EXPO_MASK) - X_EXPO_BIAS + 1;
574 u = *ps & X_SIGN_MASK;
575 *ps = X_EXPO_BIAS - 1;
576 *ps |= u;
577 return s;
578 }
579
580 static void nullify (int_4 skip, unt_2 *p, int_4 k)
581 {
582 // After skipping the first 'skip' bytes of the vector 'p',
583 // this function nullifies all the remaining ones. 'k' is
584 // the number of words forming the vector p.
585 // Warning: 'skip' must be positive !
586 int_4 i;
587 unt_2 mask = 0xffff;
588 for (i = 0; skip / 16 > 0; i++, skip -= 16);
589 if (i < k) {
590 // skip = 0, ..., 15
591 mask <<= 16 - skip;
592 p[i] &= mask;
593 for (i++; i < k; p[i] = 0, i++);
594 }
595 }
596
597 static void canonic_form (real_32 * px)
598 {
599 unt_2 *p, u;
600 int_2 e, i, j, skip;
601 p = (unt_2 *) &(px->value);
602 e = (*p & X_EXPO_MASK); // biased exponent of x
603 u = (*p & X_SIGN_MASK); // sign of x
604 if (e < X_EXPO_BIAS - 1) {
605 return;
606 } else {
607 unt_2 mask = 0xffff;
608 // e >= X_EXPO_BIAS - 1
609 for (i = 1, skip = e + 1 - X_EXPO_BIAS; skip / 16 > 0; i++, skip -= 16);
610 if (i <= FLT256_LEN) {
611 // skip = 0, ..., 15
612 mask >>= skip;
613 if ((p[i] & mask) != mask) {
614 return;
615 } else {
616 for (j = i + 1; j <= FLT256_LEN && p[j] == 0xffff; j++);
617 if (j > FLT256_LEN) {
618 p[i] -= mask;
619 for (j = i + 1; j <= FLT256_LEN; p[j] = 0, j++);
620 if (!(p[1] & 0x8000)) {
621 p[1] = 0x8000;
622 *p = ++e;
623 *p |= u;
624 } else if (u != 0) {
625 *px = xadd (*px, X_1, 1);
626 } else {
627 *px = xadd (*px, X_1, 0);
628 }
629 }
630 }
631 }
632 }
633 }
634
635 real_32 xfrac (real_32 x)
636 {
637 // xfrac(x) returns the fractional part of the number x.
638 // xfrac(x) has the same sign as x.
639 unt_2 u, *p;
640 int_2 e;
641 int_4 n;
642 canonic_form (&x);
643 p = SEXP (x);
644 e = (*p & X_EXPO_MASK); // biased exponent of x
645 if (e < X_EXPO_BIAS) {
646 return x; // The integer part of x is zero
647 } else {
648 u = *p & X_SIGN_MASK; // sign of x
649 n = e - X_EXPO_BIAS + 1;
650 xlshift (n, p + 1, FLT256_LEN);
651 e = X_EXPO_BIAS - 1;
652 // Now I have to take in account the rule
653 // of the leading one.
654 while (e > 0 && !(p[1] & X_SIGN_MASK)) {
655 xlshift (1, p + 1, FLT256_LEN);
656 e -= 1;
657 }
658 // Now p+1 points to the fractionary part of x,
659 // u is its sign, e is its biased exponent.
660 p[0] = e;
661 p[0] |= u;
662 return *(real_32 *) p;
663 }
664 }
665
666 real_32 xtrunc (real_32 x)
667 {
668 // xtrunc(x) returns the integer part of the number x.
669 // xtrunc(x) has the same sign as x.
670 unt_2 *p;
671 int_2 e;
672 canonic_form (&x);
673 p = SEXP (x);
674 e = (*p & X_EXPO_MASK); // biased exponent of x
675 if (e < X_EXPO_BIAS) {
676 return X_0; // The integer part of x is zero
677 } else {
678 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
679 return *(real_32 *) p;
680 }
681 }
682
683 real_32 xround (real_32 x)
684 {
685 return xtrunc (xadd (x, X_RND_CORR, x.value[0] & X_SIGN_MASK));
686 }
687
688 real_32 xceil (real_32 x)
689 {
690 unt_2 *ps = SEXP (x);
691 if ((*ps & X_SIGN_MASK)) {
692 return xtrunc (x);
693 } else {
694 real_32 y = xfrac (x);
695 // y has the same sign as x (see above).
696 return (xreal_cmp (&y, &X_0) > 0 ? xadd (xtrunc (x), X_1, 0) : x);
697 }
698 }
699
700 real_32 xfloor (real_32 x)
701 {
702 unt_2 *ps = SEXP (x);
703 if ((*ps & X_SIGN_MASK)) {
704 real_32 y = xfrac (x);
705 // y has the same sign as x (see above).
706 return (xreal_cmp (&y, &X_0) < 0 ? xadd (xtrunc (x), X_1, 1) : x);
707 } else {
708 return xtrunc (x);
709 }
710 }
711
712 static void xadd_correction (real_32 * px, int_4 k)
713 {
714 int_2 e = (px->value[0] & X_EXPO_MASK) - X_EXPO_BIAS;
715 // e = (e > 0 ? e : 0);
716 *px = xadd (*px, xreal_2 (X_FIX_CORR, e), k);
717 }
718
719 real_32 xfix (real_32 x)
720 {
721 unt_2 *p;
722 int_2 e;
723 xadd_correction (&x, x.value[0] & X_SIGN_MASK);
724 p = SEXP (x);
725 e = (*p & X_EXPO_MASK); // biased exponent of x
726 if (e < X_EXPO_BIAS) {
727 return X_0; // The integer part of x is zero
728 } else {
729 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
730 return *(real_32 *) p;
731 }
732 }
733
734 real_32 xtanh (real_32 z)
735 {
736 real_32 s, d, f;
737 int_4 m, k;
738 if ((k = xgetexp (&z)) > xK_tanh) {
739 if (xgetsgn (&z)) {
740 return xneg (X_1);
741 } else {
742 return X_1;
743 }
744 }
745 if (k < xK_lin) {
746 return z;
747 }
748 ++k;
749 if (k > 0) {
750 z = xreal_2 (z, -k);
751 }
752 s = xmul (z, z);
753 f = X_0;
754 for (d = inttox (m = xMS_hyp); m > 1;) {
755 f = xdiv (s, xadd (d, f, 0));
756 d = inttox (m -= 2);
757 }
758 f = xdiv (z, xadd (d, f, 0));
759 for (; k > 0; --k) {
760 f = xdiv (xreal_2 (f, 1), xadd (d, xmul (f, f), 0));
761 }
762 return f;
763 }
764
765 real_32 xsinh (real_32 z)
766 {
767 int_4 k;
768 if ((k = xgetexp (&z)) < xK_lin) {
769 return z;
770 } else if (k < 0) {
771 z = xtanh (xreal_2 (z, -1));
772 return xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 1));
773 } else {
774 z = xexp (z);
775 return xreal_2 (xadd (z, xdiv (X_1, z), 1), -1);
776 }
777 }
778
779 real_32 xcosh (real_32 z)
780 {
781 if (xgetexp (&z) < xK_lin) {
782 return X_1;
783 }
784 z = xexp (z);
785 return xreal_2 (xadd (z, xdiv (X_1, z), 0), -1);
786 }
787
788 real_32 xatanh (real_32 x)
789 {
790 real_32 y = x;
791 y.value[0] &= X_EXPO_MASK; // Now y == abs(x)
792 if ((xsigerr (xreal_cmp (&y, &X_1) >= 0, XEDOM, "xatanh"))) {
793 return ((x.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
794 } else {
795 y = xdiv (xadd (X_1, x, 0), xadd (X_1, x, 1));
796 return xreal_2 (xlog (y), -1);
797 }
798 }
799
800 real_32 xasinh (real_32 x)
801 {
802 real_32 y = xmul (x, x);
803 y = xsqrt (xadd (X_1, y, 0));
804 if ((x.value[0] & X_SIGN_MASK)) {
805 return xneg (xlog (xadd (y, x, 1)));
806 } else {
807 return xlog (xadd (x, y, 0));
808 }
809 }
810
811 real_32 xacosh (real_32 x)
812 {
813 if ((xsigerr (xreal_cmp (&x, &X_1) < 0, XEDOM, "xacosh()"))) {
814 return X_0;
815 } else {
816 real_32 y = xmul (x, x);
817 y = xsqrt (xadd (y, X_1, 1));
818 return xlog (xadd (x, y, 0));
819 }
820 }
821
822 real_32 xatan (real_32 z)
823 {
824 real_32 s, f;
825 int_4 k, m;
826 if ((k = xgetexp (&z)) < xK_lin) {
827 return z;
828 }
829 if (k >= 0) {
830 // k>=0 is equivalent to abs(z) >= 1.0
831 z = xdiv (X_1, z);
832 m = 1;
833 } else {
834 m = 0;
835 }
836 f = dbltox (atan (xtodbl (z)));
837 s = xadd (X_1, xmul (z, z), 0);
838 for (k = 0; k < xItt_div; ++k) {
839 f = xadd (f, xdiv (xadd (z, xtan (f), 1), s), 0);
840 }
841 if (m != 0) {
842 if (xgetsgn (&f)) {
843 return xadd (xneg (X_PI_OVER_2), f, 1);
844 } else {
845 return xadd (X_PI_OVER_2, f, 1);
846 }
847 } else {
848 return f;
849 }
850 }
851
852 real_32 xasin (real_32 z)
853 {
854 real_32 u = z;
855 u.value[0] &= X_EXPO_MASK;
856 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xasin()"))) {
857 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
858 } else {
859 if (xgetexp (&z) < xK_lin) {
860 return z;
861 }
862 u = xsqrt (xadd (X_1, xmul (z, z), 1));
863 if (xgetexp (&u) == -X_EXPO_BIAS) {
864 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
865 }
866 return xatan (xdiv (z, u));
867 }
868 }
869
870 real_32 xacos (real_32 z)
871 {
872 real_32 u = z;
873 u.value[0] &= X_EXPO_MASK;
874 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xacos()"))) {
875 return ((xgetsgn (&z)) ? X_PI : X_0);
876 } else {
877 if (xgetexp (&z) == -X_EXPO_BIAS) {
878 return X_PI_OVER_2;
879 }
880 u = xsqrt (xadd (X_1, xmul (z, z), 1));
881 u = xatan (xdiv (u, z));
882 if (xgetsgn (&z)) {
883 return xadd (X_PI, u, 0);
884 } else {
885 return u;
886 }
887 }
888 }
889 // Kindly added by A.Haumer 2010-04.09
890
891 real_32 xatan2 (real_32 y, real_32 x)
892 {
893 int_4 rs, is;
894 rs = xsgn (&x);
895 is = xsgn (&y);
896 if (rs > 0) {
897 return xatan (xdiv (y, x));
898 } else if (rs < 0) {
899 x.value[0] ^= X_SIGN_MASK;
900 y.value[0] ^= X_SIGN_MASK;
901 if (is >= 0) {
902 return xadd (X_PI, xatan (xdiv (y, x)), 0);
903 } else {
904 return xadd (xatan (xdiv (y, x)), X_PI, 1);
905 }
906 } else { // x is zero !
907 if (!xsigerr (is == 0, XEDOM, "xatan2()")) {
908 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
909 } else {
910 return X_0; // Dummy value :)
911 }
912 }
913 }
914
915 real_32 xlog (real_32 z)
916 {
917 real_32 f, h;
918 int_4 k, m;
919 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog()"))) {
920 return X_MINUS_INF;
921 } else if (xreal_cmp (&z, &X_1) == 0) {
922 return X_0;
923 } else {
924 z = xfrexp (z, &m);
925 z = xmul (z, X_SQRT_2);
926 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
927 h = xreal_2 (z, 1);
928 z = xmul (z, z);
929 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
930 h = xmul (h, z);
931 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
932 }
933 return xadd (f, xmul (X_LN_2, dbltox (m - .5)), 0);
934 }
935 }
936
937 real_32 xlog2 (real_32 z)
938 {
939 real_32 f, h;
940 int_4 k, m;
941 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog2()"))) {
942 return X_MINUS_INF;
943 } else if (xreal_cmp (&z, &X_1) == 0) {
944 return X_0;
945 } else {
946 z = xfrexp (z, &m);
947 z = xmul (z, X_SQRT_2);
948 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
949 h = xreal_2 (z, 1);
950 z = xmul (z, z);
951 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
952 h = xmul (h, z);
953 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
954 }
955 return xadd (xmul (f, X_LOG2_E), dbltox (m - .5), 0);
956 }
957 }
958
959 real_32 xlog10 (real_32 z)
960 {
961 real_32 w = xlog (z);
962 if (xreal_cmp (&w, &X_MINUS_INF) <= 0) {
963 return X_MINUS_INF;
964 } else {
965 return xmul (w, X_LOG10_E);
966 }
967 }
968
969 logical_4 xeq (real_32 x1, real_32 x2)
970 {
971 return (xreal_cmp (&x1, &x2) == 0);
972 }
973
974 logical_4 xneq (real_32 x1, real_32 x2)
975 {
976 return (xreal_cmp (&x1, &x2) != 0);
977 }
978
979 logical_4 xgt (real_32 x1, real_32 x2)
980 {
981 return (xreal_cmp (&x1, &x2) > 0);
982 }
983
984 logical_4 xge (real_32 x1, real_32 x2)
985 {
986 return (xreal_cmp (&x1, &x2) >= 0);
987 }
988
989 logical_4 xlt (real_32 x1, real_32 x2)
990 {
991 return (xreal_cmp (&x1, &x2) < 0);
992 }
993
994 logical_4 xle (real_32 x1, real_32 x2)
995 {
996 return (xreal_cmp (&x1, &x2) <= 0);
997 }
998
999
1000 logical_4 xis_nan (const real_32 * u)
1001 {
1002 // xis_nan (&x) returns TRUE if and only if x is not a valid number
1003 unt_2 *p = (unt_2 *) &(u->value);
1004 if (*p != 0) {
1005 return FALSE;
1006 } else {
1007 int_4 i;
1008 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1009 return i <= FLT256_LEN;
1010 }
1011 }
1012
1013 logical_4 xis0 (const real_32 * u)
1014 {
1015 unt_2 *p = (unt_2 *) &(u->value);
1016 int_4 m;
1017 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1018 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? TRUE : FALSE);
1019 }
1020
1021 logical_4 xnot0 (const real_32 * u)
1022 {
1023 unt_2 *p = (unt_2 *) &(u->value);
1024 int_4 m;
1025 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1026 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? FALSE : TRUE);
1027 }
1028
1029 logical_4 xlt1 (const real_32 u)
1030 {
1031 return (xlt (xabs (u), X_1));
1032 }
1033
1034 int_4 xsgn (const real_32 * u)
1035 {
1036 unt_2 *p = (unt_2 *) &(u->value);
1037 int_4 m;
1038 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1039 if ((m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) || !*p) {
1040 return 0;
1041 } else {
1042 return ((*p & X_SIGN_MASK) ? -1 : 1);
1043 }
1044 }
1045
1046 logical_4 xis_pinf (const real_32 * u)
1047 {
1048 return (*u->value == X_EXPO_MASK);
1049 }
1050
1051 logical_4 xis_minf (const real_32 * u)
1052 {
1053 return (*u->value == (X_EXPO_MASK | X_SIGN_MASK));
1054 }
1055
1056 logical_4 xisordnumb (const real_32 * u)
1057 {
1058 int_4 isNaN, isfinite;
1059 unt_2 *p = (unt_2 *) &(u->value);
1060 if (*p != 0) {
1061 isNaN = FALSE;
1062 } else {
1063 int_4 i;
1064 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1065 isNaN = i <= FLT256_LEN;
1066 }
1067 isfinite = (*p & X_EXPO_MASK) < X_EXPO_MASK;
1068 return (!isNaN && (isfinite) ? TRUE : FALSE);
1069 }
1070
1071 real_32 xpwr (real_32 s, int_4 n)
1072 {
1073 real_32 t;
1074 unsigned k, m;
1075 t = X_1;
1076 if (n < 0) {
1077 m = -n;
1078 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1079 return X_0;
1080 }
1081 s = xdiv (X_1, s);
1082 } else {
1083 m = n;
1084 }
1085 if (m != 0) {
1086 k = 1;
1087 while (1) {
1088 if (k & m) {
1089 t = xmul (s, t);
1090 }
1091 if ((k <<= 1) <= m) {
1092 s = xmul (s, s);
1093 } else {
1094 break;
1095 }
1096 }
1097 } else {
1098 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1099 }
1100 return t;
1101 }
1102
1103 real_32 xpow (real_32 x, real_32 y)
1104 {
1105 if (xsigerr ((xgetsgn (&x)) || xgetexp (&x) == -X_EXPO_BIAS, XEDOM, "xpow()")) {
1106 return X_0;
1107 } else {
1108 return xexp2 (xmul (xlog2 (x), y));
1109 }
1110 }
1111
1112 real_32 xsqrt (real_32 z)
1113 {
1114 real_32 s, h;
1115 int_2 m, e;
1116 unt_2 *pc;
1117 if ((xsigerr ((xgetsgn (&z)), XEDOM, "xsqrt()"))) {
1118 return X_0;
1119 } else {
1120 pc = SEXP (z);
1121 if (*pc == 0) {
1122 return X_0;
1123 }
1124 e = *pc - X_EXPO_BIAS;
1125 *pc = X_EXPO_BIAS + (e % 2);
1126 e /= 2;
1127 s = dbltox (sqrt (xtodbl (z)));
1128 for (m = 0; m < xItt_div; ++m) {
1129 h = xdiv (xadd (z, xmul (s, s), 1), xreal_2 (s, 1));
1130 s = xadd (s, h, 0);
1131 }
1132 pc = SEXP (s);
1133 *pc += e;
1134 return s;
1135 }
1136 }
1137
1138 static int_4 xodd (real_32 x)
1139 {
1140 unt_2 *p = SEXP (x);
1141 int_2 e, i;
1142 e = (*p & X_EXPO_MASK) - X_EXPO_BIAS; // exponent of x
1143 if (e < 0) {
1144 return 0;
1145 } else {
1146 for (i = 1; e / 16 > 0; i++, e -= 16);
1147 // Now e = 0, ..., 15
1148 return (i <= FLT256_LEN ? p[i] & 0x8000 >> e : 0);
1149 }
1150 }
1151
1152 real_32 xtan (real_32 z)
1153 {
1154 int_4 k, m;
1155 z = rred (z, 't', &k);
1156 if ((xsigerr (xreal_cmp (&z, &X_PI_OVER_2) >= 0, XEDOM, "xtan()"))) {
1157 return (!k ? X_PLUS_INF : X_MINUS_INF);
1158 } else {
1159 if (xreal_cmp (&z, &X_PI_OVER_4) == 1) {
1160 m = 1;
1161 z = xadd (X_PI_OVER_2, z, 1);
1162 } else {
1163 m = 0;
1164 }
1165 if (k != 0) {
1166 z = xneg (c_tan (z));
1167 } else {
1168 z = c_tan (z);
1169 }
1170 if (m != 0) {
1171 return xdiv (X_1, z);
1172 } else {
1173 return z;
1174 }
1175 }
1176 }
1177
1178 real_32 xcos (real_32 z)
1179 {
1180 int_4 k;
1181 z = rred (z, 'c', &k);
1182 if (xgetexp (&z) < xK_lin) {
1183 if (k != 0) {
1184 return xneg (X_1);
1185 } else {
1186 return X_1;
1187 }
1188 }
1189 z = c_tan (xreal_2 (z, -1));
1190 z = xmul (z, z);
1191 z = xdiv (xadd (X_1, z, 1), xadd (X_1, z, 0));
1192 if (k != 0) {
1193 return xneg (z);
1194 } else {
1195 return z;
1196 }
1197 }
1198
1199 real_32 xsin (real_32 z)
1200 {
1201 int_4 k;
1202 z = rred (z, 's', &k);
1203 if (xgetexp (&z) >= xK_lin) {
1204 z = c_tan (xreal_2 (z, -1));
1205 z = xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 0));
1206 }
1207 if (k != 0) {
1208 return xneg (z);
1209 } else {
1210 return z;
1211 }
1212 }
1213
1214 static real_32 c_tan (real_32 z)
1215 {
1216 real_32 s, f, d;
1217 int_4 m;
1218 unt_2 k;
1219 if (xgetexp (&z) < xK_lin)
1220 return z;
1221 s = xneg (xmul (z, z));
1222 for (k = 1; k <= FLT256_LEN && s.value[k] == 0; k++);
1223 if ((xsigerr (s.value[0] == 0xffff && k > FLT256_LEN, XFPOFLOW, NO_TEXT))) {
1224 return X_0;
1225 } else {
1226 f = X_0;
1227 for (d = inttox (m = xMS_trg); m > 1;) {
1228 f = xdiv (s, xadd (d, f, 0));
1229 d = inttox (m -= 2);
1230 }
1231 return xdiv (z, xadd (d, f, 0));
1232 }
1233 }
1234
1235 static real_32 rred (real_32 z, int_4 kf, int_4 *ps)
1236 {
1237 real_32 is, q;
1238 if (xgetsgn (&z)) {
1239 z = xneg (z);
1240 is = X_1;
1241 } else {
1242 is = X_0;
1243 }
1244 z = xfmod (z, X_PI, &q);
1245 if (kf == 't') {
1246 q = is;
1247 } else if (kf == 's') {
1248 q = xadd (q, is, 0);
1249 }
1250 if (xreal_cmp (&z, &X_PI_OVER_2) == 1) {
1251 z = xadd (X_PI, z, 1);
1252 if (kf == 'c' || kf == 't') {
1253 q = xadd (q, X_1, 0);
1254 }
1255 }
1256 *ps = (xodd (q)) ? 1 : 0;
1257 return z;
1258 }
1259
1260 // COMPLEX*64
1261
1262 complex_64 cxadd (complex_64 z1, complex_64 z2, int_4 k)
1263 {
1264 complex_64 w;
1265 w.re = xadd (z1.re, z2.re, k);
1266 w.im = xadd (z1.im, z2.im, k);
1267 return w;
1268 }
1269
1270 complex_64 cxsum (complex_64 z1, complex_64 z2)
1271 {
1272 complex_64 w;
1273 w.re = xadd (z1.re, z2.re, 0);
1274 w.im = xadd (z1.im, z2.im, 0);
1275 return w;
1276 }
1277
1278 complex_64 cxsub (complex_64 z1, complex_64 z2)
1279 {
1280 complex_64 w;
1281 w.re = xadd (z1.re, z2.re, 1);
1282 w.im = xadd (z1.im, z2.im, 1);
1283 return w;
1284 }
1285
1286 complex_64 cxmul (complex_64 z1, complex_64 z2)
1287 {
1288 complex_64 w;
1289 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1290 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1291 return w;
1292 }
1293
1294 complex_64 cxrmul (real_32 c, complex_64 z)
1295 {
1296 complex_64 w;
1297 w.re = xmul (c, z.re);
1298 w.im = xmul (c, z.im);
1299 return w;
1300 }
1301
1302 complex_64 cxdrot (complex_64 z)
1303 {
1304 complex_64 y;
1305 y.re = z.im;
1306 y.im = z.re;
1307 // Multiplication by +i
1308 y.re.value[0] ^= X_SIGN_MASK;
1309 return y;
1310 }
1311
1312 complex_64 cxrrot (complex_64 z)
1313 {
1314 complex_64 y;
1315 y.re = z.im;
1316 y.im = z.re;
1317 // Multiplication by -i
1318 y.im.value[0] ^= X_SIGN_MASK;
1319 return y;
1320 }
1321
1322 complex_64 cxdiv (complex_64 z1, complex_64 z2)
1323 {
1324 int_4 tv = cxrec (z2, &z2);
1325 if (!xsigerr (!tv, XEDIV, "cxdiv()")) {
1326 complex_64 w;
1327 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1328 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1329 return w;
1330 } else {
1331 return X_0_0I;
1332 }
1333 }
1334
1335 complex_64 cxinv (complex_64 z)
1336 {
1337 int_4 tv = cxrec (z, &z);
1338 if (!xsigerr (!tv, XEDOM, "cxinv()")) {
1339 return z;
1340 } else {
1341 return X_0_0I;
1342 }
1343 }
1344
1345 complex_64 cxsqr (complex_64 z)
1346 {
1347 complex_64 w;
1348 w.re = xadd (xmul (z.re, z.re), xmul (z.im, z.im), 1);
1349 w.im = xmul (X_2, xmul (z.im, z.re));
1350 return w;
1351 }
1352
1353 complex_64 cxsqrt (complex_64 z)
1354 {
1355 complex_64 w;
1356 if (xsgn (&(z.re)) == 0 && xsgn (&(z.im)) == 0) {
1357 w = X_0_0I;
1358 } else if (xis0 (&(z.im))) {
1359 real_32 s = xsqrt (xabs (z.re));
1360 if (xsgn (&(z.re)) == -1) {
1361 w = (complex_64) {X_0, s};
1362 } else {
1363 w = (complex_64) {s, X_0};
1364 }
1365 } else {
1366 real_32 mod = xsqrt (cxabs (z)), arg = xreal_2 (cxarg (z), -1);
1367 w.re = xmul (mod, xcos (arg));
1368 w.im = xmul (mod, xsin (arg));
1369 }
1370 return w;
1371 }
1372
1373 complex_64 cxreset (real_32 re, real_32 im)
1374 {
1375 complex_64 y;
1376 y.re = re;
1377 y.im = im;
1378 return y;
1379 }
1380
1381 complex_64 cxconv (real_32 x)
1382 {
1383 complex_64 y;
1384 y.re = x;
1385 y.im = X_0;
1386 return y;
1387 }
1388
1389 real_32 cxreal (complex_64 z)
1390 {
1391 return z.re;
1392 }
1393
1394 real_32 cximag (complex_64 z)
1395 {
1396 return z.im;
1397 }
1398
1399 complex_64 cxswap (complex_64 z)
1400 {
1401 complex_64 y;
1402 y.re = z.im;
1403 y.im = z.re;
1404 return y;
1405 }
1406
1407 complex_64 cxneg (complex_64 z)
1408 {
1409 z.re.value[0] ^= X_SIGN_MASK;
1410 z.im.value[0] ^= X_SIGN_MASK;
1411 return z;
1412 }
1413
1414 complex_64 cxconj (complex_64 z)
1415 {
1416 return (z.im.value[0] ^= X_SIGN_MASK, z);
1417 }
1418
1419 #define XBOUND FLT256_LEN * 16 + 8
1420
1421 real_32 cxabs (complex_64 z)
1422 {
1423 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1424 return X_0;
1425 } else {
1426 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1427 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1428 if (ea > eb + XBOUND) {
1429 return z.re;
1430 } else if (eb > ea + XBOUND) {
1431 return z.im;
1432 } else {
1433 z.re.value[0] -= eb;
1434 z.im.value[0] = X_EXPO_BIAS;
1435 real_32 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1436 x.value[0] += eb;
1437 return x;
1438 }
1439 }
1440 }
1441
1442 real_32 cxarg (complex_64 z)
1443 {
1444 int_4 rs = xsgn (&(z.re)), is = xsgn (&(z.im));
1445 if (rs > 0) {
1446 return xatan (xdiv (z.im, z.re));
1447 } else if (rs < 0) {
1448 z.re.value[0] ^= X_SIGN_MASK;
1449 z.im.value[0] ^= X_SIGN_MASK;
1450 if (is >= 0) {
1451 return xadd (X_PI, xatan (xdiv (z.im, z.re)), 0);
1452 } else {
1453 return xadd (xatan (xdiv (z.im, z.re)), X_PI, 1);
1454 }
1455 } else { // z.re is zero !
1456 if (!xsigerr (is == 0, XEDOM, "cxarg()")) {
1457 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
1458 } else {
1459 return xneg (X_PI_OVER_2); // Dummy value :)
1460 }
1461 }
1462 }
1463
1464 logical_4 cxrec (complex_64 z, complex_64 * w)
1465 {
1466 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1467 return FALSE;
1468 } else {
1469 int_4 sa = z.re.value[0] & X_SIGN_MASK;
1470 int_4 sb = z.im.value[0] & X_SIGN_MASK;
1471 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1472 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1473 real_32 x;
1474 if (ea > eb + XBOUND) {
1475 x = z.re;
1476 } else if (eb > ea + XBOUND) {
1477 x = z.im;
1478 } else {
1479 z.re.value[0] -= eb;
1480 z.im.value[0] = X_EXPO_BIAS;
1481 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1482 x.value[0] += eb;
1483 z.re.value[0] += eb;
1484 z.im.value[0] += eb;
1485 }
1486 w->re = xdiv (xdiv (z.re, x), x);
1487 w->im = xdiv (xdiv (z.im, x), x);
1488 w->re.value[0] |= sa;
1489 w->im.value[0] |= X_SIGN_MASK ^ sb;
1490 return TRUE;
1491 }
1492 }
1493
1494 complex_64 cxexp (complex_64 z)
1495 {
1496 complex_64 w;
1497 w.re = xmul (xexp (z.re), xcos (z.im));
1498 w.im = xmul (xexp (z.re), xsin (z.im));
1499 return w;
1500 }
1501
1502 complex_64 cxlog (complex_64 z)
1503 {
1504 real_32 mod;
1505 complex_64 w;
1506 mod = cxabs (z);
1507 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog()")) {
1508 return X_0_0I;
1509 } else {
1510 w.re = xlog (mod);
1511 w.im = cxarg (z);
1512 return w;
1513 }
1514 }
1515
1516 complex_64 cxlog10 (complex_64 z)
1517 {
1518 real_32 mod;
1519 complex_64 w;
1520 mod = cxabs (z);
1521 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog10()")) {
1522 return X_0_0I;
1523 } else {
1524 w.re = xlog10 (mod);
1525 w.im = xmul (cxarg (z), X_LOG10_E);
1526 return w;
1527 }
1528 }
1529
1530 complex_64 cxlog2 (complex_64 z)
1531 {
1532 real_32 mod;
1533 complex_64 w;
1534 mod = cxabs (z);
1535 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog2()")) {
1536 return X_0_0I;
1537 } else {
1538 w.re = xlog2 (mod);
1539 w.im = xmul (cxarg (z), X_LOG2_E);
1540 return w;
1541 }
1542 }
1543
1544 complex_64 cxlog_sqrt (complex_64 z)
1545 {
1546 real_32 mod = cxabs (z);
1547 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog_sqrt()")) {
1548 return X_0_0I;
1549 } else {
1550 complex_64 w;
1551 w.re = xreal_2 (xlog (mod), -1);
1552 w.im = xreal_2 (cxarg (z), -1);
1553 return w;
1554 }
1555 }
1556
1557 complex_64 cxsinh (complex_64 z)
1558 {
1559 complex_64 w = cxsub (cxexp (z), cxexp (cxneg (z)));
1560 w.re = xreal_2 (w.re, -1);
1561 w.im = xreal_2 (w.im, -1);
1562 return w;
1563 }
1564
1565 complex_64 cxcosh (complex_64 z)
1566 {
1567 complex_64 w = cxsum (cxexp (z), cxexp (cxneg (z)));
1568 w.re = xreal_2 (w.re, -1);
1569 w.im = xreal_2 (w.im, -1);
1570 return w;
1571 }
1572
1573 complex_64 cxtanh (complex_64 z)
1574 {
1575 if (xsigerr (xreal_cmp (&(z.re), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
1576 return X_1_0I;
1577 } else if (xsigerr (xreal_cmp (&(z.re), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
1578 return cxneg (X_1_0I);
1579 } else {
1580 complex_64 w;
1581 if (xsigerr (!cxrec (cxcosh (z), &w), XEDOM, "cxtanh()")) {
1582 return X_0_0I;
1583 } else {
1584 return cxmul (cxsinh (z), w);
1585 }
1586 }
1587 }
1588
1589 complex_64 cxasinh (complex_64 z)
1590 {
1591 // This way cxasinh() works fine also with real numbers very near to -oo.
1592 complex_64 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1593 real_32 ls = cxabs (cxsum (z, w));
1594 real_32 rs = xmul (X_VSV, cxabs (z));
1595 if (xreal_cmp (&ls, &rs) < 0) {
1596 return cxneg (cxlog (cxsub (w, z)));
1597 } else {
1598 return cxlog (cxsum (z, w));
1599 }
1600 }
1601
1602 complex_64 cxacosh (complex_64 z)
1603 {
1604 complex_64 w = cxsqrt (cxsub (cxsqr (z), X_1_0I));
1605 real_32 ls = cxabs (cxsum (z, w));
1606 real_32 rs = xmul (X_VSV, cxabs (z));
1607 if (xreal_cmp (&ls, &rs) < 0) {
1608 return cxneg (cxlog (cxsub (z, w)));
1609 } else {
1610 return cxlog (cxsum (z, w));
1611 }
1612 }
1613
1614 complex_64 cxatanh (complex_64 z)
1615 {
1616 real_32 t = xadd (xabs (z.re), X_1, 1);
1617 int_4 errcond = xsgn (&(z.im)) == 0 && xsgn (&t) == 0;
1618 if (xsigerr (errcond, XEDOM, "cxatanh()")) {
1619 return X_0_0I;
1620 } else {
1621 complex_64 w = cxdiv (cxsum (X_1_0I, z), cxsub (X_1_0I, z));
1622 w = cxlog_sqrt (w);
1623 return w;
1624 }
1625 }
1626
1627 complex_64 cxgdiv (complex_64 z1, complex_64 z2)
1628 {
1629 z1.re = xround (z1.re);
1630 z1.im = xround (z1.im);
1631 z2.re = xround (z2.re);
1632 z2.im = xround (z2.im);
1633 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1634 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgdiv()")) {
1635 return X_0_0I;
1636 } else {
1637 complex_64 w;
1638 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1639 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1640 w.re = xround (xdiv (w.re, mod2));
1641 w.im = xround (xdiv (w.im, mod2));
1642 return w;
1643 }
1644 }
1645
1646 complex_64 cxidiv (complex_64 z1, complex_64 z2)
1647 {
1648 z1.re = xround (z1.re);
1649 z1.im = xround (z1.im);
1650 z2.re = xround (z2.re);
1651 z2.im = xround (z2.im);
1652 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1653 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxidiv()")) {
1654 return X_0_0I;
1655 } else {
1656 complex_64 w;
1657 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1658 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1659 w.re = xfix (xdiv (w.re, mod2));
1660 w.im = xfix (xdiv (w.im, mod2));
1661 return w;
1662 }
1663 }
1664
1665 complex_64 cxgmod (complex_64 z1, complex_64 z2)
1666 {
1667 z1.re = xround (z1.re);
1668 z1.im = xround (z1.im);
1669 z2.re = xround (z2.re);
1670 z2.im = xround (z2.im);
1671 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1672 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgmod()")) {
1673 return X_0_0I;
1674 } else {
1675 complex_64 w, z;
1676 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1677 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1678 w.re = xround (xdiv (w.re, mod2));
1679 w.im = xround (xdiv (w.im, mod2));
1680 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1681 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1682 w.re = xround (xadd (z1.re, z.re, 1));
1683 w.im = xround (xadd (z1.im, z.im, 1));
1684 return w;
1685 }
1686 }
1687
1688 complex_64 cxmod (complex_64 z1, complex_64 z2)
1689 {
1690 z1.re = xround (z1.re);
1691 z1.im = xround (z1.im);
1692 z2.re = xround (z2.re);
1693 z2.im = xround (z2.im);
1694 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1695 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxmod()")) {
1696 return X_0_0I;
1697 } else {
1698 complex_64 w, z;
1699 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1700 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1701 w.re = xfix (xdiv (w.re, mod2));
1702 w.im = xfix (xdiv (w.im, mod2));
1703 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1704 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1705 w.re = xround (xadd (z1.re, z.re, 1));
1706 w.im = xround (xadd (z1.im, z.im, 1));
1707 return w;
1708 }
1709 }
1710
1711 static void unit_root (int_4 i, int_4 n, real_32 * a, real_32 * b)
1712 {
1713 // We assume n != 0
1714 i %= n;
1715 *a = xdiv (xmul (xreal_2 (inttox (i), 1), X_PI), inttox (n));
1716 *b = xsin (*a);
1717 *a = xcos (*a);
1718 if (xgetexp (b) < -80) {
1719 *b = X_0;
1720 }
1721 if (xgetexp (a) < -80) {
1722 *a = X_0;
1723 }
1724 }
1725
1726 complex_64 cxpwr (complex_64 z, int_4 n)
1727 {
1728 real_32 mod = cxabs (z);
1729 if (xsgn (&mod) <= 0) {
1730 (void) xsigerr (n <= 0, XEBADEXP, "cxpwr()");
1731 return X_0_0I;
1732 } else {
1733 real_32 arg = xmul (inttox (n), cxarg (z));
1734 mod = xpwr (mod, n);
1735 complex_64 w;
1736 w.re = xmul (mod, xcos (arg));
1737 w.im = xmul (mod, xsin (arg));
1738 return w;
1739 }
1740 }
1741
1742 complex_64 cxroot (complex_64 z, int_4 i, int_4 n)
1743 {
1744 if (xsigerr (n == 0, XEBADEXP, "cxroot()")) {
1745 return X_0_0I;
1746 } else {
1747 real_32 mod = cxabs (z);
1748 if (xsgn (&mod) <= 0) {
1749 (void) xsigerr (n < 0, XEBADEXP, "cxroot()");
1750 return X_0_0I;
1751 } else { // mod > 0
1752 real_32 arg = xdiv (cxarg (z), inttox (n));
1753 real_32 e = xdiv (X_1, inttox (n)); // 1/n
1754 // x^e = exp(e*log(x)) for any x > 0
1755 mod = xexp (xmul (e, xlog (mod)));
1756 complex_64 w, zz;
1757 w.re = xmul (mod, xcos (arg));
1758 w.im = xmul (mod, xsin (arg));
1759 real_32 a, b;
1760 unit_root (i, n, &a, &b);
1761 zz.re = xadd (xmul (w.re, a), xmul (w.im, b), 1);
1762 zz.im = xadd (xmul (w.im, a), xmul (w.re, b), 0);
1763 return zz;
1764 }
1765 }
1766 }
1767
1768 complex_64 cxpow (complex_64 z1, complex_64 z2)
1769 {
1770 real_32 mod = cxabs (z1);
1771 if (xsgn (&mod) <= 0) {
1772 (void) xsigerr (xsgn (&z2.re) <= 0, XEBADEXP, "cxpow()");
1773 return X_0_0I;
1774 } else {
1775 real_32 arg = cxarg (z1);
1776 real_32 a = xadd (xmul (z2.re, xlog (mod)), xmul (z2.im, arg), 1);
1777 real_32 b = xadd (xmul (z2.re, arg), xmul (z2.im, xlog (mod)), 0);
1778 complex_64 w;
1779 w.re = xmul (xexp (a), xcos (b));
1780 w.im = xmul (xexp (a), xsin (b));
1781 return w;
1782 }
1783 }
1784
1785 logical_4 cxis0 (const complex_64 * z)
1786 {
1787 return (xis0 (&z->re) && xis0 (&z->im));
1788 }
1789
1790 logical_4 cxnot0 (const complex_64 * z)
1791 {
1792 return (xnot0 (&z->re) || xnot0 (&z->im));
1793 }
1794
1795 logical_4 cxeq (complex_64 z1, complex_64 z2)
1796 {
1797 return (xreal_cmp (&z1.re, &z2.re) == 0 && xreal_cmp (&z1.im, &z2.im) == 0);
1798 }
1799
1800 logical_4 cxneq (complex_64 z1, complex_64 z2)
1801 {
1802 return (xreal_cmp (&z1.re, &z2.re) != 0 || xreal_cmp (&z1.im, &z2.im) != 0);
1803 }
1804
1805 complex_64 cxsin (complex_64 z)
1806 {
1807 complex_64 w1, w2;
1808 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1809 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1810 w1 = cxsub (cxexp (w1), cxexp (w2));
1811 w2.re = xreal_2 (w1.im, -1);
1812 w1.re.value[0] ^= X_SIGN_MASK;
1813 w2.im = xreal_2 (w1.re, -1); // Now w2= (exp(i*z)-exp(-i*z))/2i
1814 return w2;
1815 }
1816
1817 complex_64 cxcos (complex_64 z)
1818 {
1819 complex_64 w1, w2;
1820 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1821 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1822 w1 = cxsum (cxexp (w1), cxexp (w2));
1823 w2.re = xreal_2 (w1.re, -1);
1824 w2.im = xreal_2 (w1.im, -1);
1825 return w2;
1826 }
1827
1828 complex_64 cxtan (complex_64 z)
1829 {
1830 if (xsigerr (xreal_cmp (&(z.im), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
1831 return X_0_1I;
1832 } else if (xsigerr (xreal_cmp (&(z.im), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
1833 return cxneg (X_0_1I);
1834 } else {
1835 complex_64 w;
1836 if (xsigerr (!cxrec (cxcos (z), &w), XEDOM, "cxtan()")) {
1837 return X_0_0I;
1838 } else {
1839 return cxmul (cxsin (z), w);
1840 }
1841 }
1842 }
1843
1844 complex_64 cxasin (complex_64 z)
1845 {
1846 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1847 real_32 ls = cxabs (cxsum (cxdrot (z), w));
1848 real_32 rs = xmul (X_VSV, cxabs (z));
1849 if (xreal_cmp (&ls, &rs) < 0) {
1850 w = cxdrot (cxlog (cxsub (w, cxdrot (z))));
1851 } else {
1852 w = cxrrot (cxlog (cxsum (cxdrot (z), w)));
1853 }
1854 return w;
1855 }
1856
1857 complex_64 cxacos (complex_64 z)
1858 {
1859 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1860 real_32 ls = cxabs (cxsum (z, cxdrot (w)));
1861 real_32 rs = xmul (X_VSV, cxabs (z));
1862 if (xreal_cmp (&ls, &rs) < 0) {
1863 w = cxdrot (cxlog (cxsub (z, cxdrot (w))));
1864 } else {
1865 w = cxrrot (cxlog (cxsum (z, cxdrot (w))));
1866 }
1867 return w;
1868 }
1869
1870 complex_64 cxatan (complex_64 z)
1871 {
1872 real_32 mod = xadd (xabs (z.im), X_1, 1);
1873 int_4 errcond = xsgn (&(z.re)) == 0 && xsgn (&mod) == 0;
1874 if (xsigerr (errcond, XEDOM, "cxatan()")) {
1875 return X_0_0I;
1876 } else {
1877 // This way, cxatan() works fine also with complex numbers very far from the origin.
1878 complex_64 w;
1879 mod = cxabs (z);
1880 if (xreal_cmp (&mod, &X_VGV) > 0) {
1881 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1882 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), w);
1883 w = cxrrot (cxlog (w));
1884 } else {
1885 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), cxsub (X_1_0I, cxdrot (z)));
1886 w = cxrrot (cxlog_sqrt (w));
1887 }
1888 return w;
1889 }
1890 }
1891
1892 complex_64 cxflt(complex_8 z)
1893 {
1894 complex_64 zz;
1895 zz.re = flttox (creal (z));
1896 zz.im = flttox (cimag (z));
1897 return zz;
1898 }
1899
1900 complex_64 cxdbl(complex_16 z)
1901 {
1902 complex_64 zz;
1903 zz.re = dbltox (creal (z));
1904 zz.im = dbltox (cimag (z));
1905 return zz;
1906 }
1907
1908 complex_64 cxquad(complex_32 z)
1909 {
1910 complex_64 zz;
1911 zz.re = dbltox (crealq (z));
1912 zz.im = dbltox (cimagq (z));
1913 return zz;
1914 }
1915
1916 complex_32 qxcmplx(real_32 re, real_32 im)
1917 {
1918 complex_32 zz;
1919 __real__ zz = xtoquad (re);
1920 __imag__ zz = xtoquad (im);
1921 return zz;
1922 }
1923
1924 complex_64 _cquadtop (complex_64 *zz, complex_32 z)
1925 {
1926 zz->re = dbltox (crealq (z));
1927 zz->im = dbltox (cimagq (z));
1928 return *zz;
1929 }
1930
1931 complex_64 cxreal32(real_32 z)
1932 {
1933 complex_64 zz;
1934 zz.re = z;
1935 zz.im = X_0;
1936 return zz;
1937 }
1938
1939 complex_64 _coctotop(complex_64 *zz, real_32 z)
1940 {
1941 zz->re = z;
1942 zz->im = X_0;
1943 return *zz;
1944 }
1945
1946 // VIF additions (REAL*32)
1947
1948 real_32 xtenup (int_4 n)
1949 {
1950 if (n == 0) {
1951 return X_1;
1952 } else if (n == 1) {
1953 return X_10;
1954 } else if (n == -1) {
1955 return X_1_OVER_10;
1956 }
1957 real_32 s = X_10, t;
1958 unsigned k, m;
1959 t = X_1;
1960 if (n < 0) {
1961 m = -n;
1962 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1963 return X_0;
1964 }
1965 s = xdiv (X_1, s);
1966 } else {
1967 m = n;
1968 }
1969 if (m != 0) {
1970 k = 1;
1971 while (1) {
1972 if (k & m) {
1973 t = xmul (s, t);
1974 }
1975 if ((k <<= 1) <= m) {
1976 s = xmul (s, s);
1977 } else {
1978 break;
1979 }
1980 }
1981 } else {
1982 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1983 }
1984 return t;
1985 }
1986
1987 real_32 xcotan (real_32 x)
1988 {
1989 // Intrinsic function so arguments are not pointers.
1990 return xdiv (X_1, xtan (x));
1991 }
1992
1993 real_32 xacotan (real_32 x)
1994 {
1995 // Intrinsic function so arguments are not pointers.
1996 return xatan (xdiv (X_1, x));
1997 }
1998
1999 real_32 _xsgn (real_32 a, real_32 b)
2000 {
2001 // Intrinsic function so arguments are not pointers.
2002 real_32 x = (xgetsgn (&a) == 0 ? a : xneg (a));
2003 return (xgetsgn (&b) == 0 ? x : xneg (x));
2004 }
2005
2006 real_32 _zabs_64 (real_32 re, real_32 im)
2007 {
2008 // Intrinsic function so arguments are not pointers.
2009 return cxabs (CMPLXX (re, im));
2010 }
2011
2012 real_32 _xhypot (real_32 a, real_32 b)
2013 {
2014 if (xis0 (&a) && xis0 (&b)) {
2015 return X_0;
2016 } else if (xgt (a, b)) {
2017 real_32 q = xdiv (b, a);
2018 real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
2019 return xmul (xabs (a), z);
2020 } else {
2021 real_32 q = xdiv (a, b);
2022 real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
2023 return xmul (xabs (b), z);
2024 }
2025 }
2026
2027 // Conversion.
2028
2029 real_32 inttox (int_4 n)
2030 {
2031 REAL32 pe;
2032 unt_2 *pc, e;
2033 unt_4 k, h;
2034 bzero (pe, sizeof (pe));
2035 k = ABS (n);
2036 pc = (unt_2 *) &k;
2037 if (n == 0) {
2038 return *(real_32 *) pe;
2039 }
2040
2041 #if __BYTE_ORDER == __LITTLE_ENDIAN
2042 pe[1] = *(pc + 1);
2043 pe[2] = *pc;
2044 #else
2045 pe[1] = *pc;
2046 pe[2] = *(pc + 1);
2047 #endif
2048
2049 for (e = 0, h = 1; h <= k && e < ((8 * sizeof (unt_4)) - 1); h <<= 1, ++e);
2050 if (h <= k) {
2051 e += 1;
2052 }
2053 *pe = X_EXPO_BIAS + e - 1;
2054 if (n < 0) {
2055 *pe |= X_SIGN_MASK;
2056 }
2057 xlshift ((8 * sizeof (unt_4)) - e, pe + 1, FLT256_LEN);
2058 return *(real_32 *) pe;
2059 }
2060
2061 real_4 xtoflt (real_32 s)
2062 {
2063 // An extended floating point_4 number is represented as a combination of the
2064 // following elements:
2065 //
2066 // sign bit(s): 0 -> positive, 1 -> negative ;
2067 // exponent(e): 15-bit biased integer (bias=16383) ;
2068 // mantissa(m): 7 (or more) words of 16 bit length with the
2069 // leading 1 explicitly represented.
2070 //
2071 // Thus f = (-1)^s*2^[e-16383] *m , with 1 <= m < 2 .
2072 //
2073 // This format supports a dynamic range of:
2074 //
2075 // 2^16384 > f > 2^[-16383] or
2076 // 1.19*10^4932 > f > 1.68*10^-[4932].
2077 //
2078 // Special values of the exponent are:
2079 //
2080 // all ones -> infinity (floating point_4 overflow)
2081 // all zeros -> number = zero.
2082 //
2083 // Underflow in operations is handled by a flush to zero. Thus, a number with
2084 // the exponent zero and nonzero mantissa is invalid (not-a-number).
2085 //
2086 //
2087 union {unt_2 pe[2]; real_4 f;} v;
2088 unt_2 *pc, u;
2089 int_2 i, e;
2090 pc = SEXP (s);
2091 u = *pc & X_SIGN_MASK;
2092 e = (*pc & X_EXPO_MASK) - xF_bias;
2093 //
2094 // u is the sign of the number s.
2095 // e == (exponent of s) + 127
2096 //
2097 if (e >= xF_max) {
2098 return (!u ? FLT_MAX : -FLT_MAX);
2099 }
2100 if (e <= 0) {
2101 return 0.;
2102 }
2103 for (i = 0; i < 2; v.pe[i] = *++pc, i++);
2104 // In the IEEE 754 Standard the leading 1 is not represented.
2105 v.pe[0] &= X_EXPO_MASK;
2106 // Now in pe[0],pe[1] we have 31 bits of mantissa.
2107 // But only the first 23 ones must be put in the
2108 // final real_4 number.
2109 xrshift (xF_lex - 1, v.pe, 2);
2110 // We have just loaded the mantissa and now we
2111 // are going to load exponent and sign.
2112 v.pe[0] |= (e << (16 - xF_lex));
2113 v.pe[0] |= u;
2114 #if __BYTE_ORDER == __LITTLE_ENDIAN
2115 u = v.pe[0];
2116 v.pe[0] = v.pe[1];
2117 v.pe[1] = u;
2118 #endif
2119 return v.f;
2120 }
2121
2122 real_32 flttox (real_4 y)
2123 {
2124 REAL32 pe;
2125 unt_2 *pc, u;
2126 int_2 i, e;
2127 if (y < FLT_MIN && y > -FLT_MIN) {
2128 return X_0;
2129 }
2130 bzero (pe, sizeof (pe));
2131 pc = (unt_2 *) &y;
2132 #if __BYTE_ORDER == __LITTLE_ENDIAN
2133 pc += 1;
2134 #endif
2135 u = *pc & X_SIGN_MASK;
2136 e = xF_bias + ((*pc & X_EXPO_MASK) >> (16 - xF_lex));
2137 // Now u is the sign of y and e is the
2138 // biased exponent (exponent + bias).
2139 #if __BYTE_ORDER == __LITTLE_ENDIAN
2140 for (i = 1; i < 3; pe[i] = *pc--, i++);
2141 #else
2142 for (i = 1; i < 3; pe[i] = *pc++, i++);
2143 #endif
2144 pc = pe + 1;
2145 xlshift (xF_lex - 1, pc, 2);
2146 *pc |= X_SIGN_MASK;
2147 // We have just put in pe[1],pe[2] the whole
2148 // mantissa of y with a leading 1.
2149 // Now we have only to put exponent and sign
2150 // in pe[0].
2151 *pe = e;
2152 *pe |= u;
2153 return *(real_32 *) pe;
2154 }
2155
2156 real_8 xtodbl (real_32 s)
2157 {
2158 union {unt_2 pe[4]; real_8 d;} v;
2159 unt_2 *pc, u;
2160 int_2 i, e;
2161 pc = SEXP (s);
2162 u = *pc & X_SIGN_MASK;
2163 e = (*pc & X_EXPO_MASK) - xD_bias;
2164 if (e >= xD_max) {
2165 return (!u ? DBL_MAX : -DBL_MAX);
2166 }
2167 if (e <= 0) {
2168 return 0.;
2169 }
2170 for (i = 0; i < 4; v.pe[i] = *++pc, i++);
2171 v.pe[0] &= X_EXPO_MASK;
2172 xrshift (xD_lex - 1, v.pe, 4);
2173 v.pe[0] |= (e << (16 - xD_lex));
2174 v.pe[0] |= u;
2175 #if __BYTE_ORDER == __LITTLE_ENDIAN
2176 u = v.pe[3];
2177 v.pe[3] = v.pe[0];
2178 v.pe[0] = u;
2179 u = v.pe[2];
2180 v.pe[2] = v.pe[1];
2181 v.pe[1] = u;
2182 #endif
2183 return v.d;
2184 }
2185
2186 real_32 dbltox (real_8 y)
2187 {
2188 REAL32 pe;
2189 unt_2 *pc, u;
2190 int_2 i, e;
2191 if (y < DBL_MIN && y > -DBL_MIN) {
2192 return X_0;
2193 }
2194 bzero (pe, sizeof (pe));
2195 pc = (unt_2 *) &y;
2196 #if __BYTE_ORDER == __LITTLE_ENDIAN
2197 pc += 3;
2198 #endif
2199 u = *pc & X_SIGN_MASK;
2200 e = xD_bias + ((*pc & X_EXPO_MASK) >> (16 - xD_lex));
2201 #if __BYTE_ORDER == __LITTLE_ENDIAN
2202 for (i = 1; i < 5; pe[i] = *pc--, i++);
2203 #else
2204 for (i = 1; i < 5; pe[i] = *pc++, i++);
2205 #endif
2206 pc = pe + 1;
2207 xlshift (xD_lex - 1, pc, 4);
2208 *pc |= X_SIGN_MASK;
2209 *pe = e;
2210 *pe |= u;
2211 return *(real_32 *) pe;
2212 }
2213
2214 real_32 quadtox (real_16 x)
2215 {
2216 // Intrinsic function so arguments are not pointers.
2217 REAL32 z;
2218 unt_2 *y;
2219 int_4 i;
2220 if (x == 0.0q) {
2221 return X_0;
2222 }
2223 int_4 sinf = isinfq (x);
2224 if (sinf == 1) {
2225 return X_PLUS_INF;
2226 } else if (sinf == -1) {
2227 return X_MINUS_INF;
2228 }
2229 if (isnanq (x)) {
2230 return X_NAN;
2231 }
2232 bzero (z, sizeof (z));
2233 y = (unt_2 *) &x;
2234 for (i = 0; i <= 7; i++) {
2235 #if __BYTE_ORDER == __LITTLE_ENDIAN
2236 z[i] = y[7 - i];
2237 #else
2238 z[i] = y[i];
2239 #endif
2240 }
2241 // real_16 skips the default first bit, HPA lib does not.
2242 unt_2 cy = 0x8000;
2243 for (i = 1; i < 8; i++) {
2244 if (z[i] & 0x1) {
2245 z[i] = (z[i] >> 1) | cy;
2246 cy = 0x8000;
2247 } else {
2248 z[i] = (z[i] >> 1) | cy;
2249 cy = 0x0;
2250 }
2251 }
2252 z[8] |= cy;
2253 return *(real_32 *) z;
2254 }
2255
2256 real_32 _quadtop (real_32 *z, real_16 x)
2257 {
2258 *z = quadtox (x);
2259 return *z;
2260 }
2261
2262 real_16 xtoquad (real_32 x)
2263 {
2264 // Intrinsic function so arguments are not pointers.
2265 REAL16 y;
2266 REAL32 z;
2267 int_4 i;
2268 memcpy (z, x.value, sizeof (real_32));
2269 // Catch NaN, +-Inf is handled correctly.
2270 if (xis_nan (&x)) {
2271 z[0] = 0x7fff;
2272 z[1] = 0xffff;
2273 }
2274 // real_16 skips the default first bit, HPA lib does not.
2275 unt_2 cy = (z[8] & 0x8000 ? 0x1 : 0x0);
2276 for (i = 7; i > 0; i--) {
2277 if (z[i] & 0x8000) {
2278 z[i] = (z[i] << 1) | cy;
2279 cy = 0x1;
2280 } else {
2281 z[i] = (z[i] << 1) | cy;
2282 cy = 0x0;
2283 }
2284 }
2285 for (i = 0; i < 8; i++) {
2286 #if __BYTE_ORDER == __LITTLE_ENDIAN
2287 y[i] = z[7 - i];
2288 #else
2289 y[i] = z[i];
2290 #endif
2291 }
2292 // Avoid 'dereferencing type-punned pointer will break strict-aliasing rules'
2293 real_16 u;
2294 memcpy (&u, &y[0], sizeof (real_16));
2295 return u;
2296 }
2297
2298 real_16 cxrealq (complex_64 z)
2299 {
2300 return xtoquad (cxreal (z));
2301 }
2302
2303 real_16 cximagq (complex_64 z)
2304 {
2305 return xtoquad (cximag (z));
2306 }
2307
2308 real_32 strtox (char *s, char **end)
2309 {
2310 // This routine replaces the HPA solution - MvdV.
2311 // Algol 68 Genie employs the same algorithm.
2312 //
2313 // Initialize.
2314 #define N (FLT256_DIG + FLT256_GUARD)
2315 errno = 0;
2316 real_32 y[N];
2317 if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
2318 (*end) = &(s[0]);
2319 }
2320 while (isspace (s[0])) {
2321 s++;
2322 }
2323 real_32 W;
2324 if (s[0] == '-') {
2325 W = X_MINUS_1;
2326 s++;
2327 } else if (s[0] == '+') {
2328 W = X_1;
2329 s++;
2330 } else {
2331 W = X_1;
2332 }
2333 // Scan mantissa digits and put them into "y".
2334 while (s[0] == '0') {
2335 s++;
2336 }
2337 int dot = -1, pos = 0, pow = 0;
2338 while (pow < N && (isdigit (s[pos]) || s[pos] == '.')) {
2339 if (s[pos] == '.') {
2340 dot = pos;
2341 } else {
2342 switch (s[pos]) {
2343 case '0': y[pow] = X_0; break;
2344 case '1': y[pow] = W; break;
2345 case '2': y[pow] = xmul (X_2, W); break;
2346 case '3': y[pow] = xmul (X_3, W); break;
2347 case '4': y[pow] = xmul (X_4, W); break;
2348 case '5': y[pow] = xmul (X_5, W); break;
2349 case '6': y[pow] = xmul (X_6, W); break;
2350 case '7': y[pow] = xmul (X_7, W); break;
2351 case '8': y[pow] = xmul (X_8, W); break;
2352 case '9': y[pow] = xmul (X_9, W); break;
2353 }
2354 W = xdiv (W, X_10);
2355 pow++;
2356 }
2357 pos++;
2358 }
2359 // Skip trailing digits.
2360 while (isdigit (s[pos])) {
2361 pos++;
2362 }
2363 if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
2364 (*end) = &(s[pos]);
2365 }
2366 // Sum from low to high to preserve precision.
2367 real_32 sum = X_0;
2368 for (int k = pow - 1; k >= 0; k--) {
2369 sum = xsum (sum, y[k]);
2370 }
2371 // Optional exponent.
2372 int expo = 0;
2373 while (s[pos] == ' ') {
2374 pos++;
2375 }
2376 if (tolower (s[pos]) == 'e' || tolower (s[pos]) == 'q' || tolower (s[pos]) == 'x') {
2377 pos++;
2378 if (isdigit (s[pos]) || s[pos] == '-' || s[pos] == '+') {
2379 expo = (int) strtol (&(s[pos]), end, 10);
2380 }
2381 }
2382 // Standardize.
2383 if (dot >= 0) {
2384 expo += dot - 1;
2385 } else {
2386 expo += pow - 1;
2387 }
2388 while (xnot0 (&sum) && xlt1 (sum)) {
2389 sum = xmul (sum, X_10);
2390 expo--;
2391 }
2392 if (errno == 0) {
2393 return xmul (sum, xtenup (expo));
2394 } else {
2395 return X_0;
2396 }
2397 #undef N
2398 }
2399
2400 real_32 atox (char *q)
2401 {
2402 return strtox (q, NO_REF_TEXT);
2403 }
2404
2405 real_32 _xdimx (real_32 x, real_32 y)
2406 {
2407 if (xgt (x, y)) {
2408 return xsub (x, y);
2409 } else {
2410 return X_0;
2411 }
2412 }
2413
2414 int_4 _xint4 (real_32 x_)
2415 {
2416 static real_16 y_;
2417 y_ = xtoquad (x_);
2418 return (int_4) _anintq (y_);
2419 }
2420
2421 int_4 _xnint4 (real_32 x_)
2422 {
2423 static real_16 y_;
2424 if (xgt (x_, X_0)) {
2425 y_ = xtoquad (xsum (x_, X_1_OVER_2));
2426 }
2427 else {
2428 y_ = xtoquad (xsub (x_, X_1_OVER_2));
2429 }
2430 return (int_4) (y_);
2431 }
2432
2433 int_8 _xint8 (real_32 x_)
2434 {
2435 static real_16 y_;
2436 y_ = xtoquad (x_);
2437 return (int_8) (y_);
2438 }
2439
2440 int_8 _xnint8 (real_32 x_)
2441 {
2442 static real_16 y_;
2443 if (xgt (x_, X_0)) {
2444 y_ = xtoquad (xsum (x_, X_1_OVER_2));
2445 }
2446 else {
2447 y_ = xtoquad (xsub (x_, X_1_OVER_2));
2448 }
2449 return (int_8) (y_);
2450 }
2451
2452 void _xdump (real_32 *z)
2453 {
2454 printf ("{{");
2455 for (int i = 0; i <= FLT256_LEN; i++) {
2456 printf("0x%04x", (z->value)[i]);
2457 if (i < FLT256_LEN) {
2458 printf(", ");
2459 }
2460 }
2461 printf ("}}\n");
2462 fflush (stdout);
2463 }
2464
2465 real_32 _x1mach (int_4 *i)
2466 {
2467 switch (*i) {
2468 // d1mach(1) = b**(emin-1), the smallest positive magnitude.
2469 case 1: return FLT256_MIN;
2470 // d1mach(2) = b**emax*(1 - b**(-t)), the largest magnitude.
2471 case 2: return FLT256_MAX;
2472 // d1mach(3) = b**(-t), the smallest relative spacing.
2473 case 3: return FLT256_EPSILON_HALF;
2474 // d1mach(4) = b**(1-t), the largest relative spacing.
2475 case 4: return FLT256_EPSILON;
2476 // d1mach(5) = log10(b)
2477 case 5: return X_LOG10_2;
2478 //
2479 default: return X_0;
2480 }
2481 }
2482
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|