rts-real32.c
1 //! @file rts-real32.c
2 //! @author (see below)
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Runtime support for REAL*32 and COMPLEX*64.
25
26 // The code is based on the HPA Library, available from:
27 // <http://download-mirror.savannah.gnu.org/releases/hpalib/>
28 //
29 // Copyright (C) 2000 Daniel A. Atkinson <DanAtk@aol.com>
30 // Copyright (C) 2004 Ivano Primi <ivprimi@libero.it>
31 // Copyright (C) 2022 Marcel van der Veer <algol68g@xs4all.nl> - VIF version.
32 //
33 // HPA code is adapted to work with VIF to implement REAL*32 and COMPLEX*64.
34 // HPA was choosen since it stores a 256-bit float as a 256-bit struct, which
35 // is convenient for FORTRAN.
36 //
37 // The HPA Library is free software; you can redistribute it and/or
38 // modify it under the terms of the GNU Lesser General Public
39 // License as published by the Free Software Foundation; either
40 // version 2.1 of the License, or (at your option) any later version.
41 //
42 // The HPA Library is distributed in the hope that it will be useful,
43 // but WITHOUT ANY WARRANTY; without even the implied warranty of
44 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
45 // Lesser General Public License for more details.
46 //
47 // You should have received a copy of the GNU Lesser General Public
48 // License along with the HPA Library; if not, write to the Free
49 // Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
50 // 02110-1301 USA.
51
52 // The functions forming the HPA library are all implemented in a portable
53 // fashion in the C language. The IEEE 754 standard for floating point
54 // hardware and software is assumed in the PC/Unix version of this library.
55 // A REAL*32 number is represented as a combination of the following elements:
56 //
57 // sign bit(s): 0 -> positive, 1 -> negative ;
58 // exponent(e): 15-bit biased integer (bias=16383) ;
59 // mantissa(m): 15 words of 16 bit length *including* leading 1.
60 //
61 // The range of representable numbers is then given by
62 //
63 // 2^16384 > x > 2^[-16383] => 1.19*10^4932 > x > 1.68*10^-[4932]
64 //
65 // Special values of the exponent are:
66 //
67 // all ones -> infinity (floating point overflow)
68 // all zeros -> number = zero.
69 //
70 // Underflow in operations is handled by a flush to zero. Thus, a number
71 // with the exponent zero and nonzero mantissa is invalid (not-a-number).
72 // From the point of view of the HPA library, a complex number is a
73 // structure formed by two REAL*32 numbers.
74 //
75 // HPA cannot extend precision beyond the preset, hardcoded precision.
76 // Hence some math routines will not achieve full precision.
77
78 #include <vif.h>
79 #include <rts-real32.h>
80
81 static const char *errmsg[] = {
82 "No error",
83 "Division by zero",
84 "Out of domain",
85 "Bad exponent",
86 "Floating point overflow",
87 "Invalid error code"
88 };
89
90 #define SEXP(z) ((unt_2 *) &(z.value))
91
92 void _pi32 (real_32 *x)
93 {
94 *x = X_PI;
95 }
96
97 int_4 xsigerr (int_4 errcond, int_4 errcode, const char *where)
98 {
99 // errcode must come from the evaluation of an error condition.
100 // errcode, which should describe the type of the error,
101 // should always be one between XEDIV, XEDOM, XEBADEXP and XFPOFLOW.
102 if (errcond == 0) {
103 errcode = 0;
104 }
105 if (errcode < 0 || errcode > XNERR) {
106 errcode = XEINV;
107 }
108 if (errcode != 0) {
109 RTE (where, errmsg[errcode]);
110 }
111 return errcode;
112 }
113
114 // Elementary stuff.
115
116 inline real_32 xneg (real_32 s)
117 {
118 unt_2 *p = SEXP (s);
119 *p ^= X_SIGN_MASK;
120 return s;
121 }
122
123 inline real_32 xabs (real_32 s)
124 {
125 unt_2 *p = SEXP (s);
126 *p &= X_EXPO_MASK;
127 return s;
128 }
129
130 inline int_4 xgetexp (const real_32 * ps)
131 {
132 unt_2 *q = (unt_2 *) &(ps->value);
133 return (*q & X_EXPO_MASK) - X_EXPO_BIAS;
134 }
135
136 inline int_4 xgetsgn (const real_32 * ps)
137 {
138 unt_2 *q = (unt_2 *) &(ps->value);
139 return (*q & X_SIGN_MASK);
140 }
141
142 int_4 xreal_cmp (const real_32 * pa, const real_32 * pb)
143 {
144 unt_2 e, k, *p, *q, p0, q0;
145 int_4 m;
146 p = (unt_2 *) &(pa->value);
147 q = (unt_2 *) &(pb->value);
148 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
149 if (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) {
150 // *pa is actually zero
151 p0 = 0;
152 } else {
153 p0 = *p;
154 }
155 for (m = 1; m <= FLT256_LEN && q[m] == 0; m++);
156 if (m > FLT256_LEN && (*q & X_EXPO_MASK) < X_EXPO_MASK) {
157 // *pb is actually zero
158 q0 = 0;
159 } else {
160 q0 = *q;
161 }
162 e = p0 & X_SIGN_MASK;
163 k = q0 & X_SIGN_MASK;
164 if (e && !k) {
165 return -1;
166 } else if (!e && k) {
167 return 1;
168 } else { // *pa and *pb have the same sign
169 m = (e) ? -1 : 1;
170 e = p0 & X_EXPO_MASK;
171 k = q0 & X_EXPO_MASK;
172 if (e > k) {
173 return m;
174 } else if (e < k) {
175 return -m;
176 } else {
177 for (e = 0; *(++p) == *(++q) && e < FLT256_LEN; ++e);
178 if (e < FLT256_LEN) {
179 return (*p > *q ? m : -m);
180 } else {
181 return 0;
182 }
183 }
184 }
185 }
186
187 real_32 xreal_2 (real_32 s, int_4 m)
188 {
189 unt_2 *p = SEXP (s);
190 int_4 e;
191 for (e = 1; e <= FLT256_LEN && p[e] == 0; e++);
192 if (e <= FLT256_LEN) {
193 e = *p & X_EXPO_MASK; // biased exponent
194 if (e + m < 0) {
195 return X_0;
196 } else if ((xsigerr (e + m >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
197 return ((s.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
198 } else {
199 *p += m;
200 return s;
201 }
202 } else { // s is zero or +-Inf
203 return s;
204 }
205 }
206
207 real_32 xsfmod (real_32 s, int_4 *p)
208 {
209 unt_2 *pa = SEXP (s);
210 unt_2 *pb = pa + 1;
211 int_2 e, k;
212 e = (*pa & X_EXPO_MASK) - X_EXPO_BIAS;
213 if ((xsigerr (e >= 15, XFPOFLOW, NO_TEXT))) {
214 *p = -1;
215 return s;
216 } else if (e < 0) {
217 *p = 0;
218 return s;
219 }
220 *p = *pb >> (15 - e);
221 xlshift (++e, pb, FLT256_LEN);
222 *pa -= e;
223 for (e = 0; *pb == 0 && e < xMax_p; ++pb, e += 16);
224 if (e == xMax_p) {
225 return X_0;
226 }
227 for (k = 0; !((*pb << k) & X_SIGN_MASK); ++k);
228 if ((k += e)) {
229 xlshift (k, pa + 1, FLT256_LEN);
230 *pa -= k;
231 }
232 return s;
233 }
234
235 void xlshift (int_4 n, unt_2 *pm, int_4 m)
236 {
237 unt_2 *pc = pm + m - 1;
238 if (n < 16 * m) {
239 unt_2 *pa = pm + n / 16;
240 m = n % 16;
241 n = 16 - m;
242 while (pa < pc) {
243 *pm = (*pa++) << m;
244 *pm++ |= *pa >> n;
245 }
246 *pm++ = *pa << m;
247 }
248 while (pm <= pc) {
249 *pm++ = 0;
250 }
251 }
252
253 void xrshift (int_4 n, unt_2 *pm, int_4 m)
254 {
255 unt_2 *pc = pm + m - 1;
256 if (n < 16 * m) {
257 unt_2 *pa = pc - n / 16;
258 m = n % 16;
259 n = 16 - m;
260 while (pa > pm) {
261 *pc = (*pa--) >> m;
262 *pc-- |= *pa << n;
263 }
264 *pc-- = *pa >> m;
265 }
266 while (pc >= pm) {
267 *pc-- = 0;
268 }
269 }
270
271 real_32 _xI (real_32 x)
272 {
273 // Intrinsic function so arguments are not pointers.
274 return x;
275 }
276
277 real_32 _aintx (real_32 x)
278 {
279 // Intrinsic function so arguments are not pointers.
280 return xtrunc (x);
281 }
282
283 real_32 _anintx (real_32 x)
284 {
285 // Intrinsic function so arguments are not pointers.
286 if (xgetsgn (&x) == 0) {
287 return xtrunc (xsum (x, X_1_OVER_2));
288 } else {
289 return xtrunc (xsub (x, X_1_OVER_2));
290 }
291 }
292
293 real_32 _xmax (real_32 a, real_32 b)
294 {
295 // Intrinsic function so arguments are not pointers.
296 if (xge (a, b)) {
297 return a;
298 } else {
299 return b;
300 }
301 }
302
303 real_32 _xmin (real_32 a, real_32 b)
304 {
305 // Intrinsic function so arguments are not pointers.
306 if (xle (a, b)) {
307 return a;
308 } else {
309 return b;
310 }
311 }
312
313 real_32 _xmod (real_32 a, real_32 b)
314 {
315 // Intrinsic function so arguments are not pointers.
316 real_32 q = xdiv (a, b);
317 if (xsgn (&q) >= 0) {
318 q = xfloor (q);
319 } else {
320 q = xneg (xfloor (xneg (q)));
321 }
322 return xadd (a, xmul (b, q), 1);
323 }
324
325 real_32 xadd (real_32 s, real_32 t, int_4 f)
326 {
327 REAL32 pe;
328 unt_2 h, u;
329 unt_2 *pc, *pf = pe;
330 unt_4 n = 0;
331 unt_2 *pa = SEXP (s);
332 unt_2 *pb = SEXP (t);
333 int_2 e = *pa & X_EXPO_MASK;
334 int_2 k = *pb & X_EXPO_MASK;
335 if (f != 0) {
336 *pb ^= X_SIGN_MASK;
337 }
338 u = (*pb ^ *pa) & X_SIGN_MASK;
339 f = 0;
340 if (e > k) {
341 if ((k = e - k) >= xMax_p) {
342 return s;
343 }
344 xrshift (k, pb + 1, FLT256_LEN);
345 } else if (e < k) {
346 if ((e = k - e) >= xMax_p) {
347 return t;
348 }
349 xrshift (e, pa + 1, FLT256_LEN);
350 e = k;
351 pc = pa;
352 pa = pb;
353 pb = pc;
354 } else if (u != 0) {
355 for (pc = pa, pf = pb; *(++pc) == *(++pf) && f < FLT256_LEN; ++f);
356 if (f >= FLT256_LEN) {
357 return X_0;
358 }
359 if (*pc < *pf) {
360 pc = pa;
361 pa = pb;
362 pb = pc;
363 }
364 pf = pe + f;
365 }
366 h = *pa & X_SIGN_MASK;
367 if (u != 0) {
368 for (pc = pb + FLT256_LEN; pc > pb; --pc) {
369 *pc = ~(*pc);
370 }
371 n = 1L;
372 }
373 for (pc = pe + FLT256_LEN, pa += FLT256_LEN, pb += FLT256_LEN; pc > pf;) {
374 n += *pa;
375 pa--;
376 n += *pb;
377 pb--;
378 *pc = n;
379 pc--;
380 n >>= 16;
381 }
382 if (u != 0) {
383 for (; *(++pc) == 0; ++f);
384 for (k = 0; !((*pc << k) & X_SIGN_MASK); ++k);
385 if ((k += 16 * f)) {
386 if ((e -= k) <= 0) {
387 return X_0;
388 }
389 xlshift (k, pe + 1, FLT256_LEN);
390 }
391 } else {
392 if (n != 0) {
393 ++e;
394 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
395 return (!h ? X_PLUS_INF : X_MINUS_INF);
396 }
397 ++pf;
398 xrshift (1, pf, FLT256_LEN);
399 *pf |= X_SIGN_MASK;
400 }
401 }
402 *pe = e;
403 *pe |= h;
404 return *(real_32 *) pe;
405 }
406
407 real_32 xmul (real_32 s, real_32 t)
408 {
409 unt_2 pe[FLT256_LEN + 2], *q0, *q1, h;
410 unt_2 *pa, *pb, *pc;
411 unt_4 m, n, p;
412 int_2 e;
413 int_2 k;
414 q0 = SEXP (s);
415 q1 = SEXP (t);
416 e = (*q0 & X_EXPO_MASK) - X_EXPO_BIAS;
417 k = (*q1 & X_EXPO_MASK) + 1;
418 if ((xsigerr (e > (short) X_EXPO_MASK - k, XFPOFLOW, NO_TEXT))) {
419 return (((s.value[0] & X_SIGN_MASK) ^ (t.value[0] & X_SIGN_MASK)) ? X_MINUS_INF : X_PLUS_INF);
420 }
421 if ((e += k) <= 0) {
422 return X_0;
423 }
424 h = (*q0 ^ *q1) & X_SIGN_MASK;
425 for (++q1, k = FLT256_LEN, p = n = 0L, pc = pe + FLT256_LEN + 1; k > 0; --k) {
426 for (pa = q0 + k, pb = q1; pa > q0;) {
427 m = *pa--;
428 m *= *pb++;
429 n += (m & 0xffffL);
430 p += (m >> 16);
431 }
432 *pc-- = n;
433 n = p + (n >> 16);
434 p = 0L;
435 }
436 *pc = n;
437 if (!(*pc & X_SIGN_MASK)) {
438 --e;
439 if (e <= 0) {
440 return X_0;
441 }
442 xlshift (1, pc, FLT256_LEN + 1);
443 }
444 if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
445 return (!h ? X_PLUS_INF : X_MINUS_INF);
446 }
447 *pe = e;
448 *pe |= h;
449 return *(real_32 *) pe;
450 }
451
452 real_32 xdiv (real_32 s, real_32 t)
453 {
454 unt_2 *pd = SEXP (s), *pc = SEXP (t);
455 // Next makes xdiv robust at extreme exponents - MvdV.
456 if ((*pd & X_EXPO_MASK) == (*pc & X_EXPO_MASK)) {
457 *pd &= ~X_EXPO_MASK;
458 *pc &= ~X_EXPO_MASK;
459 }
460 // HPA implementation.
461 unt_2 e = *pc;
462 *pc = X_EXPO_BIAS;
463 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xdiv()"))) {
464 return X_0;
465 } else {
466 real_32 a = dbltox (1 / xtodbl (t));
467 *pc = e;
468 pc = SEXP (a);
469 *pc += X_EXPO_BIAS - (e & X_EXPO_MASK);
470 *pc |= e & X_SIGN_MASK;
471 for (unt_2 i = 0; i < xItt_div; ++i) {
472 a = xmul (a, xadd (X_2, xmul (a, t), 1));
473 }
474 return xmul (s, a);
475 }
476 }
477
478
479 real_32 xevtch (real_32 z, real_32 * a, int_4 m)
480 {
481 real_32 *p, f, t, tp, w;
482 w = xreal_2 (z, 1);
483 t = X_0;
484 tp = X_0;
485 for (p = a + m; p > a;) {
486 f = xadd (*p--, xadd (xmul (w, t), tp, 1), 0);
487 tp = t;
488 t = f;
489 }
490 return xadd (*p, xadd (xmul (z, t), tp, 1), 0);
491 }
492
493 real_32 xexp2 (real_32 x)
494 {
495 real_32 s, d, f;
496 unt_2 *pf = SEXP (x);
497 int_4 m, k;
498 if (xreal_cmp (&x, &xE2min) < 0) {
499 return X_0;
500 } else if ((xsigerr (xreal_cmp (&x, &xE2max) > 0, XFPOFLOW, NO_TEXT))) {
501 return X_PLUS_INF;
502 } else {
503 m = (*pf & X_SIGN_MASK) ? 1 : 0;
504 x = xsfmod (x, &k);
505 if (m != 0) {
506 k *= -1;
507 }
508 // -X_EXPO_BIAS <= k <= +X_EXPO_BIAS
509 x = xmul (x, X_LN_2);
510 if (xgetexp (&x) > -X_EXPO_BIAS) {
511 x = xreal_2 (x, -1);
512 s = xmul (x, x);
513 f = X_0;
514 for (d = inttox (m = xMS_exp); m > 1; m -= 2, d = inttox (m)) {
515 f = xdiv (s, xadd (d, f, 0));
516 }
517 f = xdiv (x, xadd (d, f, 0));
518 f = xdiv (xadd (d, f, 0), xadd (d, f, 1));
519 } else {
520 f = X_1;
521 }
522 pf = SEXP (f);
523 if (-k > *pf) {
524 return X_0;
525 } else {
526 *pf += k;
527 if ((xsigerr (*pf >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
528 return X_PLUS_INF;
529 } else {
530 return f;
531 }
532 }
533 }
534 }
535
536 real_32 xexp (real_32 z)
537 {
538 return xexp2 (xmul (z, X_LOG2_E));
539 }
540
541 real_32 xexp10 (real_32 z)
542 {
543 return xexp2 (xmul (z, X_LOG2_10));
544 }
545
546 real_32 xfmod (real_32 s, real_32 t, real_32 * q)
547 {
548 if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xfmod()"))) {
549 return X_0;
550 } else {
551 unt_2 *p, mask = 0xffff;
552 int_2 e, i;
553 int_4 u;
554 *q = xdiv (s, t);
555 p = (unt_2 *) &(q->value);
556 u = (*p & X_SIGN_MASK) ? 0 : 1;
557 e = (*p &= X_EXPO_MASK); // biased exponent of *q
558 e = e < X_EXPO_BIAS ? 0 : e - X_EXPO_BIAS + 1;
559 for (i = 1; e / 16 > 0; i++, e -= 16);
560 if (i <= FLT256_LEN) {
561 // e = 0, ..., 15
562 mask <<= 16 - e;
563 p[i] &= mask;
564 for (i++; i <= FLT256_LEN; p[i] = 0, i++);
565 }
566 // Now *q == abs(quotient of (s/t))
567 return xadd (s, xmul (t, *q), u);
568 }
569 }
570
571 real_32 xfrexp (real_32 s, int_4 *p)
572 {
573 unt_2 *ps = SEXP (s), u;
574 *p = (*ps & X_EXPO_MASK) - X_EXPO_BIAS + 1;
575 u = *ps & X_SIGN_MASK;
576 *ps = X_EXPO_BIAS - 1;
577 *ps |= u;
578 return s;
579 }
580
581 static void nullify (int_4 skip, unt_2 *p, int_4 k)
582 {
583 // After skipping the first 'skip' bytes of the vector 'p',
584 // this function nullifies all the remaining ones. 'k' is
585 // the number of words forming the vector p.
586 // Warning: 'skip' must be positive !
587 int_4 i;
588 unt_2 mask = 0xffff;
589 for (i = 0; skip / 16 > 0; i++, skip -= 16);
590 if (i < k) {
591 // skip = 0, ..., 15
592 mask <<= 16 - skip;
593 p[i] &= mask;
594 for (i++; i < k; p[i] = 0, i++);
595 }
596 }
597
598 static void canonic_form (real_32 * px)
599 {
600 unt_2 *p, u;
601 int_2 e, i, j, skip;
602 p = (unt_2 *) &(px->value);
603 e = (*p & X_EXPO_MASK); // biased exponent of x
604 u = (*p & X_SIGN_MASK); // sign of x
605 if (e < X_EXPO_BIAS - 1) {
606 return;
607 } else {
608 unt_2 mask = 0xffff;
609 // e >= X_EXPO_BIAS - 1
610 for (i = 1, skip = e + 1 - X_EXPO_BIAS; skip / 16 > 0; i++, skip -= 16);
611 if (i <= FLT256_LEN) {
612 // skip = 0, ..., 15
613 mask >>= skip;
614 if ((p[i] & mask) != mask) {
615 return;
616 } else {
617 for (j = i + 1; j <= FLT256_LEN && p[j] == 0xffff; j++);
618 if (j > FLT256_LEN) {
619 p[i] -= mask;
620 for (j = i + 1; j <= FLT256_LEN; p[j] = 0, j++);
621 if (!(p[1] & 0x8000)) {
622 p[1] = 0x8000;
623 *p = ++e;
624 *p |= u;
625 } else if (u != 0) {
626 *px = xadd (*px, X_1, 1);
627 } else {
628 *px = xadd (*px, X_1, 0);
629 }
630 }
631 }
632 }
633 }
634 }
635
636 real_32 xfrac (real_32 x)
637 {
638 // xfrac(x) returns the fractional part of the number x.
639 // xfrac(x) has the same sign as x.
640 unt_2 u, *p;
641 int_2 e;
642 int_4 n;
643 canonic_form (&x);
644 p = SEXP (x);
645 e = (*p & X_EXPO_MASK); // biased exponent of x
646 if (e < X_EXPO_BIAS) {
647 return x; // The integer part of x is zero
648 } else {
649 u = *p & X_SIGN_MASK; // sign of x
650 n = e - X_EXPO_BIAS + 1;
651 xlshift (n, p + 1, FLT256_LEN);
652 e = X_EXPO_BIAS - 1;
653 // Now I have to take in account the rule
654 // of the leading one.
655 while (e > 0 && !(p[1] & X_SIGN_MASK)) {
656 xlshift (1, p + 1, FLT256_LEN);
657 e -= 1;
658 }
659 // Now p+1 points to the fractionary part of x,
660 // u is its sign, e is its biased exponent.
661 p[0] = e;
662 p[0] |= u;
663 return *(real_32 *) p;
664 }
665 }
666
667 real_32 xtrunc (real_32 x)
668 {
669 // xtrunc(x) returns the integer part of the number x.
670 // xtrunc(x) has the same sign as x.
671 unt_2 *p;
672 int_2 e;
673 canonic_form (&x);
674 p = SEXP (x);
675 e = (*p & X_EXPO_MASK); // biased exponent of x
676 if (e < X_EXPO_BIAS) {
677 return X_0; // The integer part of x is zero
678 } else {
679 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
680 return *(real_32 *) p;
681 }
682 }
683
684 real_32 xround (real_32 x)
685 {
686 return xtrunc (xadd (x, X_RND_CORR, x.value[0] & X_SIGN_MASK));
687 }
688
689 real_32 xceil (real_32 x)
690 {
691 unt_2 *ps = SEXP (x);
692 if ((*ps & X_SIGN_MASK)) {
693 return xtrunc (x);
694 } else {
695 real_32 y = xfrac (x);
696 // y has the same sign as x (see above).
697 return (xreal_cmp (&y, &X_0) > 0 ? xadd (xtrunc (x), X_1, 0) : x);
698 }
699 }
700
701 real_32 xfloor (real_32 x)
702 {
703 unt_2 *ps = SEXP (x);
704 if ((*ps & X_SIGN_MASK)) {
705 real_32 y = xfrac (x);
706 // y has the same sign as x (see above).
707 return (xreal_cmp (&y, &X_0) < 0 ? xadd (xtrunc (x), X_1, 1) : x);
708 } else {
709 return xtrunc (x);
710 }
711 }
712
713 static void xadd_correction (real_32 * px, int_4 k)
714 {
715 int_2 e = (px->value[0] & X_EXPO_MASK) - X_EXPO_BIAS;
716 // e = (e > 0 ? e : 0);
717 *px = xadd (*px, xreal_2 (X_FIX_CORR, e), k);
718 }
719
720 real_32 xfix (real_32 x)
721 {
722 unt_2 *p;
723 int_2 e;
724 xadd_correction (&x, x.value[0] & X_SIGN_MASK);
725 p = SEXP (x);
726 e = (*p & X_EXPO_MASK); // biased exponent of x
727 if (e < X_EXPO_BIAS) {
728 return X_0; // The integer part of x is zero
729 } else {
730 nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
731 return *(real_32 *) p;
732 }
733 }
734
735 real_32 xtanh (real_32 z)
736 {
737 real_32 s, d, f;
738 int_4 m, k;
739 if ((k = xgetexp (&z)) > xK_tanh) {
740 if (xgetsgn (&z)) {
741 return xneg (X_1);
742 } else {
743 return X_1;
744 }
745 }
746 if (k < xK_lin) {
747 return z;
748 }
749 ++k;
750 if (k > 0) {
751 z = xreal_2 (z, -k);
752 }
753 s = xmul (z, z);
754 f = X_0;
755 for (d = inttox (m = xMS_hyp); m > 1;) {
756 f = xdiv (s, xadd (d, f, 0));
757 d = inttox (m -= 2);
758 }
759 f = xdiv (z, xadd (d, f, 0));
760 for (; k > 0; --k) {
761 f = xdiv (xreal_2 (f, 1), xadd (d, xmul (f, f), 0));
762 }
763 return f;
764 }
765
766 real_32 xsinh (real_32 z)
767 {
768 int_4 k;
769 if ((k = xgetexp (&z)) < xK_lin) {
770 return z;
771 } else if (k < 0) {
772 z = xtanh (xreal_2 (z, -1));
773 return xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 1));
774 } else {
775 z = xexp (z);
776 return xreal_2 (xadd (z, xdiv (X_1, z), 1), -1);
777 }
778 }
779
780 real_32 xcosh (real_32 z)
781 {
782 if (xgetexp (&z) < xK_lin) {
783 return X_1;
784 }
785 z = xexp (z);
786 return xreal_2 (xadd (z, xdiv (X_1, z), 0), -1);
787 }
788
789 real_32 xatanh (real_32 x)
790 {
791 real_32 y = x;
792 y.value[0] &= X_EXPO_MASK; // Now y == abs(x)
793 if ((xsigerr (xreal_cmp (&y, &X_1) >= 0, XEDOM, "xatanh"))) {
794 return ((x.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
795 } else {
796 y = xdiv (xadd (X_1, x, 0), xadd (X_1, x, 1));
797 return xreal_2 (xlog (y), -1);
798 }
799 }
800
801 real_32 xasinh (real_32 x)
802 {
803 real_32 y = xmul (x, x);
804 y = xsqrt (xadd (X_1, y, 0));
805 if ((x.value[0] & X_SIGN_MASK)) {
806 return xneg (xlog (xadd (y, x, 1)));
807 } else {
808 return xlog (xadd (x, y, 0));
809 }
810 }
811
812 real_32 xacosh (real_32 x)
813 {
814 if ((xsigerr (xreal_cmp (&x, &X_1) < 0, XEDOM, "xacosh()"))) {
815 return X_0;
816 } else {
817 real_32 y = xmul (x, x);
818 y = xsqrt (xadd (y, X_1, 1));
819 return xlog (xadd (x, y, 0));
820 }
821 }
822
823 real_32 xatan (real_32 z)
824 {
825 real_32 s, f;
826 int_4 k, m;
827 if ((k = xgetexp (&z)) < xK_lin) {
828 return z;
829 }
830 if (k >= 0) {
831 // k>=0 is equivalent to abs(z) >= 1.0
832 z = xdiv (X_1, z);
833 m = 1;
834 } else {
835 m = 0;
836 }
837 f = dbltox (atan (xtodbl (z)));
838 s = xadd (X_1, xmul (z, z), 0);
839 for (k = 0; k < xItt_div; ++k) {
840 f = xadd (f, xdiv (xadd (z, xtan (f), 1), s), 0);
841 }
842 if (m != 0) {
843 if (xgetsgn (&f)) {
844 return xadd (xneg (X_PI_OVER_2), f, 1);
845 } else {
846 return xadd (X_PI_OVER_2, f, 1);
847 }
848 } else {
849 return f;
850 }
851 }
852
853 real_32 xasin (real_32 z)
854 {
855 real_32 u = z;
856 u.value[0] &= X_EXPO_MASK;
857 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xasin()"))) {
858 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
859 } else {
860 if (xgetexp (&z) < xK_lin) {
861 return z;
862 }
863 u = xsqrt (xadd (X_1, xmul (z, z), 1));
864 if (xgetexp (&u) == -X_EXPO_BIAS) {
865 return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
866 }
867 return xatan (xdiv (z, u));
868 }
869 }
870
871 real_32 xacos (real_32 z)
872 {
873 real_32 u = z;
874 u.value[0] &= X_EXPO_MASK;
875 if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xacos()"))) {
876 return ((xgetsgn (&z)) ? X_PI : X_0);
877 } else {
878 if (xgetexp (&z) == -X_EXPO_BIAS) {
879 return X_PI_OVER_2;
880 }
881 u = xsqrt (xadd (X_1, xmul (z, z), 1));
882 u = xatan (xdiv (u, z));
883 if (xgetsgn (&z)) {
884 return xadd (X_PI, u, 0);
885 } else {
886 return u;
887 }
888 }
889 }
890 // Kindly added by A.Haumer 2010-04.09
891
892 real_32 xatan2 (real_32 y, real_32 x)
893 {
894 int_4 rs, is;
895 rs = xsgn (&x);
896 is = xsgn (&y);
897 if (rs > 0) {
898 return xatan (xdiv (y, x));
899 } else if (rs < 0) {
900 x.value[0] ^= X_SIGN_MASK;
901 y.value[0] ^= X_SIGN_MASK;
902 if (is >= 0) {
903 return xadd (X_PI, xatan (xdiv (y, x)), 0);
904 } else {
905 return xadd (xatan (xdiv (y, x)), X_PI, 1);
906 }
907 } else { // x is zero !
908 if (!xsigerr (is == 0, XEDOM, "xatan2()")) {
909 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
910 } else {
911 return X_0; // Dummy value :)
912 }
913 }
914 }
915
916 real_32 xlog (real_32 z)
917 {
918 real_32 f, h;
919 int_4 k, m;
920 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog()"))) {
921 return X_MINUS_INF;
922 } else if (xreal_cmp (&z, &X_1) == 0) {
923 return X_0;
924 } else {
925 z = xfrexp (z, &m);
926 z = xmul (z, X_SQRT_2);
927 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
928 h = xreal_2 (z, 1);
929 z = xmul (z, z);
930 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
931 h = xmul (h, z);
932 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
933 }
934 return xadd (f, xmul (X_LN_2, dbltox (m - .5)), 0);
935 }
936 }
937
938 real_32 xlog2 (real_32 z)
939 {
940 real_32 f, h;
941 int_4 k, m;
942 if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog2()"))) {
943 return X_MINUS_INF;
944 } else if (xreal_cmp (&z, &X_1) == 0) {
945 return X_0;
946 } else {
947 z = xfrexp (z, &m);
948 z = xmul (z, X_SQRT_2);
949 z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
950 h = xreal_2 (z, 1);
951 z = xmul (z, z);
952 for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
953 h = xmul (h, z);
954 f = xadd (f, xdiv (h, inttox (k += 2)), 0);
955 }
956 return xadd (xmul (f, X_LOG2_E), dbltox (m - .5), 0);
957 }
958 }
959
960 real_32 xlog10 (real_32 z)
961 {
962 real_32 w = xlog (z);
963 if (xreal_cmp (&w, &X_MINUS_INF) <= 0) {
964 return X_MINUS_INF;
965 } else {
966 return xmul (w, X_LOG10_E);
967 }
968 }
969
970 logical_4 xeq (real_32 x1, real_32 x2)
971 {
972 return (xreal_cmp (&x1, &x2) == 0);
973 }
974
975 logical_4 xneq (real_32 x1, real_32 x2)
976 {
977 return (xreal_cmp (&x1, &x2) != 0);
978 }
979
980 logical_4 xgt (real_32 x1, real_32 x2)
981 {
982 return (xreal_cmp (&x1, &x2) > 0);
983 }
984
985 logical_4 xge (real_32 x1, real_32 x2)
986 {
987 return (xreal_cmp (&x1, &x2) >= 0);
988 }
989
990 logical_4 xlt (real_32 x1, real_32 x2)
991 {
992 return (xreal_cmp (&x1, &x2) < 0);
993 }
994
995 logical_4 xle (real_32 x1, real_32 x2)
996 {
997 return (xreal_cmp (&x1, &x2) <= 0);
998 }
999
1000
1001 logical_4 xis_nan (const real_32 * u)
1002 {
1003 // xis_nan (&x) returns TRUE if and only if x is not a valid number
1004 unt_2 *p = (unt_2 *) &(u->value);
1005 if (*p != 0) {
1006 return FALSE;
1007 } else {
1008 int_4 i;
1009 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1010 return i <= FLT256_LEN;
1011 }
1012 }
1013
1014 logical_4 xis0 (const real_32 * u)
1015 {
1016 unt_2 *p = (unt_2 *) &(u->value);
1017 int_4 m;
1018 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1019 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? TRUE : FALSE);
1020 }
1021
1022 logical_4 xnot0 (const real_32 * u)
1023 {
1024 unt_2 *p = (unt_2 *) &(u->value);
1025 int_4 m;
1026 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1027 return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? FALSE : TRUE);
1028 }
1029
1030 logical_4 xlt1 (const real_32 u)
1031 {
1032 return (xlt (xabs (u), X_1));
1033 }
1034
1035 int_4 xsgn (const real_32 * u)
1036 {
1037 unt_2 *p = (unt_2 *) &(u->value);
1038 int_4 m;
1039 for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1040 if ((m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) || !*p) {
1041 return 0;
1042 } else {
1043 return ((*p & X_SIGN_MASK) ? -1 : 1);
1044 }
1045 }
1046
1047 logical_4 xis_pinf (const real_32 * u)
1048 {
1049 return (*u->value == X_EXPO_MASK);
1050 }
1051
1052 logical_4 xis_minf (const real_32 * u)
1053 {
1054 return (*u->value == (X_EXPO_MASK | X_SIGN_MASK));
1055 }
1056
1057 logical_4 xisordnumb (const real_32 * u)
1058 {
1059 int_4 isNaN, isfinite;
1060 unt_2 *p = (unt_2 *) &(u->value);
1061 if (*p != 0) {
1062 isNaN = FALSE;
1063 } else {
1064 int_4 i;
1065 for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1066 isNaN = i <= FLT256_LEN;
1067 }
1068 isfinite = (*p & X_EXPO_MASK) < X_EXPO_MASK;
1069 return (!isNaN && (isfinite) ? TRUE : FALSE);
1070 }
1071
1072 real_32 xpwr (real_32 s, int_4 n)
1073 {
1074 real_32 t;
1075 unsigned k, m;
1076 t = X_1;
1077 if (n < 0) {
1078 m = -n;
1079 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1080 return X_0;
1081 }
1082 s = xdiv (X_1, s);
1083 } else {
1084 m = n;
1085 }
1086 if (m != 0) {
1087 k = 1;
1088 while (1) {
1089 if (k & m) {
1090 t = xmul (s, t);
1091 }
1092 if ((k <<= 1) <= m) {
1093 s = xmul (s, s);
1094 } else {
1095 break;
1096 }
1097 }
1098 } else {
1099 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1100 }
1101 return t;
1102 }
1103
1104 real_32 xpow (real_32 x, real_32 y)
1105 {
1106 if (xsigerr ((xgetsgn (&x)) || xgetexp (&x) == -X_EXPO_BIAS, XEDOM, "xpow()")) {
1107 return X_0;
1108 } else {
1109 return xexp2 (xmul (xlog2 (x), y));
1110 }
1111 }
1112
1113 real_32 xsqrt (real_32 z)
1114 {
1115 real_32 s, h;
1116 int_2 m, e;
1117 unt_2 *pc;
1118 if ((xsigerr ((xgetsgn (&z)), XEDOM, "xsqrt()"))) {
1119 return X_0;
1120 } else {
1121 pc = SEXP (z);
1122 if (*pc == 0) {
1123 return X_0;
1124 }
1125 e = *pc - X_EXPO_BIAS;
1126 *pc = X_EXPO_BIAS + (e % 2);
1127 e /= 2;
1128 s = dbltox (sqrt (xtodbl (z)));
1129 for (m = 0; m < xItt_div; ++m) {
1130 h = xdiv (xadd (z, xmul (s, s), 1), xreal_2 (s, 1));
1131 s = xadd (s, h, 0);
1132 }
1133 pc = SEXP (s);
1134 *pc += e;
1135 return s;
1136 }
1137 }
1138
1139 static int_4 xodd (real_32 x)
1140 {
1141 unt_2 *p = SEXP (x);
1142 int_2 e, i;
1143 e = (*p & X_EXPO_MASK) - X_EXPO_BIAS; // exponent of x
1144 if (e < 0) {
1145 return 0;
1146 } else {
1147 for (i = 1; e / 16 > 0; i++, e -= 16);
1148 // Now e = 0, ..., 15
1149 return (i <= FLT256_LEN ? p[i] & 0x8000 >> e : 0);
1150 }
1151 }
1152
1153 real_32 xtan (real_32 z)
1154 {
1155 int_4 k, m;
1156 z = rred (z, 't', &k);
1157 if ((xsigerr (xreal_cmp (&z, &X_PI_OVER_2) >= 0, XEDOM, "xtan()"))) {
1158 return (!k ? X_PLUS_INF : X_MINUS_INF);
1159 } else {
1160 if (xreal_cmp (&z, &X_PI_OVER_4) == 1) {
1161 m = 1;
1162 z = xadd (X_PI_OVER_2, z, 1);
1163 } else {
1164 m = 0;
1165 }
1166 if (k != 0) {
1167 z = xneg (c_tan (z));
1168 } else {
1169 z = c_tan (z);
1170 }
1171 if (m != 0) {
1172 return xdiv (X_1, z);
1173 } else {
1174 return z;
1175 }
1176 }
1177 }
1178
1179 real_32 xcos (real_32 z)
1180 {
1181 int_4 k;
1182 z = rred (z, 'c', &k);
1183 if (xgetexp (&z) < xK_lin) {
1184 if (k != 0) {
1185 return xneg (X_1);
1186 } else {
1187 return X_1;
1188 }
1189 }
1190 z = c_tan (xreal_2 (z, -1));
1191 z = xmul (z, z);
1192 z = xdiv (xadd (X_1, z, 1), xadd (X_1, z, 0));
1193 if (k != 0) {
1194 return xneg (z);
1195 } else {
1196 return z;
1197 }
1198 }
1199
1200 real_32 xsin (real_32 z)
1201 {
1202 int_4 k;
1203 z = rred (z, 's', &k);
1204 if (xgetexp (&z) >= xK_lin) {
1205 z = c_tan (xreal_2 (z, -1));
1206 z = xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 0));
1207 }
1208 if (k != 0) {
1209 return xneg (z);
1210 } else {
1211 return z;
1212 }
1213 }
1214
1215 static real_32 c_tan (real_32 z)
1216 {
1217 real_32 s, f, d;
1218 int_4 m;
1219 unt_2 k;
1220 if (xgetexp (&z) < xK_lin)
1221 return z;
1222 s = xneg (xmul (z, z));
1223 for (k = 1; k <= FLT256_LEN && s.value[k] == 0; k++);
1224 if ((xsigerr (s.value[0] == 0xffff && k > FLT256_LEN, XFPOFLOW, NO_TEXT))) {
1225 return X_0;
1226 } else {
1227 f = X_0;
1228 for (d = inttox (m = xMS_trg); m > 1;) {
1229 f = xdiv (s, xadd (d, f, 0));
1230 d = inttox (m -= 2);
1231 }
1232 return xdiv (z, xadd (d, f, 0));
1233 }
1234 }
1235
1236 static real_32 rred (real_32 z, int_4 kf, int_4 *ps)
1237 {
1238 real_32 is, q;
1239 if (xgetsgn (&z)) {
1240 z = xneg (z);
1241 is = X_1;
1242 } else {
1243 is = X_0;
1244 }
1245 z = xfmod (z, X_PI, &q);
1246 if (kf == 't') {
1247 q = is;
1248 } else if (kf == 's') {
1249 q = xadd (q, is, 0);
1250 }
1251 if (xreal_cmp (&z, &X_PI_OVER_2) == 1) {
1252 z = xadd (X_PI, z, 1);
1253 if (kf == 'c' || kf == 't') {
1254 q = xadd (q, X_1, 0);
1255 }
1256 }
1257 *ps = (xodd (q)) ? 1 : 0;
1258 return z;
1259 }
1260
1261 // COMPLEX*64
1262
1263 complex_64 cxadd (complex_64 z1, complex_64 z2, int_4 k)
1264 {
1265 complex_64 w;
1266 w.re = xadd (z1.re, z2.re, k);
1267 w.im = xadd (z1.im, z2.im, k);
1268 return w;
1269 }
1270
1271 complex_64 cxsum (complex_64 z1, complex_64 z2)
1272 {
1273 complex_64 w;
1274 w.re = xadd (z1.re, z2.re, 0);
1275 w.im = xadd (z1.im, z2.im, 0);
1276 return w;
1277 }
1278
1279 complex_64 cxsub (complex_64 z1, complex_64 z2)
1280 {
1281 complex_64 w;
1282 w.re = xadd (z1.re, z2.re, 1);
1283 w.im = xadd (z1.im, z2.im, 1);
1284 return w;
1285 }
1286
1287 complex_64 cxmul (complex_64 z1, complex_64 z2)
1288 {
1289 complex_64 w;
1290 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1291 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1292 return w;
1293 }
1294
1295 complex_64 cxrmul (real_32 c, complex_64 z)
1296 {
1297 complex_64 w;
1298 w.re = xmul (c, z.re);
1299 w.im = xmul (c, z.im);
1300 return w;
1301 }
1302
1303 complex_64 cxdrot (complex_64 z)
1304 {
1305 complex_64 y;
1306 y.re = z.im;
1307 y.im = z.re;
1308 // Multiplication by +i
1309 y.re.value[0] ^= X_SIGN_MASK;
1310 return y;
1311 }
1312
1313 complex_64 cxrrot (complex_64 z)
1314 {
1315 complex_64 y;
1316 y.re = z.im;
1317 y.im = z.re;
1318 // Multiplication by -i
1319 y.im.value[0] ^= X_SIGN_MASK;
1320 return y;
1321 }
1322
1323 complex_64 cxdiv (complex_64 z1, complex_64 z2)
1324 {
1325 int_4 tv = cxrec (z2, &z2);
1326 if (!xsigerr (!tv, XEDIV, "cxdiv()")) {
1327 complex_64 w;
1328 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1329 w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1330 return w;
1331 } else {
1332 return X_0_0I;
1333 }
1334 }
1335
1336 complex_64 cxinv (complex_64 z)
1337 {
1338 int_4 tv = cxrec (z, &z);
1339 if (!xsigerr (!tv, XEDOM, "cxinv()")) {
1340 return z;
1341 } else {
1342 return X_0_0I;
1343 }
1344 }
1345
1346 complex_64 cxsqr (complex_64 z)
1347 {
1348 complex_64 w;
1349 w.re = xadd (xmul (z.re, z.re), xmul (z.im, z.im), 1);
1350 w.im = xmul (X_2, xmul (z.im, z.re));
1351 return w;
1352 }
1353
1354 complex_64 cxsqrt (complex_64 z)
1355 {
1356 complex_64 w;
1357 if (xsgn (&(z.re)) == 0 && xsgn (&(z.im)) == 0) {
1358 w = X_0_0I;
1359 } else if (xis0 (&(z.im))) {
1360 real_32 s = xsqrt (xabs (z.re));
1361 if (xsgn (&(z.re)) == -1) {
1362 w = (complex_64) {X_0, s};
1363 } else {
1364 w = (complex_64) {s, X_0};
1365 }
1366 } else {
1367 real_32 mod = xsqrt (cxabs (z)), arg = xreal_2 (cxarg (z), -1);
1368 w.re = xmul (mod, xcos (arg));
1369 w.im = xmul (mod, xsin (arg));
1370 }
1371 return w;
1372 }
1373
1374 complex_64 cxreset (real_32 re, real_32 im)
1375 {
1376 complex_64 y;
1377 y.re = re;
1378 y.im = im;
1379 return y;
1380 }
1381
1382 complex_64 cxconv (real_32 x)
1383 {
1384 complex_64 y;
1385 y.re = x;
1386 y.im = X_0;
1387 return y;
1388 }
1389
1390 real_32 cxreal (complex_64 z)
1391 {
1392 return z.re;
1393 }
1394
1395 real_32 cximag (complex_64 z)
1396 {
1397 return z.im;
1398 }
1399
1400 complex_64 cxswap (complex_64 z)
1401 {
1402 complex_64 y;
1403 y.re = z.im;
1404 y.im = z.re;
1405 return y;
1406 }
1407
1408 complex_64 cxneg (complex_64 z)
1409 {
1410 z.re.value[0] ^= X_SIGN_MASK;
1411 z.im.value[0] ^= X_SIGN_MASK;
1412 return z;
1413 }
1414
1415 complex_64 cxconj (complex_64 z)
1416 {
1417 return (z.im.value[0] ^= X_SIGN_MASK, z);
1418 }
1419
1420 #define XBOUND FLT256_LEN * 16 + 8
1421
1422 real_32 cxabs (complex_64 z)
1423 {
1424 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1425 return X_0;
1426 } else {
1427 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1428 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1429 if (ea > eb + XBOUND) {
1430 return z.re;
1431 } else if (eb > ea + XBOUND) {
1432 return z.im;
1433 } else {
1434 z.re.value[0] -= eb;
1435 z.im.value[0] = X_EXPO_BIAS;
1436 real_32 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1437 x.value[0] += eb;
1438 return x;
1439 }
1440 }
1441 }
1442
1443 real_32 cxarg (complex_64 z)
1444 {
1445 int_4 rs = xsgn (&(z.re)), is = xsgn (&(z.im));
1446 if (rs > 0) {
1447 return xatan (xdiv (z.im, z.re));
1448 } else if (rs < 0) {
1449 z.re.value[0] ^= X_SIGN_MASK;
1450 z.im.value[0] ^= X_SIGN_MASK;
1451 if (is >= 0) {
1452 return xadd (X_PI, xatan (xdiv (z.im, z.re)), 0);
1453 } else {
1454 return xadd (xatan (xdiv (z.im, z.re)), X_PI, 1);
1455 }
1456 } else { // z.re is zero !
1457 if (!xsigerr (is == 0, XEDOM, "cxarg()")) {
1458 return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
1459 } else {
1460 return xneg (X_PI_OVER_2); // Dummy value :)
1461 }
1462 }
1463 }
1464
1465 logical_4 cxrec (complex_64 z, complex_64 * w)
1466 {
1467 if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1468 return FALSE;
1469 } else {
1470 int_4 sa = z.re.value[0] & X_SIGN_MASK;
1471 int_4 sb = z.im.value[0] & X_SIGN_MASK;
1472 int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1473 int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1474 real_32 x;
1475 if (ea > eb + XBOUND) {
1476 x = z.re;
1477 } else if (eb > ea + XBOUND) {
1478 x = z.im;
1479 } else {
1480 z.re.value[0] -= eb;
1481 z.im.value[0] = X_EXPO_BIAS;
1482 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1483 x.value[0] += eb;
1484 z.re.value[0] += eb;
1485 z.im.value[0] += eb;
1486 }
1487 w->re = xdiv (xdiv (z.re, x), x);
1488 w->im = xdiv (xdiv (z.im, x), x);
1489 w->re.value[0] |= sa;
1490 w->im.value[0] |= X_SIGN_MASK ^ sb;
1491 return TRUE;
1492 }
1493 }
1494
1495 complex_64 cxexp (complex_64 z)
1496 {
1497 complex_64 w;
1498 w.re = xmul (xexp (z.re), xcos (z.im));
1499 w.im = xmul (xexp (z.re), xsin (z.im));
1500 return w;
1501 }
1502
1503 complex_64 cxlog (complex_64 z)
1504 {
1505 real_32 mod;
1506 complex_64 w;
1507 mod = cxabs (z);
1508 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog()")) {
1509 return X_0_0I;
1510 } else {
1511 w.re = xlog (mod);
1512 w.im = cxarg (z);
1513 return w;
1514 }
1515 }
1516
1517 complex_64 cxlog10 (complex_64 z)
1518 {
1519 real_32 mod;
1520 complex_64 w;
1521 mod = cxabs (z);
1522 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog10()")) {
1523 return X_0_0I;
1524 } else {
1525 w.re = xlog10 (mod);
1526 w.im = xmul (cxarg (z), X_LOG10_E);
1527 return w;
1528 }
1529 }
1530
1531 complex_64 cxlog2 (complex_64 z)
1532 {
1533 real_32 mod;
1534 complex_64 w;
1535 mod = cxabs (z);
1536 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog2()")) {
1537 return X_0_0I;
1538 } else {
1539 w.re = xlog2 (mod);
1540 w.im = xmul (cxarg (z), X_LOG2_E);
1541 return w;
1542 }
1543 }
1544
1545 complex_64 cxlog_sqrt (complex_64 z)
1546 {
1547 real_32 mod = cxabs (z);
1548 if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog_sqrt()")) {
1549 return X_0_0I;
1550 } else {
1551 complex_64 w;
1552 w.re = xreal_2 (xlog (mod), -1);
1553 w.im = xreal_2 (cxarg (z), -1);
1554 return w;
1555 }
1556 }
1557
1558 complex_64 cxsinh (complex_64 z)
1559 {
1560 complex_64 w = cxsub (cxexp (z), cxexp (cxneg (z)));
1561 w.re = xreal_2 (w.re, -1);
1562 w.im = xreal_2 (w.im, -1);
1563 return w;
1564 }
1565
1566 complex_64 cxcosh (complex_64 z)
1567 {
1568 complex_64 w = cxsum (cxexp (z), cxexp (cxneg (z)));
1569 w.re = xreal_2 (w.re, -1);
1570 w.im = xreal_2 (w.im, -1);
1571 return w;
1572 }
1573
1574 complex_64 cxtanh (complex_64 z)
1575 {
1576 if (xsigerr (xreal_cmp (&(z.re), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
1577 return X_1_0I;
1578 } else if (xsigerr (xreal_cmp (&(z.re), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
1579 return cxneg (X_1_0I);
1580 } else {
1581 complex_64 w;
1582 if (xsigerr (!cxrec (cxcosh (z), &w), XEDOM, "cxtanh()")) {
1583 return X_0_0I;
1584 } else {
1585 return cxmul (cxsinh (z), w);
1586 }
1587 }
1588 }
1589
1590 complex_64 cxasinh (complex_64 z)
1591 {
1592 // This way cxasinh() works fine also with real numbers very near to -oo.
1593 complex_64 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1594 real_32 ls = cxabs (cxsum (z, w));
1595 real_32 rs = xmul (X_VSV, cxabs (z));
1596 if (xreal_cmp (&ls, &rs) < 0) {
1597 return cxneg (cxlog (cxsub (w, z)));
1598 } else {
1599 return cxlog (cxsum (z, w));
1600 }
1601 }
1602
1603 complex_64 cxacosh (complex_64 z)
1604 {
1605 complex_64 w = cxsqrt (cxsub (cxsqr (z), X_1_0I));
1606 real_32 ls = cxabs (cxsum (z, w));
1607 real_32 rs = xmul (X_VSV, cxabs (z));
1608 if (xreal_cmp (&ls, &rs) < 0) {
1609 return cxneg (cxlog (cxsub (z, w)));
1610 } else {
1611 return cxlog (cxsum (z, w));
1612 }
1613 }
1614
1615 complex_64 cxatanh (complex_64 z)
1616 {
1617 real_32 t = xadd (xabs (z.re), X_1, 1);
1618 int_4 errcond = xsgn (&(z.im)) == 0 && xsgn (&t) == 0;
1619 if (xsigerr (errcond, XEDOM, "cxatanh()")) {
1620 return X_0_0I;
1621 } else {
1622 complex_64 w = cxdiv (cxsum (X_1_0I, z), cxsub (X_1_0I, z));
1623 w = cxlog_sqrt (w);
1624 return w;
1625 }
1626 }
1627
1628 complex_64 cxgdiv (complex_64 z1, complex_64 z2)
1629 {
1630 z1.re = xround (z1.re);
1631 z1.im = xround (z1.im);
1632 z2.re = xround (z2.re);
1633 z2.im = xround (z2.im);
1634 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1635 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgdiv()")) {
1636 return X_0_0I;
1637 } else {
1638 complex_64 w;
1639 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1640 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1641 w.re = xround (xdiv (w.re, mod2));
1642 w.im = xround (xdiv (w.im, mod2));
1643 return w;
1644 }
1645 }
1646
1647 complex_64 cxidiv (complex_64 z1, complex_64 z2)
1648 {
1649 z1.re = xround (z1.re);
1650 z1.im = xround (z1.im);
1651 z2.re = xround (z2.re);
1652 z2.im = xround (z2.im);
1653 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1654 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxidiv()")) {
1655 return X_0_0I;
1656 } else {
1657 complex_64 w;
1658 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1659 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1660 w.re = xfix (xdiv (w.re, mod2));
1661 w.im = xfix (xdiv (w.im, mod2));
1662 return w;
1663 }
1664 }
1665
1666 complex_64 cxgmod (complex_64 z1, complex_64 z2)
1667 {
1668 z1.re = xround (z1.re);
1669 z1.im = xround (z1.im);
1670 z2.re = xround (z2.re);
1671 z2.im = xround (z2.im);
1672 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1673 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgmod()")) {
1674 return X_0_0I;
1675 } else {
1676 complex_64 w, z;
1677 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1678 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1679 w.re = xround (xdiv (w.re, mod2));
1680 w.im = xround (xdiv (w.im, mod2));
1681 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1682 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1683 w.re = xround (xadd (z1.re, z.re, 1));
1684 w.im = xround (xadd (z1.im, z.im, 1));
1685 return w;
1686 }
1687 }
1688
1689 complex_64 cxmod (complex_64 z1, complex_64 z2)
1690 {
1691 z1.re = xround (z1.re);
1692 z1.im = xround (z1.im);
1693 z2.re = xround (z2.re);
1694 z2.im = xround (z2.im);
1695 real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1696 if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxmod()")) {
1697 return X_0_0I;
1698 } else {
1699 complex_64 w, z;
1700 w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1701 w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1702 w.re = xfix (xdiv (w.re, mod2));
1703 w.im = xfix (xdiv (w.im, mod2));
1704 z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1705 z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1706 w.re = xround (xadd (z1.re, z.re, 1));
1707 w.im = xround (xadd (z1.im, z.im, 1));
1708 return w;
1709 }
1710 }
1711
1712 static void unit_root (int_4 i, int_4 n, real_32 * a, real_32 * b)
1713 {
1714 // We assume n != 0
1715 i %= n;
1716 *a = xdiv (xmul (xreal_2 (inttox (i), 1), X_PI), inttox (n));
1717 *b = xsin (*a);
1718 *a = xcos (*a);
1719 if (xgetexp (b) < -80) {
1720 *b = X_0;
1721 }
1722 if (xgetexp (a) < -80) {
1723 *a = X_0;
1724 }
1725 }
1726
1727 complex_64 cxpwr (complex_64 z, int_4 n)
1728 {
1729 real_32 mod = cxabs (z);
1730 if (xsgn (&mod) <= 0) {
1731 (void) xsigerr (n <= 0, XEBADEXP, "cxpwr()");
1732 return X_0_0I;
1733 } else {
1734 real_32 arg = xmul (inttox (n), cxarg (z));
1735 mod = xpwr (mod, n);
1736 complex_64 w;
1737 w.re = xmul (mod, xcos (arg));
1738 w.im = xmul (mod, xsin (arg));
1739 return w;
1740 }
1741 }
1742
1743 complex_64 cxroot (complex_64 z, int_4 i, int_4 n)
1744 {
1745 if (xsigerr (n == 0, XEBADEXP, "cxroot()")) {
1746 return X_0_0I;
1747 } else {
1748 real_32 mod = cxabs (z);
1749 if (xsgn (&mod) <= 0) {
1750 (void) xsigerr (n < 0, XEBADEXP, "cxroot()");
1751 return X_0_0I;
1752 } else { // mod > 0
1753 real_32 arg = xdiv (cxarg (z), inttox (n));
1754 real_32 e = xdiv (X_1, inttox (n)); // 1/n
1755 // x^e = exp(e*log(x)) for any x > 0
1756 mod = xexp (xmul (e, xlog (mod)));
1757 complex_64 w, zz;
1758 w.re = xmul (mod, xcos (arg));
1759 w.im = xmul (mod, xsin (arg));
1760 real_32 a, b;
1761 unit_root (i, n, &a, &b);
1762 zz.re = xadd (xmul (w.re, a), xmul (w.im, b), 1);
1763 zz.im = xadd (xmul (w.im, a), xmul (w.re, b), 0);
1764 return zz;
1765 }
1766 }
1767 }
1768
1769 complex_64 cxpow (complex_64 z1, complex_64 z2)
1770 {
1771 real_32 mod = cxabs (z1);
1772 if (xsgn (&mod) <= 0) {
1773 (void) xsigerr (xsgn (&z2.re) <= 0, XEBADEXP, "cxpow()");
1774 return X_0_0I;
1775 } else {
1776 real_32 arg = cxarg (z1);
1777 real_32 a = xadd (xmul (z2.re, xlog (mod)), xmul (z2.im, arg), 1);
1778 real_32 b = xadd (xmul (z2.re, arg), xmul (z2.im, xlog (mod)), 0);
1779 complex_64 w;
1780 w.re = xmul (xexp (a), xcos (b));
1781 w.im = xmul (xexp (a), xsin (b));
1782 return w;
1783 }
1784 }
1785
1786 logical_4 cxis0 (const complex_64 * z)
1787 {
1788 return (xis0 (&z->re) && xis0 (&z->im));
1789 }
1790
1791 logical_4 cxnot0 (const complex_64 * z)
1792 {
1793 return (xnot0 (&z->re) || xnot0 (&z->im));
1794 }
1795
1796 logical_4 cxeq (complex_64 z1, complex_64 z2)
1797 {
1798 return (xreal_cmp (&z1.re, &z2.re) == 0 && xreal_cmp (&z1.im, &z2.im) == 0);
1799 }
1800
1801 logical_4 cxneq (complex_64 z1, complex_64 z2)
1802 {
1803 return (xreal_cmp (&z1.re, &z2.re) != 0 || xreal_cmp (&z1.im, &z2.im) != 0);
1804 }
1805
1806 complex_64 cxsin (complex_64 z)
1807 {
1808 complex_64 w1, w2;
1809 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1810 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1811 w1 = cxsub (cxexp (w1), cxexp (w2));
1812 w2.re = xreal_2 (w1.im, -1);
1813 w1.re.value[0] ^= X_SIGN_MASK;
1814 w2.im = xreal_2 (w1.re, -1); // Now w2= (exp(i*z)-exp(-i*z))/2i
1815 return w2;
1816 }
1817
1818 complex_64 cxcos (complex_64 z)
1819 {
1820 complex_64 w1, w2;
1821 w1 = cxdrot (z); // Now w1= i*z, where i={0,1}
1822 w2 = cxrrot (z); // Now w2= -i*z, where i={0,1}
1823 w1 = cxsum (cxexp (w1), cxexp (w2));
1824 w2.re = xreal_2 (w1.re, -1);
1825 w2.im = xreal_2 (w1.im, -1);
1826 return w2;
1827 }
1828
1829 complex_64 cxtan (complex_64 z)
1830 {
1831 if (xsigerr (xreal_cmp (&(z.im), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
1832 return X_0_1I;
1833 } else if (xsigerr (xreal_cmp (&(z.im), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
1834 return cxneg (X_0_1I);
1835 } else {
1836 complex_64 w;
1837 if (xsigerr (!cxrec (cxcos (z), &w), XEDOM, "cxtan()")) {
1838 return X_0_0I;
1839 } else {
1840 return cxmul (cxsin (z), w);
1841 }
1842 }
1843 }
1844
1845 complex_64 cxasin (complex_64 z)
1846 {
1847 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1848 real_32 ls = cxabs (cxsum (cxdrot (z), w));
1849 real_32 rs = xmul (X_VSV, cxabs (z));
1850 if (xreal_cmp (&ls, &rs) < 0) {
1851 w = cxdrot (cxlog (cxsub (w, cxdrot (z))));
1852 } else {
1853 w = cxrrot (cxlog (cxsum (cxdrot (z), w)));
1854 }
1855 return w;
1856 }
1857
1858 complex_64 cxacos (complex_64 z)
1859 {
1860 complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1861 real_32 ls = cxabs (cxsum (z, cxdrot (w)));
1862 real_32 rs = xmul (X_VSV, cxabs (z));
1863 if (xreal_cmp (&ls, &rs) < 0) {
1864 w = cxdrot (cxlog (cxsub (z, cxdrot (w))));
1865 } else {
1866 w = cxrrot (cxlog (cxsum (z, cxdrot (w))));
1867 }
1868 return w;
1869 }
1870
1871 complex_64 cxatan (complex_64 z)
1872 {
1873 real_32 mod = xadd (xabs (z.im), X_1, 1);
1874 int_4 errcond = xsgn (&(z.re)) == 0 && xsgn (&mod) == 0;
1875 if (xsigerr (errcond, XEDOM, "cxatan()")) {
1876 return X_0_0I;
1877 } else {
1878 // This way, cxatan() works fine also with complex numbers very far from the origin.
1879 complex_64 w;
1880 mod = cxabs (z);
1881 if (xreal_cmp (&mod, &X_VGV) > 0) {
1882 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1883 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), w);
1884 w = cxrrot (cxlog (w));
1885 } else {
1886 w = cxdiv (cxsum (X_1_0I, cxdrot (z)), cxsub (X_1_0I, cxdrot (z)));
1887 w = cxrrot (cxlog_sqrt (w));
1888 }
1889 return w;
1890 }
1891 }
1892
1893 complex_64 cxflt(complex_8 z)
1894 {
1895 complex_64 zz;
1896 zz.re = flttox (creal (z));
1897 zz.im = flttox (cimag (z));
1898 return zz;
1899 }
1900
1901 complex_64 cxdbl(complex_16 z)
1902 {
1903 complex_64 zz;
1904 zz.re = dbltox (creal (z));
1905 zz.im = dbltox (cimag (z));
1906 return zz;
1907 }
1908
1909 complex_64 cxquad(complex_32 z)
1910 {
1911 complex_64 zz;
1912 zz.re = dbltox (crealq (z));
1913 zz.im = dbltox (cimagq (z));
1914 return zz;
1915 }
1916
1917 complex_32 qxcmplx(real_32 re, real_32 im)
1918 {
1919 complex_32 zz;
1920 __real__ zz = xtoquad (re);
1921 __imag__ zz = xtoquad (im);
1922 return zz;
1923 }
1924
1925 complex_64 _cquadtop (complex_64 *zz, complex_32 z)
1926 {
1927 zz->re = dbltox (crealq (z));
1928 zz->im = dbltox (cimagq (z));
1929 return *zz;
1930 }
1931
1932 complex_64 cxreal32(real_32 z)
1933 {
1934 complex_64 zz;
1935 zz.re = z;
1936 zz.im = X_0;
1937 return zz;
1938 }
1939
1940 complex_64 _coctotop(complex_64 *zz, real_32 z)
1941 {
1942 zz->re = z;
1943 zz->im = X_0;
1944 return *zz;
1945 }
1946
1947 // VIF additions (REAL*32)
1948
1949 real_32 xtenup (int_4 n)
1950 {
1951 if (n == 0) {
1952 return X_1;
1953 } else if (n == 1) {
1954 return X_10;
1955 } else if (n == -1) {
1956 return X_1_OVER_10;
1957 }
1958 real_32 s = X_10, t;
1959 unsigned k, m;
1960 t = X_1;
1961 if (n < 0) {
1962 m = -n;
1963 if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1964 return X_0;
1965 }
1966 s = xdiv (X_1, s);
1967 } else {
1968 m = n;
1969 }
1970 if (m != 0) {
1971 k = 1;
1972 while (1) {
1973 if (k & m) {
1974 t = xmul (s, t);
1975 }
1976 if ((k <<= 1) <= m) {
1977 s = xmul (s, s);
1978 } else {
1979 break;
1980 }
1981 }
1982 } else {
1983 xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1984 }
1985 return t;
1986 }
1987
1988 real_32 xcotan (real_32 x)
1989 {
1990 // Intrinsic function so arguments are not pointers.
1991 return xdiv (X_1, xtan (x));
1992 }
1993
1994 real_32 xacotan (real_32 x)
1995 {
1996 // Intrinsic function so arguments are not pointers.
1997 return xatan (xdiv (X_1, x));
1998 }
1999
2000 real_32 _xsgn (real_32 a, real_32 b)
2001 {
2002 // Intrinsic function so arguments are not pointers.
2003 real_32 x = (xgetsgn (&a) == 0 ? a : xneg (a));
2004 return (xgetsgn (&b) == 0 ? x : xneg (x));
2005 }
2006
2007 real_32 _zabs_64 (real_32 re, real_32 im)
2008 {
2009 // Intrinsic function so arguments are not pointers.
2010 return cxabs (CMPLXX (re, im));
2011 }
2012
2013 real_32 _xhypot (real_32 a, real_32 b)
2014 {
2015 if (xis0 (&a) && xis0 (&b)) {
2016 return X_0;
2017 } else if (xgt (a, b)) {
2018 real_32 q = xdiv (b, a);
2019 real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
2020 return xmul (xabs (a), z);
2021 } else {
2022 real_32 q = xdiv (a, b);
2023 real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
2024 return xmul (xabs (b), z);
2025 }
2026 }
2027
2028 // Conversion.
2029
2030 real_32 inttox (int_4 n)
2031 {
2032 REAL32 pe;
2033 unt_2 *pc, e;
2034 unt_4 k, h;
2035 bzero (pe, sizeof (pe));
2036 k = ABS (n);
2037 pc = (unt_2 *) &k;
2038 if (n == 0) {
2039 return *(real_32 *) pe;
2040 }
2041
2042 #if __BYTE_ORDER == __LITTLE_ENDIAN
2043 pe[1] = *(pc + 1);
2044 pe[2] = *pc;
2045 #else
2046 pe[1] = *pc;
2047 pe[2] = *(pc + 1);
2048 #endif
2049
2050 for (e = 0, h = 1; h <= k && e < ((8 * sizeof (unt_4)) - 1); h <<= 1, ++e);
2051 if (h <= k) {
2052 e += 1;
2053 }
2054 *pe = X_EXPO_BIAS + e - 1;
2055 if (n < 0) {
2056 *pe |= X_SIGN_MASK;
2057 }
2058 xlshift ((8 * sizeof (unt_4)) - e, pe + 1, FLT256_LEN);
2059 return *(real_32 *) pe;
2060 }
2061
2062 real_4 xtoflt (real_32 s)
2063 {
2064 // An extended floating point_4 number is represented as a combination of the
2065 // following elements:
2066 //
2067 // sign bit(s): 0 -> positive, 1 -> negative ;
2068 // exponent(e): 15-bit biased integer (bias=16383) ;
2069 // mantissa(m): 7 (or more) words of 16 bit length with the
2070 // leading 1 explicitly represented.
2071 //
2072 // Thus f = (-1)^s*2^[e-16383] *m , with 1 <= m < 2 .
2073 //
2074 // This format supports a dynamic range of:
2075 //
2076 // 2^16384 > f > 2^[-16383] or
2077 // 1.19*10^4932 > f > 1.68*10^-[4932].
2078 //
2079 // Special values of the exponent are:
2080 //
2081 // all ones -> infinity (floating point_4 overflow)
2082 // all zeros -> number = zero.
2083 //
2084 // Underflow in operations is handled by a flush to zero. Thus, a number with
2085 // the exponent zero and nonzero mantissa is invalid (not-a-number).
2086 //
2087 //
2088 union {unt_2 pe[2]; real_4 f;} v;
2089 unt_2 *pc, u;
2090 int_2 i, e;
2091 pc = SEXP (s);
2092 u = *pc & X_SIGN_MASK;
2093 e = (*pc & X_EXPO_MASK) - xF_bias;
2094 //
2095 // u is the sign of the number s.
2096 // e == (exponent of s) + 127
2097 //
2098 if (e >= xF_max) {
2099 return (!u ? FLT_MAX : -FLT_MAX);
2100 }
2101 if (e <= 0) {
2102 return 0.;
2103 }
2104 for (i = 0; i < 2; v.pe[i] = *++pc, i++);
2105 // In the IEEE 754 Standard the leading 1 is not represented.
2106 v.pe[0] &= X_EXPO_MASK;
2107 // Now in pe[0],pe[1] we have 31 bits of mantissa.
2108 // But only the first 23 ones must be put in the
2109 // final real_4 number.
2110 xrshift (xF_lex - 1, v.pe, 2);
2111 // We have just loaded the mantissa and now we
2112 // are going to load exponent and sign.
2113 v.pe[0] |= (e << (16 - xF_lex));
2114 v.pe[0] |= u;
2115 #if __BYTE_ORDER == __LITTLE_ENDIAN
2116 u = v.pe[0];
2117 v.pe[0] = v.pe[1];
2118 v.pe[1] = u;
2119 #endif
2120 return v.f;
2121 }
2122
2123 real_32 flttox (real_4 y)
2124 {
2125 REAL32 pe;
2126 unt_2 *pc, u;
2127 int_2 i, e;
2128 if (y < FLT_MIN && y > -FLT_MIN) {
2129 return X_0;
2130 }
2131 bzero (pe, sizeof (pe));
2132 pc = (unt_2 *) &y;
2133 #if __BYTE_ORDER == __LITTLE_ENDIAN
2134 pc += 1;
2135 #endif
2136 u = *pc & X_SIGN_MASK;
2137 e = xF_bias + ((*pc & X_EXPO_MASK) >> (16 - xF_lex));
2138 // Now u is the sign of y and e is the
2139 // biased exponent (exponent + bias).
2140 #if __BYTE_ORDER == __LITTLE_ENDIAN
2141 for (i = 1; i < 3; pe[i] = *pc--, i++);
2142 #else
2143 for (i = 1; i < 3; pe[i] = *pc++, i++);
2144 #endif
2145 pc = pe + 1;
2146 xlshift (xF_lex - 1, pc, 2);
2147 *pc |= X_SIGN_MASK;
2148 // We have just put in pe[1],pe[2] the whole
2149 // mantissa of y with a leading 1.
2150 // Now we have only to put exponent and sign
2151 // in pe[0].
2152 *pe = e;
2153 *pe |= u;
2154 return *(real_32 *) pe;
2155 }
2156
2157 real_8 xtodbl (real_32 s)
2158 {
2159 union {unt_2 pe[4]; real_8 d;} v;
2160 unt_2 *pc, u;
2161 int_2 i, e;
2162 pc = SEXP (s);
2163 u = *pc & X_SIGN_MASK;
2164 e = (*pc & X_EXPO_MASK) - xD_bias;
2165 if (e >= xD_max) {
2166 return (!u ? DBL_MAX : -DBL_MAX);
2167 }
2168 if (e <= 0) {
2169 return 0.;
2170 }
2171 for (i = 0; i < 4; v.pe[i] = *++pc, i++);
2172 v.pe[0] &= X_EXPO_MASK;
2173 xrshift (xD_lex - 1, v.pe, 4);
2174 v.pe[0] |= (e << (16 - xD_lex));
2175 v.pe[0] |= u;
2176 #if __BYTE_ORDER == __LITTLE_ENDIAN
2177 u = v.pe[3];
2178 v.pe[3] = v.pe[0];
2179 v.pe[0] = u;
2180 u = v.pe[2];
2181 v.pe[2] = v.pe[1];
2182 v.pe[1] = u;
2183 #endif
2184 return v.d;
2185 }
2186
2187 real_32 dbltox (real_8 y)
2188 {
2189 REAL32 pe;
2190 unt_2 *pc, u;
2191 int_2 i, e;
2192 if (y < DBL_MIN && y > -DBL_MIN) {
2193 return X_0;
2194 }
2195 bzero (pe, sizeof (pe));
2196 pc = (unt_2 *) &y;
2197 #if __BYTE_ORDER == __LITTLE_ENDIAN
2198 pc += 3;
2199 #endif
2200 u = *pc & X_SIGN_MASK;
2201 e = xD_bias + ((*pc & X_EXPO_MASK) >> (16 - xD_lex));
2202 #if __BYTE_ORDER == __LITTLE_ENDIAN
2203 for (i = 1; i < 5; pe[i] = *pc--, i++);
2204 #else
2205 for (i = 1; i < 5; pe[i] = *pc++, i++);
2206 #endif
2207 pc = pe + 1;
2208 xlshift (xD_lex - 1, pc, 4);
2209 *pc |= X_SIGN_MASK;
2210 *pe = e;
2211 *pe |= u;
2212 return *(real_32 *) pe;
2213 }
2214
2215 real_32 quadtox (real_16 x)
2216 {
2217 // Intrinsic function so arguments are not pointers.
2218 REAL32 z;
2219 unt_2 *y;
2220 int_4 i;
2221 if (x == 0.0q) {
2222 return X_0;
2223 }
2224 int_4 sinf = isinfq (x);
2225 if (sinf == 1) {
2226 return X_PLUS_INF;
2227 } else if (sinf == -1) {
2228 return X_MINUS_INF;
2229 }
2230 if (isnanq (x)) {
2231 return X_NAN;
2232 }
2233 bzero (z, sizeof (z));
2234 y = (unt_2 *) &x;
2235 for (i = 0; i <= 7; i++) {
2236 #if __BYTE_ORDER == __LITTLE_ENDIAN
2237 z[i] = y[7 - i];
2238 #else
2239 z[i] = y[i];
2240 #endif
2241 }
2242 // real_16 skips the default first bit, HPA lib does not.
2243 unt_2 cy = 0x8000;
2244 for (i = 1; i < 8; i++) {
2245 if (z[i] & 0x1) {
2246 z[i] = (z[i] >> 1) | cy;
2247 cy = 0x8000;
2248 } else {
2249 z[i] = (z[i] >> 1) | cy;
2250 cy = 0x0;
2251 }
2252 }
2253 z[8] |= cy;
2254 return *(real_32 *) z;
2255 }
2256
2257 real_32 _quadtop (real_32 *z, real_16 x)
2258 {
2259 *z = quadtox (x);
2260 return *z;
2261 }
2262
2263 real_16 xtoquad (real_32 x)
2264 {
2265 // Intrinsic function so arguments are not pointers.
2266 REAL16 y;
2267 REAL32 z;
2268 int_4 i;
2269 memcpy (z, x.value, sizeof (real_32));
2270 // Catch NaN, +-Inf is handled correctly.
2271 if (xis_nan (&x)) {
2272 z[0] = 0x7fff;
2273 z[1] = 0xffff;
2274 }
2275 // real_16 skips the default first bit, HPA lib does not.
2276 unt_2 cy = (z[8] & 0x8000 ? 0x1 : 0x0);
2277 for (i = 7; i > 0; i--) {
2278 if (z[i] & 0x8000) {
2279 z[i] = (z[i] << 1) | cy;
2280 cy = 0x1;
2281 } else {
2282 z[i] = (z[i] << 1) | cy;
2283 cy = 0x0;
2284 }
2285 }
2286 for (i = 0; i < 8; i++) {
2287 #if __BYTE_ORDER == __LITTLE_ENDIAN
2288 y[i] = z[7 - i];
2289 #else
2290 y[i] = z[i];
2291 #endif
2292 }
2293 // Avoid 'dereferencing type-punned pointer will break strict-aliasing rules'
2294 real_16 u;
2295 memcpy (&u, &y[0], sizeof (real_16));
2296 return u;
2297 }
2298
2299 real_16 cxrealq (complex_64 z)
2300 {
2301 return xtoquad (cxreal (z));
2302 }
2303
2304 real_16 cximagq (complex_64 z)
2305 {
2306 return xtoquad (cximag (z));
2307 }
2308
2309 real_32 strtox (char *s, char **end)
2310 {
2311 // This routine replaces the HPA solution - MvdV.
2312 // Algol 68 Genie employs the same algorithm.
2313 //
2314 // Initialize.
2315 #define N (FLT256_DIG + FLT256_GUARD)
2316 errno = 0;
2317 real_32 y[N];
2318 if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
2319 (*end) = &(s[0]);
2320 }
2321 while (isspace (s[0])) {
2322 s++;
2323 }
2324 real_32 W;
2325 if (s[0] == '-') {
2326 W = X_MINUS_1;
2327 s++;
2328 } else if (s[0] == '+') {
2329 W = X_1;
2330 s++;
2331 } else {
2332 W = X_1;
2333 }
2334 // Scan mantissa digits and put them into "y".
2335 while (s[0] == '0') {
2336 s++;
2337 }
2338 int dot = -1, pos = 0, pow = 0;
2339 while (pow < N && (isdigit (s[pos]) || s[pos] == '.')) {
2340 if (s[pos] == '.') {
2341 dot = pos;
2342 } else {
2343 switch (s[pos]) {
2344 case '0': y[pow] = X_0; break;
2345 case '1': y[pow] = W; break;
2346 case '2': y[pow] = xmul (X_2, W); break;
2347 case '3': y[pow] = xmul (X_3, W); break;
2348 case '4': y[pow] = xmul (X_4, W); break;
2349 case '5': y[pow] = xmul (X_5, W); break;
2350 case '6': y[pow] = xmul (X_6, W); break;
2351 case '7': y[pow] = xmul (X_7, W); break;
2352 case '8': y[pow] = xmul (X_8, W); break;
2353 case '9': y[pow] = xmul (X_9, W); break;
2354 }
2355 W = xdiv (W, X_10);
2356 pow++;
2357 }
2358 pos++;
2359 }
2360 // Skip trailing digits.
2361 while (isdigit (s[pos])) {
2362 pos++;
2363 }
2364 if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
2365 (*end) = &(s[pos]);
2366 }
2367 // Sum from low to high to preserve precision.
2368 real_32 sum = X_0;
2369 for (int k = pow - 1; k >= 0; k--) {
2370 sum = xsum (sum, y[k]);
2371 }
2372 // Optional exponent.
2373 int expo = 0;
2374 while (s[pos] == ' ') {
2375 pos++;
2376 }
2377 if (tolower (s[pos]) == 'e' || tolower (s[pos]) == 'q' || tolower (s[pos]) == 'x') {
2378 pos++;
2379 if (isdigit (s[pos]) || s[pos] == '-' || s[pos] == '+') {
2380 expo = (int) strtol (&(s[pos]), end, 10);
2381 }
2382 }
2383 // Standardize.
2384 if (dot >= 0) {
2385 expo += dot - 1;
2386 } else {
2387 expo += pow - 1;
2388 }
2389 while (xnot0 (&sum) && xlt1 (sum)) {
2390 sum = xmul (sum, X_10);
2391 expo--;
2392 }
2393 if (errno == 0) {
2394 return xmul (sum, xtenup (expo));
2395 } else {
2396 return X_0;
2397 }
2398 #undef N
2399 }
2400
2401 real_32 atox (char *q)
2402 {
2403 return strtox (q, NO_REF_TEXT);
2404 }
2405
2406 real_32 _xdimx (real_32 x, real_32 y)
2407 {
2408 if (xgt (x, y)) {
2409 return xsub (x, y);
2410 } else {
2411 return X_0;
2412 }
2413 }
2414
2415 int_4 _xint4 (real_32 x_)
2416 {
2417 static real_16 y_;
2418 y_ = xtoquad (x_);
2419 return (int_4) _anintq (y_);
2420 }
2421
2422 int_4 _xnint4 (real_32 x_)
2423 {
2424 static real_16 y_;
2425 if (xgt (x_, X_0)) {
2426 y_ = xtoquad (xsum (x_, X_1_OVER_2));
2427 }
2428 else {
2429 y_ = xtoquad (xsub (x_, X_1_OVER_2));
2430 }
2431 return (int_4) (y_);
2432 }
2433
2434 int_8 _xint8 (real_32 x_)
2435 {
2436 static real_16 y_;
2437 y_ = xtoquad (x_);
2438 return (int_8) (y_);
2439 }
2440
2441 int_8 _xnint8 (real_32 x_)
2442 {
2443 static real_16 y_;
2444 if (xgt (x_, X_0)) {
2445 y_ = xtoquad (xsum (x_, X_1_OVER_2));
2446 }
2447 else {
2448 y_ = xtoquad (xsub (x_, X_1_OVER_2));
2449 }
2450 return (int_8) (y_);
2451 }
2452
2453 void _xdump (real_32 *z)
2454 {
2455 printf ("{{");
2456 for (int i = 0; i <= FLT256_LEN; i++) {
2457 printf("0x%04x", (z->value)[i]);
2458 if (i < FLT256_LEN) {
2459 printf(", ");
2460 }
2461 }
2462 printf ("}}\n");
2463 fflush (stdout);
2464 }
2465
2466 real_32 _x1mach (int_4 *i)
2467 {
2468 switch (*i) {
2469 // d1mach(1) = b**(emin-1), the smallest positive magnitude.
2470 case 1: return FLT256_MIN;
2471 // d1mach(2) = b**emax*(1 - b**(-t)), the largest magnitude.
2472 case 2: return FLT256_MAX;
2473 // d1mach(3) = b**(-t), the smallest relative spacing.
2474 case 3: return FLT256_EPSILON_HALF;
2475 // d1mach(4) = b**(1-t), the largest relative spacing.
2476 case 4: return FLT256_EPSILON;
2477 // d1mach(5) = log10(b)
2478 case 5: return X_LOG10_2;
2479 //
2480 default: return X_0;
2481 }
2482 }
2483
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|