rts-real32.c

     
   1  //! @file rts-real32.c
   2  //! @author (see below)
   3  //
   4  //! @section Copyright
   5  //
   6  // This file is part of VIF - vintage FORTRAN compiler.
   7  // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
   8  //
   9  //! @section License
  10  //
  11  // This program is free software; you can redistribute it and/or modify it 
  12  // under the terms of the GNU General Public License as published by the 
  13  // Free Software Foundation; either version 3 of the License, or 
  14  // (at your option) any later version.
  15  //
  16  // This program is distributed in the hope that it will be useful, but 
  17  // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  // more details. You should have received a copy of the GNU General Public 
  20  // License along with this program. If not, see <http://www.gnu.org/licenses/>.
  21  
  22  //! @section Synopsis
  23  //!
  24  //! Runtime support for REAL*32 and COMPLEX*64.
  25  
  26  // The code is based on the HPA Library, available from:
  27  //   <http://download-mirror.savannah.gnu.org/releases/hpalib/>
  28  //
  29  //   Copyright (C) 2000 Daniel A. Atkinson <DanAtk@aol.com>
  30  //   Copyright (C) 2004 Ivano Primi <ivprimi@libero.it>  
  31  //   Copyright (C) 2022 Marcel van der Veer <algol68g@xs4all.nl> - VIF version.
  32  //
  33  // HPA code is adapted to work with VIF to implement REAL*32 and COMPLEX*64.
  34  // HPA was choosen since it stores a 256-bit float as a 256-bit struct, which
  35  // is convenient for FORTRAN.
  36  //
  37  // The HPA Library is free software; you can redistribute it and/or
  38  // modify it under the terms of the GNU Lesser General Public
  39  // License as published by the Free Software Foundation; either
  40  // version 2.1 of the License, or (at your option) any later version.
  41  //
  42  // The HPA Library is distributed in the hope that it will be useful,
  43  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  44  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  45  // Lesser General Public License for more details.
  46  //
  47  // You should have received a copy of the GNU Lesser General Public
  48  // License along with the HPA Library; if not, write to the Free
  49  // Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  50  // 02110-1301 USA.
  51  
  52  // The functions forming the HPA library are all implemented in a portable 
  53  // fashion in the C language. The IEEE 754 standard for floating point 
  54  // hardware and software is assumed in the PC/Unix version of this library. 
  55  // A REAL*32 number is represented as a combination of the following elements:
  56  //
  57  //   sign bit(s): 0 -> positive, 1 -> negative ;
  58  //   exponent(e): 15-bit biased integer (bias=16383) ;
  59  //   mantissa(m): 15 words of 16 bit length *including* leading 1. 
  60  //
  61  // The range of representable numbers is then given by
  62  //
  63  //   2^16384 > x > 2^[-16383] => 1.19*10^4932 > x > 1.68*10^-[4932]
  64  // 
  65  // Special values of the exponent are:
  66  //
  67  //  all ones -> infinity (floating point overflow)
  68  //  all zeros -> number = zero. 
  69  //
  70  // Underflow in operations is handled by a flush to zero. Thus, a number 
  71  // with the exponent zero and nonzero mantissa is invalid (not-a-number). 
  72  // From the point of view of the HPA library, a complex number is a 
  73  // structure formed by two REAL*32 numbers.
  74  //
  75  // HPA cannot extend precision beyond the preset, hardcoded precision.
  76  // Hence some math routines will not achieve full precision.
  77  
  78  #include <vif.h>
  79  #include <rts-real32.h>
  80  
  81  static const char *errmsg[] = {
  82    "No error",
  83    "Division by zero",
  84    "Out of domain",
  85    "Bad exponent",
  86    "Floating point overflow",
  87    "Invalid error code"
  88  };
  89  
  90  #define SEXP(z) ((unt_2 *) &(z.value))
  91  
  92  void _pi32 (real_32 *x)
  93  {
  94    *x = X_PI;
  95  }
  96  
  97  int_4 xsigerr (int_4 errcond, int_4 errcode, const char *where)
  98  {
  99  // errcode must come from the evaluation of an error condition.
 100  // errcode, which should describe the type of the error, 
 101  // should always be one between XEDIV, XEDOM, XEBADEXP and XFPOFLOW.  
 102    if (errcond == 0) {
 103      errcode = 0;
 104    }
 105    if (errcode < 0 || errcode > XNERR) {
 106      errcode = XEINV;
 107    }
 108    if (errcode != 0) {
 109      RTE (where, errmsg[errcode]);
 110    }
 111    return errcode;
 112  }
 113  
 114  // Elementary stuff.
 115  
 116  inline real_32 xneg (real_32 s)
 117  {
 118    unt_2 *p = SEXP (s);
 119    *p ^= X_SIGN_MASK;
 120    return s;
 121  }
 122  
 123  inline real_32 xabs (real_32 s)
 124  {
 125    unt_2 *p = SEXP (s);
 126    *p &= X_EXPO_MASK;
 127    return s;
 128  }
 129  
 130  inline int_4 xgetexp (const real_32 * ps)
 131  {
 132    unt_2 *q = (unt_2 *) &(ps->value);
 133    return (*q & X_EXPO_MASK) - X_EXPO_BIAS;
 134  }
 135  
 136  inline int_4 xgetsgn (const real_32 * ps)
 137  {
 138    unt_2 *q = (unt_2 *) &(ps->value);
 139    return (*q & X_SIGN_MASK);
 140  }
 141  
 142  int_4 xreal_cmp (const real_32 * pa, const real_32 * pb)
 143  {
 144    unt_2 e, k, *p, *q, p0, q0;
 145    int_4 m;
 146    p = (unt_2 *) &(pa->value);
 147    q = (unt_2 *) &(pb->value);
 148    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
 149    if (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) {
 150  // *pa is actually zero 
 151      p0 = 0;
 152    } else {
 153      p0 = *p;
 154    }
 155    for (m = 1; m <= FLT256_LEN && q[m] == 0; m++);
 156    if (m > FLT256_LEN && (*q & X_EXPO_MASK) < X_EXPO_MASK) {
 157  // *pb is actually zero 
 158      q0 = 0;
 159    } else {
 160      q0 = *q;
 161    }
 162    e = p0 & X_SIGN_MASK;
 163    k = q0 & X_SIGN_MASK;
 164    if (e && !k) {
 165      return -1;
 166    } else if (!e && k) {
 167      return 1;
 168    } else {                        // *pa and *pb have the same sign 
 169      m = (e) ? -1 : 1;
 170      e = p0 & X_EXPO_MASK;
 171      k = q0 & X_EXPO_MASK;
 172      if (e > k) {
 173        return m;
 174      } else if (e < k) {
 175        return -m;
 176      } else {
 177        for (e = 0; *(++p) == *(++q) && e < FLT256_LEN; ++e);
 178        if (e < FLT256_LEN) {
 179          return (*p > *q ? m : -m);
 180        } else {
 181          return 0;
 182        }
 183      }
 184    }
 185  }
 186  
 187  real_32 xreal_2 (real_32 s, int_4 m)
 188  {
 189    unt_2 *p = SEXP (s);
 190    int_4 e;
 191    for (e = 1; e <= FLT256_LEN && p[e] == 0; e++);
 192    if (e <= FLT256_LEN) {
 193      e = *p & X_EXPO_MASK;            // biased exponent 
 194      if (e + m < 0) {
 195        return X_0;
 196      } else if ((xsigerr (e + m >= X_EXPO_MASK, XFPOFLOW, NULL))) {
 197        return ((s.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
 198      } else {
 199        *p += m;
 200        return s;
 201      }
 202    } else {                        // s is zero or +-Inf 
 203      return s;
 204    }
 205  }
 206  
 207  real_32 xsfmod (real_32 s, int_4 *p)
 208  {
 209    unt_2 *pa = SEXP (s);
 210    unt_2 *pb = pa + 1;
 211    int_2 e, k;
 212    e = (*pa & X_EXPO_MASK) - X_EXPO_BIAS;
 213    if ((xsigerr (e >= 15, XFPOFLOW, NULL))) {
 214      *p = -1;
 215      return s;
 216    } else if (e < 0) {
 217      *p = 0;
 218      return s;
 219    }
 220    *p = *pb >> (15 - e);
 221    xlshift (++e, pb, FLT256_LEN);
 222    *pa -= e;
 223    for (e = 0; *pb == 0 && e < xMax_p; ++pb, e += 16);
 224    if (e == xMax_p) {
 225      return X_0;
 226    }
 227    for (k = 0; !((*pb << k) & X_SIGN_MASK); ++k);
 228    if ((k += e)) {
 229      xlshift (k, pa + 1, FLT256_LEN);
 230      *pa -= k;
 231    }
 232    return s;
 233  }
 234  
 235  void xlshift (int_4 n, unt_2 *pm, int_4 m)
 236  {
 237    unt_2 *pc = pm + m - 1;
 238    if (n < 16 * m) {
 239      unt_2 *pa = pm + n / 16;
 240      m = n % 16;
 241      n = 16 - m;
 242      while (pa < pc) {
 243        *pm = (*pa++) << m;
 244        *pm++ |= *pa >> n;
 245      }
 246      *pm++ = *pa << m;
 247    }
 248    while (pm <= pc) {
 249      *pm++ = 0;
 250    }
 251  }
 252  
 253  void xrshift (int_4 n, unt_2 *pm, int_4 m)
 254  {
 255    unt_2 *pc = pm + m - 1;
 256    if (n < 16 * m) {
 257      unt_2 *pa = pc - n / 16;
 258      m = n % 16;
 259      n = 16 - m;
 260      while (pa > pm) {
 261        *pc = (*pa--) >> m;
 262        *pc-- |= *pa << n;
 263      }
 264      *pc-- = *pa >> m;
 265    }
 266    while (pc >= pm) {
 267      *pc-- = 0;
 268    }
 269  }
 270  
 271  real_32 _xI (real_32 x) 
 272  {
 273  // Intrinsic function so arguments are not pointers.
 274    return x;
 275  }
 276  
 277  real_32 _xnint (real_32 x)
 278  {
 279  // Intrinsic function so arguments are not pointers.
 280    if (xgetsgn (&x) == 0) {
 281      return xfloor (xadd (x, X_1_OVER_2, FALSE));
 282    } else {
 283      return xneg (xfloor (xadd (X_1_OVER_2, x, TRUE)));
 284    }
 285  }
 286  
 287  real_32 _xint (real_32 x) 
 288  {
 289  // Intrinsic function so arguments are not pointers.
 290    if (xgetsgn (&x) == 0) {
 291      return xtrunc (x);
 292    } else {
 293      return xneg (xtrunc (x));
 294    }
 295  }
 296  
 297  real_32 _xmax (real_32 a, real_32 b) 
 298  {
 299  // Intrinsic function so arguments are not pointers.
 300    if (xge (a, b)) {
 301      return a;
 302    } else {
 303      return b;
 304    }
 305  }
 306  
 307  real_32 _xmin (real_32 a, real_32 b) 
 308  {
 309  // Intrinsic function so arguments are not pointers.
 310    if (xle (a, b)) {
 311      return a;
 312    } else {
 313      return b;
 314    }
 315  }
 316  
 317  real_32 _xmod (real_32 a, real_32 b)
 318  {
 319  // Intrinsic function so arguments are not pointers.
 320    real_32 q = xdiv (a, b);
 321    if (xsgn (&q) >= 0) {
 322      q = xfloor (q);
 323    } else {
 324      q = xneg (xfloor (xneg (q)));
 325    }
 326    return xadd (a, xmul (b, q), 1);
 327  }
 328  
 329  real_32 xadd (real_32 s, real_32 t, int_4 f)
 330  {
 331    REAL32 pe;
 332    unt_2 h, u;
 333    unt_2 *pc, *pf = pe;
 334    unt_4 n = 0;
 335    unt_2 *pa = SEXP (s);
 336    unt_2 *pb = SEXP (t);
 337    int_2 e = *pa & X_EXPO_MASK;
 338    int_2 k = *pb & X_EXPO_MASK;
 339    if (f != 0) {
 340      *pb ^= X_SIGN_MASK;
 341    }
 342    u = (*pb ^ *pa) & X_SIGN_MASK;
 343    f = 0;
 344    if (e > k) {
 345      if ((k = e - k) >= xMax_p) {
 346        return s;
 347      }
 348      xrshift (k, pb + 1, FLT256_LEN);
 349    } else if (e < k) {
 350      if ((e = k - e) >= xMax_p) {
 351        return t;
 352      }
 353      xrshift (e, pa + 1, FLT256_LEN);
 354      e = k;
 355      pc = pa;
 356      pa = pb;
 357      pb = pc;
 358    } else if (u != 0) {
 359      for (pc = pa, pf = pb; *(++pc) == *(++pf) && f < FLT256_LEN; ++f);
 360      if (f >= FLT256_LEN) {
 361        return X_0;
 362      }
 363      if (*pc < *pf) {
 364        pc = pa;
 365        pa = pb;
 366        pb = pc;
 367      }
 368      pf = pe + f;
 369    }
 370    h = *pa & X_SIGN_MASK;
 371    if (u != 0) {
 372      for (pc = pb + FLT256_LEN; pc > pb; --pc) {
 373        *pc = ~(*pc);
 374      }
 375      n = 1L;
 376    }
 377    for (pc = pe + FLT256_LEN, pa += FLT256_LEN, pb += FLT256_LEN; pc > pf;) {
 378      n += *pa;
 379      pa--;
 380      n += *pb;
 381      pb--;
 382      *pc = n;
 383      pc--;
 384      n >>= 16;
 385    }
 386    if (u != 0) {
 387      for (; *(++pc) == 0; ++f);
 388      for (k = 0; !((*pc << k) & X_SIGN_MASK); ++k);
 389      if ((k += 16 * f)) {
 390        if ((e -= k) <= 0) {
 391          return X_0;
 392        }
 393        xlshift (k, pe + 1, FLT256_LEN);
 394      }
 395    } else {
 396      if (n != 0) {
 397        ++e;
 398        if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NULL))) {
 399          return (!h ? X_PLUS_INF : X_MINUS_INF);
 400        }
 401        ++pf;
 402        xrshift (1, pf, FLT256_LEN);
 403        *pf |= X_SIGN_MASK;
 404      }
 405    }
 406    *pe = e;
 407    *pe |= h;
 408    return *(real_32 *) pe;
 409  }
 410  
 411  real_32 xmul (real_32 s, real_32 t)
 412  {
 413    unt_2 pe[FLT256_LEN + 2], *q0, *q1, h;
 414    unt_2 *pa, *pb, *pc;
 415    unt_4 m, n, p;
 416    int_2 e;
 417    int_2 k;
 418    q0 = SEXP (s);
 419    q1 = SEXP (t);
 420    e = (*q0 & X_EXPO_MASK) - X_EXPO_BIAS;
 421    k = (*q1 & X_EXPO_MASK) + 1;
 422    if ((xsigerr (e > (short) X_EXPO_MASK - k, XFPOFLOW, NULL))) {
 423      return (((s.value[0] & X_SIGN_MASK) ^ (t.value[0] & X_SIGN_MASK)) ? X_MINUS_INF : X_PLUS_INF);
 424    }
 425    if ((e += k) <= 0) {
 426      return X_0;
 427    }
 428    h = (*q0 ^ *q1) & X_SIGN_MASK;
 429    for (++q1, k = FLT256_LEN, p = n = 0L, pc = pe + FLT256_LEN + 1; k > 0; --k) {
 430      for (pa = q0 + k, pb = q1; pa > q0;) {
 431        m = *pa--;
 432        m *= *pb++;
 433        n += (m & 0xffffL);
 434        p += (m >> 16);
 435      }
 436      *pc-- = n;
 437      n = p + (n >> 16);
 438      p = 0L;
 439    }
 440    *pc = n;
 441    if (!(*pc & X_SIGN_MASK)) {
 442      --e;
 443      if (e <= 0) {
 444        return X_0;
 445      }
 446      xlshift (1, pc, FLT256_LEN + 1);
 447    }
 448    if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NULL))) {
 449      return (!h ? X_PLUS_INF : X_MINUS_INF);
 450    }
 451    *pe = e;
 452    *pe |= h;
 453    return *(real_32 *) pe;
 454  }
 455  
 456  real_32 xdiv (real_32 s, real_32 t)
 457  {
 458    unt_2 *pd = SEXP (s), *pc = SEXP (t);
 459  // Next makes xdiv robust at extreme exponents - MvdV.
 460    if ((*pd & X_EXPO_MASK) == (*pc & X_EXPO_MASK)) {
 461      *pd &= ~X_EXPO_MASK; 
 462      *pc &= ~X_EXPO_MASK; 
 463    }
 464  // HPA implementation.
 465    unt_2 e = *pc;
 466    *pc = X_EXPO_BIAS;
 467    if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xdiv()"))) {
 468      return X_0;
 469    } else {
 470      real_32 a = dbltox (1 / xtodbl (t));
 471      *pc = e;
 472      pc = SEXP (a);
 473      *pc += X_EXPO_BIAS - (e & X_EXPO_MASK);
 474      *pc |= e & X_SIGN_MASK;
 475      for (unt_2 i = 0; i < xItt_div; ++i) {
 476        a = xmul (a, xadd (X_2, xmul (a, t), 1));
 477      }
 478      return xmul (s, a);
 479    }
 480  }
 481  
 482  
 483  real_32 xevtch (real_32 z, real_32 * a, int_4 m)
 484  {
 485    real_32 *p, f, t, tp, w;
 486    w = xreal_2 (z, 1);
 487    t = X_0;
 488    tp = X_0;
 489    for (p = a + m; p > a;) {
 490      f = xadd (*p--, xadd (xmul (w, t), tp, 1), 0);
 491      tp = t;
 492      t = f;
 493    }
 494    return xadd (*p, xadd (xmul (z, t), tp, 1), 0);
 495  }
 496  
 497  real_32 xexp2 (real_32 x)
 498  {
 499    real_32 s, d, f;
 500    unt_2 *pf = SEXP (x);
 501    int_4 m, k;
 502    if (xreal_cmp (&x, &xE2min) < 0) {
 503      return X_0;
 504    } else if ((xsigerr (xreal_cmp (&x, &xE2max) > 0, XFPOFLOW, NULL))) {
 505      return X_PLUS_INF;
 506    } else {
 507      m = (*pf & X_SIGN_MASK) ? 1 : 0;
 508      x = xsfmod (x, &k);
 509      if (m != 0) {
 510        k *= -1;
 511      }
 512  // -X_EXPO_BIAS <= k <= +X_EXPO_BIAS 
 513      x = xmul (x, X_LN_2);
 514      if (xgetexp (&x) > -X_EXPO_BIAS) {
 515        x = xreal_2 (x, -1);
 516        s = xmul (x, x);
 517        f = X_0;
 518        for (d = inttox (m = xMS_exp); m > 1; m -= 2, d = inttox (m)) {
 519          f = xdiv (s, xadd (d, f, 0));
 520        }
 521        f = xdiv (x, xadd (d, f, 0));
 522        f = xdiv (xadd (d, f, 0), xadd (d, f, 1));
 523      } else {
 524        f = X_1;
 525      }
 526      pf = SEXP (f);
 527      if (-k > *pf) {
 528        return X_0;
 529      } else {
 530        *pf += k;
 531        if ((xsigerr (*pf >= X_EXPO_MASK, XFPOFLOW, NULL))) {
 532          return X_PLUS_INF;
 533        } else {
 534          return f;
 535        }
 536      }
 537    }
 538  }
 539  
 540  real_32 xexp (real_32 z)
 541  {
 542    return xexp2 (xmul (z, X_LOG2_E));
 543  }
 544  
 545  real_32 xexp10 (real_32 z)
 546  {
 547    return xexp2 (xmul (z, X_LOG2_10));
 548  }
 549  
 550  real_32 xfmod (real_32 s, real_32 t, real_32 * q)
 551  {
 552    if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xfmod()"))) {
 553      return X_0;
 554    } else {
 555      unt_2 *p, mask = 0xffff;
 556      int_2 e, i;
 557      int_4 u;
 558      *q = xdiv (s, t);
 559      p = (unt_2 *) &(q->value);
 560      u = (*p & X_SIGN_MASK) ? 0 : 1;
 561      e = (*p &= X_EXPO_MASK);         // biased exponent of *q 
 562      e = e < X_EXPO_BIAS ? 0 : e - X_EXPO_BIAS + 1;
 563      for (i = 1; e / 16 > 0; i++, e -= 16);
 564      if (i <= FLT256_LEN) {
 565  // e = 0, ..., 15 
 566        mask <<= 16 - e;
 567        p[i] &= mask;
 568        for (i++; i <= FLT256_LEN; p[i] = 0, i++);
 569      }
 570  // Now *q == abs(quotient of (s/t)) 
 571      return xadd (s, xmul (t, *q), u);
 572    }
 573  }
 574  
 575  real_32 xfrexp (real_32 s, int_4 *p)
 576  {
 577    unt_2 *ps = SEXP (s), u;
 578    *p = (*ps & X_EXPO_MASK) - X_EXPO_BIAS + 1;
 579    u = *ps & X_SIGN_MASK;
 580    *ps = X_EXPO_BIAS - 1;
 581    *ps |= u;
 582    return s;
 583  }
 584  
 585  static void nullify (int_4 skip, unt_2 *p, int_4 k)
 586  {
 587  // After skipping the first 'skip' bytes of the vector 'p',
 588  // this function nullifies all the remaining ones. 'k' is
 589  // the number of words forming the vector p.
 590  // Warning: 'skip' must be positive !  
 591    int_4 i;
 592    unt_2 mask = 0xffff;
 593    for (i = 0; skip / 16 > 0; i++, skip -= 16);
 594    if (i < k) {
 595  // skip = 0, ..., 15 
 596      mask <<= 16 - skip;
 597      p[i] &= mask;
 598      for (i++; i < k; p[i] = 0, i++);
 599    }
 600  }
 601  
 602  static void canonic_form (real_32 * px)
 603  {
 604    unt_2 *p, u;
 605    int_2 e, i, j, skip;
 606    p = (unt_2 *) &(px->value);
 607    e = (*p & X_EXPO_MASK);            // biased exponent of x 
 608    u = (*p & X_SIGN_MASK);            // sign of x            
 609    if (e < X_EXPO_BIAS - 1) {
 610      return;
 611    } else {
 612      unt_2 mask = 0xffff;
 613  // e >= X_EXPO_BIAS - 1 
 614      for (i = 1, skip = e + 1 - X_EXPO_BIAS; skip / 16 > 0; i++, skip -= 16);
 615      if (i <= FLT256_LEN) {
 616  // skip = 0, ..., 15 
 617        mask >>= skip;
 618        if ((p[i] & mask) != mask) {
 619          return;
 620        } else {
 621          for (j = i + 1; j <= FLT256_LEN && p[j] == 0xffff; j++);
 622          if (j > FLT256_LEN) {
 623            p[i] -= mask;
 624            for (j = i + 1; j <= FLT256_LEN; p[j] = 0, j++);
 625            if (!(p[1] & 0x8000)) {
 626              p[1] = 0x8000;
 627              *p = ++e;
 628              *p |= u;
 629            } else if (u != 0) {
 630              *px = xadd (*px, X_1, 1);
 631            } else {
 632              *px = xadd (*px, X_1, 0);
 633            }
 634          }
 635        }
 636      }
 637    }
 638  }
 639  
 640  real_32 xfrac (real_32 x)
 641  {
 642  // xfrac(x) returns the fractional part of the number x.
 643  // xfrac(x) has the same sign as x.  
 644    unt_2 u, *p;
 645    int_2 e;
 646    int_4 n;
 647    canonic_form (&x);
 648    p = SEXP (x);
 649    e = (*p & X_EXPO_MASK);            // biased exponent of x 
 650    if (e < X_EXPO_BIAS) {
 651      return x;                   // The integer part of x is zero 
 652    } else {
 653      u = *p & X_SIGN_MASK;            // sign of x 
 654      n = e - X_EXPO_BIAS + 1;
 655      xlshift (n, p + 1, FLT256_LEN);
 656      e = X_EXPO_BIAS - 1;
 657  // Now I have to take in account the rule 
 658  // of the leading one.                    
 659      while (e > 0 && !(p[1] & X_SIGN_MASK)) {
 660        xlshift (1, p + 1, FLT256_LEN);
 661        e -= 1;
 662      }
 663  // Now p+1 points to the fractionary part of x, 
 664  // u is its sign, e is its biased exponent.     
 665      p[0] = e;
 666      p[0] |= u;
 667      return *(real_32 *) p;
 668    }
 669  }
 670  
 671  real_32 xtrunc (real_32 x)
 672  {
 673  // xtrunc(x) returns the integer part of the number x.
 674  // xtrunc(x) has the same sign as x.  
 675    unt_2 *p;
 676    int_2 e;
 677    canonic_form (&x);
 678    p = SEXP (x);
 679    e = (*p & X_EXPO_MASK);            // biased exponent of x 
 680    if (e < X_EXPO_BIAS) {
 681      return X_0;               // The integer part of x is zero 
 682    } else {
 683      nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
 684      return *(real_32 *) p;
 685    }
 686  }
 687  
 688  real_32 xround (real_32 x)
 689  {
 690    return xtrunc (xadd (x, X_RND_CORR, x.value[0] & X_SIGN_MASK));
 691  }
 692  
 693  real_32 xceil (real_32 x)
 694  {
 695    unt_2 *ps = SEXP (x);
 696    if ((*ps & X_SIGN_MASK)) {
 697      return xtrunc (x);
 698    } else {
 699      real_32 y = xfrac (x);
 700  // y has the same sign as x (see above). 
 701      return (xreal_cmp (&y, &X_0) > 0 ? xadd (xtrunc (x), X_1, 0) : x);
 702    }
 703  }
 704  
 705  real_32 xfloor (real_32 x)
 706  {
 707    unt_2 *ps = SEXP (x);
 708    if ((*ps & X_SIGN_MASK)) {
 709      real_32 y = xfrac (x);
 710  // y has the same sign as x (see above). 
 711      return (xreal_cmp (&y, &X_0) < 0 ? xadd (xtrunc (x), X_1, 1) : x);
 712    } else {
 713      return xtrunc (x);
 714    }
 715  }
 716  
 717  static void xadd_correction (real_32 * px, int_4 k)
 718  {
 719    int_2 e = (px->value[0] & X_EXPO_MASK) - X_EXPO_BIAS;
 720  //   e = (e > 0 ? e : 0);   
 721    *px = xadd (*px, xreal_2 (X_FIX_CORR, e), k);
 722  }
 723  
 724  real_32 xfix (real_32 x)
 725  {
 726    unt_2 *p;
 727    int_2 e;
 728    xadd_correction (&x, x.value[0] & X_SIGN_MASK);
 729    p = SEXP (x);
 730    e = (*p & X_EXPO_MASK);            // biased exponent of x 
 731    if (e < X_EXPO_BIAS) {
 732      return X_0;               // The integer part of x is zero 
 733    } else {
 734      nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
 735      return *(real_32 *) p;
 736    }
 737  }
 738  
 739  real_32 xtanh (real_32 z)
 740  {
 741    real_32 s, d, f;
 742    int_4 m, k;
 743    if ((k = xgetexp (&z)) > xK_tanh) {
 744      if (xgetsgn (&z)) {
 745        return xneg (X_1);
 746      } else {
 747        return X_1;
 748      }
 749    }
 750    if (k < xK_lin) {
 751      return z;
 752    }
 753    ++k;
 754    if (k > 0) {
 755      z = xreal_2 (z, -k);
 756    }
 757    s = xmul (z, z);
 758    f = X_0;
 759    for (d = inttox (m = xMS_hyp); m > 1;) {
 760      f = xdiv (s, xadd (d, f, 0));
 761      d = inttox (m -= 2);
 762    }
 763    f = xdiv (z, xadd (d, f, 0));
 764    for (; k > 0; --k) {
 765      f = xdiv (xreal_2 (f, 1), xadd (d, xmul (f, f), 0));
 766    }
 767    return f;
 768  }
 769  
 770  real_32 xsinh (real_32 z)
 771  {
 772    int_4 k;
 773    if ((k = xgetexp (&z)) < xK_lin) {
 774      return z;
 775    } else if (k < 0) {
 776      z = xtanh (xreal_2 (z, -1));
 777      return xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 1));
 778    } else {
 779      z = xexp (z);
 780      return xreal_2 (xadd (z, xdiv (X_1, z), 1), -1);
 781    }
 782  }
 783  
 784  real_32 xcosh (real_32 z)
 785  {
 786    if (xgetexp (&z) < xK_lin) {
 787      return X_1;
 788    }
 789    z = xexp (z);
 790    return xreal_2 (xadd (z, xdiv (X_1, z), 0), -1);
 791  }
 792  
 793  real_32 xatanh (real_32 x)
 794  {
 795    real_32 y = x;
 796    y.value[0] &= X_EXPO_MASK;           // Now y == abs(x) 
 797    if ((xsigerr (xreal_cmp (&y, &X_1) >= 0, XEDOM, "xatanh"))) {
 798      return ((x.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
 799    } else {
 800      y = xdiv (xadd (X_1, x, 0), xadd (X_1, x, 1));
 801      return xreal_2 (xlog (y), -1);
 802    }
 803  }
 804  
 805  real_32 xasinh (real_32 x)
 806  {
 807    real_32 y = xmul (x, x);
 808    y = xsqrt (xadd (X_1, y, 0));
 809    if ((x.value[0] & X_SIGN_MASK)) {
 810      return xneg (xlog (xadd (y, x, 1)));
 811    } else {
 812      return xlog (xadd (x, y, 0));
 813    }
 814  }
 815  
 816  real_32 xacosh (real_32 x)
 817  {
 818    if ((xsigerr (xreal_cmp (&x, &X_1) < 0, XEDOM, "xacosh()"))) {
 819      return X_0;
 820    } else {
 821      real_32 y = xmul (x, x);
 822      y = xsqrt (xadd (y, X_1, 1));
 823      return xlog (xadd (x, y, 0));
 824    }
 825  }
 826  
 827  real_32 xatan (real_32 z)
 828  {
 829    real_32 s, f;
 830    int_4 k, m;
 831    if ((k = xgetexp (&z)) < xK_lin) {
 832      return z;
 833    }
 834    if (k >= 0) {
 835  // k>=0 is equivalent to abs(z) >= 1.0 
 836      z = xdiv (X_1, z);
 837      m = 1;
 838    } else {
 839      m = 0;
 840    }
 841    f = dbltox (atan (xtodbl (z)));
 842    s = xadd (X_1, xmul (z, z), 0);
 843    for (k = 0; k < xItt_div; ++k) {
 844      f = xadd (f, xdiv (xadd (z, xtan (f), 1), s), 0);
 845    }
 846    if (m != 0) {
 847      if (xgetsgn (&f)) {
 848        return xadd (xneg (X_PI_OVER_2), f, 1);
 849      } else {
 850        return xadd (X_PI_OVER_2, f, 1);
 851      }
 852    } else {
 853      return f;
 854    }
 855  }
 856  
 857  real_32 xasin (real_32 z)
 858  {
 859    real_32 u = z;
 860    u.value[0] &= X_EXPO_MASK;
 861    if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xasin()"))) {
 862      return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
 863    } else {
 864      if (xgetexp (&z) < xK_lin) {
 865        return z;
 866      }
 867      u = xsqrt (xadd (X_1, xmul (z, z), 1));
 868      if (xgetexp (&u) == -X_EXPO_BIAS) {
 869        return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
 870      }
 871      return xatan (xdiv (z, u));
 872    }
 873  }
 874  
 875  real_32 xacos (real_32 z)
 876  {
 877    real_32 u = z;
 878    u.value[0] &= X_EXPO_MASK;
 879    if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xacos()"))) {
 880      return ((xgetsgn (&z)) ? X_PI : X_0);
 881    } else {
 882      if (xgetexp (&z) == -X_EXPO_BIAS) {
 883        return X_PI_OVER_2;
 884      }
 885      u = xsqrt (xadd (X_1, xmul (z, z), 1));
 886      u = xatan (xdiv (u, z));
 887      if (xgetsgn (&z)) {
 888        return xadd (X_PI, u, 0);
 889      } else {
 890        return u;
 891      }
 892    }
 893  }
 894  // Kindly added by A.Haumer 2010-04.09 
 895  
 896  real_32 xatan2 (real_32 y, real_32 x)
 897  {
 898    int_4 rs, is;
 899    rs = xsgn (&x);
 900    is = xsgn (&y);
 901    if (rs > 0) {
 902      return xatan (xdiv (y, x));
 903    } else if (rs < 0) {
 904      x.value[0] ^= X_SIGN_MASK;
 905      y.value[0] ^= X_SIGN_MASK;
 906      if (is >= 0) {
 907        return xadd (X_PI, xatan (xdiv (y, x)), 0);
 908      } else {
 909        return xadd (xatan (xdiv (y, x)), X_PI, 1);
 910      }
 911    } else {                      // x is zero ! 
 912      if (!xsigerr (is == 0, XEDOM, "xatan2()")) {
 913        return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
 914      } else {
 915        return X_0;             // Dummy value :) 
 916      }
 917    }
 918  }
 919  
 920  real_32 xlog (real_32 z)
 921  {
 922    real_32 f, h;
 923    int_4 k, m;
 924    if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog()"))) {
 925      return X_MINUS_INF;
 926    } else if (xreal_cmp (&z, &X_1) == 0) {
 927      return X_0;
 928    } else {
 929      z = xfrexp (z, &m);
 930      z = xmul (z, X_SQRT_2);
 931      z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
 932      h = xreal_2 (z, 1);
 933      z = xmul (z, z);
 934      for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
 935        h = xmul (h, z);
 936        f = xadd (f, xdiv (h, inttox (k += 2)), 0);
 937      }
 938      return xadd (f, xmul (X_LN_2, dbltox (m - .5)), 0);
 939    }
 940  }
 941  
 942  real_32 xlog2 (real_32 z)
 943  {
 944    real_32 f, h;
 945    int_4 k, m;
 946    if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog2()"))) {
 947      return X_MINUS_INF;
 948    } else if (xreal_cmp (&z, &X_1) == 0) {
 949      return X_0;
 950    } else {
 951      z = xfrexp (z, &m);
 952      z = xmul (z, X_SQRT_2);
 953      z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
 954      h = xreal_2 (z, 1);
 955      z = xmul (z, z);
 956      for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
 957        h = xmul (h, z);
 958        f = xadd (f, xdiv (h, inttox (k += 2)), 0);
 959      }
 960      return xadd (xmul (f, X_LOG2_E), dbltox (m - .5), 0);
 961    }
 962  }
 963  
 964  real_32 xlog10 (real_32 z)
 965  {
 966    real_32 w = xlog (z);
 967    if (xreal_cmp (&w, &X_MINUS_INF) <= 0) {
 968      return X_MINUS_INF;
 969    } else {
 970      return xmul (w, X_LOG10_E);
 971    }
 972  }
 973  
 974  int_4 xeq (real_32 x1, real_32 x2)
 975  {
 976    return (xreal_cmp (&x1, &x2) == 0);
 977  }
 978  
 979  int_4 xneq (real_32 x1, real_32 x2)
 980  {
 981    return (xreal_cmp (&x1, &x2) != 0);
 982  }
 983  
 984  int_4 xgt (real_32 x1, real_32 x2)
 985  {
 986    return (xreal_cmp (&x1, &x2) > 0);
 987  }
 988  
 989  int_4 xge (real_32 x1, real_32 x2)
 990  {
 991    return (xreal_cmp (&x1, &x2) >= 0);
 992  }
 993  
 994  int_4 xlt (real_32 x1, real_32 x2)
 995  {
 996    return (xreal_cmp (&x1, &x2) < 0);
 997  }
 998  
 999  int_4 xle (real_32 x1, real_32 x2)
1000  {
1001    return (xreal_cmp (&x1, &x2) <= 0);
1002  }
1003  // xisNaN (&x) returns 1 if and only if x is not a valid number 
1004  
1005  int_4 xisNaN (const real_32 * u)
1006  {
1007    unt_2 *p = (unt_2 *) &(u->value);
1008    if (*p != 0) {
1009      return 0;
1010    } else {
1011      int_4 i;
1012      for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1013      return (i <= FLT256_LEN ? 1 : 0);
1014    }
1015  }
1016  
1017  int_4 xis0 (const real_32 * u)
1018  {
1019    unt_2 *p = (unt_2 *) &(u->value);
1020    int_4 m;
1021    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1022    return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? 1 : 0);
1023  }
1024  
1025  int_4 xnot0 (const real_32 * u)
1026  {
1027    unt_2 *p = (unt_2 *) &(u->value);
1028    int_4 m;
1029    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1030    return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? 0 : 1);
1031  }
1032  
1033  int_4 xlt1 (const real_32 u)
1034  {
1035    return (xlt (xabs (u), X_1));
1036  }
1037  
1038  int_4 xsgn (const real_32 * u)
1039  {
1040    unt_2 *p = (unt_2 *) &(u->value);
1041    int_4 m;
1042    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
1043    if ((m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) || !*p) {
1044      return 0;
1045    } else {
1046      return ((*p & X_SIGN_MASK) ? -1 : 1);
1047    }
1048  }
1049  
1050  int_4 xisPinf (const real_32 * u)
1051  {
1052    return (*u->value == X_EXPO_MASK ? 1 : 0);
1053  }
1054  
1055  int_4 xisMinf (const real_32 * u)
1056  {
1057    return (*u->value == (X_EXPO_MASK | X_SIGN_MASK) ? 1 : 0);
1058  }
1059  
1060  int_4 xisordnumb (const real_32 * u)
1061  {
1062    int_4 isNaN, isfinite;
1063    unt_2 *p = (unt_2 *) &(u->value);
1064    if (*p != 0) {
1065      isNaN = 0;
1066    } else {
1067      int_4 i;
1068      for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
1069      isNaN = i <= FLT256_LEN;
1070    }
1071    isfinite = (*p & X_EXPO_MASK) < X_EXPO_MASK;
1072    return (!isNaN && (isfinite) ? 1 : 0);
1073  }
1074  
1075  real_32 xpwr (real_32 s, int_4 n)
1076  {
1077    real_32 t;
1078    unsigned k, m;
1079    t = X_1;
1080    if (n < 0) {
1081      m = -n;
1082      if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1083        return X_0;
1084      }
1085      s = xdiv (X_1, s);
1086    } else {
1087      m = n;
1088    }
1089    if (m != 0) {
1090      k = 1;
1091      while (1) {
1092        if (k & m) {
1093          t = xmul (s, t);
1094        }
1095        if ((k <<= 1) <= m) {
1096          s = xmul (s, s);
1097        } else {
1098          break;
1099        }
1100      }
1101    } else {
1102      xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1103    }
1104    return t;
1105  }
1106  
1107  real_32 xpow (real_32 x, real_32 y)
1108  {
1109    if (xsigerr ((xgetsgn (&x)) || xgetexp (&x) == -X_EXPO_BIAS, XEDOM, "xpow()")) {
1110      return X_0;
1111    } else {
1112      return xexp2 (xmul (xlog2 (x), y));
1113    }
1114  }
1115  
1116  real_32 xsqrt (real_32 z)
1117  {
1118    real_32 s, h;
1119    int_2 m, e;
1120    unt_2 *pc;
1121    if ((xsigerr ((xgetsgn (&z)), XEDOM, "xsqrt()"))) {
1122      return X_0;
1123    } else {
1124      pc = SEXP (z);
1125      if (*pc == 0) {
1126        return X_0;
1127      }
1128      e = *pc - X_EXPO_BIAS;
1129      *pc = X_EXPO_BIAS + (e % 2);
1130      e /= 2;
1131      s = dbltox (sqrt (xtodbl (z)));
1132      for (m = 0; m < xItt_div; ++m) {
1133        h = xdiv (xadd (z, xmul (s, s), 1), xreal_2 (s, 1));
1134        s = xadd (s, h, 0);
1135      }
1136      pc = SEXP (s);
1137      *pc += e;
1138      return s;
1139    }
1140  }
1141  
1142  static int_4 xodd (real_32 x)
1143  {
1144    unt_2 *p = SEXP (x);
1145    int_2 e, i;
1146    e = (*p & X_EXPO_MASK) - X_EXPO_BIAS;    // exponent of x 
1147    if (e < 0) {
1148      return 0;
1149    } else {
1150      for (i = 1; e / 16 > 0; i++, e -= 16);
1151  // Now e = 0, ..., 15 
1152      return (i <= FLT256_LEN ? p[i] & 0x8000 >> e : 0);
1153    }
1154  }
1155  
1156  real_32 xtan (real_32 z)
1157  {
1158    int_4 k, m;
1159    z = rred (z, 't', &k);
1160    if ((xsigerr (xreal_cmp (&z, &X_PI_OVER_2) >= 0, XEDOM, "xtan()"))) {
1161      return (!k ? X_PLUS_INF : X_MINUS_INF);
1162    } else {
1163      if (xreal_cmp (&z, &X_PI_OVER_4) == 1) {
1164        m = 1;
1165        z = xadd (X_PI_OVER_2, z, 1);
1166      } else {
1167        m = 0;
1168      }
1169      if (k != 0) {
1170        z = xneg (c_tan (z));
1171      } else {
1172        z = c_tan (z);
1173      }
1174      if (m != 0) {
1175        return xdiv (X_1, z);
1176      } else {
1177        return z;
1178      }
1179    }
1180  }
1181  
1182  real_32 xcos (real_32 z)
1183  {
1184    int_4 k;
1185    z = rred (z, 'c', &k);
1186    if (xgetexp (&z) < xK_lin) {
1187      if (k != 0) {
1188        return xneg (X_1);
1189      } else {
1190        return X_1;
1191      }
1192    }
1193    z = c_tan (xreal_2 (z, -1));
1194    z = xmul (z, z);
1195    z = xdiv (xadd (X_1, z, 1), xadd (X_1, z, 0));
1196    if (k != 0) {
1197      return xneg (z);
1198    } else {
1199      return z;
1200    }
1201  }
1202  
1203  real_32 xsin (real_32 z)
1204  {
1205    int_4 k;
1206    z = rred (z, 's', &k);
1207    if (xgetexp (&z) >= xK_lin) {
1208      z = c_tan (xreal_2 (z, -1));
1209      z = xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 0));
1210    }
1211    if (k != 0) {
1212      return xneg (z);
1213    } else {
1214      return z;
1215    }
1216  }
1217  
1218  static real_32 c_tan (real_32 z)
1219  {
1220    real_32 s, f, d;
1221    int_4 m;
1222    unt_2 k;
1223    if (xgetexp (&z) < xK_lin)
1224      return z;
1225    s = xneg (xmul (z, z));
1226    for (k = 1; k <= FLT256_LEN && s.value[k] == 0; k++);
1227    if ((xsigerr (s.value[0] == 0xffff && k > FLT256_LEN, XFPOFLOW, NULL))) {
1228      return X_0;
1229    } else {
1230      f = X_0;
1231      for (d = inttox (m = xMS_trg); m > 1;) {
1232        f = xdiv (s, xadd (d, f, 0));
1233        d = inttox (m -= 2);
1234      }
1235      return xdiv (z, xadd (d, f, 0));
1236    }
1237  }
1238  
1239  static real_32 rred (real_32 z, int_4 kf, int_4 *ps)
1240  {
1241    real_32 is, q;
1242    if (xgetsgn (&z)) {
1243      z = xneg (z);
1244      is = X_1;
1245    } else {
1246      is = X_0;
1247    }
1248    z = xfmod (z, X_PI, &q);
1249    if (kf == 't') {
1250      q = is;
1251    } else if (kf == 's') {
1252      q = xadd (q, is, 0);
1253    }
1254    if (xreal_cmp (&z, &X_PI_OVER_2) == 1) {
1255      z = xadd (X_PI, z, 1);
1256      if (kf == 'c' || kf == 't') {
1257        q = xadd (q, X_1, 0);
1258      }
1259    }
1260    *ps = (xodd (q)) ? 1 : 0;
1261    return z;
1262  }
1263  
1264  // COMPLEX*64
1265  
1266  complex_64 cxadd (complex_64 z1, complex_64 z2, int_4 k)
1267  {
1268    complex_64 w;
1269    w.re = xadd (z1.re, z2.re, k);
1270    w.im = xadd (z1.im, z2.im, k);
1271    return w;
1272  }
1273  
1274  complex_64 cxsum (complex_64 z1, complex_64 z2)
1275  {
1276    complex_64 w;
1277    w.re = xadd (z1.re, z2.re, 0);
1278    w.im = xadd (z1.im, z2.im, 0);
1279    return w;
1280  }
1281  
1282  complex_64 cxsub (complex_64 z1, complex_64 z2)
1283  {
1284    complex_64 w;
1285    w.re = xadd (z1.re, z2.re, 1);
1286    w.im = xadd (z1.im, z2.im, 1);
1287    return w;
1288  }
1289  
1290  complex_64 cxmul (complex_64 z1, complex_64 z2)
1291  {
1292    complex_64 w;
1293    w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1294    w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1295    return w;
1296  }
1297  
1298  complex_64 cxrmul (real_32 c, complex_64 z)
1299  {
1300    complex_64 w;
1301    w.re = xmul (c, z.re);
1302    w.im = xmul (c, z.im);
1303    return w;
1304  }
1305  
1306  complex_64 cxdrot (complex_64 z)
1307  {
1308    complex_64 y;
1309    y.re = z.im;
1310    y.im = z.re;
1311  // Multiplication by +i 
1312    y.re.value[0] ^= X_SIGN_MASK;
1313    return y;
1314  }
1315  
1316  complex_64 cxrrot (complex_64 z)
1317  {
1318    complex_64 y;
1319    y.re = z.im;
1320    y.im = z.re;
1321  // Multiplication by -i 
1322    y.im.value[0] ^= X_SIGN_MASK;
1323    return y;
1324  }
1325  
1326  complex_64 cxdiv (complex_64 z1, complex_64 z2)
1327  {
1328    int_4 tv = cxrec (z2, &z2);
1329    if (!xsigerr (!tv, XEDIV, "cxdiv()")) {
1330      complex_64 w;
1331      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
1332      w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
1333      return w;
1334    } else {
1335      return X_0_0I;
1336    }
1337  }
1338  
1339  complex_64 cxinv (complex_64 z)
1340  {
1341    int_4 tv = cxrec (z, &z);
1342    if (!xsigerr (!tv, XEDOM, "cxinv()")) {
1343      return z;
1344    } else {
1345      return X_0_0I;
1346    }
1347  }
1348  
1349  complex_64 cxsqr (complex_64 z)
1350  {
1351    complex_64 w;
1352    w.re = xadd (xmul (z.re, z.re), xmul (z.im, z.im), 1);
1353    w.im = xmul (X_2, xmul (z.im, z.re));
1354    return w;
1355  }
1356  
1357  complex_64 cxsqrt (complex_64 z)
1358  {
1359    complex_64 w;
1360    if (xsgn (&(z.re)) == 0 && xsgn (&(z.im)) == 0) {
1361      w = X_0_0I;
1362    } else if (xis0 (&(z.im))) {
1363      real_32 s = xsqrt (xabs (z.re));
1364      if (xsgn (&(z.re)) == -1) {
1365        w = (complex_64) {X_0, s};
1366      } else {
1367        w = (complex_64) {s, X_0};
1368      }
1369    } else {
1370      real_32 mod = xsqrt (cxabs (z)), arg = xreal_2 (cxarg (z), -1);
1371      w.re = xmul (mod, xcos (arg));
1372      w.im = xmul (mod, xsin (arg));
1373    }   
1374    return w;
1375  }
1376  
1377  complex_64 cxreset (real_32 re, real_32 im)
1378  {
1379    complex_64 y;
1380    y.re = re;
1381    y.im = im;
1382    return y;
1383  }
1384  
1385  complex_64 cxconv (real_32 x)
1386  {
1387    complex_64 y;
1388    y.re = x;
1389    y.im = X_0;
1390    return y;
1391  }
1392  
1393  real_32 cxre (complex_64 z)
1394  {
1395    return z.re;
1396  }
1397  
1398  real_32 cxim (complex_64 z)
1399  {
1400    return z.im;
1401  }
1402  
1403  complex_64 cxswap (complex_64 z)
1404  {
1405    complex_64 y;
1406    y.re = z.im;
1407    y.im = z.re;
1408    return y;
1409  }
1410  
1411  complex_64 cxneg (complex_64 z)
1412  {
1413    z.re.value[0] ^= X_SIGN_MASK;
1414    z.im.value[0] ^= X_SIGN_MASK;
1415    return z;
1416  }
1417  
1418  complex_64 cxconj (complex_64 z)
1419  {
1420    return (z.im.value[0] ^= X_SIGN_MASK, z);
1421  }
1422  
1423  #define XBOUND  FLT256_LEN * 16 + 8
1424  
1425  real_32 cxabs (complex_64 z)
1426  {
1427    if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1428      return X_0;
1429    } else {
1430      int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1431      int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1432      if (ea > eb + XBOUND) {
1433        return z.re;
1434      } else if (eb > ea + XBOUND) {
1435        return z.im;
1436      } else {
1437        z.re.value[0] -= eb;
1438        z.im.value[0] = X_EXPO_BIAS;
1439        real_32 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1440        x.value[0] += eb;
1441        return x;
1442      }
1443    }
1444  }
1445  
1446  real_32 cxarg (complex_64 z)
1447  {
1448    int_4 rs = xsgn (&(z.re)), is = xsgn (&(z.im));
1449    if (rs > 0) {
1450      return xatan (xdiv (z.im, z.re));
1451    } else if (rs < 0) {
1452      z.re.value[0] ^= X_SIGN_MASK;
1453      z.im.value[0] ^= X_SIGN_MASK;
1454      if (is >= 0) {
1455        return xadd (X_PI, xatan (xdiv (z.im, z.re)), 0);
1456      } else {
1457        return xadd (xatan (xdiv (z.im, z.re)), X_PI, 1);
1458      }
1459    } else {                      // z.re is zero ! 
1460      if (!xsigerr (is == 0, XEDOM, "cxarg()")) {
1461        return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
1462      } else {
1463        return xneg (X_PI_OVER_2);       // Dummy value :)
1464      } 
1465    }
1466  }
1467  
1468  int_4 cxrec (complex_64 z, complex_64 * w)
1469  {
1470    if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
1471      return 0;
1472    } else {
1473      int_4 sa = z.re.value[0] & X_SIGN_MASK;
1474      int_4 sb = z.im.value[0] & X_SIGN_MASK;
1475      int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1476      int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
1477      real_32 x;
1478      if (ea > eb + XBOUND) {
1479        x = z.re;
1480      } else if (eb > ea + XBOUND) {
1481        x = z.im;
1482      } else {
1483        z.re.value[0] -= eb;
1484        z.im.value[0] = X_EXPO_BIAS;
1485        x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
1486        x.value[0] += eb;
1487        z.re.value[0] += eb;
1488        z.im.value[0] += eb;
1489      }
1490      w->re = xdiv (xdiv (z.re, x), x);
1491      w->im = xdiv (xdiv (z.im, x), x);
1492      w->re.value[0] |= sa;
1493      w->im.value[0] |= X_SIGN_MASK ^ sb;
1494      return 1;
1495    }
1496  }
1497  
1498  complex_64 cxexp (complex_64 z)
1499  {
1500    complex_64 w;
1501    w.re = xmul (xexp (z.re), xcos (z.im));
1502    w.im = xmul (xexp (z.re), xsin (z.im));
1503    return w;
1504  }
1505  
1506  complex_64 cxlog (complex_64 z)
1507  {
1508    real_32 mod;
1509    complex_64 w;
1510    mod = cxabs (z);
1511    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog()")) {
1512      return X_0_0I;
1513    } else {
1514      w.re = xlog (mod);
1515      w.im = cxarg (z);
1516      return w;
1517    }
1518  }
1519  
1520  complex_64 cxlog10 (complex_64 z)
1521  {
1522    real_32 mod;
1523    complex_64 w;
1524    mod = cxabs (z);
1525    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog10()")) {
1526      return X_0_0I;
1527    } else {
1528      w.re = xlog10 (mod);
1529      w.im = xmul (cxarg (z), X_LOG10_E);
1530      return w;
1531    }
1532  }
1533  
1534  complex_64 cxlog2 (complex_64 z)
1535  {
1536    real_32 mod;
1537    complex_64 w;
1538    mod = cxabs (z);
1539    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog2()")) {
1540      return X_0_0I;
1541    } else {
1542      w.re = xlog2 (mod);
1543      w.im = xmul (cxarg (z), X_LOG2_E);
1544      return w;
1545    }
1546  }
1547  
1548  complex_64 cxlog_sqrt (complex_64 z)
1549  {
1550    real_32 mod = cxabs (z);
1551    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog_sqrt()")) {
1552      return X_0_0I;
1553    } else {
1554      complex_64 w;
1555      w.re = xreal_2 (xlog (mod), -1);
1556      w.im = xreal_2 (cxarg (z), -1);
1557      return w;
1558    }
1559  }
1560  
1561  complex_64 cxsinh (complex_64 z)
1562  {
1563    complex_64 w = cxsub (cxexp (z), cxexp (cxneg (z)));
1564    w.re = xreal_2 (w.re, -1);
1565    w.im = xreal_2 (w.im, -1);
1566    return w;
1567  }
1568  
1569  complex_64 cxcosh (complex_64 z)
1570  {
1571    complex_64 w = cxsum (cxexp (z), cxexp (cxneg (z)));
1572    w.re = xreal_2 (w.re, -1);
1573    w.im = xreal_2 (w.im, -1);
1574    return w;
1575  }
1576  
1577  complex_64 cxtanh (complex_64 z)
1578  {
1579    if (xsigerr (xreal_cmp (&(z.re), &xEmax) > 0, XFPOFLOW, NULL)) {
1580      return X_1_0I;
1581    } else if (xsigerr (xreal_cmp (&(z.re), &xEmin) < 0, XFPOFLOW, NULL)) {
1582      return cxneg (X_1_0I);
1583    } else {
1584      complex_64 w;
1585      if (xsigerr (!cxrec (cxcosh (z), &w), XEDOM, "cxtanh()")) {
1586        return X_0_0I;
1587      } else {
1588        return cxmul (cxsinh (z), w);
1589      }
1590    }
1591  }
1592  
1593  complex_64 cxasinh (complex_64 z)
1594  {
1595  // This way cxasinh() works fine also with real numbers very near to -oo.                                       
1596    complex_64 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1597    real_32 ls = cxabs (cxsum (z, w));
1598    real_32 rs = xmul (X_VSV, cxabs (z));
1599    if (xreal_cmp (&ls, &rs) < 0) {
1600      return cxneg (cxlog (cxsub (w, z)));
1601    } else {
1602      return cxlog (cxsum (z, w));
1603    }
1604  }
1605  
1606  complex_64 cxacosh (complex_64 z)
1607  {
1608    complex_64 w = cxsqrt (cxsub (cxsqr (z), X_1_0I));
1609    real_32 ls = cxabs (cxsum (z, w));
1610    real_32 rs = xmul (X_VSV, cxabs (z));
1611    if (xreal_cmp (&ls, &rs) < 0) {
1612      return cxneg (cxlog (cxsub (z, w)));
1613    } else {
1614      return cxlog (cxsum (z, w));
1615    }
1616  }
1617  
1618  complex_64 cxatanh (complex_64 z)
1619  {
1620    real_32 t = xadd (xabs (z.re), X_1, 1);
1621    int_4 errcond = xsgn (&(z.im)) == 0 && xsgn (&t) == 0;
1622    if (xsigerr (errcond, XEDOM, "cxatanh()")) {
1623      return X_0_0I;
1624    } else {
1625      complex_64 w = cxdiv (cxsum (X_1_0I, z), cxsub (X_1_0I, z));
1626      w = cxlog_sqrt (w);
1627      return w;
1628    }
1629  }
1630  
1631  complex_64 cxgdiv (complex_64 z1, complex_64 z2)
1632  {
1633    z1.re = xround (z1.re);
1634    z1.im = xround (z1.im);
1635    z2.re = xround (z2.re);
1636    z2.im = xround (z2.im);
1637    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1638    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgdiv()")) {
1639      return X_0_0I;
1640    } else {
1641      complex_64 w;
1642      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1643      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1644      w.re = xround (xdiv (w.re, mod2));
1645      w.im = xround (xdiv (w.im, mod2));
1646      return w;
1647    }
1648  }
1649  
1650  complex_64 cxidiv (complex_64 z1, complex_64 z2)
1651  {
1652    z1.re = xround (z1.re);
1653    z1.im = xround (z1.im);
1654    z2.re = xround (z2.re);
1655    z2.im = xround (z2.im);
1656    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1657    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxidiv()")) {
1658      return X_0_0I;
1659    } else {
1660      complex_64 w;
1661      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1662      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1663      w.re = xfix (xdiv (w.re, mod2));
1664      w.im = xfix (xdiv (w.im, mod2));
1665      return w;
1666    }
1667  }
1668  
1669  complex_64 cxgmod (complex_64 z1, complex_64 z2)
1670  {
1671    z1.re = xround (z1.re);
1672    z1.im = xround (z1.im);
1673    z2.re = xround (z2.re);
1674    z2.im = xround (z2.im);
1675    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1676    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgmod()")) {
1677      return X_0_0I;
1678    } else {
1679      complex_64 w, z;
1680      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1681      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1682      w.re = xround (xdiv (w.re, mod2));
1683      w.im = xround (xdiv (w.im, mod2));
1684      z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1685      z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1686      w.re = xround (xadd (z1.re, z.re, 1));
1687      w.im = xround (xadd (z1.im, z.im, 1));
1688      return w;
1689    }
1690  }
1691  
1692  complex_64 cxmod (complex_64 z1, complex_64 z2)
1693  {
1694    z1.re = xround (z1.re);
1695    z1.im = xround (z1.im);
1696    z2.re = xround (z2.re);
1697    z2.im = xround (z2.im);
1698    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
1699    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NULL) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxmod()")) {
1700      return X_0_0I;
1701    } else {
1702      complex_64 w, z;
1703      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
1704      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
1705      w.re = xfix (xdiv (w.re, mod2));
1706      w.im = xfix (xdiv (w.im, mod2));
1707      z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
1708      z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
1709      w.re = xround (xadd (z1.re, z.re, 1));
1710      w.im = xround (xadd (z1.im, z.im, 1));
1711      return w;
1712    }
1713  }
1714  
1715  static void unit_root (int_4 i, int_4 n, real_32 * a, real_32 * b)
1716  {
1717  // We assume n != 0 
1718    i %= n;
1719    *a = xdiv (xmul (xreal_2 (inttox (i), 1), X_PI), inttox (n));
1720    *b = xsin (*a);
1721    *a = xcos (*a);
1722    if (xgetexp (b) < -80) {
1723      *b = X_0;
1724    }
1725    if (xgetexp (a) < -80) {
1726      *a = X_0;
1727    }
1728  }
1729  
1730  complex_64 cxpwr (complex_64 z, int_4 n)
1731  {
1732    real_32 mod = cxabs (z);
1733    if (xsgn (&mod) <= 0) {
1734      (void) xsigerr (n <= 0, XEBADEXP, "cxpwr()");
1735      return X_0_0I;
1736    } else {
1737      real_32 arg = xmul (inttox (n), cxarg (z));
1738      mod = xpwr (mod, n);
1739      complex_64 w;
1740      w.re = xmul (mod, xcos (arg));
1741      w.im = xmul (mod, xsin (arg));
1742      return w;
1743    }
1744  }
1745  
1746  complex_64 cxroot (complex_64 z, int_4 i, int_4 n)
1747  {
1748    if (xsigerr (n == 0, XEBADEXP, "cxroot()")) {
1749      return X_0_0I;
1750    } else {
1751      real_32 mod = cxabs (z);
1752      if (xsgn (&mod) <= 0) {
1753        (void) xsigerr (n < 0, XEBADEXP, "cxroot()");
1754        return X_0_0I;
1755      } else {                    // mod > 0 
1756        real_32 arg = xdiv (cxarg (z), inttox (n));
1757        real_32 e = xdiv (X_1, inttox (n));      // 1/n 
1758  // x^e = exp(e*log(x)) for any x > 0 
1759        mod = xexp (xmul (e, xlog (mod)));
1760        complex_64 w, zz;
1761        w.re = xmul (mod, xcos (arg));
1762        w.im = xmul (mod, xsin (arg));
1763        real_32 a, b;
1764        unit_root (i, n, &a, &b);
1765        zz.re = xadd (xmul (w.re, a), xmul (w.im, b), 1);
1766        zz.im = xadd (xmul (w.im, a), xmul (w.re, b), 0);
1767        return zz;
1768      }
1769    }
1770  }
1771  
1772  complex_64 cxpow (complex_64 z1, complex_64 z2)
1773  {
1774    real_32 mod = cxabs (z1);
1775    if (xsgn (&mod) <= 0) {
1776      (void) xsigerr (xsgn (&z2.re) <= 0, XEBADEXP, "cxpow()");
1777      return X_0_0I;
1778    } else {
1779      real_32 arg = cxarg (z1);
1780      real_32 a = xadd (xmul (z2.re, xlog (mod)), xmul (z2.im, arg), 1);
1781      real_32 b = xadd (xmul (z2.re, arg), xmul (z2.im, xlog (mod)), 0);
1782      complex_64 w;
1783      w.re = xmul (xexp (a), xcos (b));
1784      w.im = xmul (xexp (a), xsin (b));
1785      return w;
1786    }
1787  }
1788  
1789  int_4 cxis0 (const complex_64 * z)
1790  {
1791    return (xis0 (&z->re) && xis0 (&z->im));
1792  }
1793  
1794  int_4 cxnot0 (const complex_64 * z)
1795  {
1796    return (xnot0 (&z->re) || xnot0 (&z->im));
1797  }
1798  
1799  int_4 cxeq (complex_64 z1, complex_64 z2)
1800  {
1801    return (xreal_cmp (&z1.re, &z2.re) == 0 && xreal_cmp (&z1.im, &z2.im) == 0);
1802  }
1803  
1804  int_4 cxneq (complex_64 z1, complex_64 z2)
1805  {
1806    return (xreal_cmp (&z1.re, &z2.re) != 0 || xreal_cmp (&z1.im, &z2.im) != 0);
1807  }
1808  
1809  complex_64 cxsin (complex_64 z)
1810  {
1811    complex_64 w1, w2;
1812    w1 = cxdrot (z);              // Now w1= i*z,  where i={0,1} 
1813    w2 = cxrrot (z);              // Now w2= -i*z, where i={0,1} 
1814    w1 = cxsub (cxexp (w1), cxexp (w2));
1815    w2.re = xreal_2 (w1.im, -1);
1816    w1.re.value[0] ^= X_SIGN_MASK;
1817    w2.im = xreal_2 (w1.re, -1);     // Now w2= (exp(i*z)-exp(-i*z))/2i 
1818    return w2;
1819  }
1820  
1821  complex_64 cxcos (complex_64 z)
1822  {
1823    complex_64 w1, w2;
1824    w1 = cxdrot (z);              // Now w1=  i*z,  where i={0,1} 
1825    w2 = cxrrot (z);              // Now w2= -i*z, where i={0,1} 
1826    w1 = cxsum (cxexp (w1), cxexp (w2));
1827    w2.re = xreal_2 (w1.re, -1);
1828    w2.im = xreal_2 (w1.im, -1);
1829    return w2;
1830  }
1831  
1832  complex_64 cxtan (complex_64 z)
1833  {
1834    if (xsigerr (xreal_cmp (&(z.im), &xEmax) > 0, XFPOFLOW, NULL)) {
1835      return X_0_1I;
1836    } else if (xsigerr (xreal_cmp (&(z.im), &xEmin) < 0, XFPOFLOW, NULL)) {
1837      return cxneg (X_0_1I);
1838    } else {
1839      complex_64 w;
1840      if (xsigerr (!cxrec (cxcos (z), &w), XEDOM, "cxtan()")) {
1841        return X_0_0I;
1842      } else {
1843        return cxmul (cxsin (z), w);
1844      }
1845    }
1846  }
1847  
1848  complex_64 cxasin (complex_64 z)
1849  {
1850    complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1851    real_32 ls = cxabs (cxsum (cxdrot (z), w));
1852    real_32 rs = xmul (X_VSV, cxabs (z));
1853    if (xreal_cmp (&ls, &rs) < 0) {
1854      w = cxdrot (cxlog (cxsub (w, cxdrot (z))));
1855    } else {
1856      w = cxrrot (cxlog (cxsum (cxdrot (z), w)));
1857    }
1858    return w;
1859  }
1860  
1861  complex_64 cxacos (complex_64 z)
1862  {
1863    complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
1864    real_32 ls = cxabs (cxsum (z, cxdrot (w)));
1865    real_32 rs = xmul (X_VSV, cxabs (z));
1866    if (xreal_cmp (&ls, &rs) < 0) {
1867      w = cxdrot (cxlog (cxsub (z, cxdrot (w))));
1868    } else {
1869      w = cxrrot (cxlog (cxsum (z, cxdrot (w))));
1870    }
1871    return w;
1872  }
1873  
1874  complex_64 cxatan (complex_64 z)
1875  {
1876    real_32 mod = xadd (xabs (z.im), X_1, 1);
1877    int_4 errcond = xsgn (&(z.re)) == 0 && xsgn (&mod) == 0;
1878    if (xsigerr (errcond, XEDOM, "cxatan()")) {
1879      return X_0_0I;
1880    } else {
1881  // This way, cxatan() works fine also with complex numbers very far from the origin.
1882      complex_64 w;
1883      mod = cxabs (z);
1884      if (xreal_cmp (&mod, &X_VGV) > 0) {
1885        w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
1886        w = cxdiv (cxsum (X_1_0I, cxdrot (z)), w);
1887        w = cxrrot (cxlog (w));
1888      } else {
1889        w = cxdiv (cxsum (X_1_0I, cxdrot (z)), cxsub (X_1_0I, cxdrot (z)));
1890        w = cxrrot (cxlog_sqrt (w));
1891      }
1892      return w;
1893    }
1894  }
1895  
1896  complex_64 cxdbl(complex_16 z)
1897  {
1898    complex_64 zz;
1899    zz.re = dbltox (creal (z));
1900    zz.im = dbltox (cimag (z));
1901    return zz;
1902  }
1903  
1904  complex_64 cxquad(complex_32 z)
1905  {
1906    complex_64 zz;
1907    zz.re = dbltox (crealq (z));
1908    zz.im = dbltox (cimagq (z));
1909    return zz;
1910  }
1911  
1912  complex_64 _cquadtop (complex_64 *zz, complex_32 z)
1913  {
1914    zz->re = dbltox (crealq (z));
1915    zz->im = dbltox (cimagq (z));
1916    return *zz;
1917  }
1918  
1919  complex_64 cxreal32(real_32 z)
1920  {
1921    complex_64 zz;
1922    zz.re = z;
1923    zz.im = X_0;
1924    return zz;
1925  }
1926  
1927  complex_64 _coctotop(complex_64 *zz, real_32 z)
1928  {
1929    zz->re = z;
1930    zz->im = X_0;
1931    return *zz;
1932  }
1933  
1934  // VIF additions (REAL*32)
1935  
1936  real_32 xtenup (int_4 n)
1937  {
1938    if (n == 0) {
1939      return X_1;
1940    } else if (n == 1) {
1941      return X_10;
1942    } else if (n == -1) {
1943      return X_1_OVER_10;
1944    }
1945    real_32 s = X_10, t;
1946    unsigned k, m;
1947    t = X_1;
1948    if (n < 0) {
1949      m = -n;
1950      if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
1951        return X_0;
1952      }
1953      s = xdiv (X_1, s);
1954    } else {
1955      m = n;
1956    }
1957    if (m != 0) {
1958      k = 1;
1959      while (1) {
1960        if (k & m) {
1961          t = xmul (s, t);
1962        }
1963        if ((k <<= 1) <= m) {
1964          s = xmul (s, s);
1965        } else {
1966          break;
1967        }
1968      }
1969    } else {
1970      xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
1971    }
1972    return t;
1973  }
1974  
1975  real_32 xcotan (real_32 x)
1976  {
1977  // Intrinsic function so arguments are not pointers.
1978    return xdiv (X_1, xtan (x));
1979  }
1980  
1981  real_32 xacotan (real_32 x)
1982  {
1983  // Intrinsic function so arguments are not pointers.
1984    return xatan (xdiv (X_1, x));
1985  }
1986  
1987  real_32 _xsgn (real_32 a, real_32 b)
1988  {
1989  // Intrinsic function so arguments are not pointers.
1990    real_32 x = (xgetsgn (&a) == 0 ? a : xneg (a));
1991    return (xgetsgn (&b) == 0 ? x : xneg (x));
1992  }
1993  
1994  real_32 _zabs_64 (real_32 re, real_32 im)
1995  {
1996  // Intrinsic function so arguments are not pointers.
1997    return cxabs (CMPLXX (re, im));
1998  }
1999  
2000  // Conversion.
2001  
2002  real_32 inttox (int_4 n)
2003  {
2004    REAL32 pe;
2005    unt_2 *pc, e;
2006    unt_4 k, h;
2007    bzero (pe, sizeof (pe));
2008    k = ABS (n);
2009    pc = (unt_2 *) &k;
2010    if (n == 0) {
2011      return *(real_32 *) pe;
2012    }
2013  
2014    #if __BYTE_ORDER == __LITTLE_ENDIAN
2015      pe[1] = *(pc + 1);
2016      pe[2] = *pc;
2017    #else
2018      pe[1] = *pc;
2019      pe[2] = *(pc + 1);
2020    #endif
2021  
2022    for (e = 0, h = 1; h <= k && e < ((8 * sizeof (unt_4)) - 1); h <<= 1, ++e);
2023    if (h <= k) {
2024      e += 1;
2025    }
2026    *pe = X_EXPO_BIAS + e - 1;
2027    if (n < 0) {
2028      *pe |= X_SIGN_MASK;
2029    }
2030    xlshift ((8 * sizeof (unt_4)) - e, pe + 1, FLT256_LEN);
2031    return *(real_32 *) pe;
2032  }
2033  
2034  real_4 xtoflt (real_32 s)
2035  {
2036  // An extended floating point_4 number is represented as a combination of the
2037  // following elements:
2038  //
2039  //   sign bit(s): 0 -> positive, 1 -> negative ;
2040  //   exponent(e): 15-bit biased integer (bias=16383) ;
2041  //   mantissa(m): 7 (or more) words of 16 bit length with the
2042  //                leading 1 explicitly represented.
2043  //
2044  //   Thus  f = (-1)^s*2^[e-16383] *m ,  with 1 <= m < 2 .
2045  //
2046  // This format supports a dynamic range of:
2047  //
2048  //   2^16384 > f > 2^[-16383]  or
2049  //   1.19*10^4932 > f > 1.68*10^-[4932].
2050  //
2051  // Special values of the exponent are:
2052  //
2053  //   all ones -> infinity (floating point_4 overflow)
2054  //   all zeros -> number = zero.
2055  //
2056  // Underflow in operations is handled by a flush to zero. Thus, a number with
2057  // the exponent zero and nonzero mantissa is invalid (not-a-number).
2058  //
2059  //
2060    union {unt_2 pe[2]; real_4 f;} v;
2061    unt_2 *pc, u;
2062    int_2 i, e;
2063    pc = SEXP (s);
2064    u = *pc & X_SIGN_MASK;
2065    e = (*pc & X_EXPO_MASK) - xF_bias;
2066  //
2067  // u is the sign of the number s.
2068  // e == (exponent of s) + 127 
2069  //
2070    if (e >= xF_max) {
2071      return (!u ? FLT_MAX : -FLT_MAX);
2072    }
2073    if (e <= 0) {
2074      return 0.;
2075    }
2076    for (i = 0; i < 2; v.pe[i] = *++pc, i++);
2077  // In the IEEE 754 Standard the leading 1 is not represented.                    
2078    v.pe[0] &= X_EXPO_MASK;
2079  // Now in pe[0],pe[1] we have 31 bits of mantissa. 
2080  // But only the first 23 ones must be put in the   
2081  // final real_4 number.                             
2082    xrshift (xF_lex - 1, v.pe, 2);
2083  // We have just loaded the mantissa and now we 
2084  // are going to load exponent and sign.        
2085    v.pe[0] |= (e << (16 - xF_lex));
2086    v.pe[0] |= u;
2087  #if __BYTE_ORDER == __LITTLE_ENDIAN
2088    u = v.pe[0];
2089    v.pe[0] = v.pe[1];
2090    v.pe[1] = u;
2091  #endif
2092    return v.f;
2093  }
2094  
2095  real_32 flttox (real_4 y)
2096  {
2097    REAL32 pe; 
2098    unt_2 *pc, u;
2099    int_2 i, e;
2100    if (y < FLT_MIN && y > -FLT_MIN) {
2101      return X_0;
2102    }
2103    bzero (pe, sizeof (pe));
2104    pc = (unt_2 *) &y;
2105  #if __BYTE_ORDER == __LITTLE_ENDIAN
2106    pc += 1;
2107  #endif
2108    u = *pc & X_SIGN_MASK;
2109    e = xF_bias + ((*pc & X_EXPO_MASK) >> (16 - xF_lex));
2110  // Now u is the sign of y and e is the
2111  // biased exponent (exponent + bias).  
2112  #if __BYTE_ORDER == __LITTLE_ENDIAN
2113    for (i = 1; i < 3; pe[i] = *pc--, i++);
2114  #else
2115    for (i = 1; i < 3; pe[i] = *pc++, i++);
2116  #endif
2117    pc = pe + 1;
2118    xlshift (xF_lex - 1, pc, 2);
2119    *pc |= X_SIGN_MASK;
2120  // We have just put in pe[1],pe[2] the whole 
2121  // mantissa of y with a leading 1.           
2122  // Now we have only to put exponent and sign 
2123  // in pe[0].                                 
2124    *pe = e;
2125    *pe |= u;
2126    return *(real_32 *) pe;
2127  }
2128  
2129  real_8 xtodbl (real_32 s)
2130  {
2131    union {unt_2 pe[4]; real_8 d;} v;
2132    unt_2 *pc, u;
2133    int_2 i, e;
2134    pc = SEXP (s);
2135    u = *pc & X_SIGN_MASK;
2136    e = (*pc & X_EXPO_MASK) - xD_bias;
2137    if (e >= xD_max) {
2138      return (!u ? DBL_MAX : -DBL_MAX);
2139    }
2140    if (e <= 0) {
2141      return 0.;
2142    }
2143    for (i = 0; i < 4; v.pe[i] = *++pc, i++);
2144    v.pe[0] &= X_EXPO_MASK;
2145    xrshift (xD_lex - 1, v.pe, 4);
2146    v.pe[0] |= (e << (16 - xD_lex));
2147    v.pe[0] |= u;
2148  #if __BYTE_ORDER == __LITTLE_ENDIAN
2149    u = v.pe[3];
2150    v.pe[3] = v.pe[0];
2151    v.pe[0] = u;
2152    u = v.pe[2];
2153    v.pe[2] = v.pe[1];
2154    v.pe[1] = u;
2155  #endif
2156    return v.d;
2157  }
2158  
2159  real_32 dbltox (real_8 y)
2160  {
2161    REAL32 pe;
2162    unt_2 *pc, u;
2163    int_2 i, e;
2164    if (y < DBL_MIN && y > -DBL_MIN) {
2165      return X_0;
2166    }
2167    bzero (pe, sizeof (pe));
2168    pc = (unt_2 *) &y;
2169  #if __BYTE_ORDER == __LITTLE_ENDIAN
2170    pc += 3;
2171  #endif
2172    u = *pc & X_SIGN_MASK;
2173    e = xD_bias + ((*pc & X_EXPO_MASK) >> (16 - xD_lex));
2174  #if __BYTE_ORDER == __LITTLE_ENDIAN
2175    for (i = 1; i < 5; pe[i] = *pc--, i++);
2176  #else
2177    for (i = 1; i < 5; pe[i] = *pc++, i++);
2178  #endif
2179    pc = pe + 1;
2180    xlshift (xD_lex - 1, pc, 4);
2181    *pc |= X_SIGN_MASK;
2182    *pe = e;
2183    *pe |= u;
2184    return *(real_32 *) pe;
2185  }
2186  
2187  real_32 _quadtox (real_16 x)
2188  {
2189  // Intrinsic function so arguments are not pointers.
2190    REAL32 z;
2191    unt_2 *y;
2192    int_4 i;
2193    if (x == 0.0q) {
2194      return X_0;
2195    }
2196    int_4 sinf = isinfq (x);
2197    if (sinf == 1) {
2198      return X_PLUS_INF;
2199    } else if (sinf == -1) {
2200      return X_MINUS_INF;
2201    }
2202    if (isnanq (x)) {
2203      return X_NAN;
2204    }
2205    bzero (z, sizeof (z));
2206    y = (unt_2 *) &x;
2207    for (i = 0; i <= 7; i++) {
2208  #if __BYTE_ORDER == __LITTLE_ENDIAN
2209      z[i] = y[7 - i];
2210  #else
2211      z[i] = y[i];
2212  #endif
2213    }
2214  // real_16 skips the default first bit, HPA lib does not.
2215    unt_2 cy = 0x8000;
2216    for (i = 1; i < 8; i++) {
2217      if (z[i] & 0x1) {
2218        z[i] = (z[i] >> 1) | cy;
2219        cy = 0x8000;
2220      } else {
2221        z[i] = (z[i] >> 1) | cy;
2222        cy = 0x0;
2223      }
2224    }
2225    z[8] |= cy;
2226    return *(real_32 *) z;
2227  }
2228  
2229  real_32 _quadtop (real_32 *z, real_16 x)
2230  {
2231    *z = _quadtox (x);
2232    return *z;
2233  }
2234  
2235  real_16 _xtoquad (real_32 x)
2236  {
2237  // Intrinsic function so arguments are not pointers.
2238    REAL16 y;
2239    REAL32 z;
2240    int_4 i;
2241    memcpy (z, x.value, sizeof (real_32));
2242  // Catch NaN, +-Inf is handled correctly.
2243    if (xisNaN (&x)) {
2244      z[0] = 0x7fff;
2245      z[1] = 0xffff;
2246    }
2247  // real_16 skips the default first bit, HPA lib does not.
2248    unt_2 cy = (z[8] & 0x8000 ? 0x1 : 0x0);
2249    for (i = 7; i > 0; i--) {
2250      if (z[i] & 0x8000) {
2251        z[i] = (z[i] << 1) | cy;
2252        cy = 0x1;
2253      } else {
2254        z[i] = (z[i] << 1) | cy;
2255        cy = 0x0;
2256      }
2257    }
2258    for (i = 0; i < 8; i++) {
2259  #if __BYTE_ORDER == __LITTLE_ENDIAN
2260      y[i] = z[7 - i];
2261  #else
2262      y[i] = z[i];
2263  #endif
2264    }
2265  // Avoid 'dereferencing type-punned pointer will break strict-aliasing rules'
2266    real_16 u;
2267    memcpy (&u, &y[0], sizeof (real_16));
2268    return u;
2269  }
2270  
2271  real_32 strtox (char *s, char **end)
2272  {
2273  // This routine replaces the HPA solution - MvdV.
2274  // Algol 68 Genie employs the same algorithm.
2275  //
2276  // Initialize.
2277  #define N (FLT256_DIG + FLT256_GUARD)
2278    errno = 0;
2279    real_32 y[N];
2280    if (end != NULL && (*end) != NULL) {
2281      (*end) = &(s[0]);
2282    }
2283    while (isspace (s[0])) {
2284      s++;
2285    }
2286    real_32 W;
2287    if (s[0] == '-') {
2288      W = X_MINUS_1;
2289      s++;
2290    } else if (s[0] == '+') {
2291      W = X_1;
2292      s++;
2293    } else {
2294      W = X_1;
2295    }
2296  // Scan mantissa digits and put them into "y".
2297    while (s[0] == '0') {
2298      s++;
2299    }
2300    int dot = -1, pos = 0, pow = 0;
2301    while (pow < N && (isdigit (s[pos]) || s[pos] == '.')) {
2302      if (s[pos] == '.') {
2303        dot = pos;
2304      } else {
2305        switch (s[pos]) {
2306          case '0': y[pow] = X_0; break;
2307          case '1': y[pow] = W; break;
2308          case '2': y[pow] = xmul (X_2, W); break;
2309          case '3': y[pow] = xmul (X_3, W); break;
2310          case '4': y[pow] = xmul (X_4, W); break;
2311          case '5': y[pow] = xmul (X_5, W); break;
2312          case '6': y[pow] = xmul (X_6, W); break;
2313          case '7': y[pow] = xmul (X_7, W); break;
2314          case '8': y[pow] = xmul (X_8, W); break;
2315          case '9': y[pow] = xmul (X_9, W); break;
2316        }
2317        W = xdiv (W, X_10);
2318        pow++;
2319      }
2320      pos++;
2321    }
2322  // Skip trailing digits.
2323    while (isdigit (s[pos])) {
2324      pos++;
2325    }
2326    if (end != NULL && (*end) != NULL) {
2327      (*end) = &(s[pos]);
2328    }
2329  // Sum from low to high to preserve precision.
2330    real_32 sum = X_0;
2331    for (int k = pow - 1; k >= 0; k--) {
2332      sum = xsum (sum, y[k]);
2333    }
2334  // Optional exponent.
2335    int expo = 0;
2336    while (s[pos] == ' ') {
2337      pos++;
2338    }
2339    if (tolower (s[pos]) == 'e' || tolower (s[pos]) == 'q' || tolower (s[pos]) == 'x') {
2340      pos++;
2341      if (isdigit (s[pos]) || s[pos] == '-' || s[pos] == '+') {
2342        expo = (int) strtol (&(s[pos]), end, 10);
2343      }
2344    }
2345  // Standardize.
2346    if (dot >= 0) {
2347      expo += dot - 1;
2348    } else {
2349      expo += pow - 1;
2350    }
2351    while (xnot0 (&sum) && xlt1 (sum)) {
2352      sum = xmul (sum, X_10);
2353      expo--;
2354    }
2355    if (errno == 0) {
2356       return xmul (sum, xtenup (expo));
2357    } else {
2358      return X_0;
2359    }
2360  #undef N
2361  }
2362  
2363  real_32 atox (char *q)
2364  {
2365    return strtox (q, NULL);
2366  }
2367  
2368  int_4 _xint4 (real_32 x_)
2369  {
2370    static real_16 y_;
2371    y_ = _xtoquad (x_);
2372    return (int_4) _qnint (y_);
2373  }
2374  
2375  int_4 _xnint4 (real_32 x_)
2376  {
2377    static real_16 y_;
2378    if (xgt (x_, X_0)) {
2379      y_ = _xtoquad (xsum (x_, X_1_OVER_2));
2380    }
2381    else {
2382      y_ = _xtoquad (xsub (x_, X_1_OVER_2));
2383    }
2384    return (int_4) (y_);
2385  }
2386  
2387  int_8 _xint8 (real_32 x_)
2388  {
2389    static real_16 y_;
2390    y_ = _xtoquad (x_);
2391    return (int_8) (y_);
2392  }
2393  
2394  int_8 _xnint8 (real_32 x_)
2395  {
2396    static real_16 y_;
2397    if (xgt (x_, X_0)) {
2398      y_ = _xtoquad (xsum (x_, X_1_OVER_2));
2399    }
2400    else {
2401      y_ = _xtoquad (xsub (x_, X_1_OVER_2));
2402    }
2403    return (int_8) (y_);
2404  }
2405  
2406  void _xdump (real_32 *z)
2407  {
2408    printf ("{{");
2409    for (int i = 0; i <= FLT256_LEN; i++) {
2410      printf("0x%04x", (z->value)[i]);
2411      if (i < FLT256_LEN) {
2412        printf(", ");
2413      }
2414    }
2415    printf ("}}\n");
2416    fflush (stdout);
2417  }
2418  
2419  real_32 _x1mach (int_4 *i)
2420  {
2421    switch (*i) {
2422  //    d1mach(1) = b**(emin-1), the smallest positive magnitude. 
2423    case 1: return FLT256_MIN;
2424  //    d1mach(2) = b**emax*(1 - b**(-t)), the largest magnitude. 
2425    case 2: return FLT256_MAX;
2426  //    d1mach(3) = b**(-t), the smallest relative spacing. 
2427    case 3: return FLT256_EPSILON_HALF;
2428  //    d1mach(4) = b**(1-t), the largest relative spacing. 
2429    case 4: return FLT256_EPSILON;
2430  //    d1mach(5) = log10(b) 
2431    case 5: return X_LOG10_2;
2432  //
2433    default: return X_0;
2434    }
2435  }
2436  
     


© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)