rts-real32.c

     1  //! @file rts-real32.c
     2  //! @author (see below)
     3  //
     4  //! @section Copyright
     5  //
     6  // This file is part of VIF - vintage FORTRAN compiler.
     7  // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
     8  //
     9  //! @section License
    10  //
    11  // This program is free software; you can redistribute it and/or modify it 
    12  // under the terms of the GNU General Public License as published by the 
    13  // Free Software Foundation; either version 3 of the License, or 
    14  // (at your option) any later version.
    15  //
    16  // This program is distributed in the hope that it will be useful, but 
    17  // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
    18  // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
    19  // more details. You should have received a copy of the GNU General Public 
    20  // License along with this program. If not, see <http://www.gnu.org/licenses/>.
    21  
    22  //! @section Synopsis
    23  //!
    24  //! Runtime support for REAL*32 and COMPLEX*64.
    25  
    26  // The code is based on the HPA Library, available from:
    27  //   <http://download-mirror.savannah.gnu.org/releases/hpalib/>
    28  //
    29  //   Copyright (C) 2000 Daniel A. Atkinson <DanAtk@aol.com>
    30  //   Copyright (C) 2004 Ivano Primi <ivprimi@libero.it>  
    31  //   Copyright (C) 2022 Marcel van der Veer <algol68g@xs4all.nl> - VIF version.
    32  //
    33  // HPA code is adapted to work with VIF to implement REAL*32 and COMPLEX*64.
    34  // HPA was choosen since it stores a 256-bit float as a 256-bit struct, which
    35  // is convenient for FORTRAN.
    36  //
    37  // The HPA Library is free software; you can redistribute it and/or
    38  // modify it under the terms of the GNU Lesser General Public
    39  // License as published by the Free Software Foundation; either
    40  // version 2.1 of the License, or (at your option) any later version.
    41  //
    42  // The HPA Library is distributed in the hope that it will be useful,
    43  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    44  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    45  // Lesser General Public License for more details.
    46  //
    47  // You should have received a copy of the GNU Lesser General Public
    48  // License along with the HPA Library; if not, write to the Free
    49  // Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
    50  // 02110-1301 USA.
    51  
    52  // The functions forming the HPA library are all implemented in a portable 
    53  // fashion in the C language. The IEEE 754 standard for floating point 
    54  // hardware and software is assumed in the PC/Unix version of this library. 
    55  // A REAL*32 number is represented as a combination of the following elements:
    56  //
    57  //   sign bit(s): 0 -> positive, 1 -> negative ;
    58  //   exponent(e): 15-bit biased integer (bias=16383) ;
    59  //   mantissa(m): 15 words of 16 bit length *including* leading 1. 
    60  //
    61  // The range of representable numbers is then given by
    62  //
    63  //   2^16384 > x > 2^[-16383] => 1.19*10^4932 > x > 1.68*10^-[4932]
    64  // 
    65  // Special values of the exponent are:
    66  //
    67  //  all ones -> infinity (floating point overflow)
    68  //  all zeros -> number = zero. 
    69  //
    70  // Underflow in operations is handled by a flush to zero. Thus, a number 
    71  // with the exponent zero and nonzero mantissa is invalid (not-a-number). 
    72  // From the point of view of the HPA library, a complex number is a 
    73  // structure formed by two REAL*32 numbers.
    74  //
    75  // HPA cannot extend precision beyond the preset, hardcoded precision.
    76  // Hence some math routines will not achieve full precision.
    77  
    78  #include <vif.h>
    79  #include <rts-real32.h>
    80  
    81  static const char *errmsg[] = {
    82    "No error",
    83    "Division by zero",
    84    "Out of domain",
    85    "Bad exponent",
    86    "Floating point overflow",
    87    "Invalid error code"
    88  };
    89  
    90  #define SEXP(z) ((unt_2 *) &(z.value))
    91  
    92  void _pi32 (real_32 *x)
    93  {
    94    *x = X_PI;
    95  }
    96  
    97  int_4 xsigerr (int_4 errcond, int_4 errcode, const char *where)
    98  {
    99  // errcode must come from the evaluation of an error condition.
   100  // errcode, which should describe the type of the error, 
   101  // should always be one between XEDIV, XEDOM, XEBADEXP and XFPOFLOW.  
   102    if (errcond == 0) {
   103      errcode = 0;
   104    }
   105    if (errcode < 0 || errcode > XNERR) {
   106      errcode = XEINV;
   107    }
   108    if (errcode != 0) {
   109      RTE (where, errmsg[errcode]);
   110    }
   111    return errcode;
   112  }
   113  
   114  // Elementary stuff.
   115  
   116  inline real_32 xneg (real_32 s)
   117  {
   118    unt_2 *p = SEXP (s);
   119    *p ^= X_SIGN_MASK;
   120    return s;
   121  }
   122  
   123  inline real_32 xabs (real_32 s)
   124  {
   125    unt_2 *p = SEXP (s);
   126    *p &= X_EXPO_MASK;
   127    return s;
   128  }
   129  
   130  inline int_4 xgetexp (const real_32 * ps)
   131  {
   132    unt_2 *q = (unt_2 *) &(ps->value);
   133    return (*q & X_EXPO_MASK) - X_EXPO_BIAS;
   134  }
   135  
   136  inline int_4 xgetsgn (const real_32 * ps)
   137  {
   138    unt_2 *q = (unt_2 *) &(ps->value);
   139    return (*q & X_SIGN_MASK);
   140  }
   141  
   142  int_4 xreal_cmp (const real_32 * pa, const real_32 * pb)
   143  {
   144    unt_2 e, k, *p, *q, p0, q0;
   145    int_4 m;
   146    p = (unt_2 *) &(pa->value);
   147    q = (unt_2 *) &(pb->value);
   148    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
   149    if (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) {
   150  // *pa is actually zero 
   151      p0 = 0;
   152    } else {
   153      p0 = *p;
   154    }
   155    for (m = 1; m <= FLT256_LEN && q[m] == 0; m++);
   156    if (m > FLT256_LEN && (*q & X_EXPO_MASK) < X_EXPO_MASK) {
   157  // *pb is actually zero 
   158      q0 = 0;
   159    } else {
   160      q0 = *q;
   161    }
   162    e = p0 & X_SIGN_MASK;
   163    k = q0 & X_SIGN_MASK;
   164    if (e && !k) {
   165      return -1;
   166    } else if (!e && k) {
   167      return 1;
   168    } else {                        // *pa and *pb have the same sign 
   169      m = (e) ? -1 : 1;
   170      e = p0 & X_EXPO_MASK;
   171      k = q0 & X_EXPO_MASK;
   172      if (e > k) {
   173        return m;
   174      } else if (e < k) {
   175        return -m;
   176      } else {
   177        for (e = 0; *(++p) == *(++q) && e < FLT256_LEN; ++e);
   178        if (e < FLT256_LEN) {
   179          return (*p > *q ? m : -m);
   180        } else {
   181          return 0;
   182        }
   183      }
   184    }
   185  }
   186  
   187  real_32 xreal_2 (real_32 s, int_4 m)
   188  {
   189    unt_2 *p = SEXP (s);
   190    int_4 e;
   191    for (e = 1; e <= FLT256_LEN && p[e] == 0; e++);
   192    if (e <= FLT256_LEN) {
   193      e = *p & X_EXPO_MASK;            // biased exponent 
   194      if (e + m < 0) {
   195        return X_0;
   196      } else if ((xsigerr (e + m >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
   197        return ((s.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
   198      } else {
   199        *p += m;
   200        return s;
   201      }
   202    } else {                        // s is zero or +-Inf 
   203      return s;
   204    }
   205  }
   206  
   207  real_32 xsfmod (real_32 s, int_4 *p)
   208  {
   209    unt_2 *pa = SEXP (s);
   210    unt_2 *pb = pa + 1;
   211    int_2 e, k;
   212    e = (*pa & X_EXPO_MASK) - X_EXPO_BIAS;
   213    if ((xsigerr (e >= 15, XFPOFLOW, NO_TEXT))) {
   214      *p = -1;
   215      return s;
   216    } else if (e < 0) {
   217      *p = 0;
   218      return s;
   219    }
   220    *p = *pb >> (15 - e);
   221    xlshift (++e, pb, FLT256_LEN);
   222    *pa -= e;
   223    for (e = 0; *pb == 0 && e < xMax_p; ++pb, e += 16);
   224    if (e == xMax_p) {
   225      return X_0;
   226    }
   227    for (k = 0; !((*pb << k) & X_SIGN_MASK); ++k);
   228    if ((k += e)) {
   229      xlshift (k, pa + 1, FLT256_LEN);
   230      *pa -= k;
   231    }
   232    return s;
   233  }
   234  
   235  void xlshift (int_4 n, unt_2 *pm, int_4 m)
   236  {
   237    unt_2 *pc = pm + m - 1;
   238    if (n < 16 * m) {
   239      unt_2 *pa = pm + n / 16;
   240      m = n % 16;
   241      n = 16 - m;
   242      while (pa < pc) {
   243        *pm = (*pa++) << m;
   244        *pm++ |= *pa >> n;
   245      }
   246      *pm++ = *pa << m;
   247    }
   248    while (pm <= pc) {
   249      *pm++ = 0;
   250    }
   251  }
   252  
   253  void xrshift (int_4 n, unt_2 *pm, int_4 m)
   254  {
   255    unt_2 *pc = pm + m - 1;
   256    if (n < 16 * m) {
   257      unt_2 *pa = pc - n / 16;
   258      m = n % 16;
   259      n = 16 - m;
   260      while (pa > pm) {
   261        *pc = (*pa--) >> m;
   262        *pc-- |= *pa << n;
   263      }
   264      *pc-- = *pa >> m;
   265    }
   266    while (pc >= pm) {
   267      *pc-- = 0;
   268    }
   269  }
   270  
   271  real_32 _xI (real_32 x) 
   272  {
   273  // Intrinsic function so arguments are not pointers.
   274    return x;
   275  }
   276  
   277  real_32 _aintx (real_32 x) 
   278  {
   279  // Intrinsic function so arguments are not pointers.
   280    return xtrunc (x);
   281  }
   282  
   283  real_32 _anintx (real_32 x) 
   284  {
   285  // Intrinsic function so arguments are not pointers.
   286    if (xgetsgn (&x) == 0) {
   287      return xtrunc (xsum (x, X_1_OVER_2));
   288    } else {
   289      return xtrunc (xsub (x, X_1_OVER_2));
   290    }
   291  }
   292  
   293  real_32 _xmax (real_32 a, real_32 b) 
   294  {
   295  // Intrinsic function so arguments are not pointers.
   296    if (xge (a, b)) {
   297      return a;
   298    } else {
   299      return b;
   300    }
   301  }
   302  
   303  real_32 _xmin (real_32 a, real_32 b) 
   304  {
   305  // Intrinsic function so arguments are not pointers.
   306    if (xle (a, b)) {
   307      return a;
   308    } else {
   309      return b;
   310    }
   311  }
   312  
   313  real_32 _xmod (real_32 a, real_32 b)
   314  {
   315  // Intrinsic function so arguments are not pointers.
   316    real_32 q = xdiv (a, b);
   317    if (xsgn (&q) >= 0) {
   318      q = xfloor (q);
   319    } else {
   320      q = xneg (xfloor (xneg (q)));
   321    }
   322    return xadd (a, xmul (b, q), 1);
   323  }
   324  
   325  real_32 xadd (real_32 s, real_32 t, int_4 f)
   326  {
   327    REAL32 pe;
   328    unt_2 h, u;
   329    unt_2 *pc, *pf = pe;
   330    unt_4 n = 0;
   331    unt_2 *pa = SEXP (s);
   332    unt_2 *pb = SEXP (t);
   333    int_2 e = *pa & X_EXPO_MASK;
   334    int_2 k = *pb & X_EXPO_MASK;
   335    if (f != 0) {
   336      *pb ^= X_SIGN_MASK;
   337    }
   338    u = (*pb ^ *pa) & X_SIGN_MASK;
   339    f = 0;
   340    if (e > k) {
   341      if ((k = e - k) >= xMax_p) {
   342        return s;
   343      }
   344      xrshift (k, pb + 1, FLT256_LEN);
   345    } else if (e < k) {
   346      if ((e = k - e) >= xMax_p) {
   347        return t;
   348      }
   349      xrshift (e, pa + 1, FLT256_LEN);
   350      e = k;
   351      pc = pa;
   352      pa = pb;
   353      pb = pc;
   354    } else if (u != 0) {
   355      for (pc = pa, pf = pb; *(++pc) == *(++pf) && f < FLT256_LEN; ++f);
   356      if (f >= FLT256_LEN) {
   357        return X_0;
   358      }
   359      if (*pc < *pf) {
   360        pc = pa;
   361        pa = pb;
   362        pb = pc;
   363      }
   364      pf = pe + f;
   365    }
   366    h = *pa & X_SIGN_MASK;
   367    if (u != 0) {
   368      for (pc = pb + FLT256_LEN; pc > pb; --pc) {
   369        *pc = ~(*pc);
   370      }
   371      n = 1L;
   372    }
   373    for (pc = pe + FLT256_LEN, pa += FLT256_LEN, pb += FLT256_LEN; pc > pf;) {
   374      n += *pa;
   375      pa--;
   376      n += *pb;
   377      pb--;
   378      *pc = n;
   379      pc--;
   380      n >>= 16;
   381    }
   382    if (u != 0) {
   383      for (; *(++pc) == 0; ++f);
   384      for (k = 0; !((*pc << k) & X_SIGN_MASK); ++k);
   385      if ((k += 16 * f)) {
   386        if ((e -= k) <= 0) {
   387          return X_0;
   388        }
   389        xlshift (k, pe + 1, FLT256_LEN);
   390      }
   391    } else {
   392      if (n != 0) {
   393        ++e;
   394        if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
   395          return (!h ? X_PLUS_INF : X_MINUS_INF);
   396        }
   397        ++pf;
   398        xrshift (1, pf, FLT256_LEN);
   399        *pf |= X_SIGN_MASK;
   400      }
   401    }
   402    *pe = e;
   403    *pe |= h;
   404    return *(real_32 *) pe;
   405  }
   406  
   407  real_32 xmul (real_32 s, real_32 t)
   408  {
   409    unt_2 pe[FLT256_LEN + 2], *q0, *q1, h;
   410    unt_2 *pa, *pb, *pc;
   411    unt_4 m, n, p;
   412    int_2 e;
   413    int_2 k;
   414    q0 = SEXP (s);
   415    q1 = SEXP (t);
   416    e = (*q0 & X_EXPO_MASK) - X_EXPO_BIAS;
   417    k = (*q1 & X_EXPO_MASK) + 1;
   418    if ((xsigerr (e > (short) X_EXPO_MASK - k, XFPOFLOW, NO_TEXT))) {
   419      return (((s.value[0] & X_SIGN_MASK) ^ (t.value[0] & X_SIGN_MASK)) ? X_MINUS_INF : X_PLUS_INF);
   420    }
   421    if ((e += k) <= 0) {
   422      return X_0;
   423    }
   424    h = (*q0 ^ *q1) & X_SIGN_MASK;
   425    for (++q1, k = FLT256_LEN, p = n = 0L, pc = pe + FLT256_LEN + 1; k > 0; --k) {
   426      for (pa = q0 + k, pb = q1; pa > q0;) {
   427        m = *pa--;
   428        m *= *pb++;
   429        n += (m & 0xffffL);
   430        p += (m >> 16);
   431      }
   432      *pc-- = n;
   433      n = p + (n >> 16);
   434      p = 0L;
   435    }
   436    *pc = n;
   437    if (!(*pc & X_SIGN_MASK)) {
   438      --e;
   439      if (e <= 0) {
   440        return X_0;
   441      }
   442      xlshift (1, pc, FLT256_LEN + 1);
   443    }
   444    if ((xsigerr (e == (short) X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
   445      return (!h ? X_PLUS_INF : X_MINUS_INF);
   446    }
   447    *pe = e;
   448    *pe |= h;
   449    return *(real_32 *) pe;
   450  }
   451  
   452  real_32 xdiv (real_32 s, real_32 t)
   453  {
   454    unt_2 *pd = SEXP (s), *pc = SEXP (t);
   455  // Next makes xdiv robust at extreme exponents - MvdV.
   456    if ((*pd & X_EXPO_MASK) == (*pc & X_EXPO_MASK)) {
   457      *pd &= ~X_EXPO_MASK; 
   458      *pc &= ~X_EXPO_MASK; 
   459    }
   460  // HPA implementation.
   461    unt_2 e = *pc;
   462    *pc = X_EXPO_BIAS;
   463    if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xdiv()"))) {
   464      return X_0;
   465    } else {
   466      real_32 a = dbltox (1 / xtodbl (t));
   467      *pc = e;
   468      pc = SEXP (a);
   469      *pc += X_EXPO_BIAS - (e & X_EXPO_MASK);
   470      *pc |= e & X_SIGN_MASK;
   471      for (unt_2 i = 0; i < xItt_div; ++i) {
   472        a = xmul (a, xadd (X_2, xmul (a, t), 1));
   473      }
   474      return xmul (s, a);
   475    }
   476  }
   477  
   478  real_32 xevtch (real_32 z, real_32 * a, int_4 m)
   479  {
   480    real_32 *p, f, t, tp, w;
   481    w = xreal_2 (z, 1);
   482    t = X_0;
   483    tp = X_0;
   484    for (p = a + m; p > a;) {
   485      f = xadd (*p--, xadd (xmul (w, t), tp, 1), 0);
   486      tp = t;
   487      t = f;
   488    }
   489    return xadd (*p, xadd (xmul (z, t), tp, 1), 0);
   490  }
   491  
   492  real_32 xexp2 (real_32 x)
   493  {
   494    real_32 s, d, f;
   495    unt_2 *pf = SEXP (x);
   496    int_4 m, k;
   497    if (xreal_cmp (&x, &xE2min) < 0) {
   498      return X_0;
   499    } else if ((xsigerr (xreal_cmp (&x, &xE2max) > 0, XFPOFLOW, NO_TEXT))) {
   500      return X_PLUS_INF;
   501    } else {
   502      m = (*pf & X_SIGN_MASK) ? 1 : 0;
   503      x = xsfmod (x, &k);
   504      if (m != 0) {
   505        k *= -1;
   506      }
   507  // -X_EXPO_BIAS <= k <= +X_EXPO_BIAS 
   508      x = xmul (x, X_LN_2);
   509      if (xgetexp (&x) > -X_EXPO_BIAS) {
   510        x = xreal_2 (x, -1);
   511        s = xmul (x, x);
   512        f = X_0;
   513        for (d = inttox (m = xMS_exp); m > 1; m -= 2, d = inttox (m)) {
   514          f = xdiv (s, xadd (d, f, 0));
   515        }
   516        f = xdiv (x, xadd (d, f, 0));
   517        f = xdiv (xadd (d, f, 0), xadd (d, f, 1));
   518      } else {
   519        f = X_1;
   520      }
   521      pf = SEXP (f);
   522      if (-k > *pf) {
   523        return X_0;
   524      } else {
   525        *pf += k;
   526        if ((xsigerr (*pf >= X_EXPO_MASK, XFPOFLOW, NO_TEXT))) {
   527          return X_PLUS_INF;
   528        } else {
   529          return f;
   530        }
   531      }
   532    }
   533  }
   534  
   535  real_32 xexp (real_32 z)
   536  {
   537    return xexp2 (xmul (z, X_LOG2_E));
   538  }
   539  
   540  real_32 xexp10 (real_32 z)
   541  {
   542    return xexp2 (xmul (z, X_LOG2_10));
   543  }
   544  
   545  real_32 xfmod (real_32 s, real_32 t, real_32 * q)
   546  {
   547    if ((xsigerr (xreal_cmp (&t, &X_0) == 0, XEDIV, "xfmod()"))) {
   548      return X_0;
   549    } else {
   550      unt_2 *p, mask = 0xffff;
   551      int_2 e, i;
   552      int_4 u;
   553      *q = xdiv (s, t);
   554      p = (unt_2 *) &(q->value);
   555      u = (*p & X_SIGN_MASK) ? 0 : 1;
   556      e = (*p &= X_EXPO_MASK);         // biased exponent of *q 
   557      e = e < X_EXPO_BIAS ? 0 : e - X_EXPO_BIAS + 1;
   558      for (i = 1; e / 16 > 0; i++, e -= 16);
   559      if (i <= FLT256_LEN) {
   560  // e = 0, ..., 15 
   561        mask <<= 16 - e;
   562        p[i] &= mask;
   563        for (i++; i <= FLT256_LEN; p[i] = 0, i++);
   564      }
   565  // Now *q == abs(quotient of (s/t)) 
   566      return xadd (s, xmul (t, *q), u);
   567    }
   568  }
   569  
   570  real_32 xfrexp (real_32 s, int_4 *p)
   571  {
   572    unt_2 *ps = SEXP (s), u;
   573    *p = (*ps & X_EXPO_MASK) - X_EXPO_BIAS + 1;
   574    u = *ps & X_SIGN_MASK;
   575    *ps = X_EXPO_BIAS - 1;
   576    *ps |= u;
   577    return s;
   578  }
   579  
   580  static void nullify (int_4 skip, unt_2 *p, int_4 k)
   581  {
   582  // After skipping the first 'skip' bytes of the vector 'p',
   583  // this function nullifies all the remaining ones. 'k' is
   584  // the number of words forming the vector p.
   585  // Warning: 'skip' must be positive !  
   586    int_4 i;
   587    unt_2 mask = 0xffff;
   588    for (i = 0; skip / 16 > 0; i++, skip -= 16);
   589    if (i < k) {
   590  // skip = 0, ..., 15 
   591      mask <<= 16 - skip;
   592      p[i] &= mask;
   593      for (i++; i < k; p[i] = 0, i++);
   594    }
   595  }
   596  
   597  static void canonic_form (real_32 * px)
   598  {
   599    unt_2 *p, u;
   600    int_2 e, i, j, skip;
   601    p = (unt_2 *) &(px->value);
   602    e = (*p & X_EXPO_MASK);            // biased exponent of x 
   603    u = (*p & X_SIGN_MASK);            // sign of x            
   604    if (e < X_EXPO_BIAS - 1) {
   605      return;
   606    } else {
   607      unt_2 mask = 0xffff;
   608  // e >= X_EXPO_BIAS - 1 
   609      for (i = 1, skip = e + 1 - X_EXPO_BIAS; skip / 16 > 0; i++, skip -= 16);
   610      if (i <= FLT256_LEN) {
   611  // skip = 0, ..., 15 
   612        mask >>= skip;
   613        if ((p[i] & mask) != mask) {
   614          return;
   615        } else {
   616          for (j = i + 1; j <= FLT256_LEN && p[j] == 0xffff; j++);
   617          if (j > FLT256_LEN) {
   618            p[i] -= mask;
   619            for (j = i + 1; j <= FLT256_LEN; p[j] = 0, j++);
   620            if (!(p[1] & 0x8000)) {
   621              p[1] = 0x8000;
   622              *p = ++e;
   623              *p |= u;
   624            } else if (u != 0) {
   625              *px = xadd (*px, X_1, 1);
   626            } else {
   627              *px = xadd (*px, X_1, 0);
   628            }
   629          }
   630        }
   631      }
   632    }
   633  }
   634  
   635  real_32 xfrac (real_32 x)
   636  {
   637  // xfrac(x) returns the fractional part of the number x.
   638  // xfrac(x) has the same sign as x.  
   639    unt_2 u, *p;
   640    int_2 e;
   641    int_4 n;
   642    canonic_form (&x);
   643    p = SEXP (x);
   644    e = (*p & X_EXPO_MASK);            // biased exponent of x 
   645    if (e < X_EXPO_BIAS) {
   646      return x;                   // The integer part of x is zero 
   647    } else {
   648      u = *p & X_SIGN_MASK;            // sign of x 
   649      n = e - X_EXPO_BIAS + 1;
   650      xlshift (n, p + 1, FLT256_LEN);
   651      e = X_EXPO_BIAS - 1;
   652  // Now I have to take in account the rule 
   653  // of the leading one.                    
   654      while (e > 0 && !(p[1] & X_SIGN_MASK)) {
   655        xlshift (1, p + 1, FLT256_LEN);
   656        e -= 1;
   657      }
   658  // Now p+1 points to the fractionary part of x, 
   659  // u is its sign, e is its biased exponent.     
   660      p[0] = e;
   661      p[0] |= u;
   662      return *(real_32 *) p;
   663    }
   664  }
   665  
   666  real_32 xtrunc (real_32 x)
   667  {
   668  // xtrunc(x) returns the integer part of the number x.
   669  // xtrunc(x) has the same sign as x.  
   670    unt_2 *p;
   671    int_2 e;
   672    canonic_form (&x);
   673    p = SEXP (x);
   674    e = (*p & X_EXPO_MASK);            // biased exponent of x 
   675    if (e < X_EXPO_BIAS) {
   676      return X_0;               // The integer part of x is zero 
   677    } else {
   678      nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
   679      return *(real_32 *) p;
   680    }
   681  }
   682  
   683  real_32 xround (real_32 x)
   684  {
   685    return xtrunc (xadd (x, X_RND_CORR, x.value[0] & X_SIGN_MASK));
   686  }
   687  
   688  real_32 xceil (real_32 x)
   689  {
   690    unt_2 *ps = SEXP (x);
   691    if ((*ps & X_SIGN_MASK)) {
   692      return xtrunc (x);
   693    } else {
   694      real_32 y = xfrac (x);
   695  // y has the same sign as x (see above). 
   696      return (xreal_cmp (&y, &X_0) > 0 ? xadd (xtrunc (x), X_1, 0) : x);
   697    }
   698  }
   699  
   700  real_32 xfloor (real_32 x)
   701  {
   702    unt_2 *ps = SEXP (x);
   703    if ((*ps & X_SIGN_MASK)) {
   704      real_32 y = xfrac (x);
   705  // y has the same sign as x (see above). 
   706      return (xreal_cmp (&y, &X_0) < 0 ? xadd (xtrunc (x), X_1, 1) : x);
   707    } else {
   708      return xtrunc (x);
   709    }
   710  }
   711  
   712  static void xadd_correction (real_32 * px, int_4 k)
   713  {
   714    int_2 e = (px->value[0] & X_EXPO_MASK) - X_EXPO_BIAS;
   715  //   e = (e > 0 ? e : 0);   
   716    *px = xadd (*px, xreal_2 (X_FIX_CORR, e), k);
   717  }
   718  
   719  real_32 xfix (real_32 x)
   720  {
   721    unt_2 *p;
   722    int_2 e;
   723    xadd_correction (&x, x.value[0] & X_SIGN_MASK);
   724    p = SEXP (x);
   725    e = (*p & X_EXPO_MASK);            // biased exponent of x 
   726    if (e < X_EXPO_BIAS) {
   727      return X_0;               // The integer part of x is zero 
   728    } else {
   729      nullify (e - X_EXPO_BIAS + 1, p + 1, FLT256_LEN);
   730      return *(real_32 *) p;
   731    }
   732  }
   733  
   734  real_32 xtanh (real_32 z)
   735  {
   736    real_32 s, d, f;
   737    int_4 m, k;
   738    if ((k = xgetexp (&z)) > xK_tanh) {
   739      if (xgetsgn (&z)) {
   740        return xneg (X_1);
   741      } else {
   742        return X_1;
   743      }
   744    }
   745    if (k < xK_lin) {
   746      return z;
   747    }
   748    ++k;
   749    if (k > 0) {
   750      z = xreal_2 (z, -k);
   751    }
   752    s = xmul (z, z);
   753    f = X_0;
   754    for (d = inttox (m = xMS_hyp); m > 1;) {
   755      f = xdiv (s, xadd (d, f, 0));
   756      d = inttox (m -= 2);
   757    }
   758    f = xdiv (z, xadd (d, f, 0));
   759    for (; k > 0; --k) {
   760      f = xdiv (xreal_2 (f, 1), xadd (d, xmul (f, f), 0));
   761    }
   762    return f;
   763  }
   764  
   765  real_32 xsinh (real_32 z)
   766  {
   767    int_4 k;
   768    if ((k = xgetexp (&z)) < xK_lin) {
   769      return z;
   770    } else if (k < 0) {
   771      z = xtanh (xreal_2 (z, -1));
   772      return xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 1));
   773    } else {
   774      z = xexp (z);
   775      return xreal_2 (xadd (z, xdiv (X_1, z), 1), -1);
   776    }
   777  }
   778  
   779  real_32 xcosh (real_32 z)
   780  {
   781    if (xgetexp (&z) < xK_lin) {
   782      return X_1;
   783    }
   784    z = xexp (z);
   785    return xreal_2 (xadd (z, xdiv (X_1, z), 0), -1);
   786  }
   787  
   788  real_32 xatanh (real_32 x)
   789  {
   790    real_32 y = x;
   791    y.value[0] &= X_EXPO_MASK;           // Now y == abs(x) 
   792    if ((xsigerr (xreal_cmp (&y, &X_1) >= 0, XEDOM, "xatanh"))) {
   793      return ((x.value[0] & X_SIGN_MASK) ? X_MINUS_INF : X_PLUS_INF);
   794    } else {
   795      y = xdiv (xadd (X_1, x, 0), xadd (X_1, x, 1));
   796      return xreal_2 (xlog (y), -1);
   797    }
   798  }
   799  
   800  real_32 xasinh (real_32 x)
   801  {
   802    real_32 y = xmul (x, x);
   803    y = xsqrt (xadd (X_1, y, 0));
   804    if ((x.value[0] & X_SIGN_MASK)) {
   805      return xneg (xlog (xadd (y, x, 1)));
   806    } else {
   807      return xlog (xadd (x, y, 0));
   808    }
   809  }
   810  
   811  real_32 xacosh (real_32 x)
   812  {
   813    if ((xsigerr (xreal_cmp (&x, &X_1) < 0, XEDOM, "xacosh()"))) {
   814      return X_0;
   815    } else {
   816      real_32 y = xmul (x, x);
   817      y = xsqrt (xadd (y, X_1, 1));
   818      return xlog (xadd (x, y, 0));
   819    }
   820  }
   821  
   822  real_32 xatan (real_32 z)
   823  {
   824    real_32 s, f;
   825    int_4 k, m;
   826    if ((k = xgetexp (&z)) < xK_lin) {
   827      return z;
   828    }
   829    if (k >= 0) {
   830  // k>=0 is equivalent to abs(z) >= 1.0 
   831      z = xdiv (X_1, z);
   832      m = 1;
   833    } else {
   834      m = 0;
   835    }
   836    f = dbltox (atan (xtodbl (z)));
   837    s = xadd (X_1, xmul (z, z), 0);
   838    for (k = 0; k < xItt_div; ++k) {
   839      f = xadd (f, xdiv (xadd (z, xtan (f), 1), s), 0);
   840    }
   841    if (m != 0) {
   842      if (xgetsgn (&f)) {
   843        return xadd (xneg (X_PI_OVER_2), f, 1);
   844      } else {
   845        return xadd (X_PI_OVER_2, f, 1);
   846      }
   847    } else {
   848      return f;
   849    }
   850  }
   851  
   852  real_32 xasin (real_32 z)
   853  {
   854    real_32 u = z;
   855    u.value[0] &= X_EXPO_MASK;
   856    if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xasin()"))) {
   857      return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
   858    } else {
   859      if (xgetexp (&z) < xK_lin) {
   860        return z;
   861      }
   862      u = xsqrt (xadd (X_1, xmul (z, z), 1));
   863      if (xgetexp (&u) == -X_EXPO_BIAS) {
   864        return ((xgetsgn (&z)) ? xneg (X_PI_OVER_2) : X_PI_OVER_2);
   865      }
   866      return xatan (xdiv (z, u));
   867    }
   868  }
   869  
   870  real_32 xacos (real_32 z)
   871  {
   872    real_32 u = z;
   873    u.value[0] &= X_EXPO_MASK;
   874    if ((xsigerr (xreal_cmp (&u, &X_1) > 0, XEDOM, "xacos()"))) {
   875      return ((xgetsgn (&z)) ? X_PI : X_0);
   876    } else {
   877      if (xgetexp (&z) == -X_EXPO_BIAS) {
   878        return X_PI_OVER_2;
   879      }
   880      u = xsqrt (xadd (X_1, xmul (z, z), 1));
   881      u = xatan (xdiv (u, z));
   882      if (xgetsgn (&z)) {
   883        return xadd (X_PI, u, 0);
   884      } else {
   885        return u;
   886      }
   887    }
   888  }
   889  // Kindly added by A.Haumer 2010-04.09 
   890  
   891  real_32 xatan2 (real_32 y, real_32 x)
   892  {
   893    int_4 rs, is;
   894    rs = xsgn (&x);
   895    is = xsgn (&y);
   896    if (rs > 0) {
   897      return xatan (xdiv (y, x));
   898    } else if (rs < 0) {
   899      x.value[0] ^= X_SIGN_MASK;
   900      y.value[0] ^= X_SIGN_MASK;
   901      if (is >= 0) {
   902        return xadd (X_PI, xatan (xdiv (y, x)), 0);
   903      } else {
   904        return xadd (xatan (xdiv (y, x)), X_PI, 1);
   905      }
   906    } else {                      // x is zero ! 
   907      if (!xsigerr (is == 0, XEDOM, "xatan2()")) {
   908        return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
   909      } else {
   910        return X_0;             // Dummy value :) 
   911      }
   912    }
   913  }
   914  
   915  real_32 xlog (real_32 z)
   916  {
   917    real_32 f, h;
   918    int_4 k, m;
   919    if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog()"))) {
   920      return X_MINUS_INF;
   921    } else if (xreal_cmp (&z, &X_1) == 0) {
   922      return X_0;
   923    } else {
   924      z = xfrexp (z, &m);
   925      z = xmul (z, X_SQRT_2);
   926      z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
   927      h = xreal_2 (z, 1);
   928      z = xmul (z, z);
   929      for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
   930        h = xmul (h, z);
   931        f = xadd (f, xdiv (h, inttox (k += 2)), 0);
   932      }
   933      return xadd (f, xmul (X_LN_2, dbltox (m - .5)), 0);
   934    }
   935  }
   936  
   937  real_32 xlog2 (real_32 z)
   938  {
   939    real_32 f, h;
   940    int_4 k, m;
   941    if ((xsigerr ((xgetsgn (&z)) || xgetexp (&z) == -X_EXPO_BIAS, XEDOM, "xlog2()"))) {
   942      return X_MINUS_INF;
   943    } else if (xreal_cmp (&z, &X_1) == 0) {
   944      return X_0;
   945    } else {
   946      z = xfrexp (z, &m);
   947      z = xmul (z, X_SQRT_2);
   948      z = xdiv (xadd (z, X_1, 1), xadd (z, X_1, 0));
   949      h = xreal_2 (z, 1);
   950      z = xmul (z, z);
   951      for (f = h, k = 1; xgetexp (&h) > -xMax_p;) {
   952        h = xmul (h, z);
   953        f = xadd (f, xdiv (h, inttox (k += 2)), 0);
   954      }
   955      return xadd (xmul (f, X_LOG2_E), dbltox (m - .5), 0);
   956    }
   957  }
   958  
   959  real_32 xlog10 (real_32 z)
   960  {
   961    real_32 w = xlog (z);
   962    if (xreal_cmp (&w, &X_MINUS_INF) <= 0) {
   963      return X_MINUS_INF;
   964    } else {
   965      return xmul (w, X_LOG10_E);
   966    }
   967  }
   968  
   969  logical_4 xeq (real_32 x1, real_32 x2)
   970  {
   971    return (xreal_cmp (&x1, &x2) == 0);
   972  }
   973  
   974  logical_4 xneq (real_32 x1, real_32 x2)
   975  {
   976    return (xreal_cmp (&x1, &x2) != 0);
   977  }
   978  
   979  logical_4 xgt (real_32 x1, real_32 x2)
   980  {
   981    return (xreal_cmp (&x1, &x2) > 0);
   982  }
   983  
   984  logical_4 xge (real_32 x1, real_32 x2)
   985  {
   986    return (xreal_cmp (&x1, &x2) >= 0);
   987  }
   988  
   989  logical_4 xlt (real_32 x1, real_32 x2)
   990  {
   991    return (xreal_cmp (&x1, &x2) < 0);
   992  }
   993  
   994  logical_4 xle (real_32 x1, real_32 x2)
   995  {
   996    return (xreal_cmp (&x1, &x2) <= 0);
   997  }
   998  
   999  
  1000  logical_4 xis_nan (const real_32 * u)
  1001  {
  1002  // xis_nan (&x) returns TRUE if and only if x is not a valid number 
  1003    unt_2 *p = (unt_2 *) &(u->value);
  1004    if (*p != 0) {
  1005      return FALSE;
  1006    } else {
  1007      int_4 i;
  1008      for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
  1009      return i <= FLT256_LEN;
  1010    }
  1011  }
  1012  
  1013  logical_4 xis0 (const real_32 * u)
  1014  {
  1015    unt_2 *p = (unt_2 *) &(u->value);
  1016    int_4 m;
  1017    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
  1018    return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? TRUE : FALSE);
  1019  }
  1020  
  1021  logical_4 xnot0 (const real_32 * u)
  1022  {
  1023    unt_2 *p = (unt_2 *) &(u->value);
  1024    int_4 m;
  1025    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
  1026    return (m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK ? FALSE : TRUE);
  1027  }
  1028  
  1029  logical_4 xlt1 (const real_32 u)
  1030  {
  1031    return (xlt (xabs (u), X_1));
  1032  }
  1033  
  1034  int_4 xsgn (const real_32 * u)
  1035  {
  1036    unt_2 *p = (unt_2 *) &(u->value);
  1037    int_4 m;
  1038    for (m = 1; m <= FLT256_LEN && p[m] == 0; m++);
  1039    if ((m > FLT256_LEN && (*p & X_EXPO_MASK) < X_EXPO_MASK) || !*p) {
  1040      return 0;
  1041    } else {
  1042      return ((*p & X_SIGN_MASK) ? -1 : 1);
  1043    }
  1044  }
  1045  
  1046  logical_4 xis_pinf (const real_32 * u)
  1047  {
  1048    return (*u->value == X_EXPO_MASK);
  1049  }
  1050  
  1051  logical_4 xis_minf (const real_32 * u)
  1052  {
  1053    return (*u->value == (X_EXPO_MASK | X_SIGN_MASK));
  1054  }
  1055  
  1056  logical_4 xisordnumb (const real_32 * u)
  1057  {
  1058    int_4 isNaN, isfinite;
  1059    unt_2 *p = (unt_2 *) &(u->value);
  1060    if (*p != 0) {
  1061      isNaN = FALSE;
  1062    } else {
  1063      int_4 i;
  1064      for (i = 1; i <= FLT256_LEN && p[i] == 0x0; i++);
  1065      isNaN = i <= FLT256_LEN;
  1066    }
  1067    isfinite = (*p & X_EXPO_MASK) < X_EXPO_MASK;
  1068    return (!isNaN && (isfinite) ? TRUE : FALSE);
  1069  }
  1070  
  1071  real_32 xpwr (real_32 s, int_4 n)
  1072  {
  1073    real_32 t;
  1074    unsigned k, m;
  1075    t = X_1;
  1076    if (n < 0) {
  1077      m = -n;
  1078      if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
  1079        return X_0;
  1080      }
  1081      s = xdiv (X_1, s);
  1082    } else {
  1083      m = n;
  1084    }
  1085    if (m != 0) {
  1086      k = 1;
  1087      while (1) {
  1088        if (k & m) {
  1089          t = xmul (s, t);
  1090        }
  1091        if ((k <<= 1) <= m) {
  1092          s = xmul (s, s);
  1093        } else {
  1094          break;
  1095        }
  1096      }
  1097    } else {
  1098      xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
  1099    }
  1100    return t;
  1101  }
  1102  
  1103  real_32 xpow (real_32 x, real_32 y)
  1104  {
  1105    if (xsigerr ((xgetsgn (&x)) || xgetexp (&x) == -X_EXPO_BIAS, XEDOM, "xpow()")) {
  1106      return X_0;
  1107    } else {
  1108      return xexp2 (xmul (xlog2 (x), y));
  1109    }
  1110  }
  1111  
  1112  real_32 xsqrt (real_32 z)
  1113  {
  1114    real_32 s, h;
  1115    int_2 m, e;
  1116    unt_2 *pc;
  1117    if ((xsigerr ((xgetsgn (&z)), XEDOM, "xsqrt()"))) {
  1118      return X_0;
  1119    } else {
  1120      pc = SEXP (z);
  1121      if (*pc == 0) {
  1122        return X_0;
  1123      }
  1124      e = *pc - X_EXPO_BIAS;
  1125      *pc = X_EXPO_BIAS + (e % 2);
  1126      e /= 2;
  1127      s = dbltox (sqrt (xtodbl (z)));
  1128      for (m = 0; m < xItt_div; ++m) {
  1129        h = xdiv (xadd (z, xmul (s, s), 1), xreal_2 (s, 1));
  1130        s = xadd (s, h, 0);
  1131      }
  1132      pc = SEXP (s);
  1133      *pc += e;
  1134      return s;
  1135    }
  1136  }
  1137  
  1138  static int_4 xodd (real_32 x)
  1139  {
  1140    unt_2 *p = SEXP (x);
  1141    int_2 e, i;
  1142    e = (*p & X_EXPO_MASK) - X_EXPO_BIAS;    // exponent of x 
  1143    if (e < 0) {
  1144      return 0;
  1145    } else {
  1146      for (i = 1; e / 16 > 0; i++, e -= 16);
  1147  // Now e = 0, ..., 15 
  1148      return (i <= FLT256_LEN ? p[i] & 0x8000 >> e : 0);
  1149    }
  1150  }
  1151  
  1152  real_32 xtan (real_32 z)
  1153  {
  1154    int_4 k, m;
  1155    z = rred (z, 't', &k);
  1156    if ((xsigerr (xreal_cmp (&z, &X_PI_OVER_2) >= 0, XEDOM, "xtan()"))) {
  1157      return (!k ? X_PLUS_INF : X_MINUS_INF);
  1158    } else {
  1159      if (xreal_cmp (&z, &X_PI_OVER_4) == 1) {
  1160        m = 1;
  1161        z = xadd (X_PI_OVER_2, z, 1);
  1162      } else {
  1163        m = 0;
  1164      }
  1165      if (k != 0) {
  1166        z = xneg (c_tan (z));
  1167      } else {
  1168        z = c_tan (z);
  1169      }
  1170      if (m != 0) {
  1171        return xdiv (X_1, z);
  1172      } else {
  1173        return z;
  1174      }
  1175    }
  1176  }
  1177  
  1178  real_32 xcos (real_32 z)
  1179  {
  1180    int_4 k;
  1181    z = rred (z, 'c', &k);
  1182    if (xgetexp (&z) < xK_lin) {
  1183      if (k != 0) {
  1184        return xneg (X_1);
  1185      } else {
  1186        return X_1;
  1187      }
  1188    }
  1189    z = c_tan (xreal_2 (z, -1));
  1190    z = xmul (z, z);
  1191    z = xdiv (xadd (X_1, z, 1), xadd (X_1, z, 0));
  1192    if (k != 0) {
  1193      return xneg (z);
  1194    } else {
  1195      return z;
  1196    }
  1197  }
  1198  
  1199  real_32 xsin (real_32 z)
  1200  {
  1201    int_4 k;
  1202    z = rred (z, 's', &k);
  1203    if (xgetexp (&z) >= xK_lin) {
  1204      z = c_tan (xreal_2 (z, -1));
  1205      z = xdiv (xreal_2 (z, 1), xadd (X_1, xmul (z, z), 0));
  1206    }
  1207    if (k != 0) {
  1208      return xneg (z);
  1209    } else {
  1210      return z;
  1211    }
  1212  }
  1213  
  1214  static real_32 c_tan (real_32 z)
  1215  {
  1216    real_32 s, f, d;
  1217    int_4 m;
  1218    unt_2 k;
  1219    if (xgetexp (&z) < xK_lin)
  1220      return z;
  1221    s = xneg (xmul (z, z));
  1222    for (k = 1; k <= FLT256_LEN && s.value[k] == 0; k++);
  1223    if ((xsigerr (s.value[0] == 0xffff && k > FLT256_LEN, XFPOFLOW, NO_TEXT))) {
  1224      return X_0;
  1225    } else {
  1226      f = X_0;
  1227      for (d = inttox (m = xMS_trg); m > 1;) {
  1228        f = xdiv (s, xadd (d, f, 0));
  1229        d = inttox (m -= 2);
  1230      }
  1231      return xdiv (z, xadd (d, f, 0));
  1232    }
  1233  }
  1234  
  1235  static real_32 rred (real_32 z, int_4 kf, int_4 *ps)
  1236  {
  1237    real_32 is, q;
  1238    if (xgetsgn (&z)) {
  1239      z = xneg (z);
  1240      is = X_1;
  1241    } else {
  1242      is = X_0;
  1243    }
  1244    z = xfmod (z, X_PI, &q);
  1245    if (kf == 't') {
  1246      q = is;
  1247    } else if (kf == 's') {
  1248      q = xadd (q, is, 0);
  1249    }
  1250    if (xreal_cmp (&z, &X_PI_OVER_2) == 1) {
  1251      z = xadd (X_PI, z, 1);
  1252      if (kf == 'c' || kf == 't') {
  1253        q = xadd (q, X_1, 0);
  1254      }
  1255    }
  1256    *ps = (xodd (q)) ? 1 : 0;
  1257    return z;
  1258  }
  1259  
  1260  // COMPLEX*64
  1261  
  1262  complex_64 cxadd (complex_64 z1, complex_64 z2, int_4 k)
  1263  {
  1264    complex_64 w;
  1265    w.re = xadd (z1.re, z2.re, k);
  1266    w.im = xadd (z1.im, z2.im, k);
  1267    return w;
  1268  }
  1269  
  1270  complex_64 cxsum (complex_64 z1, complex_64 z2)
  1271  {
  1272    complex_64 w;
  1273    w.re = xadd (z1.re, z2.re, 0);
  1274    w.im = xadd (z1.im, z2.im, 0);
  1275    return w;
  1276  }
  1277  
  1278  complex_64 cxsub (complex_64 z1, complex_64 z2)
  1279  {
  1280    complex_64 w;
  1281    w.re = xadd (z1.re, z2.re, 1);
  1282    w.im = xadd (z1.im, z2.im, 1);
  1283    return w;
  1284  }
  1285  
  1286  complex_64 cxmul (complex_64 z1, complex_64 z2)
  1287  {
  1288    complex_64 w;
  1289    w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
  1290    w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
  1291    return w;
  1292  }
  1293  
  1294  complex_64 cxrmul (real_32 c, complex_64 z)
  1295  {
  1296    complex_64 w;
  1297    w.re = xmul (c, z.re);
  1298    w.im = xmul (c, z.im);
  1299    return w;
  1300  }
  1301  
  1302  complex_64 cxdrot (complex_64 z)
  1303  {
  1304    complex_64 y;
  1305    y.re = z.im;
  1306    y.im = z.re;
  1307  // Multiplication by +i 
  1308    y.re.value[0] ^= X_SIGN_MASK;
  1309    return y;
  1310  }
  1311  
  1312  complex_64 cxrrot (complex_64 z)
  1313  {
  1314    complex_64 y;
  1315    y.re = z.im;
  1316    y.im = z.re;
  1317  // Multiplication by -i 
  1318    y.im.value[0] ^= X_SIGN_MASK;
  1319    return y;
  1320  }
  1321  
  1322  complex_64 cxdiv (complex_64 z1, complex_64 z2)
  1323  {
  1324    int_4 tv = cxrec (z2, &z2);
  1325    if (!xsigerr (!tv, XEDIV, "cxdiv()")) {
  1326      complex_64 w;
  1327      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 1);
  1328      w.im = xadd (xmul (z1.im, z2.re), xmul (z1.re, z2.im), 0);
  1329      return w;
  1330    } else {
  1331      return X_0_0I;
  1332    }
  1333  }
  1334  
  1335  complex_64 cxinv (complex_64 z)
  1336  {
  1337    int_4 tv = cxrec (z, &z);
  1338    if (!xsigerr (!tv, XEDOM, "cxinv()")) {
  1339      return z;
  1340    } else {
  1341      return X_0_0I;
  1342    }
  1343  }
  1344  
  1345  complex_64 cxsqr (complex_64 z)
  1346  {
  1347    complex_64 w;
  1348    w.re = xadd (xmul (z.re, z.re), xmul (z.im, z.im), 1);
  1349    w.im = xmul (X_2, xmul (z.im, z.re));
  1350    return w;
  1351  }
  1352  
  1353  complex_64 cxsqrt (complex_64 z)
  1354  {
  1355    complex_64 w;
  1356    if (xsgn (&(z.re)) == 0 && xsgn (&(z.im)) == 0) {
  1357      w = X_0_0I;
  1358    } else if (xis0 (&(z.im))) {
  1359      real_32 s = xsqrt (xabs (z.re));
  1360      if (xsgn (&(z.re)) == -1) {
  1361        w = (complex_64) {X_0, s};
  1362      } else {
  1363        w = (complex_64) {s, X_0};
  1364      }
  1365    } else {
  1366      real_32 mod = xsqrt (cxabs (z)), arg = xreal_2 (cxarg (z), -1);
  1367      w.re = xmul (mod, xcos (arg));
  1368      w.im = xmul (mod, xsin (arg));
  1369    }   
  1370    return w;
  1371  }
  1372  
  1373  complex_64 cxreset (real_32 re, real_32 im)
  1374  {
  1375    complex_64 y;
  1376    y.re = re;
  1377    y.im = im;
  1378    return y;
  1379  }
  1380  
  1381  complex_64 cxconv (real_32 x)
  1382  {
  1383    complex_64 y;
  1384    y.re = x;
  1385    y.im = X_0;
  1386    return y;
  1387  }
  1388  
  1389  real_32 cxreal (complex_64 z)
  1390  {
  1391    return z.re;
  1392  }
  1393  
  1394  real_32 cximag (complex_64 z)
  1395  {
  1396    return z.im;
  1397  }
  1398  
  1399  complex_64 cxswap (complex_64 z)
  1400  {
  1401    complex_64 y;
  1402    y.re = z.im;
  1403    y.im = z.re;
  1404    return y;
  1405  }
  1406  
  1407  complex_64 cxneg (complex_64 z)
  1408  {
  1409    z.re.value[0] ^= X_SIGN_MASK;
  1410    z.im.value[0] ^= X_SIGN_MASK;
  1411    return z;
  1412  }
  1413  
  1414  complex_64 cxconj (complex_64 z)
  1415  {
  1416    return (z.im.value[0] ^= X_SIGN_MASK, z);
  1417  }
  1418  
  1419  #define XBOUND  FLT256_LEN * 16 + 8
  1420  
  1421  real_32 cxabs (complex_64 z)
  1422  {
  1423    if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
  1424      return X_0;
  1425    } else {
  1426      int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
  1427      int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
  1428      if (ea > eb + XBOUND) {
  1429        return z.re;
  1430      } else if (eb > ea + XBOUND) {
  1431        return z.im;
  1432      } else {
  1433        z.re.value[0] -= eb;
  1434        z.im.value[0] = X_EXPO_BIAS;
  1435        real_32 x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
  1436        x.value[0] += eb;
  1437        return x;
  1438      }
  1439    }
  1440  }
  1441  
  1442  real_32 cxarg (complex_64 z)
  1443  {
  1444    int_4 rs = xsgn (&(z.re)), is = xsgn (&(z.im));
  1445    if (rs > 0) {
  1446      return xatan (xdiv (z.im, z.re));
  1447    } else if (rs < 0) {
  1448      z.re.value[0] ^= X_SIGN_MASK;
  1449      z.im.value[0] ^= X_SIGN_MASK;
  1450      if (is >= 0) {
  1451        return xadd (X_PI, xatan (xdiv (z.im, z.re)), 0);
  1452      } else {
  1453        return xadd (xatan (xdiv (z.im, z.re)), X_PI, 1);
  1454      }
  1455    } else {                      // z.re is zero ! 
  1456      if (!xsigerr (is == 0, XEDOM, "cxarg()")) {
  1457        return (is > 0 ? X_PI_OVER_2 : xneg (X_PI_OVER_2));
  1458      } else {
  1459        return xneg (X_PI_OVER_2);       // Dummy value :)
  1460      } 
  1461    }
  1462  }
  1463  
  1464  logical_4 cxrec (complex_64 z, complex_64 * w)
  1465  {
  1466    if (xreal_cmp (&(z.re), &X_0) == 0 && xreal_cmp (&(z.im), &X_0) == 0) {
  1467      return FALSE;
  1468    } else {
  1469      int_4 sa = z.re.value[0] & X_SIGN_MASK;
  1470      int_4 sb = z.im.value[0] & X_SIGN_MASK;
  1471      int_4 ea = (z.re.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
  1472      int_4 eb = (z.im.value[0] &= X_EXPO_MASK) - X_EXPO_BIAS;
  1473      real_32 x;
  1474      if (ea > eb + XBOUND) {
  1475        x = z.re;
  1476      } else if (eb > ea + XBOUND) {
  1477        x = z.im;
  1478      } else {
  1479        z.re.value[0] -= eb;
  1480        z.im.value[0] = X_EXPO_BIAS;
  1481        x = xsqrt (xadd (xmul (z.re, z.re), xmul (z.im, z.im), 0));
  1482        x.value[0] += eb;
  1483        z.re.value[0] += eb;
  1484        z.im.value[0] += eb;
  1485      }
  1486      w->re = xdiv (xdiv (z.re, x), x);
  1487      w->im = xdiv (xdiv (z.im, x), x);
  1488      w->re.value[0] |= sa;
  1489      w->im.value[0] |= X_SIGN_MASK ^ sb;
  1490      return TRUE;
  1491    }
  1492  }
  1493  
  1494  complex_64 cxexp (complex_64 z)
  1495  {
  1496    complex_64 w;
  1497    w.re = xmul (xexp (z.re), xcos (z.im));
  1498    w.im = xmul (xexp (z.re), xsin (z.im));
  1499    return w;
  1500  }
  1501  
  1502  complex_64 cxlog (complex_64 z)
  1503  {
  1504    real_32 mod;
  1505    complex_64 w;
  1506    mod = cxabs (z);
  1507    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog()")) {
  1508      return X_0_0I;
  1509    } else {
  1510      w.re = xlog (mod);
  1511      w.im = cxarg (z);
  1512      return w;
  1513    }
  1514  }
  1515  
  1516  complex_64 cxlog10 (complex_64 z)
  1517  {
  1518    real_32 mod;
  1519    complex_64 w;
  1520    mod = cxabs (z);
  1521    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog10()")) {
  1522      return X_0_0I;
  1523    } else {
  1524      w.re = xlog10 (mod);
  1525      w.im = xmul (cxarg (z), X_LOG10_E);
  1526      return w;
  1527    }
  1528  }
  1529  
  1530  complex_64 cxlog2 (complex_64 z)
  1531  {
  1532    real_32 mod;
  1533    complex_64 w;
  1534    mod = cxabs (z);
  1535    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog2()")) {
  1536      return X_0_0I;
  1537    } else {
  1538      w.re = xlog2 (mod);
  1539      w.im = xmul (cxarg (z), X_LOG2_E);
  1540      return w;
  1541    }
  1542  }
  1543  
  1544  complex_64 cxlog_sqrt (complex_64 z)
  1545  {
  1546    real_32 mod = cxabs (z);
  1547    if (xsigerr (xsgn (&mod) <= 0, XEDOM, "cxlog_sqrt()")) {
  1548      return X_0_0I;
  1549    } else {
  1550      complex_64 w;
  1551      w.re = xreal_2 (xlog (mod), -1);
  1552      w.im = xreal_2 (cxarg (z), -1);
  1553      return w;
  1554    }
  1555  }
  1556  
  1557  complex_64 cxsinh (complex_64 z)
  1558  {
  1559    complex_64 w = cxsub (cxexp (z), cxexp (cxneg (z)));
  1560    w.re = xreal_2 (w.re, -1);
  1561    w.im = xreal_2 (w.im, -1);
  1562    return w;
  1563  }
  1564  
  1565  complex_64 cxcosh (complex_64 z)
  1566  {
  1567    complex_64 w = cxsum (cxexp (z), cxexp (cxneg (z)));
  1568    w.re = xreal_2 (w.re, -1);
  1569    w.im = xreal_2 (w.im, -1);
  1570    return w;
  1571  }
  1572  
  1573  complex_64 cxtanh (complex_64 z)
  1574  {
  1575    if (xsigerr (xreal_cmp (&(z.re), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
  1576      return X_1_0I;
  1577    } else if (xsigerr (xreal_cmp (&(z.re), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
  1578      return cxneg (X_1_0I);
  1579    } else {
  1580      complex_64 w;
  1581      if (xsigerr (!cxrec (cxcosh (z), &w), XEDOM, "cxtanh()")) {
  1582        return X_0_0I;
  1583      } else {
  1584        return cxmul (cxsinh (z), w);
  1585      }
  1586    }
  1587  }
  1588  
  1589  complex_64 cxasinh (complex_64 z)
  1590  {
  1591  // This way cxasinh() works fine also with real numbers very near to -oo.                                       
  1592    complex_64 w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
  1593    real_32 ls = cxabs (cxsum (z, w));
  1594    real_32 rs = xmul (X_VSV, cxabs (z));
  1595    if (xreal_cmp (&ls, &rs) < 0) {
  1596      return cxneg (cxlog (cxsub (w, z)));
  1597    } else {
  1598      return cxlog (cxsum (z, w));
  1599    }
  1600  }
  1601  
  1602  complex_64 cxacosh (complex_64 z)
  1603  {
  1604    complex_64 w = cxsqrt (cxsub (cxsqr (z), X_1_0I));
  1605    real_32 ls = cxabs (cxsum (z, w));
  1606    real_32 rs = xmul (X_VSV, cxabs (z));
  1607    if (xreal_cmp (&ls, &rs) < 0) {
  1608      return cxneg (cxlog (cxsub (z, w)));
  1609    } else {
  1610      return cxlog (cxsum (z, w));
  1611    }
  1612  }
  1613  
  1614  complex_64 cxatanh (complex_64 z)
  1615  {
  1616    real_32 t = xadd (xabs (z.re), X_1, 1);
  1617    int_4 errcond = xsgn (&(z.im)) == 0 && xsgn (&t) == 0;
  1618    if (xsigerr (errcond, XEDOM, "cxatanh()")) {
  1619      return X_0_0I;
  1620    } else {
  1621      complex_64 w = cxdiv (cxsum (X_1_0I, z), cxsub (X_1_0I, z));
  1622      w = cxlog_sqrt (w);
  1623      return w;
  1624    }
  1625  }
  1626  
  1627  complex_64 cxgdiv (complex_64 z1, complex_64 z2)
  1628  {
  1629    z1.re = xround (z1.re);
  1630    z1.im = xround (z1.im);
  1631    z2.re = xround (z2.re);
  1632    z2.im = xround (z2.im);
  1633    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
  1634    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgdiv()")) {
  1635      return X_0_0I;
  1636    } else {
  1637      complex_64 w;
  1638      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
  1639      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
  1640      w.re = xround (xdiv (w.re, mod2));
  1641      w.im = xround (xdiv (w.im, mod2));
  1642      return w;
  1643    }
  1644  }
  1645  
  1646  complex_64 cxidiv (complex_64 z1, complex_64 z2)
  1647  {
  1648    z1.re = xround (z1.re);
  1649    z1.im = xround (z1.im);
  1650    z2.re = xround (z2.re);
  1651    z2.im = xround (z2.im);
  1652    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
  1653    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxidiv()")) {
  1654      return X_0_0I;
  1655    } else {
  1656      complex_64 w;
  1657      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
  1658      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
  1659      w.re = xfix (xdiv (w.re, mod2));
  1660      w.im = xfix (xdiv (w.im, mod2));
  1661      return w;
  1662    }
  1663  }
  1664  
  1665  complex_64 cxgmod (complex_64 z1, complex_64 z2)
  1666  {
  1667    z1.re = xround (z1.re);
  1668    z1.im = xround (z1.im);
  1669    z2.re = xround (z2.re);
  1670    z2.im = xround (z2.im);
  1671    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
  1672    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxgmod()")) {
  1673      return X_0_0I;
  1674    } else {
  1675      complex_64 w, z;
  1676      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
  1677      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
  1678      w.re = xround (xdiv (w.re, mod2));
  1679      w.im = xround (xdiv (w.im, mod2));
  1680      z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
  1681      z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
  1682      w.re = xround (xadd (z1.re, z.re, 1));
  1683      w.im = xround (xadd (z1.im, z.im, 1));
  1684      return w;
  1685    }
  1686  }
  1687  
  1688  complex_64 cxmod (complex_64 z1, complex_64 z2)
  1689  {
  1690    z1.re = xround (z1.re);
  1691    z1.im = xround (z1.im);
  1692    z2.re = xround (z2.re);
  1693    z2.im = xround (z2.im);
  1694    real_32 mod2 = xadd (xmul (z2.re, z2.re), xmul (z2.im, z2.im), 0);
  1695    if (xsigerr (xreal_cmp (&mod2, &X_PLUS_INF) >= 0, XFPOFLOW, NO_TEXT) || xsigerr (xsgn (&mod2) <= 0, XEDIV, "cxmod()")) {
  1696      return X_0_0I;
  1697    } else {
  1698      complex_64 w, z;
  1699      w.re = xadd (xmul (z1.re, z2.re), xmul (z1.im, z2.im), 0);
  1700      w.im = xadd (xmul (z2.re, z1.im), xmul (z2.im, z1.re), 1);
  1701      w.re = xfix (xdiv (w.re, mod2));
  1702      w.im = xfix (xdiv (w.im, mod2));
  1703      z.re = xadd (xmul (w.re, z2.re), xmul (w.im, z2.im), 1);
  1704      z.im = xadd (xmul (w.im, z2.re), xmul (w.re, z2.im), 0);
  1705      w.re = xround (xadd (z1.re, z.re, 1));
  1706      w.im = xround (xadd (z1.im, z.im, 1));
  1707      return w;
  1708    }
  1709  }
  1710  
  1711  static void unit_root (int_4 i, int_4 n, real_32 * a, real_32 * b)
  1712  {
  1713  // We assume n != 0 
  1714    i %= n;
  1715    *a = xdiv (xmul (xreal_2 (inttox (i), 1), X_PI), inttox (n));
  1716    *b = xsin (*a);
  1717    *a = xcos (*a);
  1718    if (xgetexp (b) < -80) {
  1719      *b = X_0;
  1720    }
  1721    if (xgetexp (a) < -80) {
  1722      *a = X_0;
  1723    }
  1724  }
  1725  
  1726  complex_64 cxpwr (complex_64 z, int_4 n)
  1727  {
  1728    real_32 mod = cxabs (z);
  1729    if (xsgn (&mod) <= 0) {
  1730      (void) xsigerr (n <= 0, XEBADEXP, "cxpwr()");
  1731      return X_0_0I;
  1732    } else {
  1733      real_32 arg = xmul (inttox (n), cxarg (z));
  1734      mod = xpwr (mod, n);
  1735      complex_64 w;
  1736      w.re = xmul (mod, xcos (arg));
  1737      w.im = xmul (mod, xsin (arg));
  1738      return w;
  1739    }
  1740  }
  1741  
  1742  complex_64 cxroot (complex_64 z, int_4 i, int_4 n)
  1743  {
  1744    if (xsigerr (n == 0, XEBADEXP, "cxroot()")) {
  1745      return X_0_0I;
  1746    } else {
  1747      real_32 mod = cxabs (z);
  1748      if (xsgn (&mod) <= 0) {
  1749        (void) xsigerr (n < 0, XEBADEXP, "cxroot()");
  1750        return X_0_0I;
  1751      } else {                    // mod > 0 
  1752        real_32 arg = xdiv (cxarg (z), inttox (n));
  1753        real_32 e = xdiv (X_1, inttox (n));      // 1/n 
  1754  // x^e = exp(e*log(x)) for any x > 0 
  1755        mod = xexp (xmul (e, xlog (mod)));
  1756        complex_64 w, zz;
  1757        w.re = xmul (mod, xcos (arg));
  1758        w.im = xmul (mod, xsin (arg));
  1759        real_32 a, b;
  1760        unit_root (i, n, &a, &b);
  1761        zz.re = xadd (xmul (w.re, a), xmul (w.im, b), 1);
  1762        zz.im = xadd (xmul (w.im, a), xmul (w.re, b), 0);
  1763        return zz;
  1764      }
  1765    }
  1766  }
  1767  
  1768  complex_64 cxpow (complex_64 z1, complex_64 z2)
  1769  {
  1770    real_32 mod = cxabs (z1);
  1771    if (xsgn (&mod) <= 0) {
  1772      (void) xsigerr (xsgn (&z2.re) <= 0, XEBADEXP, "cxpow()");
  1773      return X_0_0I;
  1774    } else {
  1775      real_32 arg = cxarg (z1);
  1776      real_32 a = xadd (xmul (z2.re, xlog (mod)), xmul (z2.im, arg), 1);
  1777      real_32 b = xadd (xmul (z2.re, arg), xmul (z2.im, xlog (mod)), 0);
  1778      complex_64 w;
  1779      w.re = xmul (xexp (a), xcos (b));
  1780      w.im = xmul (xexp (a), xsin (b));
  1781      return w;
  1782    }
  1783  }
  1784  
  1785  logical_4 cxis0 (const complex_64 * z)
  1786  {
  1787    return (xis0 (&z->re) && xis0 (&z->im));
  1788  }
  1789  
  1790  logical_4 cxnot0 (const complex_64 * z)
  1791  {
  1792    return (xnot0 (&z->re) || xnot0 (&z->im));
  1793  }
  1794  
  1795  logical_4 cxeq (complex_64 z1, complex_64 z2)
  1796  {
  1797    return (xreal_cmp (&z1.re, &z2.re) == 0 && xreal_cmp (&z1.im, &z2.im) == 0);
  1798  }
  1799  
  1800  logical_4 cxneq (complex_64 z1, complex_64 z2)
  1801  {
  1802    return (xreal_cmp (&z1.re, &z2.re) != 0 || xreal_cmp (&z1.im, &z2.im) != 0);
  1803  }
  1804  
  1805  complex_64 cxsin (complex_64 z)
  1806  {
  1807    complex_64 w1, w2;
  1808    w1 = cxdrot (z);              // Now w1= i*z,  where i={0,1} 
  1809    w2 = cxrrot (z);              // Now w2= -i*z, where i={0,1} 
  1810    w1 = cxsub (cxexp (w1), cxexp (w2));
  1811    w2.re = xreal_2 (w1.im, -1);
  1812    w1.re.value[0] ^= X_SIGN_MASK;
  1813    w2.im = xreal_2 (w1.re, -1);     // Now w2= (exp(i*z)-exp(-i*z))/2i 
  1814    return w2;
  1815  }
  1816  
  1817  complex_64 cxcos (complex_64 z)
  1818  {
  1819    complex_64 w1, w2;
  1820    w1 = cxdrot (z);              // Now w1=  i*z,  where i={0,1} 
  1821    w2 = cxrrot (z);              // Now w2= -i*z, where i={0,1} 
  1822    w1 = cxsum (cxexp (w1), cxexp (w2));
  1823    w2.re = xreal_2 (w1.re, -1);
  1824    w2.im = xreal_2 (w1.im, -1);
  1825    return w2;
  1826  }
  1827  
  1828  complex_64 cxtan (complex_64 z)
  1829  {
  1830    if (xsigerr (xreal_cmp (&(z.im), &xEmax) > 0, XFPOFLOW, NO_TEXT)) {
  1831      return X_0_1I;
  1832    } else if (xsigerr (xreal_cmp (&(z.im), &xEmin) < 0, XFPOFLOW, NO_TEXT)) {
  1833      return cxneg (X_0_1I);
  1834    } else {
  1835      complex_64 w;
  1836      if (xsigerr (!cxrec (cxcos (z), &w), XEDOM, "cxtan()")) {
  1837        return X_0_0I;
  1838      } else {
  1839        return cxmul (cxsin (z), w);
  1840      }
  1841    }
  1842  }
  1843  
  1844  complex_64 cxasin (complex_64 z)
  1845  {
  1846    complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
  1847    real_32 ls = cxabs (cxsum (cxdrot (z), w));
  1848    real_32 rs = xmul (X_VSV, cxabs (z));
  1849    if (xreal_cmp (&ls, &rs) < 0) {
  1850      w = cxdrot (cxlog (cxsub (w, cxdrot (z))));
  1851    } else {
  1852      w = cxrrot (cxlog (cxsum (cxdrot (z), w)));
  1853    }
  1854    return w;
  1855  }
  1856  
  1857  complex_64 cxacos (complex_64 z)
  1858  {
  1859    complex_64 w = cxsqrt (cxsub (X_1_0I, cxsqr (z)));
  1860    real_32 ls = cxabs (cxsum (z, cxdrot (w)));
  1861    real_32 rs = xmul (X_VSV, cxabs (z));
  1862    if (xreal_cmp (&ls, &rs) < 0) {
  1863      w = cxdrot (cxlog (cxsub (z, cxdrot (w))));
  1864    } else {
  1865      w = cxrrot (cxlog (cxsum (z, cxdrot (w))));
  1866    }
  1867    return w;
  1868  }
  1869  
  1870  complex_64 cxatan (complex_64 z)
  1871  {
  1872    real_32 mod = xadd (xabs (z.im), X_1, 1);
  1873    int_4 errcond = xsgn (&(z.re)) == 0 && xsgn (&mod) == 0;
  1874    if (xsigerr (errcond, XEDOM, "cxatan()")) {
  1875      return X_0_0I;
  1876    } else {
  1877  // This way, cxatan() works fine also with complex numbers very far from the origin.
  1878      complex_64 w;
  1879      mod = cxabs (z);
  1880      if (xreal_cmp (&mod, &X_VGV) > 0) {
  1881        w = cxsqrt (cxsum (X_1_0I, cxsqr (z)));
  1882        w = cxdiv (cxsum (X_1_0I, cxdrot (z)), w);
  1883        w = cxrrot (cxlog (w));
  1884      } else {
  1885        w = cxdiv (cxsum (X_1_0I, cxdrot (z)), cxsub (X_1_0I, cxdrot (z)));
  1886        w = cxrrot (cxlog_sqrt (w));
  1887      }
  1888      return w;
  1889    }
  1890  }
  1891  
  1892  complex_64 cxflt(complex_8 z)
  1893  {
  1894    complex_64 zz;
  1895    zz.re = flttox (creal (z));
  1896    zz.im = flttox (cimag (z));
  1897    return zz;
  1898  }
  1899  
  1900  complex_64 cxdbl(complex_16 z)
  1901  {
  1902    complex_64 zz;
  1903    zz.re = dbltox (creal (z));
  1904    zz.im = dbltox (cimag (z));
  1905    return zz;
  1906  }
  1907  
  1908  complex_64 cxquad(complex_32 z)
  1909  {
  1910    complex_64 zz;
  1911    zz.re = dbltox (crealq (z));
  1912    zz.im = dbltox (cimagq (z));
  1913    return zz;
  1914  }
  1915  
  1916  complex_32 qxcmplx(real_32 re, real_32 im)
  1917  {
  1918    complex_32 zz;
  1919    __real__ zz = xtoquad (re);
  1920    __imag__ zz = xtoquad (im);
  1921    return zz;
  1922  }
  1923  
  1924  complex_64 _cquadtop (complex_64 *zz, complex_32 z)
  1925  {
  1926    zz->re = dbltox (crealq (z));
  1927    zz->im = dbltox (cimagq (z));
  1928    return *zz;
  1929  }
  1930  
  1931  complex_64 cxreal32(real_32 z)
  1932  {
  1933    complex_64 zz;
  1934    zz.re = z;
  1935    zz.im = X_0;
  1936    return zz;
  1937  }
  1938  
  1939  complex_64 _coctotop(complex_64 *zz, real_32 z)
  1940  {
  1941    zz->re = z;
  1942    zz->im = X_0;
  1943    return *zz;
  1944  }
  1945  
  1946  // VIF additions (REAL*32)
  1947  
  1948  real_32 xtenup (int_4 n)
  1949  {
  1950    if (n == 0) {
  1951      return X_1;
  1952    } else if (n == 1) {
  1953      return X_10;
  1954    } else if (n == -1) {
  1955      return X_1_OVER_10;
  1956    }
  1957    real_32 s = X_10, t;
  1958    unsigned k, m;
  1959    t = X_1;
  1960    if (n < 0) {
  1961      m = -n;
  1962      if ((xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()"))) {
  1963        return X_0;
  1964      }
  1965      s = xdiv (X_1, s);
  1966    } else {
  1967      m = n;
  1968    }
  1969    if (m != 0) {
  1970      k = 1;
  1971      while (1) {
  1972        if (k & m) {
  1973          t = xmul (s, t);
  1974        }
  1975        if ((k <<= 1) <= m) {
  1976          s = xmul (s, s);
  1977        } else {
  1978          break;
  1979        }
  1980      }
  1981    } else {
  1982      xsigerr (xreal_cmp (&s, &X_0) == 0, XEBADEXP, "xpwr()");
  1983    }
  1984    return t;
  1985  }
  1986  
  1987  real_32 xcotan (real_32 x)
  1988  {
  1989  // Intrinsic function so arguments are not pointers.
  1990    return xdiv (X_1, xtan (x));
  1991  }
  1992  
  1993  real_32 xacotan (real_32 x)
  1994  {
  1995  // Intrinsic function so arguments are not pointers.
  1996    return xatan (xdiv (X_1, x));
  1997  }
  1998  
  1999  real_32 _xsgn (real_32 a, real_32 b)
  2000  {
  2001  // Intrinsic function so arguments are not pointers.
  2002    real_32 x = (xgetsgn (&a) == 0 ? a : xneg (a));
  2003    return (xgetsgn (&b) == 0 ? x : xneg (x));
  2004  }
  2005  
  2006  real_32 _zabs_64 (real_32 re, real_32 im)
  2007  {
  2008  // Intrinsic function so arguments are not pointers.
  2009    return cxabs (CMPLXX (re, im));
  2010  }
  2011  
  2012  real_32 _xhypot (real_32 a, real_32 b)
  2013  {
  2014    if (xis0 (&a) && xis0 (&b)) {
  2015      return X_0;
  2016    } else if (xgt (a, b)) {
  2017      real_32 q = xdiv (b, a);
  2018      real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
  2019      return xmul (xabs (a), z);
  2020    } else {
  2021      real_32 q = xdiv (a, b);
  2022      real_32 z = xsqrt (xsum (X_1, xmul (q, q)));
  2023      return xmul (xabs (b), z);
  2024    }
  2025  }
  2026  
  2027  // Conversion.
  2028  
  2029  real_32 inttox (int_4 n)
  2030  {
  2031    REAL32 pe;
  2032    unt_2 *pc, e;
  2033    unt_4 k, h;
  2034    bzero (pe, sizeof (pe));
  2035    k = ABS (n);
  2036    pc = (unt_2 *) &k;
  2037    if (n == 0) {
  2038      return *(real_32 *) pe;
  2039    }
  2040  
  2041    #if __BYTE_ORDER == __LITTLE_ENDIAN
  2042      pe[1] = *(pc + 1);
  2043      pe[2] = *pc;
  2044    #else
  2045      pe[1] = *pc;
  2046      pe[2] = *(pc + 1);
  2047    #endif
  2048  
  2049    for (e = 0, h = 1; h <= k && e < ((8 * sizeof (unt_4)) - 1); h <<= 1, ++e);
  2050    if (h <= k) {
  2051      e += 1;
  2052    }
  2053    *pe = X_EXPO_BIAS + e - 1;
  2054    if (n < 0) {
  2055      *pe |= X_SIGN_MASK;
  2056    }
  2057    xlshift ((8 * sizeof (unt_4)) - e, pe + 1, FLT256_LEN);
  2058    return *(real_32 *) pe;
  2059  }
  2060  
  2061  real_4 xtoflt (real_32 s)
  2062  {
  2063  // An extended floating point_4 number is represented as a combination of the
  2064  // following elements:
  2065  //
  2066  //   sign bit(s): 0 -> positive, 1 -> negative ;
  2067  //   exponent(e): 15-bit biased integer (bias=16383) ;
  2068  //   mantissa(m): 7 (or more) words of 16 bit length with the
  2069  //                leading 1 explicitly represented.
  2070  //
  2071  //   Thus  f = (-1)^s*2^[e-16383] *m ,  with 1 <= m < 2 .
  2072  //
  2073  // This format supports a dynamic range of:
  2074  //
  2075  //   2^16384 > f > 2^[-16383]  or
  2076  //   1.19*10^4932 > f > 1.68*10^-[4932].
  2077  //
  2078  // Special values of the exponent are:
  2079  //
  2080  //   all ones -> infinity (floating point_4 overflow)
  2081  //   all zeros -> number = zero.
  2082  //
  2083  // Underflow in operations is handled by a flush to zero. Thus, a number with
  2084  // the exponent zero and nonzero mantissa is invalid (not-a-number).
  2085  //
  2086  //
  2087    union {unt_2 pe[2]; real_4 f;} v;
  2088    unt_2 *pc, u;
  2089    int_2 i, e;
  2090    pc = SEXP (s);
  2091    u = *pc & X_SIGN_MASK;
  2092    e = (*pc & X_EXPO_MASK) - xF_bias;
  2093  //
  2094  // u is the sign of the number s.
  2095  // e == (exponent of s) + 127 
  2096  //
  2097    if (e >= xF_max) {
  2098      return (!u ? FLT_MAX : -FLT_MAX);
  2099    }
  2100    if (e <= 0) {
  2101      return 0.;
  2102    }
  2103    for (i = 0; i < 2; v.pe[i] = *++pc, i++);
  2104  // In the IEEE 754 Standard the leading 1 is not represented.                    
  2105    v.pe[0] &= X_EXPO_MASK;
  2106  // Now in pe[0],pe[1] we have 31 bits of mantissa. 
  2107  // But only the first 23 ones must be put in the   
  2108  // final real_4 number.                             
  2109    xrshift (xF_lex - 1, v.pe, 2);
  2110  // We have just loaded the mantissa and now we 
  2111  // are going to load exponent and sign.        
  2112    v.pe[0] |= (e << (16 - xF_lex));
  2113    v.pe[0] |= u;
  2114  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2115    u = v.pe[0];
  2116    v.pe[0] = v.pe[1];
  2117    v.pe[1] = u;
  2118  #endif
  2119    return v.f;
  2120  }
  2121  
  2122  real_32 flttox (real_4 y)
  2123  {
  2124    REAL32 pe; 
  2125    unt_2 *pc, u;
  2126    int_2 i, e;
  2127    if (y < FLT_MIN && y > -FLT_MIN) {
  2128      return X_0;
  2129    }
  2130    bzero (pe, sizeof (pe));
  2131    pc = (unt_2 *) &y;
  2132  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2133    pc += 1;
  2134  #endif
  2135    u = *pc & X_SIGN_MASK;
  2136    e = xF_bias + ((*pc & X_EXPO_MASK) >> (16 - xF_lex));
  2137  // Now u is the sign of y and e is the
  2138  // biased exponent (exponent + bias).  
  2139  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2140    for (i = 1; i < 3; pe[i] = *pc--, i++);
  2141  #else
  2142    for (i = 1; i < 3; pe[i] = *pc++, i++);
  2143  #endif
  2144    pc = pe + 1;
  2145    xlshift (xF_lex - 1, pc, 2);
  2146    *pc |= X_SIGN_MASK;
  2147  // We have just put in pe[1],pe[2] the whole 
  2148  // mantissa of y with a leading 1.           
  2149  // Now we have only to put exponent and sign 
  2150  // in pe[0].                                 
  2151    *pe = e;
  2152    *pe |= u;
  2153    return *(real_32 *) pe;
  2154  }
  2155  
  2156  real_8 xtodbl (real_32 s)
  2157  {
  2158    union {unt_2 pe[4]; real_8 d;} v;
  2159    unt_2 *pc, u;
  2160    int_2 i, e;
  2161    pc = SEXP (s);
  2162    u = *pc & X_SIGN_MASK;
  2163    e = (*pc & X_EXPO_MASK) - xD_bias;
  2164    if (e >= xD_max) {
  2165      return (!u ? DBL_MAX : -DBL_MAX);
  2166    }
  2167    if (e <= 0) {
  2168      return 0.;
  2169    }
  2170    for (i = 0; i < 4; v.pe[i] = *++pc, i++);
  2171    v.pe[0] &= X_EXPO_MASK;
  2172    xrshift (xD_lex - 1, v.pe, 4);
  2173    v.pe[0] |= (e << (16 - xD_lex));
  2174    v.pe[0] |= u;
  2175  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2176    u = v.pe[3];
  2177    v.pe[3] = v.pe[0];
  2178    v.pe[0] = u;
  2179    u = v.pe[2];
  2180    v.pe[2] = v.pe[1];
  2181    v.pe[1] = u;
  2182  #endif
  2183    return v.d;
  2184  }
  2185  
  2186  real_32 dbltox (real_8 y)
  2187  {
  2188    REAL32 pe;
  2189    unt_2 *pc, u;
  2190    int_2 i, e;
  2191    if (y < DBL_MIN && y > -DBL_MIN) {
  2192      return X_0;
  2193    }
  2194    bzero (pe, sizeof (pe));
  2195    pc = (unt_2 *) &y;
  2196  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2197    pc += 3;
  2198  #endif
  2199    u = *pc & X_SIGN_MASK;
  2200    e = xD_bias + ((*pc & X_EXPO_MASK) >> (16 - xD_lex));
  2201  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2202    for (i = 1; i < 5; pe[i] = *pc--, i++);
  2203  #else
  2204    for (i = 1; i < 5; pe[i] = *pc++, i++);
  2205  #endif
  2206    pc = pe + 1;
  2207    xlshift (xD_lex - 1, pc, 4);
  2208    *pc |= X_SIGN_MASK;
  2209    *pe = e;
  2210    *pe |= u;
  2211    return *(real_32 *) pe;
  2212  }
  2213  
  2214  real_32 quadtox (real_16 x)
  2215  {
  2216  // Intrinsic function so arguments are not pointers.
  2217    REAL32 z;
  2218    unt_2 *y;
  2219    int_4 i;
  2220    if (x == 0.0q) {
  2221      return X_0;
  2222    }
  2223    int_4 sinf = isinfq (x);
  2224    if (sinf == 1) {
  2225      return X_PLUS_INF;
  2226    } else if (sinf == -1) {
  2227      return X_MINUS_INF;
  2228    }
  2229    if (isnanq (x)) {
  2230      return X_NAN;
  2231    }
  2232    bzero (z, sizeof (z));
  2233    y = (unt_2 *) &x;
  2234    for (i = 0; i <= 7; i++) {
  2235  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2236      z[i] = y[7 - i];
  2237  #else
  2238      z[i] = y[i];
  2239  #endif
  2240    }
  2241  // real_16 skips the default first bit, HPA lib does not.
  2242    unt_2 cy = 0x8000;
  2243    for (i = 1; i < 8; i++) {
  2244      if (z[i] & 0x1) {
  2245        z[i] = (z[i] >> 1) | cy;
  2246        cy = 0x8000;
  2247      } else {
  2248        z[i] = (z[i] >> 1) | cy;
  2249        cy = 0x0;
  2250      }
  2251    }
  2252    z[8] |= cy;
  2253    return *(real_32 *) z;
  2254  }
  2255  
  2256  real_32 _quadtop (real_32 *z, real_16 x)
  2257  {
  2258    *z = quadtox (x);
  2259    return *z;
  2260  }
  2261  
  2262  real_16 xtoquad (real_32 x)
  2263  {
  2264  // Intrinsic function so arguments are not pointers.
  2265    REAL16 y;
  2266    REAL32 z;
  2267    int_4 i;
  2268    memcpy (z, x.value, sizeof (real_32));
  2269  // Catch NaN, +-Inf is handled correctly.
  2270    if (xis_nan (&x)) {
  2271      z[0] = 0x7fff;
  2272      z[1] = 0xffff;
  2273    }
  2274  // real_16 skips the default first bit, HPA lib does not.
  2275    unt_2 cy = (z[8] & 0x8000 ? 0x1 : 0x0);
  2276    for (i = 7; i > 0; i--) {
  2277      if (z[i] & 0x8000) {
  2278        z[i] = (z[i] << 1) | cy;
  2279        cy = 0x1;
  2280      } else {
  2281        z[i] = (z[i] << 1) | cy;
  2282        cy = 0x0;
  2283      }
  2284    }
  2285    for (i = 0; i < 8; i++) {
  2286  #if __BYTE_ORDER == __LITTLE_ENDIAN
  2287      y[i] = z[7 - i];
  2288  #else
  2289      y[i] = z[i];
  2290  #endif
  2291    }
  2292  // Avoid 'dereferencing type-punned pointer will break strict-aliasing rules'
  2293    real_16 u;
  2294    memcpy (&u, &y[0], sizeof (real_16));
  2295    return u;
  2296  }
  2297  
  2298  real_16 cxrealq (complex_64 z)
  2299  {
  2300    return xtoquad (cxreal (z));
  2301  }
  2302  
  2303  real_16 cximagq (complex_64 z)
  2304  {
  2305    return xtoquad (cximag (z));
  2306  }
  2307  
  2308  real_32 strtox (char *s, char **end)
  2309  {
  2310  // This routine replaces the HPA solution - MvdV.
  2311  // Algol 68 Genie employs the same algorithm.
  2312  //
  2313  // Initialize.
  2314  #define N (FLT256_DIG + FLT256_GUARD)
  2315    errno = 0;
  2316    real_32 y[N];
  2317    if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
  2318      (*end) = &(s[0]);
  2319    }
  2320    while (isspace (s[0])) {
  2321      s++;
  2322    }
  2323    real_32 W;
  2324    if (s[0] == '-') {
  2325      W = X_MINUS_1;
  2326      s++;
  2327    } else if (s[0] == '+') {
  2328      W = X_1;
  2329      s++;
  2330    } else {
  2331      W = X_1;
  2332    }
  2333  // Scan mantissa digits and put them into "y".
  2334    while (s[0] == '0') {
  2335      s++;
  2336    }
  2337    int dot = -1, pos = 0, pow = 0;
  2338    while (pow < N && (isdigit (s[pos]) || s[pos] == '.')) {
  2339      if (s[pos] == '.') {
  2340        dot = pos;
  2341      } else {
  2342        switch (s[pos]) {
  2343          case '0': y[pow] = X_0; break;
  2344          case '1': y[pow] = W; break;
  2345          case '2': y[pow] = xmul (X_2, W); break;
  2346          case '3': y[pow] = xmul (X_3, W); break;
  2347          case '4': y[pow] = xmul (X_4, W); break;
  2348          case '5': y[pow] = xmul (X_5, W); break;
  2349          case '6': y[pow] = xmul (X_6, W); break;
  2350          case '7': y[pow] = xmul (X_7, W); break;
  2351          case '8': y[pow] = xmul (X_8, W); break;
  2352          case '9': y[pow] = xmul (X_9, W); break;
  2353        }
  2354        W = xdiv (W, X_10);
  2355        pow++;
  2356      }
  2357      pos++;
  2358    }
  2359  // Skip trailing digits.
  2360    while (isdigit (s[pos])) {
  2361      pos++;
  2362    }
  2363    if (end != NO_REF_TEXT && (*end) != NO_TEXT) {
  2364      (*end) = &(s[pos]);
  2365    }
  2366  // Sum from low to high to preserve precision.
  2367    real_32 sum = X_0;
  2368    for (int k = pow - 1; k >= 0; k--) {
  2369      sum = xsum (sum, y[k]);
  2370    }
  2371  // Optional exponent.
  2372    int expo = 0;
  2373    while (s[pos] == ' ') {
  2374      pos++;
  2375    }
  2376    if (tolower (s[pos]) == 'e' || tolower (s[pos]) == 'q' || tolower (s[pos]) == 'x') {
  2377      pos++;
  2378      if (isdigit (s[pos]) || s[pos] == '-' || s[pos] == '+') {
  2379        expo = (int) strtol (&(s[pos]), end, 10);
  2380      }
  2381    }
  2382  // Standardize.
  2383    if (dot >= 0) {
  2384      expo += dot - 1;
  2385    } else {
  2386      expo += pow - 1;
  2387    }
  2388    while (xnot0 (&sum) && xlt1 (sum)) {
  2389      sum = xmul (sum, X_10);
  2390      expo--;
  2391    }
  2392    if (errno == 0) {
  2393       return xmul (sum, xtenup (expo));
  2394    } else {
  2395      return X_0;
  2396    }
  2397  #undef N
  2398  }
  2399  
  2400  real_32 atox (char *q)
  2401  {
  2402    return strtox (q, NO_REF_TEXT);
  2403  }
  2404  
  2405  real_32 _xdimx (real_32 x, real_32 y)
  2406  {
  2407    if (xgt (x, y)) {
  2408      return xsub (x, y);
  2409    } else {
  2410      return X_0;
  2411    }
  2412  }
  2413  
  2414  int_4 _xint4 (real_32 x_)
  2415  {
  2416    static real_16 y_;
  2417    y_ = xtoquad (x_);
  2418    return (int_4) _anintq (y_);
  2419  }
  2420  
  2421  int_4 _xnint4 (real_32 x_)
  2422  {
  2423    static real_16 y_;
  2424    if (xgt (x_, X_0)) {
  2425      y_ = xtoquad (xsum (x_, X_1_OVER_2));
  2426    }
  2427    else {
  2428      y_ = xtoquad (xsub (x_, X_1_OVER_2));
  2429    }
  2430    return (int_4) (y_);
  2431  }
  2432  
  2433  int_8 _xint8 (real_32 x_)
  2434  {
  2435    static real_16 y_;
  2436    y_ = xtoquad (x_);
  2437    return (int_8) (y_);
  2438  }
  2439  
  2440  int_8 _xnint8 (real_32 x_)
  2441  {
  2442    static real_16 y_;
  2443    if (xgt (x_, X_0)) {
  2444      y_ = xtoquad (xsum (x_, X_1_OVER_2));
  2445    }
  2446    else {
  2447      y_ = xtoquad (xsub (x_, X_1_OVER_2));
  2448    }
  2449    return (int_8) (y_);
  2450  }
  2451  
  2452  void _xdump (real_32 *z)
  2453  {
  2454    printf ("{{");
  2455    for (int i = 0; i <= FLT256_LEN; i++) {
  2456      printf("0x%04x", (z->value)[i]);
  2457      if (i < FLT256_LEN) {
  2458        printf(", ");
  2459      }
  2460    }
  2461    printf ("}}\n");
  2462    fflush (stdout);
  2463  }
  2464  
  2465  real_32 _x1mach (int_4 *i)
  2466  {
  2467    switch (*i) {
  2468  //    d1mach(1) = b**(emin-1), the smallest positive magnitude. 
  2469    case 1: return FLT256_MIN;
  2470  //    d1mach(2) = b**emax*(1 - b**(-t)), the largest magnitude. 
  2471    case 2: return FLT256_MAX;
  2472  //    d1mach(3) = b**(-t), the smallest relative spacing. 
  2473    case 3: return FLT256_EPSILON_HALF;
  2474  //    d1mach(4) = b**(1-t), the largest relative spacing. 
  2475    case 4: return FLT256_EPSILON;
  2476  //    d1mach(5) = log10(b) 
  2477    case 5: return X_LOG10_2;
  2478  //
  2479    default: return X_0;
  2480    }
  2481  }
  2482  


© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)