rts-wapr.c

     
   1  //! @file rts-wapr.c
   2  //! @author J. Marcel van der Veer
   3  //
   4  //! @section Copyright
   5  //
   6  // This file is part of VIF - vintage FORTRAN compiler.
   7  // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
   8  //
   9  //! @section License
  10  //
  11  // This program is free software; you can redistribute it and/or modify it 
  12  // under the terms of the GNU General Public License as published by the 
  13  // Free Software Foundation; either version 3 of the License, or 
  14  // (at your option) any later version.
  15  //
  16  // This program is distributed in the hope that it will be useful, but 
  17  // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  // more details. You should have received a copy of the GNU General Public 
  20  // License along with this program. If not, see <http://www.gnu.org/licenses/>.
  21  
  22  //! @section Synopsis
  23  //!
  24  //! Runtime support implementing the W-function.
  25  
  26  #include <vif.h>
  27  
  28  // Original FORTRAN77 version by Andrew Barry, S. J. Barry, Patricia Culligan-Hensley.
  29  // Original C version by John Burkardt, distributed under the MIT license [2014].
  30  // Adapted for VIF by J.M. van der Veer [2024].
  31  //
  32  // Reference:
  33  //   Andrew Barry, S. J. Barry, Patricia Culligan-Hensley,
  34  //   Algorithm 743: WAPR - A Fortran routine for calculating real values of the W-function,
  35  //   ACM Transactions on Mathematical Software,
  36  //   Volume 21, Number 2, June 1995, pages 172-181.
  37  
  38  real_8 bisect (real_8 xx, int_4 nb, int_4 * ner, int_4 l);
  39  real_8 crude (real_8 xx, int_4 nb);
  40  int_4 nbits_compute ();
  41  real_8 wapr (real_8 x, int_4 nb, int_4 * nerror, int_4 l);
  42  real_8 _wapr (real_8 * x, int_4 * nb, int_4 * nerror, int_4 * l);
  43  
  44  real_8 bisect (real_8 xx, int_4 nb, int_4 *ner, int_4 l)
  45  {
  46  // BISECT approximates the W function using bisection.
  47  // After TOMS algorithm 743.
  48  //
  49  // Discussion:
  50  //
  51  //   The parameter TOL, which determines the accuracy of the bisection
  52  //   method, is calculated using NBITS (assuming the final bit is lost
  53  //   due to rounding error).
  54  //
  55  //   N0 is the maximum number of iterations used in the bisection
  56  //   method.
  57  //
  58  //   For XX close to 0 for Wp, the exponential approximation is used.
  59  //   The approximation is exact to O(XX^8) so, depending on the value
  60  //   of NBITS, the range of application of this formula varies. Outside
  61  //   this range, the usual bisection method is used.
  62  //
  63  // Parameters:
  64  //
  65  //   Input, real_8 XX, the argument.
  66  //
  67  //   Input, int_4 NB, indicates the branch of the W function.
  68  //   0, the upper branch;
  69  //   nonzero, the lower branch.
  70  //
  71  //   Output, int_4 *NER, the error flag.
  72  //   0, success;
  73  //   1, the routine did not converge.  Perhaps reduce NBITS and try again.
  74  //
  75  //   Input, int_4 L, the offset indicator.
  76  //   1, XX represents the offset of the argument from -exp(-1).
  77  //   not 1, XX is the actual argument.
  78  //
  79  //   Output, real_8 BISECT, the value of W(X), as determined
  80  
  81    const int_4 n0 = 500;
  82    int_4 i;
  83    real_8 d, f, fd, r, test, tol, u, x, value = 0.0;
  84    static int_4 nbits = 0;
  85    *ner = 0;
  86    if (nbits == 0) {
  87      nbits = nbits_compute ();
  88    }
  89    if (l == 1) {
  90      x = xx - exp (-1.0);
  91    } else {
  92      x = xx;
  93    }
  94    if (nb == 0) {
  95      test = 1.0 / pow (pow (2.0, nbits), (1.0 / 7.0));
  96      if (fabs (x) < test) {
  97        return x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x))))));
  98      } else {
  99        u = crude (x, nb) + 1.0e-3;
 100        tol = fabs (u) / pow (2.0, nbits);
 101        d = fmax (u - 2.0e-3, -1.0);
 102        for (i = 1; i <= n0; i++) {
 103      r = 0.5 * (u - d);
 104      value = d + r;
 105  // Find root using w*exp(w)-x to avoid ln(0) error.
 106      if (x < exp (1.0)) {
 107        f = value * exp (value) - x;
 108        fd = d * exp (d) - x;
 109      }
 110  // Find root using ln(w/x)+w to avoid overflow error.
 111      else {
 112        f = log (value / x) + value;
 113        fd = log (d / x) + d;
 114      }
 115      if (f == 0.0) {
 116        return value;
 117      }
 118      if (fabs (r) <= tol) {
 119        return value;
 120      }
 121      if (0.0 < fd * f) {
 122        d = value;
 123      } else {
 124        u = value;
 125      }
 126        }
 127      }
 128    } else {
 129      d = crude (x, nb) - 1.0e-3;
 130      u = fmin (d + 2.0e-3, -1.0);
 131      tol = fabs (u) / pow (2.0, nbits);
 132      for (i = 1; i <= n0; i++) {
 133        r = 0.5 * (u - d);
 134        value = d + r;
 135        f = value * exp (value) - x;
 136        if (f == 0.0) {
 137      return value;
 138        }
 139        if (fabs (r) <= tol) {
 140      return value;
 141        }
 142        fd = d * exp (d) - x;
 143        if (0.0 < fd * f) {
 144      d = value;
 145        } else {
 146      u = value;
 147        }
 148      }
 149    }
 150  // The iteration did not converge.
 151    *ner = 1;
 152    return value;
 153  }
 154  
 155  real_8 crude (real_8 xx, int_4 nb)
 156  {
 157  // CRUDE returns a crude approximation for the W function.
 158  //
 159  // Parameters:
 160  //
 161  //   Input, real_8 XX, the argument.
 162  //
 163  //   Input, int_4 NB, indicates the desired branch.
 164  //   * 0, the upper branch;
 165  //   * nonzero, the lower branch.
 166  //
 167  //   Output, real_8 CRUDE, the crude approximation to W at XX.
 168  
 169    real_8 an2, reta, t, ts, zl;
 170    static int_4 init = 0;
 171    static real_8 c13, em, em2, em9, eta, s2, s21, s22, s23;
 172  // Various mathematical constants.
 173    if (init == 0) {
 174      init = 1;
 175      em = -exp (-1.0);
 176      em9 = -exp (-9.0);
 177      c13 = 1.0 / 3.0;
 178      em2 = 2.0 / em;
 179      s2 = sqrt (2.0);
 180      s21 = 2.0 * s2 - 3.0;
 181      s22 = 4.0 - 3.0 * s2;
 182      s23 = s2 - 2.0;
 183    }
 184  // Crude Wp.
 185    if (nb == 0) {
 186      if (xx <= 20.0) {
 187        reta = s2 * sqrt (1.0 - xx / em);
 188        an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
 189        return reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
 190      } else {
 191        zl = log (xx);
 192        return log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
 193      }
 194    } else {
 195  // Crude Wm.
 196      if (xx <= em9) {
 197        zl = log (-xx);
 198        t = -1.0 - zl;
 199        ts = sqrt (t);
 200        return zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
 201      } else {
 202        zl = log (-xx);
 203        eta = 2.0 - em2 * xx;
 204        return log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
 205      }
 206    }
 207  }
 208  
 209  int_4 nbits_compute ()
 210  {
 211  // NBITS_COMPUTE computes the mantissa length minus one.
 212  //
 213  // Discussion:
 214  //
 215  //   NBITS is the number of bits (less 1) in the mantissa of the
 216  //   floating point number number representation of your machine.
 217  //   It is used to determine the level of accuracy to which the W
 218  //   function should be calculated.
 219  //
 220  // Parameters:
 221  //
 222  //   Output, int_4 NBITS_COMPUTE, the mantissa length, in bits, minus one.
 223  //
 224    int m = 14;
 225    return _i1mach (&m) - 1;
 226  }
 227  
 228  real_8 wapr (real_8 x, int_4 nb, int_4 *nerror, int_4 l)
 229  {
 230  // WAPR approximates the W function.
 231  //
 232  // Discussion:
 233  //
 234  //   The call will fail if the input value X is out of range.
 235  //   The range requirement for the upper branch is:
 236  //     -exp(-1) <= X.
 237  //   The range requirement for the lower branch is:
 238  //     -exp(-1) < X < 0.
 239  //
 240  // Parameters:
 241  //
 242  //   Input, real_8 X, the argument.
 243  //
 244  //   Input, int_4 NB, indicates the desired branch.
 245  //   * 0, the upper branch;
 246  //   * nonzero, the lower branch.
 247  //
 248  //   Output, int_4 *NERROR, the error flag.
 249  //   * 0, successful call.
 250  //   * 1, failure, the input X is out of range.
 251  //
 252  //   Input, int_4 L, indicates the interpretation of X.
 253  //   * 1, X is actually the offset from -(exp-1), so compute W(X-exp(-1)).
 254  //   * not 1, X is the argument; compute W(X);
 255  //
 256  //   Output, real_8 WAPR, the approximate value of W(X).
 257  
 258    int_4 i;
 259    real_8 an2, delx, eta, reta, t, temp, temp2, ts, xx, zl, zn, value = 0.0;
 260    static int_4 init = 0, nbits, niter = 1;
 261    static real_8 an3, an4, an5, an6, c13, c23, d12, em, em2, em9;
 262    static real_8 s2, s21, s22, s23, tb, x0, x1;
 263    *nerror = 0;
 264    if (init == 0) {
 265      init = 1;
 266      nbits = nbits_compute ();
 267      if (56 <= nbits) {
 268        niter = 2;
 269      }
 270  // Various mathematical constants.
 271      em = -exp (-1.0);
 272      em9 = -exp (-9.0);
 273      c13 = 1.0 / 3.0;
 274      c23 = 2.0 * c13;
 275      em2 = 2.0 / em;
 276      d12 = -em2;
 277      tb = pow (0.5, nbits);
 278      x0 = pow (tb, 1.0 / 6.0) * 0.5;
 279      x1 = (1.0 - 17.0 * pow (tb, 2.0 / 7.0)) * em;
 280      an3 = 8.0 / 3.0;
 281      an4 = 135.0 / 83.0;
 282      an5 = 166.0 / 39.0;
 283      an6 = 3167.0 / 3549.0;
 284      s2 = sqrt (2.0);
 285      s21 = 2.0 * s2 - 3.0;
 286      s22 = 4.0 - 3.0 * s2;
 287      s23 = s2 - 2.0;
 288    }
 289    if (l == 1) {
 290      delx = x;
 291      if (delx < 0.0) {
 292        *nerror = 1;
 293        RTE ("wapr", "offset X must be non-negative");
 294      }
 295      xx = x + em;
 296    } else {
 297      if (x < em) {
 298        *nerror = 1;
 299        return value;
 300      } else if (x == em) {
 301        value = -1.0;
 302        return value;
 303      }
 304      xx = x;
 305      delx = xx - em;
 306    }
 307  // Calculations for Wp.
 308    if (nb == 0) {
 309      if (fabs (xx) <= x0) {
 310        value = xx / (1.0 + xx / (1.0 + xx / (2.0 + xx / (0.6 + 0.34 * xx))));
 311        return value;
 312      } else if (xx <= x1) {
 313        reta = sqrt (d12 * delx);
 314        value = reta / (1.0 + reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 + an5)) + an3))) - 1.0;
 315        return value;
 316      } else if (xx <= 20.0) {
 317        reta = s2 * sqrt (1.0 - xx / em);
 318        an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
 319        value = reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
 320      } else {
 321        zl = log (xx);
 322        value = log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
 323      }
 324    }
 325  // Calculations for Wm.
 326    else {
 327      if (0.0 <= xx) {
 328        *nerror = 1;
 329        return value;
 330      } else if (xx <= x1) {
 331        reta = sqrt (d12 * delx);
 332        value = reta / (reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 - an5)) - an3)) - 1.0) - 1.0;
 333        return value;
 334      } else if (xx <= em9) {
 335        zl = log (-xx);
 336        t = -1.0 - zl;
 337        ts = sqrt (t);
 338        value = zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
 339      } else {
 340        zl = log (-xx);
 341        eta = 2.0 - em2 * xx;
 342        value = log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
 343      }
 344    }
 345    for (i = 1; i <= niter; i++) {
 346      zn = log (xx / value) - value;
 347      temp = 1.0 + value;
 348      temp2 = temp + c23 * zn;
 349      temp2 = 2.0 * temp * temp2;
 350      value = value * (1.0 + (zn / temp) * (temp2 - zn) / (temp2 - 2.0 * zn));
 351    }
 352    return value;
 353  }
 354  
 355  real_8 _wapr (real_8 *x, int_4 *nb, int_4 *nerror, int_4 *l)
 356  {
 357  // F77 API.
 358    return wapr (*x, *nb, nerror, *l);
 359  }
     


© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)