rts-wapr.c

     1  //! @file rts-wapr.c
     2  //! @author J. Marcel van der Veer
     3  //
     4  //! @section Copyright
     5  //
     6  // This file is part of VIF - vintage FORTRAN compiler.
     7  // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
     8  //
     9  //! @section License
    10  //
    11  // This program is free software; you can redistribute it and/or modify it 
    12  // under the terms of the GNU General Public License as published by the 
    13  // Free Software Foundation; either version 3 of the License, or 
    14  // (at your option) any later version.
    15  //
    16  // This program is distributed in the hope that it will be useful, but 
    17  // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
    18  // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
    19  // more details. You should have received a copy of the GNU General Public 
    20  // License along with this program. If not, see <http://www.gnu.org/licenses/>.
    21  
    22  //! @section Synopsis
    23  //!
    24  //! Runtime support implementing the W-function.
    25  
    26  #include <vif.h>
    27  
    28  // Original FORTRAN77 version by Andrew Barry, S. J. Barry, Patricia Culligan-Hensley.
    29  // Original C version by John Burkardt, distributed under the MIT license [2014].
    30  // Adapted for VIF by J.M. van der Veer [2024].
    31  //
    32  // Reference:
    33  //   Andrew Barry, S. J. Barry, Patricia Culligan-Hensley,
    34  //   Algorithm 743: WAPR - A Fortran routine for calculating real values of the W-function,
    35  //   ACM Transactions on Mathematical Software,
    36  //   Volume 21, Number 2, June 1995, pages 172-181.
    37  
    38  real_8 bisect (real_8 xx, int_4 nb, int_4 * ner, int_4 l);
    39  real_8 crude (real_8 xx, int_4 nb);
    40  int_4 nbits_compute ();
    41  real_8 wapr (real_8 x, int_4 nb, int_4 * nerror, int_4 l);
    42  real_8 _wapr (real_8 * x, int_4 * nb, int_4 * nerror, int_4 * l);
    43  
    44  real_8 bisect (real_8 xx, int_4 nb, int_4 *ner, int_4 l)
    45  {
    46  // BISECT approximates the W function using bisection.
    47  // After TOMS algorithm 743.
    48  //
    49  // Discussion:
    50  //
    51  //   The parameter TOL, which determines the accuracy of the bisection
    52  //   method, is calculated using NBITS (assuming the final bit is lost
    53  //   due to rounding error).
    54  //
    55  //   N0 is the maximum number of iterations used in the bisection
    56  //   method.
    57  //
    58  //   For XX close to 0 for Wp, the exponential approximation is used.
    59  //   The approximation is exact to O(XX^8) so, depending on the value
    60  //   of NBITS, the range of application of this formula varies. Outside
    61  //   this range, the usual bisection method is used.
    62  //
    63  // Parameters:
    64  //
    65  //   Input, real_8 XX, the argument.
    66  //
    67  //   Input, int_4 NB, indicates the branch of the W function.
    68  //   0, the upper branch;
    69  //   nonzero, the lower branch.
    70  //
    71  //   Output, int_4 *NER, the error flag.
    72  //   0, success;
    73  //   1, the routine did not converge.  Perhaps reduce NBITS and try again.
    74  //
    75  //   Input, int_4 L, the offset indicator.
    76  //   1, XX represents the offset of the argument from -exp(-1).
    77  //   not 1, XX is the actual argument.
    78  //
    79  //   Output, real_8 BISECT, the value of W(X), as determined
    80  
    81    const int_4 n0 = 500;
    82    int_4 i;
    83    real_8 d, f, fd, r, test, tol, u, x, value = 0.0;
    84    static int_4 nbits = 0;
    85    *ner = 0;
    86    if (nbits == 0) {
    87      nbits = nbits_compute ();
    88    }
    89    if (l == 1) {
    90      x = xx - exp (-1.0);
    91    } else {
    92      x = xx;
    93    }
    94    if (nb == 0) {
    95      test = 1.0 / pow (pow (2.0, nbits), (1.0 / 7.0));
    96      if (fabs (x) < test) {
    97        return x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x))))));
    98      } else {
    99        u = crude (x, nb) + 1.0e-3;
   100        tol = fabs (u) / pow (2.0, nbits);
   101        d = fmax (u - 2.0e-3, -1.0);
   102        for (i = 1; i <= n0; i++) {
   103    r = 0.5 * (u - d);
   104    value = d + r;
   105  // Find root using w*exp(w)-x to avoid ln(0) error.
   106    if (x < exp (1.0)) {
   107      f = value * exp (value) - x;
   108      fd = d * exp (d) - x;
   109    }
   110  // Find root using ln(w/x)+w to avoid overflow error.
   111    else {
   112      f = log (value / x) + value;
   113      fd = log (d / x) + d;
   114    }
   115    if (f == 0.0) {
   116      return value;
   117    }
   118    if (fabs (r) <= tol) {
   119      return value;
   120    }
   121    if (0.0 < fd * f) {
   122      d = value;
   123    } else {
   124      u = value;
   125    }
   126        }
   127      }
   128    } else {
   129      d = crude (x, nb) - 1.0e-3;
   130      u = fmin (d + 2.0e-3, -1.0);
   131      tol = fabs (u) / pow (2.0, nbits);
   132      for (i = 1; i <= n0; i++) {
   133        r = 0.5 * (u - d);
   134        value = d + r;
   135        f = value * exp (value) - x;
   136        if (f == 0.0) {
   137    return value;
   138        }
   139        if (fabs (r) <= tol) {
   140    return value;
   141        }
   142        fd = d * exp (d) - x;
   143        if (0.0 < fd * f) {
   144    d = value;
   145        } else {
   146    u = value;
   147        }
   148      }
   149    }
   150  // The iteration did not converge.
   151    *ner = 1;
   152    return value;
   153  }
   154  
   155  real_8 crude (real_8 xx, int_4 nb)
   156  {
   157  // CRUDE returns a crude approximation for the W function.
   158  //
   159  // Parameters:
   160  //
   161  //   Input, real_8 XX, the argument.
   162  //
   163  //   Input, int_4 NB, indicates the desired branch.
   164  //   * 0, the upper branch;
   165  //   * nonzero, the lower branch.
   166  //
   167  //   Output, real_8 CRUDE, the crude approximation to W at XX.
   168  
   169    real_8 an2, reta, t, ts, zl;
   170    static int_4 init = 0;
   171    static real_8 c13, em, em2, em9, eta, s2, s21, s22, s23;
   172  // Various mathematical constants.
   173    if (init == 0) {
   174      init = 1;
   175      em = -exp (-1.0);
   176      em9 = -exp (-9.0);
   177      c13 = 1.0 / 3.0;
   178      em2 = 2.0 / em;
   179      s2 = sqrt (2.0);
   180      s21 = 2.0 * s2 - 3.0;
   181      s22 = 4.0 - 3.0 * s2;
   182      s23 = s2 - 2.0;
   183    }
   184  // Crude Wp.
   185    if (nb == 0) {
   186      if (xx <= 20.0) {
   187        reta = s2 * sqrt (1.0 - xx / em);
   188        an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
   189        return reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
   190      } else {
   191        zl = log (xx);
   192        return log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
   193      }
   194    } else {
   195  // Crude Wm.
   196      if (xx <= em9) {
   197        zl = log (-xx);
   198        t = -1.0 - zl;
   199        ts = sqrt (t);
   200        return zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
   201      } else {
   202        zl = log (-xx);
   203        eta = 2.0 - em2 * xx;
   204        return log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
   205      }
   206    }
   207  }
   208  
   209  int_4 nbits_compute ()
   210  {
   211  // NBITS_COMPUTE computes the mantissa length minus one.
   212  //
   213  // Discussion:
   214  //
   215  //   NBITS is the number of bits (less 1) in the mantissa of the
   216  //   floating point number number representation of your machine.
   217  //   It is used to determine the level of accuracy to which the W
   218  //   function should be calculated.
   219  //
   220  // Parameters:
   221  //
   222  //   Output, int_4 NBITS_COMPUTE, the mantissa length, in bits, minus one.
   223  //
   224    int m = 14;
   225    return _i1mach (&m) - 1;
   226  }
   227  
   228  real_8 wapr (real_8 x, int_4 nb, int_4 *nerror, int_4 l)
   229  {
   230  // WAPR approximates the W function.
   231  //
   232  // Discussion:
   233  //
   234  //   The call will fail if the input value X is out of range.
   235  //   The range requirement for the upper branch is:
   236  //     -exp(-1) <= X.
   237  //   The range requirement for the lower branch is:
   238  //     -exp(-1) < X < 0.
   239  //
   240  // Parameters:
   241  //
   242  //   Input, real_8 X, the argument.
   243  //
   244  //   Input, int_4 NB, indicates the desired branch.
   245  //   * 0, the upper branch;
   246  //   * nonzero, the lower branch.
   247  //
   248  //   Output, int_4 *NERROR, the error flag.
   249  //   * 0, successful call.
   250  //   * 1, failure, the input X is out of range.
   251  //
   252  //   Input, int_4 L, indicates the interpretation of X.
   253  //   * 1, X is actually the offset from -(exp-1), so compute W(X-exp(-1)).
   254  //   * not 1, X is the argument; compute W(X);
   255  //
   256  //   Output, real_8 WAPR, the approximate value of W(X).
   257  
   258    int_4 i;
   259    real_8 an2, delx, eta, reta, t, temp, temp2, ts, xx, zl, zn, value = 0.0;
   260    static int_4 init = 0, nbits, niter = 1;
   261    static real_8 an3, an4, an5, an6, c13, c23, d12, em, em2, em9;
   262    static real_8 s2, s21, s22, s23, tb, x0, x1;
   263    *nerror = 0;
   264    if (init == 0) {
   265      init = 1;
   266      nbits = nbits_compute ();
   267      if (56 <= nbits) {
   268        niter = 2;
   269      }
   270  // Various mathematical constants.
   271      em = -exp (-1.0);
   272      em9 = -exp (-9.0);
   273      c13 = 1.0 / 3.0;
   274      c23 = 2.0 * c13;
   275      em2 = 2.0 / em;
   276      d12 = -em2;
   277      tb = pow (0.5, nbits);
   278      x0 = pow (tb, 1.0 / 6.0) * 0.5;
   279      x1 = (1.0 - 17.0 * pow (tb, 2.0 / 7.0)) * em;
   280      an3 = 8.0 / 3.0;
   281      an4 = 135.0 / 83.0;
   282      an5 = 166.0 / 39.0;
   283      an6 = 3167.0 / 3549.0;
   284      s2 = sqrt (2.0);
   285      s21 = 2.0 * s2 - 3.0;
   286      s22 = 4.0 - 3.0 * s2;
   287      s23 = s2 - 2.0;
   288    }
   289    if (l == 1) {
   290      delx = x;
   291      if (delx < 0.0) {
   292        *nerror = 1;
   293        RTE ("wapr", "offset X must be non-negative");
   294      }
   295      xx = x + em;
   296    } else {
   297      if (x < em) {
   298        *nerror = 1;
   299        return value;
   300      } else if (x == em) {
   301        value = -1.0;
   302        return value;
   303      }
   304      xx = x;
   305      delx = xx - em;
   306    }
   307  // Calculations for Wp.
   308    if (nb == 0) {
   309      if (fabs (xx) <= x0) {
   310        value = xx / (1.0 + xx / (1.0 + xx / (2.0 + xx / (0.6 + 0.34 * xx))));
   311        return value;
   312      } else if (xx <= x1) {
   313        reta = sqrt (d12 * delx);
   314        value = reta / (1.0 + reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 + an5)) + an3))) - 1.0;
   315        return value;
   316      } else if (xx <= 20.0) {
   317        reta = s2 * sqrt (1.0 - xx / em);
   318        an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
   319        value = reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
   320      } else {
   321        zl = log (xx);
   322        value = log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
   323      }
   324    }
   325  // Calculations for Wm.
   326    else {
   327      if (0.0 <= xx) {
   328        *nerror = 1;
   329        return value;
   330      } else if (xx <= x1) {
   331        reta = sqrt (d12 * delx);
   332        value = reta / (reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 - an5)) - an3)) - 1.0) - 1.0;
   333        return value;
   334      } else if (xx <= em9) {
   335        zl = log (-xx);
   336        t = -1.0 - zl;
   337        ts = sqrt (t);
   338        value = zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
   339      } else {
   340        zl = log (-xx);
   341        eta = 2.0 - em2 * xx;
   342        value = log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
   343      }
   344    }
   345    for (i = 1; i <= niter; i++) {
   346      zn = log (xx / value) - value;
   347      temp = 1.0 + value;
   348      temp2 = temp + c23 * zn;
   349      temp2 = 2.0 * temp * temp2;
   350      value = value * (1.0 + (zn / temp) * (temp2 - zn) / (temp2 - 2.0 * zn));
   351    }
   352    return value;
   353  }
   354  
   355  real_8 _wapr (real_8 *x, int_4 *nb, int_4 *nerror, int_4 *l)
   356  {
   357  // F77 API.
   358    return wapr (*x, *nb, nerror, *l);
   359  }


© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)