rts-wapr.c
1 //! @file rts-wapr.c
2 //! @author J. Marcel van der Veer
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Runtime support implementing the W-function.
25
26 #include <vif.h>
27
28 // Original FORTRAN77 version by Andrew Barry, S. J. Barry, Patricia Culligan-Hensley.
29 // Original C version by John Burkardt, distributed under the MIT license [2014].
30 // Adapted for VIF by J.M. van der Veer [2024].
31 //
32 // Reference:
33 // Andrew Barry, S. J. Barry, Patricia Culligan-Hensley,
34 // Algorithm 743: WAPR - A Fortran routine for calculating real values of the W-function,
35 // ACM Transactions on Mathematical Software,
36 // Volume 21, Number 2, June 1995, pages 172-181.
37
38 real_8 bisect (real_8 xx, int_4 nb, int_4 * ner, int_4 l);
39 real_8 crude (real_8 xx, int_4 nb);
40 int_4 nbits_compute ();
41 real_8 wapr (real_8 x, int_4 nb, int_4 * nerror, int_4 l);
42 real_8 _wapr (real_8 * x, int_4 * nb, int_4 * nerror, int_4 * l);
43
44 real_8 bisect (real_8 xx, int_4 nb, int_4 *ner, int_4 l)
45 {
46 // BISECT approximates the W function using bisection.
47 // After TOMS algorithm 743.
48 //
49 // Discussion:
50 //
51 // The parameter TOL, which determines the accuracy of the bisection
52 // method, is calculated using NBITS (assuming the final bit is lost
53 // due to rounding error).
54 //
55 // N0 is the maximum number of iterations used in the bisection
56 // method.
57 //
58 // For XX close to 0 for Wp, the exponential approximation is used.
59 // The approximation is exact to O(XX^8) so, depending on the value
60 // of NBITS, the range of application of this formula varies. Outside
61 // this range, the usual bisection method is used.
62 //
63 // Parameters:
64 //
65 // Input, real_8 XX, the argument.
66 //
67 // Input, int_4 NB, indicates the branch of the W function.
68 // 0, the upper branch;
69 // nonzero, the lower branch.
70 //
71 // Output, int_4 *NER, the error flag.
72 // 0, success;
73 // 1, the routine did not converge. Perhaps reduce NBITS and try again.
74 //
75 // Input, int_4 L, the offset indicator.
76 // 1, XX represents the offset of the argument from -exp(-1).
77 // not 1, XX is the actual argument.
78 //
79 // Output, real_8 BISECT, the value of W(X), as determined
80
81 const int_4 n0 = 500;
82 int_4 i;
83 real_8 d, f, fd, r, test, tol, u, x, value = 0.0;
84 static int_4 nbits = 0;
85 *ner = 0;
86 if (nbits == 0) {
87 nbits = nbits_compute ();
88 }
89 if (l == 1) {
90 x = xx - exp (-1.0);
91 } else {
92 x = xx;
93 }
94 if (nb == 0) {
95 test = 1.0 / pow (pow (2.0, nbits), (1.0 / 7.0));
96 if (fabs (x) < test) {
97 return x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x * exp (-x))))));
98 } else {
99 u = crude (x, nb) + 1.0e-3;
100 tol = fabs (u) / pow (2.0, nbits);
101 d = fmax (u - 2.0e-3, -1.0);
102 for (i = 1; i <= n0; i++) {
103 r = 0.5 * (u - d);
104 value = d + r;
105 // Find root using w*exp(w)-x to avoid ln(0) error.
106 if (x < exp (1.0)) {
107 f = value * exp (value) - x;
108 fd = d * exp (d) - x;
109 }
110 // Find root using ln(w/x)+w to avoid overflow error.
111 else {
112 f = log (value / x) + value;
113 fd = log (d / x) + d;
114 }
115 if (f == 0.0) {
116 return value;
117 }
118 if (fabs (r) <= tol) {
119 return value;
120 }
121 if (0.0 < fd * f) {
122 d = value;
123 } else {
124 u = value;
125 }
126 }
127 }
128 } else {
129 d = crude (x, nb) - 1.0e-3;
130 u = fmin (d + 2.0e-3, -1.0);
131 tol = fabs (u) / pow (2.0, nbits);
132 for (i = 1; i <= n0; i++) {
133 r = 0.5 * (u - d);
134 value = d + r;
135 f = value * exp (value) - x;
136 if (f == 0.0) {
137 return value;
138 }
139 if (fabs (r) <= tol) {
140 return value;
141 }
142 fd = d * exp (d) - x;
143 if (0.0 < fd * f) {
144 d = value;
145 } else {
146 u = value;
147 }
148 }
149 }
150 // The iteration did not converge.
151 *ner = 1;
152 return value;
153 }
154
155 real_8 crude (real_8 xx, int_4 nb)
156 {
157 // CRUDE returns a crude approximation for the W function.
158 //
159 // Parameters:
160 //
161 // Input, real_8 XX, the argument.
162 //
163 // Input, int_4 NB, indicates the desired branch.
164 // * 0, the upper branch;
165 // * nonzero, the lower branch.
166 //
167 // Output, real_8 CRUDE, the crude approximation to W at XX.
168
169 real_8 an2, reta, t, ts, zl;
170 static int_4 init = 0;
171 static real_8 c13, em, em2, em9, eta, s2, s21, s22, s23;
172 // Various mathematical constants.
173 if (init == 0) {
174 init = 1;
175 em = -exp (-1.0);
176 em9 = -exp (-9.0);
177 c13 = 1.0 / 3.0;
178 em2 = 2.0 / em;
179 s2 = sqrt (2.0);
180 s21 = 2.0 * s2 - 3.0;
181 s22 = 4.0 - 3.0 * s2;
182 s23 = s2 - 2.0;
183 }
184 // Crude Wp.
185 if (nb == 0) {
186 if (xx <= 20.0) {
187 reta = s2 * sqrt (1.0 - xx / em);
188 an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
189 return reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
190 } else {
191 zl = log (xx);
192 return log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
193 }
194 } else {
195 // Crude Wm.
196 if (xx <= em9) {
197 zl = log (-xx);
198 t = -1.0 - zl;
199 ts = sqrt (t);
200 return zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
201 } else {
202 zl = log (-xx);
203 eta = 2.0 - em2 * xx;
204 return log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
205 }
206 }
207 }
208
209 int_4 nbits_compute ()
210 {
211 // NBITS_COMPUTE computes the mantissa length minus one.
212 //
213 // Discussion:
214 //
215 // NBITS is the number of bits (less 1) in the mantissa of the
216 // floating point number number representation of your machine.
217 // It is used to determine the level of accuracy to which the W
218 // function should be calculated.
219 //
220 // Parameters:
221 //
222 // Output, int_4 NBITS_COMPUTE, the mantissa length, in bits, minus one.
223 //
224 int m = 14;
225 return _i1mach (&m) - 1;
226 }
227
228 real_8 wapr (real_8 x, int_4 nb, int_4 *nerror, int_4 l)
229 {
230 // WAPR approximates the W function.
231 //
232 // Discussion:
233 //
234 // The call will fail if the input value X is out of range.
235 // The range requirement for the upper branch is:
236 // -exp(-1) <= X.
237 // The range requirement for the lower branch is:
238 // -exp(-1) < X < 0.
239 //
240 // Parameters:
241 //
242 // Input, real_8 X, the argument.
243 //
244 // Input, int_4 NB, indicates the desired branch.
245 // * 0, the upper branch;
246 // * nonzero, the lower branch.
247 //
248 // Output, int_4 *NERROR, the error flag.
249 // * 0, successful call.
250 // * 1, failure, the input X is out of range.
251 //
252 // Input, int_4 L, indicates the interpretation of X.
253 // * 1, X is actually the offset from -(exp-1), so compute W(X-exp(-1)).
254 // * not 1, X is the argument; compute W(X);
255 //
256 // Output, real_8 WAPR, the approximate value of W(X).
257
258 int_4 i;
259 real_8 an2, delx, eta, reta, t, temp, temp2, ts, xx, zl, zn, value = 0.0;
260 static int_4 init = 0, nbits, niter = 1;
261 static real_8 an3, an4, an5, an6, c13, c23, d12, em, em2, em9;
262 static real_8 s2, s21, s22, s23, tb, x0, x1;
263 *nerror = 0;
264 if (init == 0) {
265 init = 1;
266 nbits = nbits_compute ();
267 if (56 <= nbits) {
268 niter = 2;
269 }
270 // Various mathematical constants.
271 em = -exp (-1.0);
272 em9 = -exp (-9.0);
273 c13 = 1.0 / 3.0;
274 c23 = 2.0 * c13;
275 em2 = 2.0 / em;
276 d12 = -em2;
277 tb = pow (0.5, nbits);
278 x0 = pow (tb, 1.0 / 6.0) * 0.5;
279 x1 = (1.0 - 17.0 * pow (tb, 2.0 / 7.0)) * em;
280 an3 = 8.0 / 3.0;
281 an4 = 135.0 / 83.0;
282 an5 = 166.0 / 39.0;
283 an6 = 3167.0 / 3549.0;
284 s2 = sqrt (2.0);
285 s21 = 2.0 * s2 - 3.0;
286 s22 = 4.0 - 3.0 * s2;
287 s23 = s2 - 2.0;
288 }
289 if (l == 1) {
290 delx = x;
291 if (delx < 0.0) {
292 *nerror = 1;
293 RTE ("wapr", "offset X must be non-negative");
294 }
295 xx = x + em;
296 } else {
297 if (x < em) {
298 *nerror = 1;
299 return value;
300 } else if (x == em) {
301 value = -1.0;
302 return value;
303 }
304 xx = x;
305 delx = xx - em;
306 }
307 // Calculations for Wp.
308 if (nb == 0) {
309 if (fabs (xx) <= x0) {
310 value = xx / (1.0 + xx / (1.0 + xx / (2.0 + xx / (0.6 + 0.34 * xx))));
311 return value;
312 } else if (xx <= x1) {
313 reta = sqrt (d12 * delx);
314 value = reta / (1.0 + reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 + an5)) + an3))) - 1.0;
315 return value;
316 } else if (xx <= 20.0) {
317 reta = s2 * sqrt (1.0 - xx / em);
318 an2 = 4.612634277343749 * sqrt (sqrt (reta + 1.09556884765625));
319 value = reta / (1.0 + reta / (3.0 + (s21 * an2 + s22) * reta / (s23 * (an2 + reta)))) - 1.0;
320 } else {
321 zl = log (xx);
322 value = log (xx / log (xx / pow (zl, exp (-1.124491989777808 / (0.4225028202459761 + zl)))));
323 }
324 }
325 // Calculations for Wm.
326 else {
327 if (0.0 <= xx) {
328 *nerror = 1;
329 return value;
330 } else if (xx <= x1) {
331 reta = sqrt (d12 * delx);
332 value = reta / (reta / (3.0 + reta / (reta / (an4 + reta / (reta * an6 - an5)) - an3)) - 1.0) - 1.0;
333 return value;
334 } else if (xx <= em9) {
335 zl = log (-xx);
336 t = -1.0 - zl;
337 ts = sqrt (t);
338 value = zl - (2.0 * ts) / (s2 + (c13 - t / (270.0 + ts * 127.0471381349219)) * ts);
339 } else {
340 zl = log (-xx);
341 eta = 2.0 - em2 * xx;
342 value = log (xx / log (-xx / ((1.0 - 0.5043921323068457 * (zl + 1.0)) * (sqrt (eta) + eta / 3.0) + 1.0)));
343 }
344 }
345 for (i = 1; i <= niter; i++) {
346 zn = log (xx / value) - value;
347 temp = 1.0 + value;
348 temp2 = temp + c23 * zn;
349 temp2 = 2.0 * temp * temp2;
350 value = value * (1.0 + (zn / temp) * (temp2 - zn) / (temp2 - 2.0 * zn));
351 }
352 return value;
353 }
354
355 real_8 _wapr (real_8 *x, int_4 *nb, int_4 *nerror, int_4 *l)
356 {
357 // F77 API.
358 return wapr (*x, *nb, nerror, *l);
359 }
© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)
|