parser-scanner.c

     
   1  //! @file parser-scanner.c
   2  //! @author J. Marcel van der Veer
   3  //!
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  //!
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis 
  23  //!
  24  //! Context-dependent Algol 68 tokeniser.
  25  
  26  #include "a68g.h"
  27  #include "a68g-parser.h"
  28  #include "a68g-prelude.h"
  29  #include "a68g-options.h"
  30  #include "a68g-environ.h"
  31  #include "a68g-genie.h"
  32  
  33  // Macros.
  34  
  35  #define SCAN_DIGITS(c)\
  36    while (IS_DIGIT (c)) {\
  37      (sym++)[0] = (c);\
  38      (c) = next_char (ref_l, ref_s, A68_TRUE);\
  39    }
  40  
  41  #define SCAN_EXPONENT_PART(c)\
  42    (sym++)[0] = EXPONENT_CHAR;\
  43    (c) = next_char (ref_l, ref_s, A68_TRUE);\
  44    if ((c) == '+' || (c) == '-') {\
  45      (sym++)[0] = (c);\
  46      (c) = next_char (ref_l, ref_s, A68_TRUE);\
  47    }\
  48    SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
  49    SCAN_DIGITS (c)
  50  
  51  //! @brief Save scanner state, for character look-ahead.
  52  
  53  void save_state (LINE_T * ref_l, char *ref_s, char ch)
  54  {
  55    SCAN_STATE_L (&A68_JOB) = ref_l;
  56    SCAN_STATE_S (&A68_JOB) = ref_s;
  57    SCAN_STATE_C (&A68_JOB) = ch;
  58  }
  59  
  60  //! @brief Restore scanner state, for character look-ahead.
  61  
  62  void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
  63  {
  64    *ref_l = SCAN_STATE_L (&A68_JOB);
  65    *ref_s = SCAN_STATE_S (&A68_JOB);
  66    *ch = SCAN_STATE_C (&A68_JOB);
  67  }
  68  
  69  //! @brief New_source_line.
  70  
  71  LINE_T *new_source_line (void)
  72  {
  73    LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
  74    MARKER (z)[0] = NULL_CHAR;
  75    STRING (z) = NO_TEXT;
  76    FILENAME (z) = NO_TEXT;
  77    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
  78    NUMBER (z) = 0;
  79    PRINT_STATUS (z) = 0;
  80    LIST (z) = A68_TRUE;
  81    NEXT (z) = NO_LINE;
  82    PREVIOUS (z) = NO_LINE;
  83    return z;
  84  }
  85  
  86  //! @brief Append a source line to the internal source file.
  87  
  88  void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
  89  {
  90    LINE_T *z = new_source_line ();
  91  // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
  92    if (*line_num == 1) {
  93      if (strlen (str) >= 2 && strncmp (str, "#!", 2) == 0) {
  94        ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
  95        (*line_num)++;
  96        return;
  97      }
  98    }
  99  // Link line into the chain.
 100    STRING (z) = new_fixed_string (str);
 101    FILENAME (z) = filename;
 102    NUMBER (z) = (*line_num)++;
 103    PRINT_STATUS (z) = NOT_PRINTED;
 104    LIST (z) = A68_TRUE;
 105    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
 106    NEXT (z) = NO_LINE;
 107    PREVIOUS (z) = *ref_l;
 108    if (TOP_LINE (&A68_JOB) == NO_LINE) {
 109      TOP_LINE (&A68_JOB) = z;
 110    }
 111    if (*ref_l != NO_LINE) {
 112      NEXT (*ref_l) = z;
 113    }
 114    *ref_l = z;
 115  }
 116  
 117  // Scanner, tokenises the source code.
 118  
 119  //! @brief Whether ch is unworthy.
 120  
 121  void unworthy (LINE_T * u, char *v, char ch)
 122  {
 123    if (IS_PRINT (ch)) {
 124      ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
 125    } else {
 126      ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
 127    }
 128    scan_error (u, v, A68 (edit_line));
 129  }
 130  
 131  //! @brief Concatenate lines that terminate in '\' with next line.
 132  
 133  void concatenate_lines (LINE_T * top)
 134  {
 135    LINE_T *q;
 136  // Work from bottom backwards.
 137    for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
 138      ;
 139    }
 140    for (; q != NO_LINE; BACKWARD (q)) {
 141      char *z = STRING (q);
 142      int len = (int) strlen (z);
 143      if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
 144        z[len - 2] = NULL_CHAR;
 145        len += (int) strlen (STRING (NEXT (q)));
 146        z = (char *) get_fixed_heap_space ((size_t) (len + 1));
 147        bufcpy (z, STRING (q), len + 1);
 148        bufcat (z, STRING (NEXT (q)), len + 1);
 149        STRING (NEXT (q))[0] = NULL_CHAR;
 150        STRING (q) = z;
 151      }
 152    }
 153  }
 154  
 155  //! @brief Whether u is bold tag v, independent of stropping regime.
 156  
 157  BOOL_T is_bold (char *u, char *v)
 158  {
 159    unt len = (unt) strlen (v);
 160    if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 161      if (u[0] == '\'') {
 162        return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
 163      } else {
 164        return A68_FALSE;
 165      }
 166    } else {
 167      return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
 168    }
 169  }
 170  
 171  //! @brief Skip string.
 172  
 173  BOOL_T skip_string (LINE_T ** top, char **ch)
 174  {
 175    LINE_T *u = *top;
 176    char *v = *ch;
 177    v++;
 178    while (u != NO_LINE) {
 179      while (v[0] != NULL_CHAR) {
 180        if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
 181          *top = u;
 182          *ch = &v[1];
 183          return A68_TRUE;
 184        } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
 185          v += 2;
 186        } else {
 187          v++;
 188        }
 189      }
 190      FORWARD (u);
 191      if (u != NO_LINE) {
 192        v = &(STRING (u)[0]);
 193      } else {
 194        v = NO_TEXT;
 195      }
 196    }
 197    return A68_FALSE;
 198  }
 199  
 200  //! @brief Skip comment.
 201  
 202  BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
 203  {
 204    LINE_T *u = *top;
 205    char *v = *ch;
 206    v++;
 207    while (u != NO_LINE) {
 208      while (v[0] != NULL_CHAR) {
 209        if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
 210          *top = u;
 211          *ch = &v[1];
 212          return A68_TRUE;
 213        } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
 214          *top = u;
 215          *ch = &v[1];
 216          return A68_TRUE;
 217        } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
 218          *top = u;
 219          *ch = &v[1];
 220          return A68_TRUE;
 221        } else {
 222          v++;
 223        }
 224      }
 225      FORWARD (u);
 226      if (u != NO_LINE) {
 227        v = &(STRING (u)[0]);
 228      } else {
 229        v = NO_TEXT;
 230      }
 231    }
 232    return A68_FALSE;
 233  }
 234  
 235  //! @brief Skip rest of pragmat.
 236  
 237  BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
 238  {
 239    LINE_T *u = *top;
 240    char *v = *ch;
 241    while (u != NO_LINE) {
 242      while (v[0] != NULL_CHAR) {
 243        if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
 244          *top = u;
 245          *ch = &v[1];
 246          return A68_TRUE;
 247        } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
 248          *top = u;
 249          *ch = &v[1];
 250          return A68_TRUE;
 251        } else {
 252          if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 253            scan_error (u, v, ERROR_PRAGMENT);
 254          } else if (IS_UPPER (v[0])) {
 255  // Skip a bold word as you may trigger on REPR, for instance ...
 256            while (IS_UPPER (v[0])) {
 257              v++;
 258            }
 259          } else {
 260            v++;
 261          }
 262        }
 263      }
 264      FORWARD (u);
 265      if (u != NO_LINE) {
 266        v = &(STRING (u)[0]);
 267      } else {
 268        v = NO_TEXT;
 269      }
 270    }
 271    return A68_FALSE;
 272  }
 273  
 274  //! @brief Return pointer to next token within pragmat.
 275  
 276  char *get_pragmat_item (LINE_T ** top, char **ch)
 277  {
 278    LINE_T *u = *top;
 279    char *v = *ch;
 280    while (u != NO_LINE) {
 281      while (v[0] != NULL_CHAR) {
 282        if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 283          *top = u;
 284          *ch = v;
 285          return v;
 286        } else {
 287          v++;
 288        }
 289      }
 290      FORWARD (u);
 291      if (u != NO_LINE) {
 292        v = &(STRING (u)[0]);
 293      } else {
 294        v = NO_TEXT;
 295      }
 296    }
 297    return NO_TEXT;
 298  }
 299  
 300  //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
 301  
 302  int streq (char *u, char *v)
 303  {
 304    int diff;
 305    for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
 306      diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
 307    }
 308    return diff;
 309  }
 310  
 311  //! @brief Scan for next pragmat and yield first pragmat item.
 312  
 313  char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
 314  {
 315    LINE_T *u = *top;
 316    char *v = *ch;
 317    *delim = 0;
 318    while (u != NO_LINE) {
 319      while (v[0] != NULL_CHAR) {
 320        LINE_T *start_l = u;
 321        char *start_c = v;
 322  // STRINGs must be skipped.
 323        if (v[0] == QUOTE_CHAR) {
 324          SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
 325        }
 326  // COMMENTS must be skipped.
 327        else if (is_bold (v, "COMMENT")) {
 328          SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 329        } else if (is_bold (v, "CO")) {
 330          SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 331        } else if (v[0] == '#') {
 332          SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 333        } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
 334  // We caught a PRAGMAT.
 335          char *item;
 336          if (is_bold (v, "PRAGMAT")) {
 337            *delim = BOLD_PRAGMAT_SYMBOL;
 338            v = &v[strlen ("PRAGMAT")];
 339          } else if (is_bold (v, "PR")) {
 340            *delim = STYLE_I_PRAGMAT_SYMBOL;
 341            v = &v[strlen ("PR")];
 342          }
 343          item = get_pragmat_item (&u, &v);
 344          SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 345  // Item "preprocessor" restarts preprocessing if it is off.
 346          if (A68_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
 347            A68_PARSER (no_preprocessing) = A68_FALSE;
 348            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 349          }
 350  // If preprocessing is switched off, we idle to closing bracket.
 351          else if (A68_PARSER (no_preprocessing)) {
 352            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 353          }
 354  // Item "nopreprocessor" stops preprocessing if it is on.
 355          if (streq (item, "NOPREPROCESSOR") == 0) {
 356            A68_PARSER (no_preprocessing) = A68_TRUE;
 357            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 358          }
 359  // Item "INCLUDE" includes a file.
 360          else if (streq (item, "INCLUDE") == 0) {
 361            *top = u;
 362            *ch = v;
 363            return item;
 364          }
 365  // Item "READ" includes a file.
 366          else if (streq (item, "READ") == 0) {
 367            *top = u;
 368            *ch = v;
 369            return item;
 370          }
 371  // Unrecognised item - probably options handled later by the tokeniser.
 372          else {
 373            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 374          }
 375        } else if (IS_UPPER (v[0])) {
 376  // Skip a bold word as you may trigger on REPR, for instance ...
 377          while (IS_UPPER (v[0])) {
 378            v++;
 379          }
 380        } else {
 381          v++;
 382        }
 383      }
 384      FORWARD (u);
 385      if (u != NO_LINE) {
 386        v = &(STRING (u)[0]);
 387      } else {
 388        v = NO_TEXT;
 389      }
 390    }
 391    *top = u;
 392    *ch = v;
 393    return NO_TEXT;
 394  }
 395  
 396  //! @brief Include files.
 397  
 398  void include_files (LINE_T * top)
 399  {
 400  // include_files
 401  // 
 402  // syntax: PR read "filename" PR
 403  //         PR include "filename" PR
 404  // 
 405  // The file gets inserted before the line containing the pragmat. In this way
 406  // correct line numbers are preserved which helps diagnostics. A file that has
 407  // been included will not be included a second time - it will be ignored. 
 408  // A rigorous fail-safe, but there is no mechanism to prevent recursive includes 
 409  // in A68 source code. User reports do not indicate sophisticated use of INCLUDE, 
 410  // so this is fine for now.
 411  // TODO - some day we might need `app', analogous to `cpp'.
 412  //
 413    BOOL_T make_pass = A68_TRUE;
 414    while (make_pass) {
 415      LINE_T *s, *t, *u = top;
 416      char *v = &(STRING (u)[0]);
 417      make_pass = A68_FALSE;
 418      errno = 0;
 419      while (u != NO_LINE) {
 420        int pr_lim;
 421        char *item = next_preprocessor_item (&u, &v, &pr_lim);
 422        LINE_T *start_l = u;
 423        char *start_c = v;
 424  // Search for PR include "filename" PR.
 425        if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
 426          FILE_T fd;
 427          int n, linum, fsize, k, bytes_read, fnwid;
 428          char *fbuf, delim;
 429          BUFFER fnb;
 430          char *fn;
 431  // Skip to filename.
 432          while (IS_ALPHA (v[0])) {
 433            v++;
 434          }
 435          while (IS_SPACE (v[0])) {
 436            v++;
 437          }
 438  // Scan quoted filename.
 439          SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
 440          delim = (v++)[0];
 441          n = 0;
 442          fnb[0] = NULL_CHAR;
 443  // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
 444          do {
 445            SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
 446            SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 447            if (v[0] == delim) {
 448              while (v[0] == delim && v[1] == delim) {
 449                SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 450                fnb[n++] = delim;
 451                fnb[n] = NULL_CHAR;
 452                v += 2;
 453              }
 454            } else if (IS_PRINT (v[0])) {
 455              fnb[n++] = *(v++);
 456              fnb[n] = NULL_CHAR;
 457            } else {
 458              SCAN_ERROR (A68_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
 459            }
 460          } while (v[0] != delim);
 461  // Insist that the pragmat is closed properly.
 462          v = &v[1];
 463          SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 464          SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
 465  // Make the name relative to the position of the source file (C preprocessor standard).
 466          if (FILENAME (u) != NO_TEXT) {
 467            fn = a68_relpath (a68_dirname (FILENAME (u)), a68_dirname (fnb), a68_basename (fnb));
 468          } else {
 469            fn = a68_relpath (FILE_PATH (&A68_JOB), a68_dirname (fnb), a68_basename (fnb));
 470          }
 471  // Do not check errno, since errno may be undefined here after a successful call.
 472          if (fn != NO_TEXT) {
 473            bufcpy (fnb, fn, BUFFER_SIZE);
 474          } else {
 475            char err[PATH_MAX + 1];
 476            bufcpy (err, ERROR_SOURCE_FILE_OPEN, PATH_MAX);
 477            bufcat (err, " ", PATH_MAX);
 478            bufcat (err, fnb, PATH_MAX);
 479            SCAN_ERROR (A68_TRUE, NO_LINE, NO_TEXT, err);
 480          }
 481          fnwid = (int) strlen (fnb) + 1;
 482          fn = (char *) get_fixed_heap_space ((size_t) fnwid);
 483          bufcpy (fn, fnb, fnwid);
 484  // Ignore the file when included more than once.
 485          for (t = top; t != NO_LINE; t = NEXT (t)) {
 486            if (strcmp (FILENAME (t), fn) == 0) {
 487              goto search_next_pragmat;
 488            }
 489          }
 490          t = NO_LINE;
 491  // Access the file.
 492          errno = 0;
 493          fd = open (fn, O_RDONLY | O_BINARY);
 494          ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s \"%s\"", ERROR_SOURCE_FILE_OPEN, fn) >= 0);
 495          SCAN_ERROR (fd == -1, start_l, start_c, A68 (edit_line));
 496          errno = 0;
 497          fsize = (int) lseek (fd, 0, SEEK_END);
 498          ASSERT (fsize >= 0);
 499          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 500          fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
 501          errno = 0;
 502          ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
 503          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 504          errno = 0;
 505          bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
 506          SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
 507  // Buffer still usable?.
 508          if (fsize > A68_PARSER (max_scan_buf_length)) {
 509            A68_PARSER (max_scan_buf_length) = fsize;
 510            A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
 511          }
 512  // Link all lines into the list.
 513          linum = 1;
 514          s = u;
 515          t = PREVIOUS (u);
 516          k = 0;
 517          if (fsize == 0) {
 518  // If file is empty, insert single empty line.
 519            A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
 520            A68_PARSER (scan_buf)[1] = NULL_CHAR;
 521            append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
 522          } else
 523            while (k < fsize) {
 524              n = 0;
 525              A68_PARSER (scan_buf)[0] = NULL_CHAR;
 526              while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
 527                SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
 528                A68_PARSER (scan_buf)[n++] = fbuf[k++];
 529                A68_PARSER (scan_buf)[n] = NULL_CHAR;
 530              }
 531              A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
 532              A68_PARSER (scan_buf)[n] = NULL_CHAR;
 533              if (k < fsize) {
 534                k++;
 535              }
 536              append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
 537            }
 538  // Conclude and go find another include directive, if any.
 539          NEXT (t) = s;
 540          PREVIOUS (s) = t;
 541          concatenate_lines (top);
 542          ASSERT (close (fd) == 0);
 543          make_pass = A68_TRUE;
 544        }
 545      search_next_pragmat:_SKIP_;
 546      }
 547    }
 548  }
 549  
 550  //! @brief Size of source file.
 551  
 552  int get_source_size (void)
 553  {
 554    FILE_T f = FILE_SOURCE_FD (&A68_JOB);
 555  // This is why WIN32 must open as "read binary".
 556    return (int) lseek (f, 0, SEEK_END);
 557  }
 558  
 559  //! @brief Append environment source lines.
 560  
 561  void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
 562  {
 563    int k;
 564    for (k = 0; str[k] != NO_TEXT; k++) {
 565      int zero_line_num = 0;
 566      (*line_num)++;
 567      append_source_line (str[k], ref_l, &zero_line_num, name);
 568    }
 569  }
 570  
 571  //! @brief Read script file and make internal copy.
 572  
 573  BOOL_T read_script_file (void)
 574  {
 575    LINE_T *ref_l = NO_LINE;
 576    int k, n, num;
 577    unt len;
 578    BOOL_T file_end = A68_FALSE;
 579    BUFFER filename, linenum;
 580    char ch, *fn, *line;
 581    char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
 582    FILE_T source = FILE_SOURCE_FD (&A68_JOB);
 583    ABEND (source == -1, ERROR_ACTION, __func__);
 584    buffer[0] = NULL_CHAR;
 585    n = 0;
 586    len = (unt) (8 + A68_PARSER (source_file_size));
 587    buffer = (char *) get_temp_heap_space (len);
 588    ASSERT (lseek (source, 0, SEEK_SET) >= 0);
 589    while (!file_end) {
 590  // Read the original file name.
 591      filename[0] = NULL_CHAR;
 592      k = 0;
 593      if (io_read (source, &ch, 1) == 0) {
 594        file_end = A68_TRUE;
 595        continue;
 596      }
 597      while (ch != NEWLINE_CHAR) {
 598        filename[k++] = ch;
 599        ASSERT (io_read (source, &ch, 1) == 1);
 600      }
 601      filename[k] = NULL_CHAR;
 602      fn = TEXT (add_token (&A68 (top_token), filename));
 603  // Read the original file number.
 604      linenum[0] = NULL_CHAR;
 605      k = 0;
 606      ASSERT (io_read (source, &ch, 1) == 1);
 607      while (ch != NEWLINE_CHAR) {
 608        linenum[k++] = ch;
 609        ASSERT (io_read (source, &ch, 1) == 1);
 610      }
 611      linenum[k] = NULL_CHAR;
 612      num = (int) strtol (linenum, NO_VAR, 10);
 613      ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
 614  // COPY original line into buffer.
 615      ASSERT (io_read (source, &ch, 1) == 1);
 616      line = &buffer[n];
 617      while (ch != NEWLINE_CHAR) {
 618        buffer[n++] = ch;
 619        ASSERT (io_read (source, &ch, 1) == 1);
 620        ABEND ((unt) n >= len, ERROR_ACTION, __func__);
 621      }
 622      buffer[n++] = NEWLINE_CHAR;
 623      buffer[n] = NULL_CHAR;
 624      append_source_line (line, &ref_l, &num, fn);
 625    }
 626    return A68_TRUE;
 627  }
 628  
 629  //! @brief Read source file and make internal copy.
 630  
 631  BOOL_T read_source_file (void)
 632  {
 633    LINE_T *ref_l = NO_LINE;
 634    int line_num = 0, k, bytes_read;
 635    ssize_t l;
 636    FILE_T f = FILE_SOURCE_FD (&A68_JOB);
 637    char **prelude_start, **postlude, *buffer;
 638  // Prelude.
 639    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 640      prelude_start = bold_prelude_start;
 641      postlude = bold_postlude;
 642    } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 643      prelude_start = quote_prelude_start;
 644      postlude = quote_postlude;
 645    } else {
 646      prelude_start = postlude = NO_VAR;
 647    }
 648    append_environ (prelude_start, &ref_l, &line_num, "prelude");
 649  // Read the file into a single buffer, so we save on system calls.
 650    line_num = 1;
 651    errno = 0;
 652    buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
 653    ABEND (errno != 0 || buffer == NO_TEXT, ERROR_ALLOCATION, __func__);
 654    ASSERT (lseek (f, 0, SEEK_SET) >= 0);
 655    ABEND (errno != 0, ERROR_ACTION, __func__);
 656    errno = 0;
 657    bytes_read = (int) io_read (f, buffer, (size_t) A68_PARSER (source_file_size));
 658    ABEND (errno != 0 || bytes_read != A68_PARSER (source_file_size), ERROR_ACTION, __func__);
 659  // Link all lines into the list.
 660    k = 0;
 661    while (k < A68_PARSER (source_file_size)) {
 662      l = 0;
 663      A68_PARSER (scan_buf)[0] = NULL_CHAR;
 664      while (k < A68_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
 665        if (k < A68_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
 666          k++;
 667        } else {
 668          A68_PARSER (scan_buf)[l++] = buffer[k++];
 669          A68_PARSER (scan_buf)[l] = NULL_CHAR;
 670        }
 671      }
 672      A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
 673      A68_PARSER (scan_buf)[l] = NULL_CHAR;
 674      if (k < A68_PARSER (source_file_size)) {
 675        k++;
 676      }
 677      append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68_JOB));
 678      SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
 679    }
 680  // Postlude.
 681    append_environ (postlude, &ref_l, &line_num, "postlude");
 682  // Concatenate lines.
 683    concatenate_lines (TOP_LINE (&A68_JOB));
 684  // Include files.
 685    include_files (TOP_LINE (&A68_JOB));
 686    return A68_TRUE;
 687  }
 688  
 689  //! @brief Next_char get next character from internal copy of source file.
 690  
 691  char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
 692  {
 693    char ch;
 694  #if defined (NO_TYPO)
 695    allow_typo = A68_FALSE;
 696  #endif
 697    LOW_STACK_ALERT (NO_NODE);
 698  // Source empty?.
 699    if (*ref_l == NO_LINE) {
 700      return STOP_CHAR;
 701    } else {
 702      LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
 703  // Take new line?.
 704      if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
 705        *ref_l = NEXT (*ref_l);
 706        if (*ref_l == NO_LINE) {
 707          return STOP_CHAR;
 708        }
 709        *ref_s = STRING (*ref_l);
 710      } else {
 711        (*ref_s)++;
 712      }
 713  // Deliver next char.
 714      ch = (*ref_s)[0];
 715      if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
 716        ch = next_char (ref_l, ref_s, allow_typo);
 717      }
 718      return ch;
 719    }
 720  }
 721  
 722  //! @brief Find first character that can start a valid symbol.
 723  
 724  void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
 725  {
 726    while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
 727      if (*ref_l != NO_LINE) {
 728        LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
 729      }
 730      *ref_c = next_char (ref_l, ref_s, A68_FALSE);
 731    }
 732  }
 733  
 734  //! @brief Handle a pragment (pragmat or comment).
 735  
 736  char *pragment (int type, LINE_T ** ref_l, char **ref_c)
 737  {
 738  #define INIT_BUFFER {chars_in_buf = 0; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 739  #define ADD_ONE_CHAR(ch) {A68_PARSER (scan_buf)[chars_in_buf ++] = ch; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 740    char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
 741    char *z = NO_TEXT;
 742    LINE_T *start_l = *ref_l;
 743    int term_s_length, chars_in_buf;
 744    BOOL_T stop, pragmat = A68_FALSE;
 745  // Set terminator.
 746    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 747      if (type == STYLE_I_COMMENT_SYMBOL) {
 748        term_s = "CO";
 749      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 750        term_s = "#";
 751      } else if (type == BOLD_COMMENT_SYMBOL) {
 752        term_s = "COMMENT";
 753      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 754        term_s = "PR";
 755        pragmat = A68_TRUE;
 756      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 757        term_s = "PRAGMAT";
 758        pragmat = A68_TRUE;
 759      }
 760    } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 761      if (type == STYLE_I_COMMENT_SYMBOL) {
 762        term_s = "'CO'";
 763      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 764        term_s = "#";
 765      } else if (type == BOLD_COMMENT_SYMBOL) {
 766        term_s = "'COMMENT'";
 767      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 768        term_s = "'PR'";
 769        pragmat = A68_TRUE;
 770      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 771        term_s = "'PRAGMAT'";
 772        pragmat = A68_TRUE;
 773      }
 774    }
 775    term_s_length = (int) strlen (term_s);
 776  // Scan for terminator.
 777    INIT_BUFFER;
 778    stop = A68_FALSE;
 779    while (stop == A68_FALSE) {
 780      SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
 781  // A ".." or '..' delimited string in a PRAGMAT.
 782      if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING))) {
 783        char delim = c;
 784        BOOL_T eos = A68_FALSE;
 785        ADD_ONE_CHAR (c);
 786        c = next_char (ref_l, ref_c, A68_FALSE);
 787        while (!eos) {
 788          SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
 789          if (c == delim) {
 790            ADD_ONE_CHAR (delim);
 791            save_state (*ref_l, *ref_c, c);
 792            c = next_char (ref_l, ref_c, A68_FALSE);
 793            if (c == delim) {
 794              c = next_char (ref_l, ref_c, A68_FALSE);
 795            } else {
 796              restore_state (ref_l, ref_c, &c);
 797              eos = A68_TRUE;
 798            }
 799          } else if (IS_PRINT (c)) {
 800            ADD_ONE_CHAR (c);
 801            c = next_char (ref_l, ref_c, A68_FALSE);
 802          } else {
 803            unworthy (start_l, start_c, c);
 804          }
 805        }
 806      } else if (EOL (c)) {
 807        ADD_ONE_CHAR (NEWLINE_CHAR);
 808      } else if (IS_PRINT (c) || IS_SPACE (c)) {
 809        ADD_ONE_CHAR (c);
 810      }
 811      if (chars_in_buf >= term_s_length) {
 812  // Check whether we encountered the terminator.
 813        stop = (BOOL_T) (strcmp (term_s, &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0);
 814      }
 815      c = next_char (ref_l, ref_c, A68_FALSE);
 816    }
 817    A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
 818    z = new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
 819    if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
 820      isolate_options (A68_PARSER (scan_buf), start_l);
 821    }
 822    return z;
 823  #undef ADD_ONE_CHAR
 824  #undef INIT_BUFFER
 825  }
 826  
 827  //! @brief Attribute for format item.
 828  
 829  int get_format_item (char ch)
 830  {
 831    switch (TO_LOWER (ch)) {
 832    case 'a':{
 833        return FORMAT_ITEM_A;
 834      }
 835    case 'b':{
 836        return FORMAT_ITEM_B;
 837      }
 838    case 'c':{
 839        return FORMAT_ITEM_C;
 840      }
 841    case 'd':{
 842        return FORMAT_ITEM_D;
 843      }
 844    case 'e':{
 845        return FORMAT_ITEM_E;
 846      }
 847    case 'f':{
 848        return FORMAT_ITEM_F;
 849      }
 850    case 'g':{
 851        return FORMAT_ITEM_G;
 852      }
 853    case 'h':{
 854        return FORMAT_ITEM_H;
 855      }
 856    case 'i':{
 857        return FORMAT_ITEM_I;
 858      }
 859    case 'j':{
 860        return FORMAT_ITEM_J;
 861      }
 862    case 'k':{
 863        return FORMAT_ITEM_K;
 864      }
 865    case 'l':
 866    case '/':{
 867        return FORMAT_ITEM_L;
 868      }
 869    case 'm':{
 870        return FORMAT_ITEM_M;
 871      }
 872    case 'n':{
 873        return FORMAT_ITEM_N;
 874      }
 875    case 'o':{
 876        return FORMAT_ITEM_O;
 877      }
 878    case 'p':{
 879        return FORMAT_ITEM_P;
 880      }
 881    case 'q':{
 882        return FORMAT_ITEM_Q;
 883      }
 884    case 'r':{
 885        return FORMAT_ITEM_R;
 886      }
 887    case 's':{
 888        return FORMAT_ITEM_S;
 889      }
 890    case 't':{
 891        return FORMAT_ITEM_T;
 892      }
 893    case 'u':{
 894        return FORMAT_ITEM_U;
 895      }
 896    case 'v':{
 897        return FORMAT_ITEM_V;
 898      }
 899    case 'w':{
 900        return FORMAT_ITEM_W;
 901      }
 902    case 'x':{
 903        return FORMAT_ITEM_X;
 904      }
 905    case 'y':{
 906        return FORMAT_ITEM_Y;
 907      }
 908    case 'z':{
 909        return FORMAT_ITEM_Z;
 910      }
 911    case '+':{
 912        return FORMAT_ITEM_PLUS;
 913      }
 914    case '-':{
 915        return FORMAT_ITEM_MINUS;
 916      }
 917    case POINT_CHAR:{
 918        return FORMAT_ITEM_POINT;
 919      }
 920    case '%':{
 921        return FORMAT_ITEM_ESCAPE;
 922      }
 923    default:{
 924        return 0;
 925      }
 926    }
 927  }
 928  
 929  //! @brief Whether input shows exponent character.
 930  
 931  BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
 932  {
 933    BOOL_T ret = A68_FALSE;
 934    char exp_syms[3];
 935    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 936      exp_syms[0] = EXPONENT_CHAR;
 937      exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
 938      exp_syms[2] = NULL_CHAR;
 939    } else {
 940      exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
 941      exp_syms[1] = BACKSLASH_CHAR;
 942      exp_syms[2] = NULL_CHAR;
 943    }
 944    save_state (*ref_l, *ref_s, *ch);
 945    if (strchr (exp_syms, *ch) != NO_TEXT) {
 946      *ch = next_char (ref_l, ref_s, A68_TRUE);
 947      ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
 948    }
 949    restore_state (ref_l, ref_s, ch);
 950    return ret;
 951  }
 952  
 953  //! @brief Whether input shows radix character.
 954  
 955  BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
 956  {
 957    BOOL_T ret = A68_FALSE;
 958    save_state (*ref_l, *ref_s, *ch);
 959    if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 960      if (*ch == TO_UPPER (RADIX_CHAR)) {
 961        *ch = next_char (ref_l, ref_s, A68_TRUE);
 962        ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
 963      }
 964    } else {
 965      if (*ch == RADIX_CHAR) {
 966        *ch = next_char (ref_l, ref_s, A68_TRUE);
 967        ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
 968      }
 969    }
 970    restore_state (ref_l, ref_s, ch);
 971    return ret;
 972  }
 973  
 974  //! @brief Whether input shows decimal point.
 975  
 976  BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
 977  {
 978    BOOL_T ret = A68_FALSE;
 979    save_state (*ref_l, *ref_s, *ch);
 980    if (*ch == POINT_CHAR) {
 981      char exp_syms[3];
 982      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 983        exp_syms[0] = EXPONENT_CHAR;
 984        exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
 985        exp_syms[2] = NULL_CHAR;
 986      } else {
 987        exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
 988        exp_syms[1] = BACKSLASH_CHAR;
 989        exp_syms[2] = NULL_CHAR;
 990      }
 991      *ch = next_char (ref_l, ref_s, A68_TRUE);
 992      if (strchr (exp_syms, *ch) != NO_TEXT) {
 993        *ch = next_char (ref_l, ref_s, A68_TRUE);
 994        ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
 995      } else {
 996        ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
 997      }
 998    }
 999    restore_state (ref_l, ref_s, ch);
1000    return ret;
1001  }
1002  
1003  //! @brief Get next token from internal copy of source file..
1004  
1005  void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
1006  {
1007    char c = **ref_s, *sym = A68_PARSER (scan_buf);
1008    sym[0] = NULL_CHAR;
1009    get_good_char (&c, ref_l, ref_s);
1010    *start_l = *ref_l;
1011    *start_c = *ref_s;
1012    if (c == STOP_CHAR) {
1013  // We are at EOF.
1014      (sym++)[0] = STOP_CHAR;
1015      sym[0] = NULL_CHAR;
1016      return;
1017    }
1018  // In a format.
1019    if (in_format) {
1020      char *format_items;
1021      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1022        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1023      } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1024        format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1025      } else {
1026        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1027      }
1028      if (strchr (format_items, c) != NO_TEXT) {
1029  // General format items.
1030        (sym++)[0] = c;
1031        sym[0] = NULL_CHAR;
1032        *att = get_format_item (c);
1033        (void) next_char (ref_l, ref_s, A68_FALSE);
1034        return;
1035      }
1036      if (IS_DIGIT (c)) {
1037  // INT denotation for static replicator.
1038        SCAN_DIGITS (c);
1039        sym[0] = NULL_CHAR;
1040        *att = STATIC_REPLICATOR;
1041        return;
1042      }
1043    }
1044  // Not in a format.
1045    if (IS_UPPER (c)) {
1046      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1047  // Upper case word - bold tag.
1048        while (IS_UPPER (c) || c == '_') {
1049          (sym++)[0] = c;
1050          c = next_char (ref_l, ref_s, A68_FALSE);
1051        }
1052        sym[0] = NULL_CHAR;
1053        *att = BOLD_TAG;
1054      } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1055        while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1056          (sym++)[0] = c;
1057          c = next_char (ref_l, ref_s, A68_TRUE);
1058        }
1059        sym[0] = NULL_CHAR;
1060        *att = IDENTIFIER;
1061      }
1062    } else if (c == '\'') {
1063  // Quote, uppercase word, quote - bold tag.
1064      int k = 0;
1065      c = next_char (ref_l, ref_s, A68_FALSE);
1066      while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1067        (sym++)[0] = c;
1068        k++;
1069        c = next_char (ref_l, ref_s, A68_TRUE);
1070      }
1071      SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1072      sym[0] = NULL_CHAR;
1073      *att = BOLD_TAG;
1074  // Skip terminating quote, or complain if it is not there.
1075      SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1076      c = next_char (ref_l, ref_s, A68_FALSE);
1077    } else if (IS_LOWER (c)) {
1078  // Lower case word - identifier.
1079      while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1080        (sym++)[0] = c;
1081        c = next_char (ref_l, ref_s, A68_TRUE);
1082      }
1083      sym[0] = NULL_CHAR;
1084      *att = IDENTIFIER;
1085    } else if (c == POINT_CHAR) {
1086  // Begins with a point symbol - point, dotdot, L REAL denotation.
1087      if (is_decimal_point (ref_l, ref_s, &c)) {
1088        (sym++)[0] = '0';
1089        (sym++)[0] = POINT_CHAR;
1090        c = next_char (ref_l, ref_s, A68_TRUE);
1091        SCAN_DIGITS (c);
1092        if (is_exp_char (ref_l, ref_s, &c)) {
1093          SCAN_EXPONENT_PART (c);
1094        }
1095        sym[0] = NULL_CHAR;
1096        *att = REAL_DENOTATION;
1097      } else {
1098        c = next_char (ref_l, ref_s, A68_TRUE);
1099        if (c == POINT_CHAR) {
1100          (sym++)[0] = POINT_CHAR;
1101          (sym++)[0] = POINT_CHAR;
1102          sym[0] = NULL_CHAR;
1103          *att = DOTDOT_SYMBOL;
1104          c = next_char (ref_l, ref_s, A68_FALSE);
1105        } else {
1106          (sym++)[0] = POINT_CHAR;
1107          sym[0] = NULL_CHAR;
1108          *att = POINT_SYMBOL;
1109        }
1110      }
1111    } else if (IS_DIGIT (c)) {
1112  // Something that begins with a digit - L INT denotation, L REAL denotation.
1113      SCAN_DIGITS (c);
1114      if (is_decimal_point (ref_l, ref_s, &c)) {
1115        c = next_char (ref_l, ref_s, A68_TRUE);
1116        if (is_exp_char (ref_l, ref_s, &c)) {
1117          (sym++)[0] = POINT_CHAR;
1118          (sym++)[0] = '0';
1119          SCAN_EXPONENT_PART (c);
1120          *att = REAL_DENOTATION;
1121        } else {
1122          (sym++)[0] = POINT_CHAR;
1123          SCAN_DIGITS (c);
1124          if (is_exp_char (ref_l, ref_s, &c)) {
1125            SCAN_EXPONENT_PART (c);
1126          }
1127          *att = REAL_DENOTATION;
1128        }
1129      } else if (is_exp_char (ref_l, ref_s, &c)) {
1130        SCAN_EXPONENT_PART (c);
1131        *att = REAL_DENOTATION;
1132      } else if (is_radix_char (ref_l, ref_s, &c)) {
1133        (sym++)[0] = c;
1134        c = next_char (ref_l, ref_s, A68_TRUE);
1135        if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1136          while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1137            (sym++)[0] = c;
1138            c = next_char (ref_l, ref_s, A68_TRUE);
1139          }
1140        } else {
1141          while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1142            (sym++)[0] = c;
1143            c = next_char (ref_l, ref_s, A68_TRUE);
1144          }
1145        }
1146        *att = BITS_DENOTATION;
1147      } else {
1148        *att = INT_DENOTATION;
1149      }
1150      sym[0] = NULL_CHAR;
1151    } else if (c == QUOTE_CHAR) {
1152  // STRING denotation.
1153      BOOL_T stop = A68_FALSE;
1154      while (!stop) {
1155        c = next_char (ref_l, ref_s, A68_FALSE);
1156        while (c != QUOTE_CHAR && c != STOP_CHAR) {
1157          SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1158          (sym++)[0] = c;
1159          c = next_char (ref_l, ref_s, A68_FALSE);
1160        }
1161        SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1162        c = next_char (ref_l, ref_s, A68_FALSE);
1163        if (c == QUOTE_CHAR) {
1164          (sym++)[0] = QUOTE_CHAR;
1165        } else {
1166          stop = A68_TRUE;
1167        }
1168      }
1169      sym[0] = NULL_CHAR;
1170      *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1171    } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1172  // Single character symbols.
1173      (sym++)[0] = c;
1174      (void) next_char (ref_l, ref_s, A68_FALSE);
1175      sym[0] = NULL_CHAR;
1176      *att = 0;
1177    } else if (c == '|') {
1178  // Bar.
1179      (sym++)[0] = c;
1180      c = next_char (ref_l, ref_s, A68_FALSE);
1181      if (c == ':') {
1182        (sym++)[0] = c;
1183        (void) next_char (ref_l, ref_s, A68_FALSE);
1184      }
1185      sym[0] = NULL_CHAR;
1186      *att = 0;
1187    } else if (c == '!' && OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1188  // Bar, will be replaced with modern variant.
1189  // For this reason ! is not a MONAD with quote-stropping.
1190      (sym++)[0] = '|';
1191      c = next_char (ref_l, ref_s, A68_FALSE);
1192      if (c == ':') {
1193        (sym++)[0] = c;
1194        (void) next_char (ref_l, ref_s, A68_FALSE);
1195      }
1196      sym[0] = NULL_CHAR;
1197      *att = 0;
1198    } else if (c == ':') {
1199  // Colon, semicolon, IS, ISNT.
1200      (sym++)[0] = c;
1201      c = next_char (ref_l, ref_s, A68_FALSE);
1202      if (c == '=') {
1203        (sym++)[0] = c;
1204        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1205          (sym++)[0] = c;
1206          c = next_char (ref_l, ref_s, A68_FALSE);
1207        }
1208      } else if (c == '/') {
1209        (sym++)[0] = c;
1210        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1211          (sym++)[0] = c;
1212          if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1213            (sym++)[0] = c;
1214            c = next_char (ref_l, ref_s, A68_FALSE);
1215          }
1216        }
1217      } else if (c == ':') {
1218        (sym++)[0] = c;
1219        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1220          (sym++)[0] = c;
1221        }
1222      }
1223      sym[0] = NULL_CHAR;
1224      *att = 0;
1225    } else if (c == '=') {
1226  // Operator starting with "=".
1227      char *scanned = sym;
1228      (sym++)[0] = c;
1229      c = next_char (ref_l, ref_s, A68_FALSE);
1230      if (strchr (NOMADS, c) != NO_TEXT) {
1231        (sym++)[0] = c;
1232        c = next_char (ref_l, ref_s, A68_FALSE);
1233      }
1234      if (c == '=') {
1235        (sym++)[0] = c;
1236        if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1237          (sym++)[0] = ':';
1238          c = next_char (ref_l, ref_s, A68_FALSE);
1239          if (strlen (sym) < 4 && c == '=') {
1240            (sym++)[0] = '=';
1241            (void) next_char (ref_l, ref_s, A68_FALSE);
1242          }
1243        }
1244      } else if (c == ':') {
1245        (sym++)[0] = c;
1246        sym[0] = NULL_CHAR;
1247        if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1248          (sym++)[0] = '=';
1249          (void) next_char (ref_l, ref_s, A68_FALSE);
1250        } else {
1251          SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1252        }
1253      }
1254      sym[0] = NULL_CHAR;
1255      if (strcmp (scanned, "=") == 0) {
1256        *att = EQUALS_SYMBOL;
1257      } else {
1258        *att = OPERATOR;
1259      }
1260    } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1261  // Operator.
1262      char *scanned = sym;
1263      (sym++)[0] = c;
1264      c = next_char (ref_l, ref_s, A68_FALSE);
1265      if (strchr (NOMADS, c) != NO_TEXT) {
1266        (sym++)[0] = c;
1267        c = next_char (ref_l, ref_s, A68_FALSE);
1268      }
1269      if (c == '=') {
1270        (sym++)[0] = c;
1271        if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1272          (sym++)[0] = ':';
1273          c = next_char (ref_l, ref_s, A68_FALSE);
1274          if (strlen (scanned) < 4 && c == '=') {
1275            (sym++)[0] = '=';
1276            (void) next_char (ref_l, ref_s, A68_FALSE);
1277          }
1278        }
1279      } else if (c == ':') {
1280        (sym++)[0] = c;
1281        sym[0] = NULL_CHAR;
1282        if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1283          (sym++)[0] = '=';
1284          sym[0] = NULL_CHAR;
1285          (void) next_char (ref_l, ref_s, A68_FALSE);
1286        } else {
1287          SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1288        }
1289      }
1290      sym[0] = NULL_CHAR;
1291      *att = OPERATOR;
1292    } else {
1293  // Afuuus ... strange characters!.
1294      unworthy (*start_l, *start_c, (int) c);
1295    }
1296  }
1297  
1298  //! @brief Whether att opens an embedded clause.
1299  
1300  BOOL_T open_nested_clause (int att)
1301  {
1302    switch (att) {
1303    case OPEN_SYMBOL:
1304    case BEGIN_SYMBOL:
1305    case PAR_SYMBOL:
1306    case IF_SYMBOL:
1307    case CASE_SYMBOL:
1308    case FOR_SYMBOL:
1309    case FROM_SYMBOL:
1310    case BY_SYMBOL:
1311    case TO_SYMBOL:
1312    case DOWNTO_SYMBOL:
1313    case WHILE_SYMBOL:
1314    case DO_SYMBOL:
1315    case SUB_SYMBOL:
1316    case ACCO_SYMBOL:
1317      {
1318        return A68_TRUE;
1319      }
1320    }
1321    return A68_FALSE;
1322  }
1323  
1324  //! @brief Whether att closes an embedded clause.
1325  
1326  BOOL_T close_nested_clause (int att)
1327  {
1328    switch (att) {
1329    case CLOSE_SYMBOL:
1330    case END_SYMBOL:
1331    case FI_SYMBOL:
1332    case ESAC_SYMBOL:
1333    case OD_SYMBOL:
1334    case BUS_SYMBOL:
1335    case OCCA_SYMBOL:
1336      {
1337        return A68_TRUE;
1338      }
1339    }
1340    return A68_FALSE;
1341  }
1342  
1343  //! @brief Cast a string to lower case.
1344  
1345  void make_lower_case (char *p)
1346  {
1347    for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1348      p[0] = (char) TO_LOWER (p[0]);
1349    }
1350  }
1351  
1352  //! @brief Construct a linear list of tokens.
1353  
1354  void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1355  {
1356    char *lpr = NO_TEXT;
1357    int lprt = 0;
1358    while (l != NO_VAR && !A68_PARSER (stop_scanner)) {
1359      int att = 0;
1360      get_next_token (in_format, l, s, start_l, start_c, &att);
1361      if (A68_PARSER (scan_buf)[0] == STOP_CHAR) {
1362        A68_PARSER (stop_scanner) = A68_TRUE;
1363      } else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1364        KEYWORD_T *kw;
1365        char *c = NO_TEXT;
1366        BOOL_T make_node = A68_TRUE;
1367        char *trailing = NO_TEXT;
1368        if (att != IDENTIFIER) {
1369          kw = find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
1370        } else {
1371          kw = NO_KEYWORD;
1372        }
1373        if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1374          if (att == IDENTIFIER) {
1375            make_lower_case (A68_PARSER (scan_buf));
1376          }
1377          if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1378            int len = (int) strlen (A68_PARSER (scan_buf));
1379            while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_') {
1380              trailing = "_";
1381              A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1382              len--;
1383            }
1384          }
1385          c = TEXT (add_token (&A68 (top_token), A68_PARSER (scan_buf)));
1386        } else {
1387          if (IS (kw, TO_SYMBOL)) {
1388  // Merge GO and TO to GOTO.
1389            if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1390              ATTRIBUTE (*root) = GOTO_SYMBOL;
1391              NSYMBOL (*root) = TEXT (find_keyword (A68 (top_keyword), "GOTO"));
1392              make_node = A68_FALSE;
1393            } else {
1394              att = ATTRIBUTE (kw);
1395              c = TEXT (kw);
1396            }
1397          } else {
1398            if (att == 0 || att == BOLD_TAG) {
1399              att = ATTRIBUTE (kw);
1400            }
1401            c = TEXT (kw);
1402  // Handle pragments.
1403            if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1404              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1405              if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1406                lpr = nlpr;
1407              } else {
1408                char *stale = lpr;
1409                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1410                a68_free (nlpr);
1411                a68_free (stale);
1412              }
1413              lprt = att;
1414              make_node = A68_FALSE;
1415            } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1416              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1417              if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1418                lpr = nlpr;
1419              } else {
1420                char *stale = lpr;
1421                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1422                a68_free (nlpr);
1423                a68_free (stale);
1424              }
1425              lprt = att;
1426              if (!A68_PARSER (stop_scanner)) {
1427                (void) set_options (OPTION_LIST (&A68_JOB), A68_FALSE);
1428                make_node = A68_FALSE;
1429              }
1430            }
1431          }
1432        }
1433  // Add token to the tree.
1434        if (make_node) {
1435          NODE_T *q = new_node ();
1436          INFO (q) = new_node_info ();
1437          switch (att) {
1438          case ASSIGN_SYMBOL:
1439          case END_SYMBOL:
1440          case ESAC_SYMBOL:
1441          case OD_SYMBOL:
1442          case OF_SYMBOL:
1443          case FI_SYMBOL:
1444          case CLOSE_SYMBOL:
1445          case BUS_SYMBOL:
1446          case COLON_SYMBOL:
1447          case COMMA_SYMBOL:
1448          case DOTDOT_SYMBOL:
1449          case SEMI_SYMBOL:
1450            {
1451              GINFO (q) = NO_GINFO;
1452              break;
1453            }
1454          default:{
1455              GINFO (q) = new_genie_info ();
1456              break;
1457            }
1458          }
1459          STATUS (q) = OPTION_NODEMASK (&A68_JOB);
1460          LINE (INFO (q)) = *start_l;
1461          CHAR_IN_LINE (INFO (q)) = *start_c;
1462          PRIO (INFO (q)) = 0;
1463          PROCEDURE_LEVEL (INFO (q)) = 0;
1464          ATTRIBUTE (q) = att;
1465          NSYMBOL (q) = c;
1466          PREVIOUS (q) = *root;
1467          SUB (q) = NEXT (q) = NO_NODE;
1468          TABLE (q) = NO_TABLE;
1469          MOID (q) = NO_MOID;
1470          TAX (q) = NO_TAG;
1471          if (lpr != NO_TEXT) {
1472            NPRAGMENT (q) = lpr;
1473            NPRAGMENT_TYPE (q) = lprt;
1474            lpr = NO_TEXT;
1475            lprt = 0;
1476          }
1477          if (*root != NO_NODE) {
1478            NEXT (*root) = q;
1479          }
1480          if (TOP_NODE (&A68_JOB) == NO_NODE) {
1481            TOP_NODE (&A68_JOB) = q;
1482          }
1483          *root = q;
1484          if (trailing != NO_TEXT) {
1485            diagnostic (A68_WARNING | A68_FORCE_DIAGNOSTICS, q, WARNING_TRAILING, trailing, att);
1486          }
1487        }
1488  // Redirection in tokenising formats. The scanner is a recursive-descent type as
1489  // to know when it scans a format text and when not. 
1490        if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1491          return;
1492        } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1493          tokenise_source (root, level + 1, A68_TRUE, l, s, start_l, start_c);
1494        } else if (in_format && open_nested_clause (att)) {
1495          NODE_T *z = PREVIOUS (*root);
1496          if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1497            tokenise_source (root, level, A68_FALSE, l, s, start_l, start_c);
1498          } else if (att == OPEN_SYMBOL) {
1499            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1500          } else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL) {
1501            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1502          } else if (OPTION_BRACKETS (&A68_JOB) && att == ACCO_SYMBOL) {
1503            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1504          }
1505        } else if (!in_format && level > 0 && open_nested_clause (att)) {
1506          tokenise_source (root, level + 1, A68_FALSE, l, s, start_l, start_c);
1507        } else if (!in_format && level > 0 && close_nested_clause (att)) {
1508          return;
1509        } else if (in_format && att == CLOSE_SYMBOL) {
1510          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1511        } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL) {
1512          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1513        } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == OCCA_SYMBOL) {
1514          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1515        }
1516      }
1517    }
1518  }
1519  
1520  //! @brief Tokenise source file, build initial syntax tree.
1521  
1522  BOOL_T lexical_analyser (void)
1523  {
1524    LINE_T *l = NO_LINE, *start_l = NO_LINE;
1525    char *s = NO_TEXT, *start_c = NO_TEXT;
1526    NODE_T *root = NO_NODE;
1527    A68_PARSER (scan_buf) = NO_TEXT;
1528    A68_PARSER (max_scan_buf_length) = A68_PARSER (source_file_size) = get_source_size ();
1529  // Errors in file?.
1530    if (A68_PARSER (max_scan_buf_length) == 0) {
1531      return A68_FALSE;
1532    }
1533    if (OPTION_RUN_SCRIPT (&A68_JOB)) {
1534      A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
1535      if (!read_script_file ()) {
1536        return A68_FALSE;
1537      }
1538    } else {
1539      A68_PARSER (max_scan_buf_length) += KILOBYTE;       // for the environ, more than enough
1540      A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68_PARSER (max_scan_buf_length));
1541  // Errors in file?.
1542      if (!read_source_file ()) {
1543        return A68_FALSE;
1544      }
1545    }
1546  // Start tokenising.
1547    A68_PARSER (read_error) = A68_FALSE;
1548    A68_PARSER (stop_scanner) = A68_FALSE;
1549    if ((l = TOP_LINE (&A68_JOB)) != NO_LINE) {
1550      s = STRING (l);
1551    }
1552    tokenise_source (&root, 0, A68_FALSE, &l, &s, &start_l, &start_c);
1553    return A68_TRUE;
1554  }