parser-scanner.c

     
   1  //! @file parser-scanner.c
   2  //! @author J. Marcel van der Veer
   3  
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2024 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis 
  23  //!
  24  //! Context-dependent Algol 68 tokeniser.
  25  
  26  #include "a68g.h"
  27  #include "a68g-parser.h"
  28  #include "a68g-prelude.h"
  29  #include "a68g-options.h"
  30  #include "a68g-environ.h"
  31  
  32  // Macros.
  33  
  34  #define SCAN_DIGITS(c)\
  35    while (IS_DIGIT (c)) {\
  36      (sym++)[0] = (c);\
  37      (c) = next_char (ref_l, ref_s, A68_TRUE);\
  38    }
  39  
  40  #define SCAN_EXPONENT_PART(c)\
  41    (sym++)[0] = EXPONENT_CHAR;\
  42    (c) = next_char (ref_l, ref_s, A68_TRUE);\
  43    if ((c) == '+' || (c) == '-') {\
  44      (sym++)[0] = (c);\
  45      (c) = next_char (ref_l, ref_s, A68_TRUE);\
  46    }\
  47    SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
  48    SCAN_DIGITS (c)
  49  
  50  //! @brief Save scanner state, for character look-ahead.
  51  
  52  void save_state (LINE_T * ref_l, char *ref_s, char ch)
  53  {
  54    SCAN_STATE_L (&A68_JOB) = ref_l;
  55    SCAN_STATE_S (&A68_JOB) = ref_s;
  56    SCAN_STATE_C (&A68_JOB) = ch;
  57  }
  58  
  59  //! @brief Restore scanner state, for character look-ahead.
  60  
  61  void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
  62  {
  63    *ref_l = SCAN_STATE_L (&A68_JOB);
  64    *ref_s = SCAN_STATE_S (&A68_JOB);
  65    *ch = SCAN_STATE_C (&A68_JOB);
  66  }
  67  
  68  //! @brief New_source_line.
  69  
  70  LINE_T *new_source_line (void)
  71  {
  72    LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
  73    MARKER (z)[0] = NULL_CHAR;
  74    STRING (z) = NO_TEXT;
  75    FILENAME (z) = NO_TEXT;
  76    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
  77    NUMBER (z) = 0;
  78    PRINT_STATUS (z) = 0;
  79    LIST (z) = A68_TRUE;
  80    NEXT (z) = NO_LINE;
  81    PREVIOUS (z) = NO_LINE;
  82    return z;
  83  }
  84  
  85  //! @brief Append a source line to the internal source file.
  86  
  87  void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
  88  {
  89    LINE_T *z = new_source_line ();
  90  // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
  91    if (*line_num == 1) {
  92      if (strlen (str) >= 2 && strncmp (str, "#!", 2) == 0) {
  93        ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
  94        (*line_num)++;
  95        return;
  96      }
  97    }
  98  // Link line into the chain.
  99    STRING (z) = new_fixed_string (str);
 100    FILENAME (z) = filename;
 101    NUMBER (z) = (*line_num)++;
 102    PRINT_STATUS (z) = NOT_PRINTED;
 103    LIST (z) = A68_TRUE;
 104    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
 105    NEXT (z) = NO_LINE;
 106    PREVIOUS (z) = *ref_l;
 107    if (TOP_LINE (&A68_JOB) == NO_LINE) {
 108      TOP_LINE (&A68_JOB) = z;
 109    }
 110    if (*ref_l != NO_LINE) {
 111      NEXT (*ref_l) = z;
 112    }
 113    *ref_l = z;
 114  }
 115  
 116  // Scanner, tokenises the source code.
 117  
 118  //! @brief Whether ch is unworthy.
 119  
 120  void unworthy (LINE_T * u, char *v, char ch)
 121  {
 122    if (IS_PRINT (ch)) {
 123      ASSERT (a68_bufprt (A68 (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
 124    } else {
 125      ASSERT (a68_bufprt (A68 (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
 126    }
 127    scan_error (u, v, A68 (edit_line));
 128  }
 129  
 130  //! @brief Concatenate lines that terminate in '\' with next line.
 131  
 132  void concatenate_lines (LINE_T * top)
 133  {
 134    LINE_T *q;
 135  // Work from bottom backwards.
 136    for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
 137      ;
 138    }
 139    for (; q != NO_LINE; BACKWARD (q)) {
 140      char *z = STRING (q);
 141      int len = (int) strlen (z);
 142      if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
 143        z[len - 2] = NULL_CHAR;
 144        len += (int) strlen (STRING (NEXT (q)));
 145        z = (char *) get_fixed_heap_space ((size_t) (len + 1));
 146        a68_bufcpy (z, STRING (q), len + 1);
 147        a68_bufcat (z, STRING (NEXT (q)), len + 1);
 148        STRING (NEXT (q))[0] = NULL_CHAR;
 149        STRING (q) = z;
 150      }
 151    }
 152  }
 153  
 154  //! @brief Whether u is bold tag v, independent of stropping regime.
 155  
 156  BOOL_T is_bold (char *u, char *v)
 157  {
 158    unt len = (unt) strlen (v);
 159    if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 160      if (u[0] == '\'') {
 161        return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
 162      } else {
 163        return A68_FALSE;
 164      }
 165    } else {
 166      return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
 167    }
 168  }
 169  
 170  //! @brief Skip string.
 171  
 172  BOOL_T skip_string (LINE_T ** top, char **ch)
 173  {
 174    LINE_T *u = *top;
 175    char *v = *ch;
 176    v++;
 177    while (u != NO_LINE) {
 178      while (v[0] != NULL_CHAR) {
 179        if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
 180          *top = u;
 181          *ch = &v[1];
 182          return A68_TRUE;
 183        } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
 184          v += 2;
 185        } else {
 186          v++;
 187        }
 188      }
 189      FORWARD (u);
 190      if (u != NO_LINE) {
 191        v = &(STRING (u)[0]);
 192      } else {
 193        v = NO_TEXT;
 194      }
 195    }
 196    return A68_FALSE;
 197  }
 198  
 199  //! @brief Skip comment.
 200  
 201  BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
 202  {
 203    LINE_T *u = *top;
 204    char *v = *ch;
 205    v++;
 206    while (u != NO_LINE) {
 207      while (v[0] != NULL_CHAR) {
 208        if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
 209          *top = u;
 210          *ch = &v[1];
 211          return A68_TRUE;
 212        } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
 213          *top = u;
 214          *ch = &v[1];
 215          return A68_TRUE;
 216        } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
 217          *top = u;
 218          *ch = &v[1];
 219          return A68_TRUE;
 220        } else {
 221          v++;
 222        }
 223      }
 224      FORWARD (u);
 225      if (u != NO_LINE) {
 226        v = &(STRING (u)[0]);
 227      } else {
 228        v = NO_TEXT;
 229      }
 230    }
 231    return A68_FALSE;
 232  }
 233  
 234  //! @brief Skip rest of pragmat.
 235  
 236  BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
 237  {
 238    LINE_T *u = *top;
 239    char *v = *ch;
 240    while (u != NO_LINE) {
 241      while (v[0] != NULL_CHAR) {
 242        if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
 243          *top = u;
 244          *ch = &v[1];
 245          return A68_TRUE;
 246        } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
 247          *top = u;
 248          *ch = &v[1];
 249          return A68_TRUE;
 250        } else {
 251          if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 252            scan_error (u, v, ERROR_PRAGMENT);
 253          } else if (IS_UPPER (v[0])) {
 254  // Skip a bold word as you may trigger on REPR, for instance ...
 255            while (IS_UPPER (v[0])) {
 256              v++;
 257            }
 258          } else {
 259            v++;
 260          }
 261        }
 262      }
 263      FORWARD (u);
 264      if (u != NO_LINE) {
 265        v = &(STRING (u)[0]);
 266      } else {
 267        v = NO_TEXT;
 268      }
 269    }
 270    return A68_FALSE;
 271  }
 272  
 273  //! @brief Return pointer to next token within pragmat.
 274  
 275  char *get_pragmat_item (LINE_T ** top, char **ch)
 276  {
 277    LINE_T *u = *top;
 278    char *v = *ch;
 279    while (u != NO_LINE) {
 280      while (v[0] != NULL_CHAR) {
 281        if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 282          *top = u;
 283          *ch = v;
 284          return v;
 285        } else {
 286          v++;
 287        }
 288      }
 289      FORWARD (u);
 290      if (u != NO_LINE) {
 291        v = &(STRING (u)[0]);
 292      } else {
 293        v = NO_TEXT;
 294      }
 295    }
 296    return NO_TEXT;
 297  }
 298  
 299  //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
 300  
 301  int streq (char *u, char *v)
 302  {
 303    int diff;
 304    for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
 305      diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
 306    }
 307    return diff;
 308  }
 309  
 310  //! @brief Scan for next pragmat and yield first pragmat item.
 311  
 312  char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
 313  {
 314    LINE_T *u = *top;
 315    char *v = *ch;
 316    *delim = 0;
 317    while (u != NO_LINE) {
 318      while (v[0] != NULL_CHAR) {
 319        LINE_T *start_l = u;
 320        char *start_c = v;
 321  // STRINGs must be skipped.
 322        if (v[0] == QUOTE_CHAR) {
 323          SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
 324        }
 325  // COMMENTS must be skipped.
 326        else if (is_bold (v, "COMMENT")) {
 327          SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 328        } else if (is_bold (v, "CO")) {
 329          SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 330        } else if (v[0] == '#') {
 331          SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 332        } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
 333  // We caught a PRAGMAT.
 334          char *item;
 335          if (is_bold (v, "PRAGMAT")) {
 336            *delim = BOLD_PRAGMAT_SYMBOL;
 337            v = &v[strlen ("PRAGMAT")];
 338          } else if (is_bold (v, "PR")) {
 339            *delim = STYLE_I_PRAGMAT_SYMBOL;
 340            v = &v[strlen ("PR")];
 341          }
 342          item = get_pragmat_item (&u, &v);
 343          SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 344  // Item "preprocessor" restarts preprocessing if it is off.
 345          if (A68_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
 346            A68_PARSER (no_preprocessing) = A68_FALSE;
 347            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 348          }
 349  // If preprocessing is switched off, we idle to closing bracket.
 350          else if (A68_PARSER (no_preprocessing)) {
 351            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 352          }
 353  // Item "nopreprocessor" stops preprocessing if it is on.
 354          if (streq (item, "NOPREPROCESSOR") == 0) {
 355            A68_PARSER (no_preprocessing) = A68_TRUE;
 356            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 357          }
 358  // Item "INCLUDE" includes a file.
 359          else if (streq (item, "INCLUDE") == 0) {
 360            *top = u;
 361            *ch = v;
 362            return item;
 363          }
 364  // Item "READ" includes a file.
 365          else if (streq (item, "READ") == 0) {
 366            *top = u;
 367            *ch = v;
 368            return item;
 369          }
 370  // Unrecognised item - probably options handled later by the tokeniser.
 371          else {
 372            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 373          }
 374        } else if (IS_UPPER (v[0])) {
 375  // Skip a bold word as you may trigger on REPR, for instance ...
 376          while (IS_UPPER (v[0])) {
 377            v++;
 378          }
 379        } else {
 380          v++;
 381        }
 382      }
 383      FORWARD (u);
 384      if (u != NO_LINE) {
 385        v = &(STRING (u)[0]);
 386      } else {
 387        v = NO_TEXT;
 388      }
 389    }
 390    *top = u;
 391    *ch = v;
 392    return NO_TEXT;
 393  }
 394  
 395  //! @brief Include files.
 396  
 397  void include_files (LINE_T * top)
 398  {
 399  // include_files
 400  // 
 401  // syntax: PR read "filename" PR
 402  //         PR include "filename" PR
 403  // 
 404  // The file gets inserted before the line containing the pragmat. In this way
 405  // correct line numbers are preserved which helps diagnostics. A file that has
 406  // been included will not be included a second time - it will be ignored. 
 407  // A rigorous fail-safe, but there is no mechanism to prevent recursive includes 
 408  // in A68 source code. User reports do not indicate sophisticated use of INCLUDE, 
 409  // so this is fine for now.
 410  // TODO - some day we might need `app', analogous to `cpp'.
 411    BOOL_T make_pass = A68_TRUE;
 412    while (make_pass) {
 413      LINE_T *s, *t, *u = top;
 414      char *v = &(STRING (u)[0]);
 415      make_pass = A68_FALSE;
 416      errno = 0;
 417      while (u != NO_LINE) {
 418        int pr_lim;
 419        char *item = next_preprocessor_item (&u, &v, &pr_lim);
 420        LINE_T *start_l = u;
 421        char *start_c = v;
 422  // Search for PR include "filename" PR.
 423        if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
 424          FILE_T fd;
 425          int n, linum, fsize, k, bytes_read, fnwid;
 426          char *fbuf, delim;
 427          BUFFER fnb;
 428          char *fn;
 429  // Skip to filename.
 430          while (IS_ALPHA (v[0])) {
 431            v++;
 432          }
 433          while (IS_SPACE (v[0])) {
 434            v++;
 435          }
 436  // Scan quoted filename.
 437          SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
 438          delim = (v++)[0];
 439          n = 0;
 440          fnb[0] = NULL_CHAR;
 441  // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
 442          do {
 443            SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
 444            SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 445            if (v[0] == delim) {
 446              while (v[0] == delim && v[1] == delim) {
 447                SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 448                fnb[n++] = delim;
 449                fnb[n] = NULL_CHAR;
 450                v += 2;
 451              }
 452            } else if (IS_PRINT (v[0])) {
 453              fnb[n++] = *(v++);
 454              fnb[n] = NULL_CHAR;
 455            } else {
 456              SCAN_ERROR (A68_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
 457            }
 458          } while (v[0] != delim);
 459  // Insist that the pragmat is closed properly.
 460          v = &v[1];
 461          SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 462          SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
 463  // Make the name relative to the position of the source file (C preprocessor standard).
 464          if (FILENAME (u) != NO_TEXT) {
 465            fn = a68_relpath (a68_dirname (FILENAME (u)), a68_dirname (fnb), a68_basename (fnb));
 466          } else {
 467            fn = a68_relpath (FILE_PATH (&A68_JOB), a68_dirname (fnb), a68_basename (fnb));
 468          }
 469  // Do not check errno, since errno may be undefined here after a successful call.
 470          if (fn != NO_TEXT) {
 471            a68_bufcpy (fnb, fn, BUFFER_SIZE);
 472          } else {
 473            SCAN_ERROR (A68_TRUE, NO_LINE, NO_TEXT, ERROR_SOURCE_FILE_INCLUDE_OPEN);
 474          }
 475          fnwid = (int) strlen (fnb) + 1;
 476          fn = (char *) get_fixed_heap_space ((size_t) fnwid);
 477          a68_bufcpy (fn, fnb, fnwid);
 478  // Ignore the file when included more than once.
 479          for (t = top; t != NO_LINE; t = NEXT (t)) {
 480            if (strcmp (FILENAME (t), fn) == 0) {
 481              goto search_next_pragmat;
 482            }
 483          }
 484  // Access the file.
 485          errno = 0;
 486          fd = open (fn, O_RDONLY | O_BINARY);
 487          SCAN_ERROR (fd == -1, start_l, start_c, ERROR_SOURCE_FILE_INCLUDE_OPEN);
 488          errno = 0;
 489          fsize = (int) lseek (fd, 0, SEEK_END);
 490          ASSERT (fsize >= 0);
 491          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 492          fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
 493          errno = 0;
 494          ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
 495          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 496          errno = 0;
 497          bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
 498          SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
 499  // Buffer still usable?.
 500          if (fsize > A68_PARSER (max_scan_buf_length)) {
 501            A68_PARSER (max_scan_buf_length) = fsize;
 502            A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
 503          }
 504  // Link all lines into the list.
 505          linum = 1;
 506          s = u;
 507          t = PREVIOUS (u);
 508          k = 0;
 509          if (fsize == 0) {
 510  // If file is empty, insert single empty line.
 511            A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
 512            A68_PARSER (scan_buf)[1] = NULL_CHAR;
 513            append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
 514          } else
 515            while (k < fsize) {
 516              n = 0;
 517              A68_PARSER (scan_buf)[0] = NULL_CHAR;
 518              while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
 519                SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
 520                A68_PARSER (scan_buf)[n++] = fbuf[k++];
 521                A68_PARSER (scan_buf)[n] = NULL_CHAR;
 522              }
 523              A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
 524              A68_PARSER (scan_buf)[n] = NULL_CHAR;
 525              if (k < fsize) {
 526                k++;
 527              }
 528              append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
 529            }
 530  // Conclude and go find another include directive, if any.
 531          NEXT (t) = s;
 532          PREVIOUS (s) = t;
 533          concatenate_lines (top);
 534          ASSERT (close (fd) == 0);
 535          make_pass = A68_TRUE;
 536        }
 537      search_next_pragmat:_SKIP_;
 538      }
 539    }
 540  }
 541  
 542  //! @brief Size of source file.
 543  
 544  int get_source_size (void)
 545  {
 546    FILE_T f = FILE_SOURCE_FD (&A68_JOB);
 547  // This is why WIN32 must open as "read binary".
 548    return (int) lseek (f, 0, SEEK_END);
 549  }
 550  
 551  //! @brief Append environment source lines.
 552  
 553  void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
 554  {
 555    for (int k = 0; str[k] != NO_TEXT; k++) {
 556      int zero_line_num = 0;
 557      (*line_num)++;
 558      append_source_line (str[k], ref_l, &zero_line_num, name);
 559    }
 560  }
 561  
 562  //! @brief Read script file and make internal copy.
 563  
 564  BOOL_T read_script_file (void)
 565  {
 566    LINE_T *ref_l = NO_LINE;
 567    int k, n, num;
 568    unt len;
 569    BOOL_T file_end = A68_FALSE;
 570    BUFFER filename, linenum;
 571    char ch, *fn, *line;
 572    char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
 573    FILE_T source = FILE_SOURCE_FD (&A68_JOB);
 574    ABEND (source == -1, ERROR_ACTION, __func__);
 575    buffer[0] = NULL_CHAR;
 576    n = 0;
 577    len = (unt) (8 + A68_PARSER (source_file_size));
 578    buffer = (char *) get_temp_heap_space (len);
 579    ASSERT (lseek (source, 0, SEEK_SET) >= 0);
 580    while (!file_end) {
 581  // Read the original file name.
 582      filename[0] = NULL_CHAR;
 583      k = 0;
 584      if (io_read (source, &ch, 1) == 0) {
 585        file_end = A68_TRUE;
 586        continue;
 587      }
 588      while (ch != NEWLINE_CHAR) {
 589        filename[k++] = ch;
 590        ASSERT (io_read (source, &ch, 1) == 1);
 591      }
 592      filename[k] = NULL_CHAR;
 593      fn = TEXT (add_token (&A68 (top_token), filename));
 594  // Read the original file number.
 595      linenum[0] = NULL_CHAR;
 596      k = 0;
 597      ASSERT (io_read (source, &ch, 1) == 1);
 598      while (ch != NEWLINE_CHAR) {
 599        linenum[k++] = ch;
 600        ASSERT (io_read (source, &ch, 1) == 1);
 601      }
 602      linenum[k] = NULL_CHAR;
 603      num = (int) strtol (linenum, NO_VAR, 10);
 604      ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
 605  // COPY original line into buffer.
 606      ASSERT (io_read (source, &ch, 1) == 1);
 607      line = &buffer[n];
 608      while (ch != NEWLINE_CHAR) {
 609        buffer[n++] = ch;
 610        ASSERT (io_read (source, &ch, 1) == 1);
 611        ABEND ((unt) n >= len, ERROR_ACTION, __func__);
 612      }
 613      buffer[n++] = NEWLINE_CHAR;
 614      buffer[n] = NULL_CHAR;
 615      append_source_line (line, &ref_l, &num, fn);
 616    }
 617    return A68_TRUE;
 618  }
 619  
 620  //! @brief Read source file and make internal copy.
 621  
 622  BOOL_T read_source_file (void)
 623  {
 624    LINE_T *ref_l = NO_LINE;
 625    int line_num = 0, k, bytes_read;
 626    ssize_t l;
 627    FILE_T f = FILE_SOURCE_FD (&A68_JOB);
 628    char **prelude_start, **postlude, *buffer;
 629  // Prelude.
 630    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 631      prelude_start = bold_prelude_start;
 632      postlude = bold_postlude;
 633    } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 634      prelude_start = quote_prelude_start;
 635      postlude = quote_postlude;
 636    } else {
 637      prelude_start = postlude = NO_VAR;
 638    }
 639    append_environ (prelude_start, &ref_l, &line_num, "prelude");
 640  // Read the file into a single buffer, so we save on system calls.
 641    line_num = 1;
 642    errno = 0;
 643    buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
 644    ABEND (errno != 0 || buffer == NO_TEXT, ERROR_ALLOCATION, __func__);
 645    ASSERT (lseek (f, 0, SEEK_SET) >= 0);
 646    ABEND (errno != 0, ERROR_ACTION, __func__);
 647    errno = 0;
 648    bytes_read = (int) io_read (f, buffer, (size_t) A68_PARSER (source_file_size));
 649    ABEND (errno != 0 || bytes_read != A68_PARSER (source_file_size), ERROR_ACTION, __func__);
 650  // Link all lines into the list.
 651    k = 0;
 652    while (k < A68_PARSER (source_file_size)) {
 653      l = 0;
 654      A68_PARSER (scan_buf)[0] = NULL_CHAR;
 655      while (k < A68_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
 656        if (k < A68_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
 657          k++;
 658        } else {
 659          A68_PARSER (scan_buf)[l++] = buffer[k++];
 660          A68_PARSER (scan_buf)[l] = NULL_CHAR;
 661        }
 662      }
 663      A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
 664      A68_PARSER (scan_buf)[l] = NULL_CHAR;
 665      if (k < A68_PARSER (source_file_size)) {
 666        k++;
 667      }
 668      append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68_JOB));
 669      SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
 670    }
 671  // Postlude.
 672    append_environ (postlude, &ref_l, &line_num, "postlude");
 673  // Concatenate lines.
 674    concatenate_lines (TOP_LINE (&A68_JOB));
 675  // Include files.
 676    include_files (TOP_LINE (&A68_JOB));
 677    return A68_TRUE;
 678  }
 679  
 680  //! @brief Next_char get next character from internal copy of source file.
 681  
 682  char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
 683  {
 684    char ch;
 685  #if defined (NO_TYPO)
 686    allow_typo = A68_FALSE;
 687  #endif
 688    LOW_STACK_ALERT (NO_NODE);
 689  // Source empty?.
 690    if (*ref_l == NO_LINE) {
 691      return STOP_CHAR;
 692    } else {
 693      LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
 694  // Take new line?.
 695      if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
 696        *ref_l = NEXT (*ref_l);
 697        if (*ref_l == NO_LINE) {
 698          return STOP_CHAR;
 699        }
 700        *ref_s = STRING (*ref_l);
 701      } else {
 702        (*ref_s)++;
 703      }
 704  // Deliver next char.
 705      ch = (*ref_s)[0];
 706      if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
 707        ch = next_char (ref_l, ref_s, allow_typo);
 708      }
 709      return ch;
 710    }
 711  }
 712  
 713  //! @brief Find first character that can start a valid symbol.
 714  
 715  void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
 716  {
 717    while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
 718      if (*ref_l != NO_LINE) {
 719        LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
 720      }
 721      *ref_c = next_char (ref_l, ref_s, A68_FALSE);
 722    }
 723  }
 724  
 725  //! @brief Handle a pragment (pragmat or comment).
 726  
 727  char *pragment (int type, LINE_T ** ref_l, char **ref_c)
 728  {
 729  #define INIT_BUFFER {chars_in_buf = 0; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 730  #define ADD_ONE_CHAR(ch) {A68_PARSER (scan_buf)[chars_in_buf ++] = ch; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 731    char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
 732    char *z = NO_TEXT;
 733    LINE_T *start_l = *ref_l;
 734    int term_s_length, chars_in_buf;
 735    BOOL_T stop, pragmat = A68_FALSE;
 736  // Set terminator.
 737    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 738      if (type == STYLE_I_COMMENT_SYMBOL) {
 739        term_s = "CO";
 740      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 741        term_s = "#";
 742      } else if (type == BOLD_COMMENT_SYMBOL) {
 743        term_s = "COMMENT";
 744      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 745        term_s = "PR";
 746        pragmat = A68_TRUE;
 747      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 748        term_s = "PRAGMAT";
 749        pragmat = A68_TRUE;
 750      }
 751    } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 752      if (type == STYLE_I_COMMENT_SYMBOL) {
 753        term_s = "'CO'";
 754      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 755        term_s = "#";
 756      } else if (type == BOLD_COMMENT_SYMBOL) {
 757        term_s = "'COMMENT'";
 758      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 759        term_s = "'PR'";
 760        pragmat = A68_TRUE;
 761      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 762        term_s = "'PRAGMAT'";
 763        pragmat = A68_TRUE;
 764      }
 765    }
 766    term_s_length = (int) strlen (term_s);
 767  // Scan for terminator.
 768    INIT_BUFFER;
 769    stop = A68_FALSE;
 770    while (stop == A68_FALSE) {
 771      SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
 772  // A ".." or '..' delimited string in a PRAGMAT.
 773      if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING))) {
 774        char delim = c;
 775        BOOL_T eos = A68_FALSE;
 776        ADD_ONE_CHAR (c);
 777        c = next_char (ref_l, ref_c, A68_FALSE);
 778        while (!eos) {
 779          SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
 780          if (c == delim) {
 781            ADD_ONE_CHAR (delim);
 782            save_state (*ref_l, *ref_c, c);
 783            c = next_char (ref_l, ref_c, A68_FALSE);
 784            if (c == delim) {
 785              c = next_char (ref_l, ref_c, A68_FALSE);
 786            } else {
 787              restore_state (ref_l, ref_c, &c);
 788              eos = A68_TRUE;
 789            }
 790          } else if (IS_PRINT (c)) {
 791            ADD_ONE_CHAR (c);
 792            c = next_char (ref_l, ref_c, A68_FALSE);
 793          } else {
 794            unworthy (start_l, start_c, c);
 795          }
 796        }
 797      } else if (EOL (c)) {
 798        ADD_ONE_CHAR (NEWLINE_CHAR);
 799      } else if (IS_PRINT (c) || IS_SPACE (c)) {
 800        ADD_ONE_CHAR (c);
 801      }
 802      if (chars_in_buf >= term_s_length) {
 803  // Check whether we encountered the terminator.
 804        stop = (BOOL_T) (strcmp (term_s, &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0);
 805      }
 806      c = next_char (ref_l, ref_c, A68_FALSE);
 807    }
 808    A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
 809    z = new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
 810    if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
 811      isolate_options (A68_PARSER (scan_buf), start_l);
 812    }
 813    return z;
 814  #undef ADD_ONE_CHAR
 815  #undef INIT_BUFFER
 816  }
 817  
 818  //! @brief Attribute for format item.
 819  
 820  int get_format_item (char ch)
 821  {
 822    switch (TO_LOWER (ch)) {
 823    case 'a': {
 824        return FORMAT_ITEM_A;
 825      }
 826    case 'b': {
 827        return FORMAT_ITEM_B;
 828      }
 829    case 'c': {
 830        return FORMAT_ITEM_C;
 831      }
 832    case 'd': {
 833        return FORMAT_ITEM_D;
 834      }
 835    case 'e': {
 836        return FORMAT_ITEM_E;
 837      }
 838    case 'f': {
 839        return FORMAT_ITEM_F;
 840      }
 841    case 'g': {
 842        return FORMAT_ITEM_G;
 843      }
 844    case 'h': {
 845        return FORMAT_ITEM_H;
 846      }
 847    case 'i': {
 848        return FORMAT_ITEM_I;
 849      }
 850    case 'j': {
 851        return FORMAT_ITEM_J;
 852      }
 853    case 'k': {
 854        return FORMAT_ITEM_K;
 855      }
 856    case 'l':
 857    case '/': {
 858        return FORMAT_ITEM_L;
 859      }
 860    case 'm': {
 861        return FORMAT_ITEM_M;
 862      }
 863    case 'n': {
 864        return FORMAT_ITEM_N;
 865      }
 866    case 'o': {
 867        return FORMAT_ITEM_O;
 868      }
 869    case 'p': {
 870        return FORMAT_ITEM_P;
 871      }
 872    case 'q': {
 873        return FORMAT_ITEM_Q;
 874      }
 875    case 'r': {
 876        return FORMAT_ITEM_R;
 877      }
 878    case 's': {
 879        return FORMAT_ITEM_S;
 880      }
 881    case 't': {
 882        return FORMAT_ITEM_T;
 883      }
 884    case 'u': {
 885        return FORMAT_ITEM_U;
 886      }
 887    case 'v': {
 888        return FORMAT_ITEM_V;
 889      }
 890    case 'w': {
 891        return FORMAT_ITEM_W;
 892      }
 893    case 'x': {
 894        return FORMAT_ITEM_X;
 895      }
 896    case 'y': {
 897        return FORMAT_ITEM_Y;
 898      }
 899    case 'z': {
 900        return FORMAT_ITEM_Z;
 901      }
 902    case '+': {
 903        return FORMAT_ITEM_PLUS;
 904      }
 905    case '-': {
 906        return FORMAT_ITEM_MINUS;
 907      }
 908    case POINT_CHAR: {
 909        return FORMAT_ITEM_POINT;
 910      }
 911    case '%': {
 912        return FORMAT_ITEM_ESCAPE;
 913      }
 914    default: {
 915        return 0;
 916      }
 917    }
 918  }
 919  
 920  //! @brief Whether input shows exponent character.
 921  
 922  BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
 923  {
 924    BOOL_T ret = A68_FALSE;
 925    char exp_syms[3];
 926    if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 927      exp_syms[0] = EXPONENT_CHAR;
 928      exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
 929      exp_syms[2] = NULL_CHAR;
 930    } else {
 931      exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
 932      exp_syms[1] = BACKSLASH_CHAR;
 933      exp_syms[2] = NULL_CHAR;
 934    }
 935    save_state (*ref_l, *ref_s, *ch);
 936    if (strchr (exp_syms, *ch) != NO_TEXT) {
 937      *ch = next_char (ref_l, ref_s, A68_TRUE);
 938      ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
 939    }
 940    restore_state (ref_l, ref_s, ch);
 941    return ret;
 942  }
 943  
 944  //! @brief Whether input shows radix character.
 945  
 946  BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
 947  {
 948    BOOL_T ret = A68_FALSE;
 949    save_state (*ref_l, *ref_s, *ch);
 950    if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
 951      if (*ch == TO_UPPER (RADIX_CHAR)) {
 952        *ch = next_char (ref_l, ref_s, A68_TRUE);
 953        ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
 954      }
 955    } else {
 956      if (*ch == RADIX_CHAR) {
 957        *ch = next_char (ref_l, ref_s, A68_TRUE);
 958        ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
 959      }
 960    }
 961    restore_state (ref_l, ref_s, ch);
 962    return ret;
 963  }
 964  
 965  //! @brief Whether input shows decimal point.
 966  
 967  BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
 968  {
 969    BOOL_T ret = A68_FALSE;
 970    save_state (*ref_l, *ref_s, *ch);
 971    if (*ch == POINT_CHAR) {
 972      char exp_syms[3];
 973      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
 974        exp_syms[0] = EXPONENT_CHAR;
 975        exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
 976        exp_syms[2] = NULL_CHAR;
 977      } else {
 978        exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
 979        exp_syms[1] = BACKSLASH_CHAR;
 980        exp_syms[2] = NULL_CHAR;
 981      }
 982      *ch = next_char (ref_l, ref_s, A68_TRUE);
 983      if (strchr (exp_syms, *ch) != NO_TEXT) {
 984        *ch = next_char (ref_l, ref_s, A68_TRUE);
 985        ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
 986      } else {
 987        ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
 988      }
 989    }
 990    restore_state (ref_l, ref_s, ch);
 991    return ret;
 992  }
 993  
 994  //! @brief Get next token from internal copy of source file..
 995  
 996  void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
 997  {
 998    char c = **ref_s, *sym = A68_PARSER (scan_buf);
 999    sym[0] = NULL_CHAR;
1000    get_good_char (&c, ref_l, ref_s);
1001    *start_l = *ref_l;
1002    *start_c = *ref_s;
1003    if (c == STOP_CHAR) {
1004  // We are at EOF.
1005      (sym++)[0] = STOP_CHAR;
1006      sym[0] = NULL_CHAR;
1007      return;
1008    }
1009  // In a format.
1010    if (in_format) {
1011      char *format_items;
1012      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1013        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1014      } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1015        format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1016      } else {
1017        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1018      }
1019      if (strchr (format_items, c) != NO_TEXT) {
1020  // General format items.
1021        (sym++)[0] = c;
1022        sym[0] = NULL_CHAR;
1023        *att = get_format_item (c);
1024        (void) next_char (ref_l, ref_s, A68_FALSE);
1025        return;
1026      }
1027      if (IS_DIGIT (c)) {
1028  // INT denotation for static replicator.
1029        SCAN_DIGITS (c);
1030        sym[0] = NULL_CHAR;
1031        *att = STATIC_REPLICATOR;
1032        return;
1033      }
1034    }
1035  // Not in a format.
1036    if (IS_UPPER (c)) {
1037      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1038  // Upper case word - bold tag.
1039        while (IS_UPPER (c) || c == '_') {
1040          (sym++)[0] = c;
1041          c = next_char (ref_l, ref_s, A68_FALSE);
1042        }
1043        sym[0] = NULL_CHAR;
1044        *att = BOLD_TAG;
1045      } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1046        while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1047          (sym++)[0] = c;
1048          c = next_char (ref_l, ref_s, A68_TRUE);
1049        }
1050        sym[0] = NULL_CHAR;
1051        *att = IDENTIFIER;
1052      }
1053    } else if (c == '\'') {
1054  // Quote, uppercase word, quote - bold tag.
1055      int k = 0;
1056      c = next_char (ref_l, ref_s, A68_FALSE);
1057      while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1058        (sym++)[0] = c;
1059        k++;
1060        c = next_char (ref_l, ref_s, A68_TRUE);
1061      }
1062      SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1063      sym[0] = NULL_CHAR;
1064      *att = BOLD_TAG;
1065  // Skip terminating quote, or complain if it is not there.
1066      SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1067      c = next_char (ref_l, ref_s, A68_FALSE);
1068    } else if (IS_LOWER (c)) {
1069  // Lower case word - identifier.
1070      while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1071        (sym++)[0] = c;
1072        c = next_char (ref_l, ref_s, A68_TRUE);
1073      }
1074      sym[0] = NULL_CHAR;
1075      *att = IDENTIFIER;
1076    } else if (c == POINT_CHAR) {
1077  // Begins with a point symbol - point, dotdot, L REAL denotation.
1078      if (is_decimal_point (ref_l, ref_s, &c)) {
1079        (sym++)[0] = '0';
1080        (sym++)[0] = POINT_CHAR;
1081        c = next_char (ref_l, ref_s, A68_TRUE);
1082        SCAN_DIGITS (c);
1083        if (is_exp_char (ref_l, ref_s, &c)) {
1084          SCAN_EXPONENT_PART (c);
1085        }
1086        sym[0] = NULL_CHAR;
1087        *att = REAL_DENOTATION;
1088      } else {
1089        c = next_char (ref_l, ref_s, A68_TRUE);
1090        if (c == POINT_CHAR) {
1091          (sym++)[0] = POINT_CHAR;
1092          (sym++)[0] = POINT_CHAR;
1093          sym[0] = NULL_CHAR;
1094          *att = DOTDOT_SYMBOL;
1095          c = next_char (ref_l, ref_s, A68_FALSE);
1096        } else {
1097          (sym++)[0] = POINT_CHAR;
1098          sym[0] = NULL_CHAR;
1099          *att = POINT_SYMBOL;
1100        }
1101      }
1102    } else if (IS_DIGIT (c)) {
1103  // Something that begins with a digit - L INT denotation, L REAL denotation.
1104      SCAN_DIGITS (c);
1105      if (is_decimal_point (ref_l, ref_s, &c)) {
1106        c = next_char (ref_l, ref_s, A68_TRUE);
1107        if (is_exp_char (ref_l, ref_s, &c)) {
1108          (sym++)[0] = POINT_CHAR;
1109          (sym++)[0] = '0';
1110          SCAN_EXPONENT_PART (c);
1111          *att = REAL_DENOTATION;
1112        } else {
1113          (sym++)[0] = POINT_CHAR;
1114          SCAN_DIGITS (c);
1115          if (is_exp_char (ref_l, ref_s, &c)) {
1116            SCAN_EXPONENT_PART (c);
1117          }
1118          *att = REAL_DENOTATION;
1119        }
1120      } else if (is_exp_char (ref_l, ref_s, &c)) {
1121        SCAN_EXPONENT_PART (c);
1122        *att = REAL_DENOTATION;
1123      } else if (is_radix_char (ref_l, ref_s, &c)) {
1124        (sym++)[0] = c;
1125        c = next_char (ref_l, ref_s, A68_TRUE);
1126        if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1127          while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1128            (sym++)[0] = c;
1129            c = next_char (ref_l, ref_s, A68_TRUE);
1130          }
1131        } else {
1132          while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1133            (sym++)[0] = c;
1134            c = next_char (ref_l, ref_s, A68_TRUE);
1135          }
1136        }
1137        *att = BITS_DENOTATION;
1138      } else {
1139        *att = INT_DENOTATION;
1140      }
1141      sym[0] = NULL_CHAR;
1142    } else if (c == QUOTE_CHAR) {
1143  // STRING denotation.
1144      BOOL_T stop = A68_FALSE;
1145      while (!stop) {
1146        c = next_char (ref_l, ref_s, A68_FALSE);
1147        while (c != QUOTE_CHAR && c != STOP_CHAR) {
1148          SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1149          (sym++)[0] = c;
1150          c = next_char (ref_l, ref_s, A68_FALSE);
1151        }
1152        SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1153        c = next_char (ref_l, ref_s, A68_FALSE);
1154        if (c == QUOTE_CHAR) {
1155          (sym++)[0] = QUOTE_CHAR;
1156        } else {
1157          stop = A68_TRUE;
1158        }
1159      }
1160      sym[0] = NULL_CHAR;
1161      *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1162    } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1163  // Single character symbols.
1164      (sym++)[0] = c;
1165      (void) next_char (ref_l, ref_s, A68_FALSE);
1166      sym[0] = NULL_CHAR;
1167      *att = 0;
1168    } else if (c == '|') {
1169  // Bar.
1170      (sym++)[0] = c;
1171      c = next_char (ref_l, ref_s, A68_FALSE);
1172      if (c == ':') {
1173        (sym++)[0] = c;
1174        (void) next_char (ref_l, ref_s, A68_FALSE);
1175      }
1176      sym[0] = NULL_CHAR;
1177      *att = 0;
1178    } else if (c == '!' && OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1179  // Bar, will be replaced with modern variant.
1180  // For this reason ! is not a MONAD with quote-stropping.
1181      (sym++)[0] = '|';
1182      c = next_char (ref_l, ref_s, A68_FALSE);
1183      if (c == ':') {
1184        (sym++)[0] = c;
1185        (void) next_char (ref_l, ref_s, A68_FALSE);
1186      }
1187      sym[0] = NULL_CHAR;
1188      *att = 0;
1189    } else if (c == ':') {
1190  // Colon, semicolon, IS, ISNT.
1191      (sym++)[0] = c;
1192      c = next_char (ref_l, ref_s, A68_FALSE);
1193      if (c == '=') {
1194        (sym++)[0] = c;
1195        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1196          (sym++)[0] = c;
1197          c = next_char (ref_l, ref_s, A68_FALSE);
1198        }
1199      } else if (c == '/') {
1200        (sym++)[0] = c;
1201        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1202          (sym++)[0] = c;
1203          if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1204            (sym++)[0] = c;
1205            c = next_char (ref_l, ref_s, A68_FALSE);
1206          }
1207        }
1208      } else if (c == ':') {
1209        (sym++)[0] = c;
1210        if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1211          (sym++)[0] = c;
1212        }
1213      }
1214      sym[0] = NULL_CHAR;
1215      *att = 0;
1216    } else if (c == '=') {
1217  // Operator starting with "=".
1218      char *scanned = sym;
1219      (sym++)[0] = c;
1220      c = next_char (ref_l, ref_s, A68_FALSE);
1221      if (strchr (NOMADS, c) != NO_TEXT) {
1222        (sym++)[0] = c;
1223        c = next_char (ref_l, ref_s, A68_FALSE);
1224      }
1225      if (c == '=') {
1226        (sym++)[0] = c;
1227        if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1228          (sym++)[0] = ':';
1229          c = next_char (ref_l, ref_s, A68_FALSE);
1230          if (strlen (sym) < 4 && c == '=') {
1231            (sym++)[0] = '=';
1232            (void) next_char (ref_l, ref_s, A68_FALSE);
1233          }
1234        }
1235      } else if (c == ':') {
1236        (sym++)[0] = c;
1237        sym[0] = NULL_CHAR;
1238        if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1239          (sym++)[0] = '=';
1240          (void) next_char (ref_l, ref_s, A68_FALSE);
1241        } else {
1242          SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1243        }
1244      }
1245      sym[0] = NULL_CHAR;
1246      if (strcmp (scanned, "=") == 0) {
1247        *att = EQUALS_SYMBOL;
1248      } else {
1249        *att = OPERATOR;
1250      }
1251    } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1252  // Operator.
1253      char *scanned = sym;
1254      (sym++)[0] = c;
1255      c = next_char (ref_l, ref_s, A68_FALSE);
1256      if (strchr (NOMADS, c) != NO_TEXT) {
1257        (sym++)[0] = c;
1258        c = next_char (ref_l, ref_s, A68_FALSE);
1259      }
1260      if (c == '=') {
1261        (sym++)[0] = c;
1262        if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1263          (sym++)[0] = ':';
1264          c = next_char (ref_l, ref_s, A68_FALSE);
1265          if (strlen (scanned) < 4 && c == '=') {
1266            (sym++)[0] = '=';
1267            (void) next_char (ref_l, ref_s, A68_FALSE);
1268          }
1269        }
1270      } else if (c == ':') {
1271        (sym++)[0] = c;
1272        sym[0] = NULL_CHAR;
1273        if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1274          (sym++)[0] = '=';
1275          sym[0] = NULL_CHAR;
1276          (void) next_char (ref_l, ref_s, A68_FALSE);
1277        } else {
1278          SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1279        }
1280      }
1281      sym[0] = NULL_CHAR;
1282      *att = OPERATOR;
1283    } else {
1284  // Afuuus ... strange characters!.
1285      unworthy (*start_l, *start_c, (int) c);
1286    }
1287  }
1288  
1289  //! @brief Whether att opens an embedded clause.
1290  
1291  BOOL_T open_nested_clause (int att)
1292  {
1293    switch (att) {
1294    case OPEN_SYMBOL:
1295    case BEGIN_SYMBOL:
1296    case PAR_SYMBOL:
1297    case IF_SYMBOL:
1298    case CASE_SYMBOL:
1299    case FOR_SYMBOL:
1300    case FROM_SYMBOL:
1301    case BY_SYMBOL:
1302    case TO_SYMBOL:
1303    case DOWNTO_SYMBOL:
1304    case WHILE_SYMBOL:
1305    case DO_SYMBOL:
1306    case SUB_SYMBOL:
1307    case ACCO_SYMBOL: {
1308        return A68_TRUE;
1309      }
1310    }
1311    return A68_FALSE;
1312  }
1313  
1314  //! @brief Whether att closes an embedded clause.
1315  
1316  BOOL_T close_nested_clause (int att)
1317  {
1318    switch (att) {
1319    case CLOSE_SYMBOL:
1320    case END_SYMBOL:
1321    case FI_SYMBOL:
1322    case ESAC_SYMBOL:
1323    case OD_SYMBOL:
1324    case BUS_SYMBOL:
1325    case OCCA_SYMBOL: {
1326        return A68_TRUE;
1327      }
1328    }
1329    return A68_FALSE;
1330  }
1331  
1332  //! @brief Cast a string to lower case.
1333  
1334  void make_lower_case (char *p)
1335  {
1336    for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1337      p[0] = (char) TO_LOWER (p[0]);
1338    }
1339  }
1340  
1341  //! @brief Construct a linear list of tokens.
1342  
1343  void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1344  {
1345    char *lpr = NO_TEXT;
1346    int lprt = 0;
1347    while (l != NO_VAR && !A68_PARSER (stop_scanner)) {
1348      int att = 0;
1349      get_next_token (in_format, l, s, start_l, start_c, &att);
1350      if (A68_PARSER (scan_buf)[0] == STOP_CHAR) {
1351        A68_PARSER (stop_scanner) = A68_TRUE;
1352      } else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1353        KEYWORD_T *kw;
1354        char *c = NO_TEXT;
1355        BOOL_T make_node = A68_TRUE;
1356        char *trailing = NO_TEXT;
1357        if (att != IDENTIFIER) {
1358          kw = find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
1359        } else {
1360          kw = NO_KEYWORD;
1361        }
1362        if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1363          if (att == IDENTIFIER) {
1364            make_lower_case (A68_PARSER (scan_buf));
1365          }
1366          if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1367            int len = (int) strlen (A68_PARSER (scan_buf));
1368            while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_') {
1369              trailing = "_";
1370              A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1371              len--;
1372            }
1373          }
1374          c = TEXT (add_token (&A68 (top_token), A68_PARSER (scan_buf)));
1375        } else {
1376          if (IS (kw, TO_SYMBOL)) {
1377  // Merge GO and TO to GOTO.
1378            if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1379              ATTRIBUTE (*root) = GOTO_SYMBOL;
1380              NSYMBOL (*root) = TEXT (find_keyword (A68 (top_keyword), "GOTO"));
1381              make_node = A68_FALSE;
1382            } else {
1383              att = ATTRIBUTE (kw);
1384              c = TEXT (kw);
1385            }
1386          } else {
1387            if (att == 0 || att == BOLD_TAG) {
1388              att = ATTRIBUTE (kw);
1389            }
1390            c = TEXT (kw);
1391  // Handle pragments.
1392            if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1393              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1394              if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1395                lpr = nlpr;
1396              } else {
1397                char *stale = lpr;
1398                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1399                a68_free (nlpr);
1400                a68_free (stale);
1401              }
1402              lprt = att;
1403              make_node = A68_FALSE;
1404            } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1405              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1406              if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1407                lpr = nlpr;
1408              } else {
1409                char *stale = lpr;
1410                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1411                a68_free (nlpr);
1412                a68_free (stale);
1413              }
1414              lprt = att;
1415              if (!A68_PARSER (stop_scanner)) {
1416                (void) set_options (OPTION_LIST (&A68_JOB), A68_FALSE);
1417                make_node = A68_FALSE;
1418              }
1419            }
1420          }
1421        }
1422  // Add token to the tree.
1423        if (make_node) {
1424          NODE_T *q = new_node ();
1425          INFO (q) = new_node_info ();
1426          switch (att) {
1427          case ASSIGN_SYMBOL:
1428          case END_SYMBOL:
1429          case ESAC_SYMBOL:
1430          case OD_SYMBOL:
1431          case OF_SYMBOL:
1432          case FI_SYMBOL:
1433          case CLOSE_SYMBOL:
1434          case BUS_SYMBOL:
1435          case COLON_SYMBOL:
1436          case COMMA_SYMBOL:
1437          case DOTDOT_SYMBOL:
1438          case SEMI_SYMBOL: {
1439              GINFO (q) = NO_GINFO;
1440              break;
1441            }
1442          default: {
1443              GINFO (q) = new_genie_info ();
1444              break;
1445            }
1446          }
1447          STATUS (q) = OPTION_NODEMASK (&A68_JOB);
1448          LINE (INFO (q)) = *start_l;
1449          CHAR_IN_LINE (INFO (q)) = *start_c;
1450          PRIO (INFO (q)) = 0;
1451          PROCEDURE_LEVEL (INFO (q)) = 0;
1452          ATTRIBUTE (q) = att;
1453          NSYMBOL (q) = c;
1454          PREVIOUS (q) = *root;
1455          SUB (q) = NEXT (q) = NO_NODE;
1456          TABLE (q) = NO_TABLE;
1457          MOID (q) = NO_MOID;
1458          TAX (q) = NO_TAG;
1459          if (lpr != NO_TEXT) {
1460            NPRAGMENT (q) = lpr;
1461            NPRAGMENT_TYPE (q) = lprt;
1462            lpr = NO_TEXT;
1463            lprt = 0;
1464          }
1465          if (*root != NO_NODE) {
1466            NEXT (*root) = q;
1467          }
1468          if (TOP_NODE (&A68_JOB) == NO_NODE) {
1469            TOP_NODE (&A68_JOB) = q;
1470          }
1471          *root = q;
1472          if (trailing != NO_TEXT) {
1473            diagnostic (A68_WARNING | A68_FORCE_DIAGNOSTICS, q, WARNING_TRAILING, trailing, att);
1474          }
1475        }
1476  // Redirection in tokenising formats. The scanner is a recursive-descent type as
1477  // to know when it scans a format text and when not. 
1478        if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1479          return;
1480        } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1481          tokenise_source (root, level + 1, A68_TRUE, l, s, start_l, start_c);
1482        } else if (in_format && open_nested_clause (att)) {
1483          NODE_T *z = PREVIOUS (*root);
1484          if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1485            tokenise_source (root, level, A68_FALSE, l, s, start_l, start_c);
1486          } else if (att == OPEN_SYMBOL) {
1487            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1488          } else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL) {
1489            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1490          } else if (OPTION_BRACKETS (&A68_JOB) && att == ACCO_SYMBOL) {
1491            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1492          }
1493        } else if (!in_format && level > 0 && open_nested_clause (att)) {
1494          tokenise_source (root, level + 1, A68_FALSE, l, s, start_l, start_c);
1495        } else if (!in_format && level > 0 && close_nested_clause (att)) {
1496          return;
1497        } else if (in_format && att == CLOSE_SYMBOL) {
1498          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1499        } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL) {
1500          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1501        } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == OCCA_SYMBOL) {
1502          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1503        }
1504      }
1505    }
1506  }
1507  
1508  //! @brief Tokenise source file, build initial syntax tree.
1509  
1510  BOOL_T lexical_analyser (void)
1511  {
1512    LINE_T *l = NO_LINE, *start_l = NO_LINE;
1513    char *s = NO_TEXT, *start_c = NO_TEXT;
1514    NODE_T *root = NO_NODE;
1515    A68_PARSER (scan_buf) = NO_TEXT;
1516    A68_PARSER (max_scan_buf_length) = A68_PARSER (source_file_size) = get_source_size ();
1517  // Errors in file?.
1518    if (A68_PARSER (max_scan_buf_length) == 0) {
1519      return A68_FALSE;
1520    }
1521    if (OPTION_RUN_SCRIPT (&A68_JOB)) {
1522      A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
1523      if (!read_script_file ()) {
1524        return A68_FALSE;
1525      }
1526    } else {
1527      A68_PARSER (max_scan_buf_length) += KILOBYTE;       // for the environ, more than enough
1528      A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68_PARSER (max_scan_buf_length));
1529  // Errors in file?.
1530      if (!read_source_file ()) {
1531        return A68_FALSE;
1532      }
1533    }
1534  // Start tokenising.
1535    A68_PARSER (read_error) = A68_FALSE;
1536    A68_PARSER (stop_scanner) = A68_FALSE;
1537    if ((l = TOP_LINE (&A68_JOB)) != NO_LINE) {
1538      s = STRING (l);
1539    }
1540    tokenise_source (&root, 0, A68_FALSE, &l, &s, &start_l, &start_c);
1541    return A68_TRUE;
1542  }