parser-scanner.c

     
   1  //! @file parser-scanner.c
   2  //! @author J. Marcel van der Veer
   3  
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2025 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis 
  23  //!
  24  //! Context-dependent Algol 68 tokeniser.
  25  
  26  #include "a68g.h"
  27  #include "a68g-parser.h"
  28  #include "a68g-prelude.h"
  29  #include "a68g-options.h"
  30  #include "a68g-environ.h"
  31  
  32  // Macros.
  33  
  34  #define SCAN_DIGITS(c)\
  35    while (IS_DIGIT (c)) {\
  36      (sym++)[0] = (c);\
  37      (c) = next_char (ref_l, ref_s, A68G_TRUE);\
  38    }
  39  
  40  #define SCAN_EXPONENT_PART(c)\
  41    (sym++)[0] = EXPONENT_CHAR;\
  42    (c) = next_char (ref_l, ref_s, A68G_TRUE);\
  43    if ((c) == '+' || (c) == '-') {\
  44      (sym++)[0] = (c);\
  45      (c) = next_char (ref_l, ref_s, A68G_TRUE);\
  46    }\
  47    SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
  48    SCAN_DIGITS (c)
  49  
  50  //! @brief Save scanner state, for character look-ahead.
  51  
  52  void save_state (LINE_T * ref_l, char *ref_s, char ch)
  53  {
  54    SCAN_STATE_L (&A68G_JOB) = ref_l;
  55    SCAN_STATE_S (&A68G_JOB) = ref_s;
  56    SCAN_STATE_C (&A68G_JOB) = ch;
  57  }
  58  
  59  //! @brief Restore scanner state, for character look-ahead.
  60  
  61  void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
  62  {
  63    *ref_l = SCAN_STATE_L (&A68G_JOB);
  64    *ref_s = SCAN_STATE_S (&A68G_JOB);
  65    *ch = SCAN_STATE_C (&A68G_JOB);
  66  }
  67  
  68  //! @brief New_source_line.
  69  
  70  LINE_T *new_source_line (void)
  71  {
  72    LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
  73    MARKER (z)[0] = NULL_CHAR;
  74    STRING (z) = NO_TEXT;
  75    FILENAME (z) = NO_TEXT;
  76    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
  77    NUMBER (z) = 0;
  78    PRINT_STATUS (z) = 0;
  79    LIST (z) = A68G_TRUE;
  80    NEXT (z) = NO_LINE;
  81    PREVIOUS (z) = NO_LINE;
  82    return z;
  83  }
  84  
  85  //! @brief Append a source line to the internal source file.
  86  
  87  void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
  88  {
  89    LINE_T *z = new_source_line ();
  90  // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
  91    if (*line_num == 1) {
  92      if (strlen (str) >= 2 && strncmp (str, "#!", strlen ("#!")) == 0) {
  93        ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
  94        (*line_num)++;
  95        return;
  96      }
  97    }
  98  // Link line into the chain.
  99    STRING (z) = new_fixed_string (str);
 100    FILENAME (z) = filename;
 101    NUMBER (z) = (*line_num)++;
 102    PRINT_STATUS (z) = NOT_PRINTED;
 103    LIST (z) = A68G_TRUE;
 104    DIAGNOSTICS (z) = NO_DIAGNOSTIC;
 105    NEXT (z) = NO_LINE;
 106    PREVIOUS (z) = *ref_l;
 107    if (TOP_LINE (&A68G_JOB) == NO_LINE) {
 108      TOP_LINE (&A68G_JOB) = z;
 109    }
 110    if (*ref_l != NO_LINE) {
 111      NEXT (*ref_l) = z;
 112    }
 113    *ref_l = z;
 114  }
 115  
 116  // Scanner, tokenises the source code.
 117  
 118  //! @brief Whether ch is unworthy.
 119  
 120  void unworthy (LINE_T * u, char *v, char ch)
 121  {
 122    if (IS_PRINT (ch)) {
 123      ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
 124    } else {
 125      ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
 126    }
 127    scan_error (u, v, A68G (edit_line));
 128  }
 129  
 130  //! @brief Concatenate lines that terminate in '\' with next line.
 131  
 132  void concatenate_lines (LINE_T * top)
 133  {
 134    LINE_T *q;
 135  // Work from bottom backwards.
 136    for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
 137      ;
 138    }
 139    for (; q != NO_LINE; BACKWARD (q)) {
 140      char *z = STRING (q);
 141      size_t len = strlen (z);
 142      if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
 143        z[len - 2] = NULL_CHAR;
 144        len += strlen (STRING (NEXT (q)));
 145        z = (char *) get_fixed_heap_space ((size_t) (len + 1));
 146        a68g_bufcpy (z, STRING (q), len + 1);
 147        a68g_bufcat (z, STRING (NEXT (q)), len + 1);
 148        STRING (NEXT (q))[0] = NULL_CHAR;
 149        STRING (q) = z;
 150      }
 151    }
 152  }
 153  
 154  //! @brief Whether u is bold tag v, independent of stropping regime.
 155  
 156  BOOL_T is_bold (char *u, char *v)
 157  {
 158    size_t len = strlen (v);
 159    if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
 160      if (u[0] == '\'') {
 161        return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
 162      } else {
 163        return A68G_FALSE;
 164      }
 165    } else {
 166      return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
 167    }
 168  }
 169  
 170  //! @brief Skip string.
 171  
 172  BOOL_T skip_string (LINE_T ** top, char **ch)
 173  {
 174    LINE_T *u = *top;
 175    char *v = *ch;
 176    v++;
 177    while (u != NO_LINE) {
 178      while (v[0] != NULL_CHAR) {
 179        if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
 180          *top = u;
 181          *ch = &v[1];
 182          return A68G_TRUE;
 183        } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
 184          v += 2;
 185        } else {
 186          v++;
 187        }
 188      }
 189      FORWARD (u);
 190      if (u != NO_LINE) {
 191        v = &(STRING (u)[0]);
 192      } else {
 193        v = NO_TEXT;
 194      }
 195    }
 196    return A68G_FALSE;
 197  }
 198  
 199  //! @brief Skip comment.
 200  
 201  BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
 202  {
 203    LINE_T *u = *top;
 204    char *v = *ch;
 205    BOOL_T qstrop = OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING;
 206    v++;
 207    while (u != NO_LINE) {
 208      while (v[0] != NULL_CHAR) {
 209        if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
 210          char *w = &v[strlen(qstrop ? "'COMMENT'" : "COMMENT")];
 211          if (!IS_UPPER(w[0])) {
 212            *top = u;
 213            *ch = w;
 214            return A68G_TRUE;
 215          }
 216        } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
 217          char *w = &v[strlen(qstrop ? "'CO'" : "CO")];
 218          if (!IS_UPPER(w[0])) {
 219            *top = u;
 220            *ch = w;
 221            return A68G_TRUE;
 222          }
 223        } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
 224          *top = u;
 225          *ch = &v[1];
 226          return A68G_TRUE;
 227        } else {
 228          v++;
 229        }
 230      }
 231      FORWARD (u);
 232      if (u != NO_LINE) {
 233        v = &(STRING (u)[0]);
 234      } else {
 235        v = NO_TEXT;
 236      }
 237    }
 238    return A68G_FALSE;
 239  }
 240  
 241  //! @brief Skip rest of pragmat.
 242  
 243  BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
 244  {
 245    LINE_T *u = *top;
 246    char *v = *ch;
 247    BOOL_T qstrop = OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING;
 248    while (u != NO_LINE) {
 249      while (v[0] != NULL_CHAR) {
 250        if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
 251          char *w = &v[strlen(qstrop ? "'PRAGMAT'" : "PRAGMAT")];
 252          if (!IS_UPPER(w[0])) {
 253            *top = u;
 254            *ch = w;
 255            return A68G_TRUE;
 256          }
 257        } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
 258          char *w = &v[strlen(qstrop ? "'PR'" : "PR")];
 259          if (!IS_UPPER(w[0])) {
 260            *top = u;
 261            *ch = w;
 262            return A68G_TRUE;
 263          }
 264        } else {
 265          if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 266            scan_error (u, v, ERROR_PRAGMENT);
 267          } else if (IS_UPPER (v[0])) {
 268  // Skip a bold word as you may trigger on REPR, for instance ...
 269            while (IS_UPPER (v[0])) {
 270              v++;
 271            }
 272          } else {
 273            v++;
 274          }
 275        }
 276      }
 277      FORWARD (u);
 278      if (u != NO_LINE) {
 279        v = &(STRING (u)[0]);
 280      } else {
 281        v = NO_TEXT;
 282      }
 283    }
 284    return A68G_FALSE;
 285  }
 286  
 287  //! @brief Return pointer to next token within pragmat.
 288  
 289  char *get_pragmat_item (LINE_T ** top, char **ch)
 290  {
 291    LINE_T *u = *top;
 292    char *v = *ch;
 293    while (u != NO_LINE) {
 294      while (v[0] != NULL_CHAR) {
 295        if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
 296          *top = u;
 297          *ch = v;
 298          return v;
 299        } else {
 300          v++;
 301        }
 302      }
 303      FORWARD (u);
 304      if (u != NO_LINE) {
 305        v = &(STRING (u)[0]);
 306      } else {
 307        v = NO_TEXT;
 308      }
 309    }
 310    return NO_TEXT;
 311  }
 312  
 313  //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
 314  
 315  int streq (char *u, char *v)
 316  {
 317    int diff;
 318    for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
 319      diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
 320    }
 321    return diff;
 322  }
 323  
 324  //! @brief Scan for next pragmat and yield first pragmat item.
 325  
 326  char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
 327  {
 328    LINE_T *u = *top;
 329    char *v = *ch;
 330    *delim = 0;
 331    while (u != NO_LINE) {
 332      while (v[0] != NULL_CHAR) {
 333        LINE_T *start_l = u;
 334        char *start_c = v;
 335  // STRINGs must be skipped.
 336        if (v[0] == QUOTE_CHAR) {
 337          SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
 338        }
 339  // COMMENTS must be skipped.
 340        else if (is_bold (v, "COMMENT")) {
 341          SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 342        } else if (is_bold (v, "CO")) {
 343          SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 344        } else if (v[0] == '#') {
 345          SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
 346        } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
 347  // We caught a PRAGMAT.
 348          char *item;
 349          if (is_bold (v, "PRAGMAT")) {
 350            *delim = BOLD_PRAGMAT_SYMBOL;
 351            v = &v[strlen ("PRAGMAT")];
 352          } else if (is_bold (v, "PR")) {
 353            *delim = STYLE_I_PRAGMAT_SYMBOL;
 354            v = &v[strlen ("PR")];
 355          }
 356          item = get_pragmat_item (&u, &v);
 357          SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 358  // Item "preprocessor" restarts preprocessing if it is off.
 359          if (A68G_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
 360            A68G_PARSER (no_preprocessing) = A68G_FALSE;
 361            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68G_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 362          }
 363  // If preprocessing is switched off, we idle to closing bracket.
 364          else if (A68G_PARSER (no_preprocessing)) {
 365            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68G_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 366          }
 367  // Item "nopreprocessor" stops preprocessing if it is on.
 368          if (streq (item, "NOPREPROCESSOR") == 0) {
 369            A68G_PARSER (no_preprocessing) = A68G_TRUE;
 370            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68G_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 371          }
 372  // Item "INCLUDE" includes a file.
 373          else if (streq (item, "INCLUDE") == 0) {
 374            *top = u;
 375            *ch = v;
 376            return item;
 377          }
 378  // Item "READ" includes a file.
 379          else if (streq (item, "READ") == 0) {
 380            *top = u;
 381            *ch = v;
 382            return item;
 383          }
 384  // Unrecognised item - probably options handled later by the tokeniser.
 385          else {
 386            SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68G_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 387          }
 388        } else if (IS_UPPER (v[0])) {
 389  // Skip a bold word as you may trigger on REPR, for instance ...
 390          while (IS_UPPER (v[0])) {
 391            v++;
 392          }
 393        } else {
 394          v++;
 395        }
 396      }
 397      FORWARD (u);
 398      if (u != NO_LINE) {
 399        v = &(STRING (u)[0]);
 400      } else {
 401        v = NO_TEXT;
 402      }
 403    }
 404    *top = u;
 405    *ch = v;
 406    return NO_TEXT;
 407  }
 408  
 409  //! @brief Include files.
 410  
 411  void include_files (LINE_T * top)
 412  {
 413  // include_files
 414  // 
 415  // syntax: PR read "filename" PR
 416  //         PR include "filename" PR
 417  // 
 418  // The file gets inserted before the line containing the pragmat. In this way
 419  // correct line numbers are preserved which helps diagnostics. A file that has
 420  // been included will not be included a second time - it will be ignored. 
 421  // A rigorous fail-safe, but there is no mechanism to prevent recursive includes 
 422  // in A68 source code. User reports do not indicate sophisticated use of INCLUDE, 
 423  // so this is fine for now.
 424  // TODO - some day we might need `app', analogous to `cpp'.
 425    BOOL_T make_pass = A68G_TRUE;
 426    while (make_pass) {
 427      LINE_T *s, *t, *u = top;
 428      char *v = &(STRING (u)[0]);
 429      make_pass = A68G_FALSE;
 430      errno = 0;
 431      while (u != NO_LINE) {
 432        int pr_lim;
 433        char *item = next_preprocessor_item (&u, &v, &pr_lim);
 434        LINE_T *start_l = u;
 435        char *start_c = v;
 436  // Search for PR include "filename" PR.
 437        if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
 438          FILE_T fd;
 439          int n, line_num, fsize, k, bytes_read;
 440          char *fbuf, delim;
 441          BUFFER fnb;
 442          char *fn;
 443  // Skip to filename.
 444          while (IS_ALPHA (v[0])) {
 445            v++;
 446          }
 447          while (IS_SPACE (v[0])) {
 448            v++;
 449          }
 450  // Scan quoted filename.
 451          SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
 452          delim = (v++)[0];
 453          n = 0;
 454          fnb[0] = NULL_CHAR;
 455  // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
 456          do {
 457            SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
 458            SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 459            if (v[0] == delim) {
 460              while (v[0] == delim && v[1] == delim) {
 461                SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
 462                fnb[n++] = delim;
 463                fnb[n] = NULL_CHAR;
 464                v += 2;
 465              }
 466            } else if (IS_PRINT (v[0])) {
 467              fnb[n++] = *(v++);
 468              fnb[n] = NULL_CHAR;
 469            } else {
 470              SCAN_ERROR (A68G_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
 471            }
 472          } while (v[0] != delim);
 473  // Insist that the pragmat is closed properly.
 474          v = &v[1];
 475          SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68G_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
 476          SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
 477  // Make the name relative to the position of the source file (C preprocessor standard).
 478          if (FILENAME (u) != NO_TEXT) {
 479            fn = a68g_relpath (a68g_dirname (FILENAME (u)), a68g_dirname (fnb), a68g_basename (fnb));
 480          } else {
 481            fn = a68g_relpath (FILE_PATH (&A68G_JOB), a68g_dirname (fnb), a68g_basename (fnb));
 482          }
 483  // Do not check errno, since errno may be undefined here after a successful call.
 484          if (fn != NO_TEXT) {
 485            a68g_bufcpy (fnb, fn, BUFFER_SIZE);
 486          } else {
 487            SCAN_ERROR_INFO (A68G_TRUE, NO_LINE, NO_TEXT, ERROR_SOURCE_FILE_INCLUDE_OPEN, fnb);
 488          }
 489          size_t fnwid = strlen (fnb) + 1;
 490          fn = (char *) get_fixed_heap_space ((size_t) fnwid);
 491          a68g_bufcpy (fn, fnb, fnwid);
 492  // Ignore the file when included more than once.
 493          for (t = top; t != NO_LINE; t = NEXT (t)) {
 494            if (strcmp (FILENAME (t), fn) == 0) {
 495              goto search_next_pragmat;
 496            }
 497          }
 498  // Access the file.
 499          errno = 0;
 500          fd = open (fn, O_RDONLY | O_BINARY);
 501          SCAN_ERROR_INFO (fd == -1, start_l, start_c, ERROR_SOURCE_FILE_INCLUDE_OPEN, fnb);
 502          errno = 0;
 503          fsize = (int) lseek (fd, 0, SEEK_END);
 504          ASSERT (fsize >= 0);
 505          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 506          fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
 507          errno = 0;
 508          ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
 509          SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
 510          errno = 0;
 511          bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
 512          SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
 513  // Buffer still usable?.
 514          if (fsize > A68G_PARSER (max_scan_buf_length)) {
 515            A68G_PARSER (max_scan_buf_length) = fsize;
 516            A68G_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68G_PARSER (max_scan_buf_length)));
 517          }
 518  // Link all lines into the list.
 519          line_num = 1;
 520          s = u;
 521          t = PREVIOUS (u);
 522          k = 0;
 523          if (fsize == 0) {
 524  // If file is empty, insert single empty line.
 525            A68G_PARSER (scan_buf)[0] = NEWLINE_CHAR;
 526            A68G_PARSER (scan_buf)[1] = NULL_CHAR;
 527            append_source_line (A68G_PARSER (scan_buf), &t, &line_num, fn);
 528          } else
 529            while (k < fsize) {
 530              n = 0;
 531              A68G_PARSER (scan_buf)[0] = NULL_CHAR;
 532              while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
 533                SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
 534                A68G_PARSER (scan_buf)[n++] = fbuf[k++];
 535                A68G_PARSER (scan_buf)[n] = NULL_CHAR;
 536              }
 537              A68G_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
 538              A68G_PARSER (scan_buf)[n] = NULL_CHAR;
 539              if (k < fsize) {
 540                k++;
 541              }
 542              append_source_line (A68G_PARSER (scan_buf), &t, &line_num, fn);
 543            }
 544  // Conclude and go find another include directive, if any.
 545          NEXT (t) = s;
 546          PREVIOUS (s) = t;
 547          concatenate_lines (top);
 548          ASSERT (close (fd) == 0);
 549          make_pass = A68G_TRUE;
 550        }
 551      search_next_pragmat:_SKIP_;
 552      }
 553    }
 554  }
 555  
 556  //! @brief Size of source file.
 557  
 558  int get_source_size (void)
 559  {
 560    FILE_T f = FILE_SOURCE_FD (&A68G_JOB);
 561  // This is why WIN32 must open as "read binary".
 562    return (int) lseek (f, 0, SEEK_END);
 563  }
 564  
 565  //! @brief Append environment source lines.
 566  
 567  void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
 568  {
 569    for (int k = 0; str[k] != NO_TEXT; k++) {
 570      int zero_line_num = 0;
 571      (void) line_num;
 572      append_source_line (str[k], ref_l, &zero_line_num, name);
 573    }
 574  }
 575  
 576  //! @brief Read script file and make internal copy.
 577  
 578  BOOL_T read_script_file (void)
 579  {
 580    LINE_T *ref_l = NO_LINE;
 581    int k, n, num;
 582    unt len;
 583    BOOL_T file_end = A68G_FALSE;
 584    BUFFER filename, linenum;
 585    char ch, *fn, *line;
 586    char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68G_PARSER (source_file_size)));
 587    FILE_T source = FILE_SOURCE_FD (&A68G_JOB);
 588    ABEND (source == -1, ERROR_ACTION, __func__);
 589    buffer[0] = NULL_CHAR;
 590    n = 0;
 591    len = (unt) (8 + A68G_PARSER (source_file_size));
 592    buffer = (char *) get_temp_heap_space (len);
 593    ASSERT (lseek (source, 0, SEEK_SET) >= 0);
 594    while (!file_end) {
 595  // Read the original file name.
 596      filename[0] = NULL_CHAR;
 597      k = 0;
 598      if (io_read (source, &ch, 1) == 0) {
 599        file_end = A68G_TRUE;
 600        continue;
 601      }
 602      while (ch != NEWLINE_CHAR) {
 603        filename[k++] = ch;
 604        ASSERT (io_read (source, &ch, 1) == 1);
 605      }
 606      filename[k] = NULL_CHAR;
 607      fn = TEXT (add_token (&A68G (top_token), filename));
 608  // Read the original file number.
 609      linenum[0] = NULL_CHAR;
 610      k = 0;
 611      ASSERT (io_read (source, &ch, 1) == 1);
 612      while (ch != NEWLINE_CHAR) {
 613        linenum[k++] = ch;
 614        ASSERT (io_read (source, &ch, 1) == 1);
 615      }
 616      linenum[k] = NULL_CHAR;
 617      num = (int) strtol (linenum, NO_REF, 10);
 618      ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
 619  // COPY original line into buffer.
 620      ASSERT (io_read (source, &ch, 1) == 1);
 621      line = &buffer[n];
 622      while (ch != NEWLINE_CHAR) {
 623        buffer[n++] = ch;
 624        ASSERT (io_read (source, &ch, 1) == 1);
 625        ABEND ((unt) n >= len, ERROR_ACTION, __func__);
 626      }
 627      buffer[n++] = NEWLINE_CHAR;
 628      buffer[n] = NULL_CHAR;
 629      append_source_line (line, &ref_l, &num, fn);
 630    }
 631    return A68G_TRUE;
 632  }
 633  
 634  //! @brief match first non-white characters in string.
 635  
 636  BOOL_T a68g_start(char *u, char *v, char **end)
 637  {
 638    *end = NO_TEXT;
 639    while (v[0] != NULL_CHAR) {
 640      if (u[0] == NULL_CHAR) {
 641        return A68G_FALSE;
 642      } else if (IS_SPACE (u[0])) {
 643        u++;
 644      } else {
 645        if (u[0] == v[0]) {
 646          u++;
 647          v++;
 648          if (end != NULL) {
 649            *end = u;
 650          }
 651        } else {
 652          return A68G_FALSE;
 653        }
 654      }
 655    }
 656    return A68G_TRUE;
 657  }
 658  
 659  //! @brief Read source file and make internal copy.
 660  
 661  BOOL_T read_source_file (void)
 662  {
 663    LINE_T *ref_l = NO_LINE;
 664    int line_num = 0, k, bytes_read;
 665    ssize_t l;
 666    FILE_T f = FILE_SOURCE_FD (&A68G_JOB);
 667    char **prelude_start, **postlude, *buffer, *text;
 668  // Read the file into a single buffer, so we save on system calls.
 669    line_num = 1;
 670    errno = 0;
 671    text = (char *) get_temp_heap_space ((unt) (8 + A68G_PARSER (source_file_size)));
 672    ABEND (errno != 0 || text == NO_TEXT, ERROR_ALLOCATION, __func__);
 673    ASSERT (lseek (f, 0, SEEK_SET) >= 0);
 674    ABEND (errno != 0, ERROR_ACTION, __func__);
 675    errno = 0;
 676    bytes_read = (int) io_read (f, text, (size_t) A68G_PARSER (source_file_size));
 677    ABEND (errno != 0 || bytes_read != A68G_PARSER (source_file_size), ERROR_ACTION, __func__);
 678  // Little test on stropping.
 679    char *pr1 = "'PR'QUOTESTROPPING'PR'";
 680    char *pr2 = "'PRAGMAT'QUOTESTROPPING'PRAGMAT'";
 681    char *end = NO_TEXT;
 682    if (a68g_start (text, pr1, &end)) {
 683      OPTION_STROPPING (&A68G_JOB) = QUOTE_STROPPING;
 684      buffer = end;
 685      A68G_PARSER (source_file_size) = strlen (buffer);
 686    } else if (a68g_start (text, pr2, &end)) {
 687      OPTION_STROPPING (&A68G_JOB) = QUOTE_STROPPING;
 688      buffer = end;
 689      A68G_PARSER (source_file_size) = strlen (buffer);
 690    } else {
 691      buffer = text;
 692    }
 693  // Prelude.
 694    if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
 695      prelude_start = bold_prelude_start;
 696      postlude = bold_postlude;
 697    } else if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
 698      prelude_start = quote_prelude_start;
 699      postlude = quote_postlude;
 700    } else {
 701      prelude_start = postlude = NO_REF;
 702    }
 703    append_environ (prelude_start, &ref_l, &line_num, "prelude");
 704  // Link all lines into the list.
 705    k = 0;
 706    while (k < A68G_PARSER (source_file_size)) {
 707      l = 0;
 708      A68G_PARSER (scan_buf)[0] = NULL_CHAR;
 709      while (k < A68G_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
 710        if (k < A68G_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
 711          k++;
 712        } else {
 713          A68G_PARSER (scan_buf)[l++] = buffer[k++];
 714          A68G_PARSER (scan_buf)[l] = NULL_CHAR;
 715        }
 716      }
 717      A68G_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
 718      A68G_PARSER (scan_buf)[l] = NULL_CHAR;
 719      if (k < A68G_PARSER (source_file_size)) {
 720        k++;
 721      }
 722      append_source_line (A68G_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68G_JOB));
 723      SCAN_ERROR (l != strlen (A68G_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
 724    }
 725  // Postlude.
 726    append_environ (postlude, &ref_l, &line_num, "postlude");
 727  // Concatenate lines.
 728    concatenate_lines (TOP_LINE (&A68G_JOB));
 729  // Include files.
 730    include_files (TOP_LINE (&A68G_JOB));
 731    return A68G_TRUE;
 732  }
 733  
 734  //! @brief Next_char get next character from internal copy of source file.
 735  
 736  char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
 737  {
 738    char ch;
 739  #if defined (NO_TYPO)
 740    allow_typo = A68G_FALSE;
 741  #endif
 742    LOW_STACK_ALERT (NO_NODE);
 743  // Source empty?.
 744    if (*ref_l == NO_LINE) {
 745      return STOP_CHAR;
 746    } else {
 747      LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68G_JOB) & SOURCE_MASK ? A68G_TRUE : A68G_FALSE);
 748  // Take new line?.
 749      if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
 750        *ref_l = NEXT (*ref_l);
 751        if (*ref_l == NO_LINE) {
 752          return STOP_CHAR;
 753        }
 754        *ref_s = STRING (*ref_l);
 755      } else {
 756        (*ref_s)++;
 757      }
 758  // Deliver next char.
 759      ch = (*ref_s)[0];
 760      if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
 761        ch = next_char (ref_l, ref_s, allow_typo);
 762      }
 763      return ch;
 764    }
 765  }
 766  
 767  //! @brief Find first character that can start a valid symbol.
 768  
 769  void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
 770  {
 771    while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
 772      if (*ref_l != NO_LINE) {
 773        LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68G_JOB) & SOURCE_MASK ? A68G_TRUE : A68G_FALSE);
 774      }
 775      *ref_c = next_char (ref_l, ref_s, A68G_FALSE);
 776    }
 777  }
 778  
 779  //! @brief Handle a pragment (pragmat or comment).
 780  
 781  char *pragment (int type, LINE_T ** ref_l, char **ref_c)
 782  {
 783    size_t chars_in_buf;
 784  #define INIT_BUFFER {chars_in_buf = 0; A68G_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 785  #define ADD_ONE_CHAR(ch) {A68G_PARSER (scan_buf)[chars_in_buf ++] = ch; A68G_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
 786  //
 787    char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
 788    char *z = NO_TEXT;
 789    LINE_T *start_l = *ref_l;
 790    BOOL_T stop, pragmat = A68G_FALSE;
 791  // Set terminator.
 792    if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
 793      if (type == STYLE_I_COMMENT_SYMBOL) {
 794        term_s = "CO";
 795      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 796        term_s = "#";
 797      } else if (type == BOLD_COMMENT_SYMBOL) {
 798        term_s = "COMMENT";
 799      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 800        term_s = "PR";
 801        pragmat = A68G_TRUE;
 802      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 803        term_s = "PRAGMAT";
 804        pragmat = A68G_TRUE;
 805      }
 806    } else if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
 807      if (type == STYLE_I_COMMENT_SYMBOL) {
 808        term_s = "'CO'";
 809      } else if (type == STYLE_II_COMMENT_SYMBOL) {
 810        term_s = "#";
 811      } else if (type == BOLD_COMMENT_SYMBOL) {
 812        term_s = "'COMMENT'";
 813      } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
 814        term_s = "'PR'";
 815        pragmat = A68G_TRUE;
 816      } else if (type == BOLD_PRAGMAT_SYMBOL) {
 817        term_s = "'PRAGMAT'";
 818        pragmat = A68G_TRUE;
 819      }
 820    }
 821    size_t term_s_length = strlen (term_s);
 822  // Scan for terminator.
 823    INIT_BUFFER;
 824    stop = A68G_FALSE;
 825    while (stop == A68G_FALSE) {
 826      BOOL_T scan_next = A68G_TRUE;
 827      SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
 828  // A ".." or '..' delimited string in a PRAGMAT.
 829      if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING))) {
 830        char delim = c;
 831        BOOL_T eos = A68G_FALSE;
 832        ADD_ONE_CHAR (c);
 833        c = next_char (ref_l, ref_c, A68G_FALSE);
 834        while (!eos) {
 835          SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
 836          if (c == delim) {
 837            ADD_ONE_CHAR (delim);
 838            save_state (*ref_l, *ref_c, c);
 839            c = next_char (ref_l, ref_c, A68G_FALSE);
 840            if (c == delim) {
 841              c = next_char (ref_l, ref_c, A68G_FALSE);
 842            } else {
 843              restore_state (ref_l, ref_c, &c);
 844              eos = A68G_TRUE;
 845            }
 846          } else if (IS_PRINT (c)) {
 847            ADD_ONE_CHAR (c);
 848            c = next_char (ref_l, ref_c, A68G_FALSE);
 849          } else {
 850            unworthy (start_l, start_c, c);
 851          }
 852        }
 853      } else if (EOL (c)) {
 854        ADD_ONE_CHAR (NEWLINE_CHAR);
 855      } else if (IS_UPPER (c)) {
 856        while (IS_UPPER (c)) {
 857          ADD_ONE_CHAR (c);
 858          c = next_char (ref_l, ref_c, A68G_FALSE);
 859        }
 860        scan_next = A68G_FALSE;
 861      } else if (IS_PRINT (c) || IS_SPACE (c)) {
 862        ADD_ONE_CHAR (c);
 863      }
 864      if (chars_in_buf >= term_s_length) {
 865  // Check whether we encountered the terminator.
 866        char *tok = &(A68G_PARSER (scan_buf)[chars_in_buf - term_s_length]);
 867        stop = (BOOL_T) (strcmp (term_s, tok) == 0);
 868      }
 869      if (scan_next) {
 870        c = next_char (ref_l, ref_c, A68G_FALSE);
 871      }
 872    }
 873    A68G_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
 874    z = new_string (term_s, A68G_PARSER (scan_buf), term_s, NO_TEXT);
 875    if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
 876      isolate_options (A68G_PARSER (scan_buf), start_l);
 877    }
 878    return z;
 879  #undef ADD_ONE_CHAR
 880  #undef INIT_BUFFER
 881  }
 882  
 883  //! @brief Attribute for format item.
 884  
 885  int get_format_item (char ch)
 886  {
 887    switch (TO_LOWER (ch)) {
 888    case 'a': {
 889        return FORMAT_ITEM_A;
 890      }
 891    case 'b': {
 892        return FORMAT_ITEM_B;
 893      }
 894    case 'c': {
 895        return FORMAT_ITEM_C;
 896      }
 897    case 'd': {
 898        return FORMAT_ITEM_D;
 899      }
 900    case 'e': {
 901        return FORMAT_ITEM_E;
 902      }
 903    case 'f': {
 904        return FORMAT_ITEM_F;
 905      }
 906    case 'g': {
 907        return FORMAT_ITEM_G;
 908      }
 909    case 'h': {
 910        return FORMAT_ITEM_H;
 911      }
 912    case 'i': {
 913        return FORMAT_ITEM_I;
 914      }
 915    case 'j': {
 916        return FORMAT_ITEM_J;
 917      }
 918    case 'k': {
 919        return FORMAT_ITEM_K;
 920      }
 921    case 'l':
 922    case '/': {
 923        return FORMAT_ITEM_L;
 924      }
 925    case 'm': {
 926        return FORMAT_ITEM_M;
 927      }
 928    case 'n': {
 929        return FORMAT_ITEM_N;
 930      }
 931    case 'o': {
 932        return FORMAT_ITEM_O;
 933      }
 934    case 'p': {
 935        return FORMAT_ITEM_P;
 936      }
 937    case 'q': {
 938        return FORMAT_ITEM_Q;
 939      }
 940    case 'r': {
 941        return FORMAT_ITEM_R;
 942      }
 943    case 's': {
 944        return FORMAT_ITEM_S;
 945      }
 946    case 't': {
 947        return FORMAT_ITEM_T;
 948      }
 949    case 'u': {
 950        return FORMAT_ITEM_U;
 951      }
 952    case 'v': {
 953        return FORMAT_ITEM_V;
 954      }
 955    case 'w': {
 956        return FORMAT_ITEM_W;
 957      }
 958    case 'x': {
 959        return FORMAT_ITEM_X;
 960      }
 961    case 'y': {
 962        return FORMAT_ITEM_Y;
 963      }
 964    case 'z': {
 965        return FORMAT_ITEM_Z;
 966      }
 967    case '+': {
 968        return FORMAT_ITEM_PLUS;
 969      }
 970    case '-': {
 971        return FORMAT_ITEM_MINUS;
 972      }
 973    case POINT_CHAR: {
 974        return FORMAT_ITEM_POINT;
 975      }
 976    case '%': {
 977        return FORMAT_ITEM_ESCAPE;
 978      }
 979    default: {
 980        return 0;
 981      }
 982    }
 983  }
 984  
 985  //! @brief Whether input shows exponent character.
 986  
 987  BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
 988  {
 989    BOOL_T ret = A68G_FALSE;
 990    char exp_syms[3];
 991    if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
 992      exp_syms[0] = EXPONENT_CHAR;
 993      exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
 994      exp_syms[2] = NULL_CHAR;
 995    } else {
 996      exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
 997      exp_syms[1] = BACKSLASH_CHAR;
 998      exp_syms[2] = NULL_CHAR;
 999    }
1000    save_state (*ref_l, *ref_s, *ch);
1001    if (strchr (exp_syms, *ch) != NO_TEXT) {
1002      *ch = next_char (ref_l, ref_s, A68G_TRUE);
1003      ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
1004    }
1005    restore_state (ref_l, ref_s, ch);
1006    return ret;
1007  }
1008  
1009  //! @brief Whether input shows radix character.
1010  
1011  BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
1012  {
1013    BOOL_T ret = A68G_FALSE;
1014    save_state (*ref_l, *ref_s, *ch);
1015    if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
1016      if (*ch == TO_UPPER (RADIX_CHAR)) {
1017        *ch = next_char (ref_l, ref_s, A68G_TRUE);
1018        ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
1019      }
1020    } else {
1021      if (*ch == RADIX_CHAR) {
1022        *ch = next_char (ref_l, ref_s, A68G_TRUE);
1023        ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
1024      }
1025    }
1026    restore_state (ref_l, ref_s, ch);
1027    return ret;
1028  }
1029  
1030  //! @brief Whether input shows decimal point.
1031  
1032  BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
1033  {
1034    BOOL_T ret = A68G_FALSE;
1035    save_state (*ref_l, *ref_s, *ch);
1036    if (*ch == POINT_CHAR) {
1037      char exp_syms[3];
1038      if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
1039        exp_syms[0] = EXPONENT_CHAR;
1040        exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
1041        exp_syms[2] = NULL_CHAR;
1042      } else {
1043        exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
1044        exp_syms[1] = BACKSLASH_CHAR;
1045        exp_syms[2] = NULL_CHAR;
1046      }
1047      *ch = next_char (ref_l, ref_s, A68G_TRUE);
1048      if (strchr (exp_syms, *ch) != NO_TEXT) {
1049        *ch = next_char (ref_l, ref_s, A68G_TRUE);
1050        ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
1051      } else {
1052        ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
1053      }
1054    }
1055    restore_state (ref_l, ref_s, ch);
1056    return ret;
1057  }
1058  
1059  //! @brief Get next token from internal copy of source file..
1060  
1061  void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
1062  {
1063    char c = **ref_s, *sym = A68G_PARSER (scan_buf);
1064    sym[0] = NULL_CHAR;
1065    get_good_char (&c, ref_l, ref_s);
1066    *start_l = *ref_l;
1067    *start_c = *ref_s;
1068    if (c == STOP_CHAR) {
1069  // We are at EOF.
1070      (sym++)[0] = STOP_CHAR;
1071      sym[0] = NULL_CHAR;
1072      return;
1073    }
1074  // In a format.
1075    if (in_format) {
1076      char *format_items;
1077      if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
1078        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1079      } else if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
1080        format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1081      } else {
1082        format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1083      }
1084      if (strchr (format_items, c) != NO_TEXT) {
1085  // General format items.
1086        (sym++)[0] = c;
1087        sym[0] = NULL_CHAR;
1088        *att = get_format_item (c);
1089        (void) next_char (ref_l, ref_s, A68G_FALSE);
1090        return;
1091      }
1092      if (IS_DIGIT (c)) {
1093  // INT denotation for static replicator.
1094        SCAN_DIGITS (c);
1095        sym[0] = NULL_CHAR;
1096        *att = STATIC_REPLICATOR;
1097        return;
1098      }
1099    }
1100  // Not in a format.
1101    if (IS_UPPER (c)) {
1102      if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
1103  // Upper case word - bold tag.
1104        while (IS_UPPER (c) || c == '_') {
1105          (sym++)[0] = c;
1106          c = next_char (ref_l, ref_s, A68G_FALSE);
1107        }
1108        sym[0] = NULL_CHAR;
1109        *att = BOLD_TAG;
1110      } else if (OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
1111        while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1112          (sym++)[0] = c;
1113          c = next_char (ref_l, ref_s, A68G_TRUE);
1114        }
1115        sym[0] = NULL_CHAR;
1116        *att = IDENTIFIER;
1117      }
1118    } else if (c == '\'') {
1119  // Quote, uppercase word, quote - bold tag.
1120      int k = 0;
1121      c = next_char (ref_l, ref_s, A68G_FALSE);
1122      while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1123        (sym++)[0] = c;
1124        k++;
1125        c = next_char (ref_l, ref_s, A68G_TRUE);
1126      }
1127      SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1128      sym[0] = NULL_CHAR;
1129      *att = BOLD_TAG;
1130  // Skip terminating quote, or complain if it is not there.
1131      SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1132      c = next_char (ref_l, ref_s, A68G_FALSE);
1133    } else if (IS_LOWER (c)) {
1134  // Lower case word - identifier.
1135      while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1136        (sym++)[0] = c;
1137        c = next_char (ref_l, ref_s, A68G_TRUE);
1138      }
1139      sym[0] = NULL_CHAR;
1140      *att = IDENTIFIER;
1141    } else if (c == POINT_CHAR) {
1142  // Begins with a point symbol - point, dotdot, L REAL denotation.
1143      if (is_decimal_point (ref_l, ref_s, &c)) {
1144        (sym++)[0] = '0';
1145        (sym++)[0] = POINT_CHAR;
1146        c = next_char (ref_l, ref_s, A68G_TRUE);
1147        SCAN_DIGITS (c);
1148        if (is_exp_char (ref_l, ref_s, &c)) {
1149          SCAN_EXPONENT_PART (c);
1150        }
1151        sym[0] = NULL_CHAR;
1152        *att = REAL_DENOTATION;
1153      } else {
1154        c = next_char (ref_l, ref_s, A68G_TRUE);
1155        if (c == POINT_CHAR) {
1156          (sym++)[0] = POINT_CHAR;
1157          (sym++)[0] = POINT_CHAR;
1158          sym[0] = NULL_CHAR;
1159          *att = DOTDOT_SYMBOL;
1160          c = next_char (ref_l, ref_s, A68G_FALSE);
1161        } else {
1162          (sym++)[0] = POINT_CHAR;
1163          sym[0] = NULL_CHAR;
1164          *att = POINT_SYMBOL;
1165        }
1166      }
1167    } else if (IS_DIGIT (c)) {
1168  // Something that begins with a digit - L INT denotation, L REAL denotation.
1169      SCAN_DIGITS (c);
1170      if (is_decimal_point (ref_l, ref_s, &c)) {
1171        c = next_char (ref_l, ref_s, A68G_TRUE);
1172        if (is_exp_char (ref_l, ref_s, &c)) {
1173          (sym++)[0] = POINT_CHAR;
1174          (sym++)[0] = '0';
1175          SCAN_EXPONENT_PART (c);
1176          *att = REAL_DENOTATION;
1177        } else {
1178          (sym++)[0] = POINT_CHAR;
1179          SCAN_DIGITS (c);
1180          if (is_exp_char (ref_l, ref_s, &c)) {
1181            SCAN_EXPONENT_PART (c);
1182          }
1183          *att = REAL_DENOTATION;
1184        }
1185      } else if (is_exp_char (ref_l, ref_s, &c)) {
1186        SCAN_EXPONENT_PART (c);
1187        *att = REAL_DENOTATION;
1188      } else if (is_radix_char (ref_l, ref_s, &c)) {
1189        (sym++)[0] = c;
1190        c = next_char (ref_l, ref_s, A68G_TRUE);
1191        if (OPTION_STROPPING (&A68G_JOB) == UPPER_STROPPING) {
1192          while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1193            (sym++)[0] = c;
1194            c = next_char (ref_l, ref_s, A68G_TRUE);
1195          }
1196        } else {
1197          while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1198            (sym++)[0] = c;
1199            c = next_char (ref_l, ref_s, A68G_TRUE);
1200          }
1201        }
1202        *att = BITS_DENOTATION;
1203      } else {
1204        *att = INT_DENOTATION;
1205      }
1206      sym[0] = NULL_CHAR;
1207    } else if (c == QUOTE_CHAR) {
1208  // STRING denotation.
1209      BOOL_T stop = A68G_FALSE;
1210      while (!stop) {
1211        c = next_char (ref_l, ref_s, A68G_FALSE);
1212        while (c != QUOTE_CHAR && c != STOP_CHAR) {
1213          SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1214          (sym++)[0] = c;
1215          c = next_char (ref_l, ref_s, A68G_FALSE);
1216        }
1217        SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1218        c = next_char (ref_l, ref_s, A68G_FALSE);
1219        if (c == QUOTE_CHAR) {
1220          (sym++)[0] = QUOTE_CHAR;
1221        } else {
1222          stop = A68G_TRUE;
1223        }
1224      }
1225      sym[0] = NULL_CHAR;
1226      *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1227    } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1228  // Single character symbols.
1229      (sym++)[0] = c;
1230      (void) next_char (ref_l, ref_s, A68G_FALSE);
1231      sym[0] = NULL_CHAR;
1232      *att = 0;
1233    } else if (c == '|') {
1234  // Bar.
1235      (sym++)[0] = c;
1236      c = next_char (ref_l, ref_s, A68G_FALSE);
1237      if (c == ':') {
1238        (sym++)[0] = c;
1239        (void) next_char (ref_l, ref_s, A68G_FALSE);
1240      }
1241      sym[0] = NULL_CHAR;
1242      *att = 0;
1243    } else if (c == '!' && OPTION_STROPPING (&A68G_JOB) == QUOTE_STROPPING) {
1244  // Bar, will be replaced with modern variant.
1245  // For this reason ! is not a MONAD with quote-stropping.
1246      (sym++)[0] = '|';
1247      c = next_char (ref_l, ref_s, A68G_FALSE);
1248      if (c == ':') {
1249        (sym++)[0] = c;
1250        (void) next_char (ref_l, ref_s, A68G_FALSE);
1251      }
1252      sym[0] = NULL_CHAR;
1253      *att = 0;
1254    } else if (c == ':') {
1255  // Colon, semicolon, IS, ISNT.
1256      (sym++)[0] = c;
1257      c = next_char (ref_l, ref_s, A68G_FALSE);
1258      if (c == '=') {
1259        (sym++)[0] = c;
1260        if ((c = next_char (ref_l, ref_s, A68G_FALSE)) == ':') {
1261          (sym++)[0] = c;
1262          c = next_char (ref_l, ref_s, A68G_FALSE);
1263        }
1264      } else if (c == '/') {
1265        (sym++)[0] = c;
1266        if ((c = next_char (ref_l, ref_s, A68G_FALSE)) == '=') {
1267          (sym++)[0] = c;
1268          if ((c = next_char (ref_l, ref_s, A68G_FALSE)) == ':') {
1269            (sym++)[0] = c;
1270            c = next_char (ref_l, ref_s, A68G_FALSE);
1271          }
1272        }
1273      } else if (c == ':') {
1274        (sym++)[0] = c;
1275        if ((c = next_char (ref_l, ref_s, A68G_FALSE)) == '=') {
1276          (sym++)[0] = c;
1277        }
1278      }
1279      sym[0] = NULL_CHAR;
1280      *att = 0;
1281    } else if (c == '=') {
1282  // Operator starting with "=".
1283      char *scanned = sym;
1284      (sym++)[0] = c;
1285      c = next_char (ref_l, ref_s, A68G_FALSE);
1286      if (strchr (NOMADS, c) != NO_TEXT) {
1287        (sym++)[0] = c;
1288        c = next_char (ref_l, ref_s, A68G_FALSE);
1289      }
1290      if (c == '=') {
1291        (sym++)[0] = c;
1292        if (next_char (ref_l, ref_s, A68G_FALSE) == ':') {
1293          (sym++)[0] = ':';
1294          c = next_char (ref_l, ref_s, A68G_FALSE);
1295          if (strlen (sym) < 4 && c == '=') {
1296            (sym++)[0] = '=';
1297            (void) next_char (ref_l, ref_s, A68G_FALSE);
1298          }
1299        }
1300      } else if (c == ':') {
1301        (sym++)[0] = c;
1302        sym[0] = NULL_CHAR;
1303        if (next_char (ref_l, ref_s, A68G_FALSE) == '=') {
1304          (sym++)[0] = '=';
1305          (void) next_char (ref_l, ref_s, A68G_FALSE);
1306        } else {
1307          SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1308        }
1309      }
1310      sym[0] = NULL_CHAR;
1311      if (strcmp (scanned, "=") == 0) {
1312        *att = EQUALS_SYMBOL;
1313      } else {
1314        *att = OPERATOR;
1315      }
1316    } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1317  // Operator.
1318      char *scanned = sym;
1319      (sym++)[0] = c;
1320      c = next_char (ref_l, ref_s, A68G_FALSE);
1321      if (strchr (NOMADS, c) != NO_TEXT) {
1322        (sym++)[0] = c;
1323        c = next_char (ref_l, ref_s, A68G_FALSE);
1324      }
1325      if (c == '=') {
1326        (sym++)[0] = c;
1327        if (next_char (ref_l, ref_s, A68G_FALSE) == ':') {
1328          (sym++)[0] = ':';
1329          c = next_char (ref_l, ref_s, A68G_FALSE);
1330          if (strlen (scanned) < 4 && c == '=') {
1331            (sym++)[0] = '=';
1332            (void) next_char (ref_l, ref_s, A68G_FALSE);
1333          }
1334        }
1335      } else if (c == ':') {
1336        (sym++)[0] = c;
1337        sym[0] = NULL_CHAR;
1338        if (next_char (ref_l, ref_s, A68G_FALSE) == '=') {
1339          (sym++)[0] = '=';
1340          sym[0] = NULL_CHAR;
1341          (void) next_char (ref_l, ref_s, A68G_FALSE);
1342        } else {
1343          SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1344        }
1345      }
1346      sym[0] = NULL_CHAR;
1347      *att = OPERATOR;
1348    } else {
1349  // Afuuus ... strange characters!.
1350      unworthy (*start_l, *start_c, (int) c);
1351    }
1352  }
1353  
1354  //! @brief Whether att opens an embedded clause.
1355  
1356  BOOL_T open_nested_clause (int att)
1357  {
1358    switch (att) {
1359    case OPEN_SYMBOL:
1360    case BEGIN_SYMBOL:
1361    case PAR_SYMBOL:
1362    case IF_SYMBOL:
1363    case CASE_SYMBOL:
1364    case FOR_SYMBOL:
1365    case FROM_SYMBOL:
1366    case BY_SYMBOL:
1367    case TO_SYMBOL:
1368    case DOWNTO_SYMBOL:
1369    case WHILE_SYMBOL:
1370    case DO_SYMBOL:
1371    case SUB_SYMBOL:
1372    case ACCO_SYMBOL: {
1373        return A68G_TRUE;
1374      }
1375    }
1376    return A68G_FALSE;
1377  }
1378  
1379  //! @brief Whether att closes an embedded clause.
1380  
1381  BOOL_T close_nested_clause (int att)
1382  {
1383    switch (att) {
1384    case CLOSE_SYMBOL:
1385    case END_SYMBOL:
1386    case FI_SYMBOL:
1387    case ESAC_SYMBOL:
1388    case OD_SYMBOL:
1389    case BUS_SYMBOL:
1390    case OCCA_SYMBOL: {
1391        return A68G_TRUE;
1392      }
1393    }
1394    return A68G_FALSE;
1395  }
1396  
1397  //! @brief Cast a string to lower case.
1398  
1399  void make_lower_case (char *p)
1400  {
1401    for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1402      p[0] = (char) TO_LOWER (p[0]);
1403    }
1404  }
1405  
1406  //! @brief Construct a linear list of tokens.
1407  
1408  void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1409  {
1410    char *lpr = NO_TEXT;
1411    int lprt = 0;
1412    while (l != NO_REF && !A68G_PARSER (stop_scanner)) {
1413      int att = 0;
1414      get_next_token (in_format, l, s, start_l, start_c, &att);
1415      if (A68G_PARSER (scan_buf)[0] == STOP_CHAR) {
1416        A68G_PARSER (stop_scanner) = A68G_TRUE;
1417      } else if (strlen (A68G_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1418        KEYWORD_T *kw;
1419        char *c = NO_TEXT;
1420        BOOL_T make_node = A68G_TRUE;
1421        char *trailing = NO_TEXT;
1422        if (att != IDENTIFIER) {
1423          kw = find_keyword (A68G (top_keyword), A68G_PARSER (scan_buf));
1424        } else {
1425          kw = NO_KEYWORD;
1426        }
1427        if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1428          if (att == IDENTIFIER) {
1429            make_lower_case (A68G_PARSER (scan_buf));
1430          }
1431          if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1432            size_t len = strlen (A68G_PARSER (scan_buf));
1433            while (len >= 1 && A68G_PARSER (scan_buf)[len - 1] == '_') {
1434              trailing = "_";
1435              A68G_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1436              len--;
1437            }
1438          }
1439          c = TEXT (add_token (&A68G (top_token), A68G_PARSER (scan_buf)));
1440        } else {
1441          if (IS (kw, TO_SYMBOL)) {
1442  // Merge GO and TO to GOTO.
1443            if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1444              ATTRIBUTE (*root) = GOTO_SYMBOL;
1445              NSYMBOL (*root) = TEXT (find_keyword (A68G (top_keyword), "GOTO"));
1446              make_node = A68G_FALSE;
1447            } else {
1448              att = ATTRIBUTE (kw);
1449              c = TEXT (kw);
1450            }
1451          } else {
1452            if (att == 0 || att == BOLD_TAG) {
1453              att = ATTRIBUTE (kw);
1454            }
1455            c = TEXT (kw);
1456  // Handle pragments.
1457            if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1458              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1459              if (lpr == NO_TEXT || strlen (lpr) == 0) {
1460                lpr = nlpr;
1461              } else {
1462                char *stale = lpr;
1463                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1464                a68g_free (nlpr);
1465                a68g_free (stale);
1466              }
1467              lprt = att;
1468              make_node = A68G_FALSE;
1469            } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1470              char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1471              if (lpr == NO_TEXT || strlen (lpr) == 0) {
1472                lpr = nlpr;
1473              } else {
1474                char *stale = lpr;
1475                lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1476                a68g_free (nlpr);
1477                a68g_free (stale);
1478              }
1479              lprt = att;
1480              if (!A68G_PARSER (stop_scanner)) {
1481                (void) set_options (OPTION_LIST (&A68G_JOB), A68G_FALSE);
1482                make_node = A68G_FALSE;
1483              }
1484            }
1485          }
1486        }
1487  // Add token to the tree.
1488        if (make_node) {
1489          NODE_T *q = new_node ();
1490          INFO (q) = new_node_info ();
1491          switch (att) {
1492          case ASSIGN_SYMBOL:
1493          case END_SYMBOL:
1494          case ESAC_SYMBOL:
1495          case OD_SYMBOL:
1496          case OF_SYMBOL:
1497          case FI_SYMBOL:
1498          case CLOSE_SYMBOL:
1499          case BUS_SYMBOL:
1500          case COLON_SYMBOL:
1501          case COMMA_SYMBOL:
1502          case DOTDOT_SYMBOL:
1503          case SEMI_SYMBOL: {
1504              GINFO (q) = NO_GINFO;
1505              break;
1506            }
1507          default: {
1508              GINFO (q) = new_genie_info ();
1509              break;
1510            }
1511          }
1512          STATUS (q) = OPTION_NODEMASK (&A68G_JOB);
1513          LINE (INFO (q)) = *start_l;
1514          CHAR_IN_LINE (INFO (q)) = *start_c;
1515          PRIO (INFO (q)) = 0;
1516          PROCEDURE_LEVEL (INFO (q)) = 0;
1517          ATTRIBUTE (q) = att;
1518          NSYMBOL (q) = c;
1519          PREVIOUS (q) = *root;
1520          SUB (q) = NEXT (q) = NO_NODE;
1521          TABLE (q) = NO_TABLE;
1522          MOID (q) = NO_MOID;
1523          TAX (q) = NO_TAG;
1524          if (lpr != NO_TEXT) {
1525            NPRAGMENT (q) = lpr;
1526            NPRAGMENT_TYPE (q) = lprt;
1527            lpr = NO_TEXT;
1528            lprt = 0;
1529          }
1530          if (*root != NO_NODE) {
1531            NEXT (*root) = q;
1532          }
1533          if (TOP_NODE (&A68G_JOB) == NO_NODE) {
1534            TOP_NODE (&A68G_JOB) = q;
1535          }
1536          *root = q;
1537          if (trailing != NO_TEXT) {
1538            diagnostic (A68G_WARNING, q, WARNING_TRAILING, trailing, att);
1539          }
1540        }
1541  // Redirection in tokenising formats. The scanner is a recursive-descent type as
1542  // to know when it scans a format text and when not. 
1543        if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1544          return;
1545        } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1546          tokenise_source (root, level + 1, A68G_TRUE, l, s, start_l, start_c);
1547        } else if (in_format && open_nested_clause (att)) {
1548          NODE_T *z = PREVIOUS (*root);
1549          if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1550            tokenise_source (root, level, A68G_FALSE, l, s, start_l, start_c);
1551          } else if (att == OPEN_SYMBOL) {
1552            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1553          } else if (OPTION_BRACKETS (&A68G_JOB) && att == SUB_SYMBOL) {
1554            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1555          } else if (OPTION_BRACKETS (&A68G_JOB) && att == ACCO_SYMBOL) {
1556            ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1557          }
1558        } else if (!in_format && level > 0 && open_nested_clause (att)) {
1559          tokenise_source (root, level + 1, A68G_FALSE, l, s, start_l, start_c);
1560        } else if (!in_format && level > 0 && close_nested_clause (att)) {
1561          return;
1562        } else if (in_format && att == CLOSE_SYMBOL) {
1563          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1564        } else if (OPTION_BRACKETS (&A68G_JOB) && in_format && att == BUS_SYMBOL) {
1565          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1566        } else if (OPTION_BRACKETS (&A68G_JOB) && in_format && att == OCCA_SYMBOL) {
1567          ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1568        }
1569      }
1570    }
1571  }
1572  
1573  //! @brief Tokenise source file, build initial syntax tree.
1574  
1575  BOOL_T lexical_analyser (void)
1576  {
1577    LINE_T *l = NO_LINE, *start_l = NO_LINE;
1578    char *s = NO_TEXT, *start_c = NO_TEXT;
1579    NODE_T *root = NO_NODE;
1580    A68G_PARSER (scan_buf) = NO_TEXT;
1581    A68G_PARSER (max_scan_buf_length) = A68G_PARSER (source_file_size) = get_source_size ();
1582  // Errors in file?.
1583    if (A68G_PARSER (max_scan_buf_length) == 0) {
1584      return A68G_FALSE;
1585    }
1586    if (OPTION_RUN_SCRIPT (&A68G_JOB)) {
1587      A68G_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68G_PARSER (max_scan_buf_length)));
1588      if (!read_script_file ()) {
1589        return A68G_FALSE;
1590      }
1591    } else {
1592      A68G_PARSER (max_scan_buf_length) += KILOBYTE;       // for the environ, more than enough
1593      A68G_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68G_PARSER (max_scan_buf_length));
1594  // Errors in file?.
1595      if (!read_source_file ()) {
1596        return A68G_FALSE;
1597      }
1598    }
1599  // Start tokenising.
1600    A68G_PARSER (read_error) = A68G_FALSE;
1601    A68G_PARSER (stop_scanner) = A68G_FALSE;
1602    if ((l = TOP_LINE (&A68G_JOB)) != NO_LINE) {
1603      s = STRING (l);
1604    }
1605    tokenise_source (&root, 0, A68G_FALSE, &l, &s, &start_l, &start_c);
1606    return A68G_TRUE;
1607  }
     


© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)