parser.c

     
   1  //! @file parser.c
   2  //! @author J. Marcel van der Veer
   3  
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2025 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis 
  23  //!
  24  //! Mailloux-type Algol 68 parser driver.
  25  
  26  // The Algol 68 grammar is a two level (Van Wijngaarden, "VW") grammar 
  27  // that incorporates, as syntactical rules, the semantical rules in 
  28  // other languages. Examples are correct use of symbols, modes and scope.
  29  // 
  30  // This code constitutes an effective "VW Algol 68 parser". A pragmatic
  31  // approach was chosen since in the early days of Algol 68, many "ab initio" 
  32  // implementations failed, probably because techniques to parse a language
  33  // like Algol 68 had yet to be invented.
  34  // 
  35  // This is a Mailloux-type parser, in the sense that it scans a "phrase" for
  36  // definitions needed for parsing. Algol 68 allows for tags to be used
  37  // before they are defined, which gives freedom in top-down programming.
  38  // 
  39  //    B. J. Mailloux. On the implementation of Algol 68.
  40  //    Thesis, Universiteit van Amsterdam (Mathematisch Centrum) [1968].
  41  // 
  42  // Technically, Mailloux's approach renders the two-level grammar LALR.
  43  // 
  44  // First part of the parser is the scanner. The source file is read,
  45  // is tokenised, and if needed a refinement preprocessor elaborates a stepwise
  46  // refined program. The result is a linear list of tokens that is input for the
  47  // parser, that will transform the linear list into a syntax tree.
  48  // 
  49  // Algol68G tokenises all symbols before the bottom-up parser is invoked. 
  50  // This means that scanning does not use information from the parser.
  51  // The scanner does of course some rudimentary parsing. Format texts can have
  52  // enclosed clauses in them, so we record information in a stack as to know
  53  // what is being scanned. Also, the refinement preprocessor implements a
  54  // (trivial) grammar.
  55  // 
  56  // The scanner supports two stropping regimes: "bold" (or "upper") and "quote". 
  57  // Examples of both:
  58  // 
  59  //    bold stropping: BEGIN INT i = 1, j = 1; print (i + j) END
  60  // 
  61  //    quote stropping: 'BEGIN' 'INT' I = 1, J = 1; PRINT (I + J) 'END'
  62  // 
  63  // Quote stropping was used frequently in the (excusez-le-mot) punch-card age.
  64  // Hence, bold stropping is the default. There also existed point stropping, 
  65  // but that has not been implemented here.
  66  // 
  67  // Next part of the parser is a recursive-descent type to check parenthesis.
  68  // Also a first set-up is made of symbol tables, needed by the bottom-up parser.
  69  // Next part is the bottom-up parser, that parses without knowing modes while
  70  // parsing and reducing. It can therefore not exchange "[]" with "()" as was
  71  // blessed by the Revised Report. This is solved by treating CALL and SLICE as
  72  // equivalent for the moment and letting the mode checker sort it out later.
  73  // 
  74  // Parsing progresses in various phases to avoid spurious diagnostics from a
  75  // recovering parser. Every phase "tightens" the grammar more.
  76  // An error in any phase makes the parser quit when that phase ends.
  77  // The parser is forgiving in case of superfluous semicolons.
  78  // 
  79  // These are the parser phases:
  80  // 
  81  //  (1) Parenthesis are checked to see whether they match. Then, a top-down 
  82  //      parser determines the basic-block structure of the program
  83  //      so symbol tables can be set up that the bottom-up parser will consult
  84  //      as you can define things before they are applied.
  85  // 
  86  //  (2) A bottom-up parser resolves the structure of the program.
  87  // 
  88  //  (3) After the symbol tables have been finalised, a small rearrangement of the
  89  //      tree may be required where JUMPs have no GOTO. This leads to the
  90  //      non-standard situation that JUMPs without GOTO can have the syntactic
  91  //      position of a PRIMARY, SECONDARY or TERTIARY. The bottom-up parser also
  92  //      does not check VICTAL correctness of declarers. This is done separately. 
  93  //      Also structure of format texts is checked separately.
  94  // 
  95  // The parser sets up symbol tables and populates them as far as needed to parse
  96  // the source. After the bottom-up parser terminates succesfully, the symbol tables
  97  // are completed.
  98  // 
  99  //  (4) Next, modes are collected and rules for well-formedness and structural 
 100  //      equivalence are applied. Then the symbol-table is completed now moids are 
 101  //      all known.
 102  // 
 103  //  (5) Next phases are the mode checker and coercion inserter. The syntax tree is 
 104  //      traversed to determine and check all modes, and to select operators. Then 
 105  //      the tree is traversed again to insert coercions.
 106  // 
 107  //  (6) A static scope checker detects where objects are transported out of scope.
 108  //      At run time, a dynamic scope checker will check that what the static scope 
 109  //      checker cannot see.
 110  
 111  #include "a68g.h"
 112  #include "a68g-parser.h"
 113  #include "a68g-mp.h"
 114  #include "a68g-postulates.h"
 115  #include "a68g-prelude.h"
 116  
 117  //! @brief First initialisations.
 118  
 119  void init_before_tokeniser (void)
 120  {
 121  // Heap management set-up.
 122    errno = 0;
 123    init_heap ();
 124    A68G (top_keyword) = NO_KEYWORD;
 125    A68G (top_token) = NO_TOKEN;
 126    TOP_NODE (&A68G_JOB) = NO_NODE;
 127    TOP_MOID (&A68G_JOB) = NO_MOID;
 128    TOP_LINE (&A68G_JOB) = NO_LINE;
 129    STANDENV_MOID (&A68G_JOB) = NO_MOID;
 130    set_up_tables ();
 131  // Various initialisations.
 132    ERROR_COUNT (&A68G_JOB) = WARNING_COUNT (&A68G_JOB) = 0;
 133    ABEND (errno != 0, ERROR_ALLOCATION, __func__);
 134    errno = 0;
 135  }
 136  
 137  void init_parser (void)
 138  {
 139    A68G_PARSER (stop_scanner) = A68G_FALSE;
 140    A68G_PARSER (read_error) = A68G_FALSE;
 141    A68G_PARSER (no_preprocessing) = A68G_FALSE;
 142  }
 143  
 144  //! @brief Is_ref_refety_flex.
 145  
 146  BOOL_T is_ref_refety_flex (MOID_T * m)
 147  {
 148    if (IS_REF_FLEX (m)) {
 149      return A68G_TRUE;
 150    } else if (IS_REF (m)) {
 151      return is_ref_refety_flex (SUB (m));
 152    } else {
 153      return A68G_FALSE;
 154    }
 155  }
 156  
 157  //! @brief Count number of operands in operator parameter list.
 158  
 159  int count_operands (NODE_T * p)
 160  {
 161    if (p != NO_NODE) {
 162      if (IS (p, DECLARER)) {
 163        return count_operands (NEXT (p));
 164      } else if (IS (p, COMMA_SYMBOL)) {
 165        return 1 + count_operands (NEXT (p));
 166      } else {
 167        return count_operands (NEXT (p)) + count_operands (SUB (p));
 168      }
 169    } else {
 170      return 0;
 171    }
 172  }
 173  
 174  //! @brief Count formal bounds in declarer in tree.
 175  
 176  int count_formal_bounds (NODE_T * p)
 177  {
 178    if (p == NO_NODE) {
 179      return 0;
 180    } else {
 181      if (IS (p, COMMA_SYMBOL)) {
 182        return 1;
 183      } else {
 184        return count_formal_bounds (NEXT (p)) + count_formal_bounds (SUB (p));
 185      }
 186    }
 187  }
 188  
 189  //! @brief Count pictures.
 190  
 191  void count_pictures (NODE_T * p, int *k)
 192  {
 193    for (; p != NO_NODE; FORWARD (p)) {
 194      if (IS (p, PICTURE)) {
 195        (*k)++;
 196      }
 197      count_pictures (SUB (p), k);
 198    }
 199  }
 200  
 201  //! @brief Whether token cannot follow semicolon or EXIT.
 202  
 203  BOOL_T is_semicolon_less (NODE_T * p)
 204  {
 205    switch (ATTRIBUTE (p)) {
 206    case BUS_SYMBOL:
 207    case CLOSE_SYMBOL:
 208    case END_SYMBOL:
 209    case SEMI_SYMBOL:
 210    case EXIT_SYMBOL:
 211    case THEN_BAR_SYMBOL:
 212    case ELSE_BAR_SYMBOL:
 213    case THEN_SYMBOL:
 214    case ELIF_SYMBOL:
 215    case ELSE_SYMBOL:
 216    case FI_SYMBOL:
 217    case IN_SYMBOL:
 218    case OUT_SYMBOL:
 219    case OUSE_SYMBOL:
 220    case ESAC_SYMBOL:
 221    case EDOC_SYMBOL:
 222    case OCCA_SYMBOL:
 223    case OD_SYMBOL:
 224    case UNTIL_SYMBOL: {
 225        return A68G_TRUE;
 226      }
 227    default: {
 228        return A68G_FALSE;
 229      }
 230    }
 231  }
 232  
 233  //! @brief Whether formal bounds.
 234  
 235  BOOL_T is_formal_bounds (NODE_T * p)
 236  {
 237    if (p == NO_NODE) {
 238      return A68G_TRUE;
 239    } else {
 240      switch (ATTRIBUTE (p)) {
 241      case OPEN_SYMBOL:
 242      case CLOSE_SYMBOL:
 243      case SUB_SYMBOL:
 244      case BUS_SYMBOL:
 245      case COMMA_SYMBOL:
 246      case COLON_SYMBOL:
 247      case DOTDOT_SYMBOL:
 248      case INT_DENOTATION:
 249      case IDENTIFIER:
 250      case OPERATOR: {
 251          return (BOOL_T) (is_formal_bounds (SUB (p)) && is_formal_bounds (NEXT (p)));
 252        }
 253      default: {
 254          return A68G_FALSE;
 255        }
 256      }
 257    }
 258  }
 259  
 260  //! @brief Whether token terminates a unit.
 261  
 262  BOOL_T is_unit_terminator (NODE_T * p)
 263  {
 264    switch (ATTRIBUTE (p)) {
 265    case BUS_SYMBOL:
 266    case CLOSE_SYMBOL:
 267    case END_SYMBOL:
 268    case SEMI_SYMBOL:
 269    case EXIT_SYMBOL:
 270    case COMMA_SYMBOL:
 271    case THEN_BAR_SYMBOL:
 272    case ELSE_BAR_SYMBOL:
 273    case THEN_SYMBOL:
 274    case ELIF_SYMBOL:
 275    case ELSE_SYMBOL:
 276    case FI_SYMBOL:
 277    case IN_SYMBOL:
 278    case OUT_SYMBOL:
 279    case OUSE_SYMBOL:
 280    case ESAC_SYMBOL:
 281    case EDOC_SYMBOL:
 282    case OCCA_SYMBOL: {
 283        return A68G_TRUE;
 284      }
 285    }
 286    return A68G_FALSE;
 287  }
 288  
 289  //! @brief Whether token is a unit-terminator in a loop clause.
 290  
 291  BOOL_T is_loop_keyword (NODE_T * p)
 292  {
 293    switch (ATTRIBUTE (p)) {
 294    case FOR_SYMBOL:
 295    case FROM_SYMBOL:
 296    case BY_SYMBOL:
 297    case TO_SYMBOL:
 298    case DOWNTO_SYMBOL:
 299    case WHILE_SYMBOL:
 300    case DO_SYMBOL: {
 301        return A68G_TRUE;
 302      }
 303    }
 304    return A68G_FALSE;
 305  }
 306  
 307  //! @brief Get good attribute.
 308  
 309  int get_good_attribute (NODE_T * p)
 310  {
 311    switch (ATTRIBUTE (p)) {
 312    case UNIT:
 313    case TERTIARY:
 314    case SECONDARY:
 315    case PRIMARY:
 316    case ENCLOSED_CLAUSE: {
 317        return get_good_attribute (SUB (p));
 318      }
 319    case DECLARER: {
 320      if (IS (SUB (p), INDICANT)) {
 321        if (SUB_SUB (p) != NO_NODE) {
 322          return ATTRIBUTE (SUB_SUB (p));
 323        } else {
 324          return INDICANT;
 325        }
 326      } else {
 327        return DECLARER;
 328      }
 329    }
 330    case DECLARATION_LIST: {
 331      if (SUB (p) != NO_NODE) {
 332        return ATTRIBUTE (SUB (p));
 333      } else {
 334        return ATTRIBUTE (p);
 335      }
 336    }
 337    default: {
 338        return ATTRIBUTE (p);
 339      }
 340    }
 341  }
 342  
 343  //! @brief Intelligible diagnostic from syntax tree branch.
 344  
 345  char *phrase_to_text (NODE_T * p, NODE_T ** w)
 346  {
 347  #define MAX_TERMINALS 10
 348    int count = 0;
 349    BOOL_T put_space = A68G_FALSE;
 350    static BUFFER buffer;
 351    buffer[0] = NULL_CHAR;
 352    while (p != NO_NODE && count < MAX_TERMINALS) {
 353      if (LINE_NUMBER (p) > 0) {
 354        int gatt = get_good_attribute (p);
 355        char *z = non_terminal_string (A68G (input_line), gatt);
 356  // Where to put the error message? 
 357  // The actual content of a  diagnostic is not as important 
 358  // as accurately indicating *were* the problem is! 
 359        if (w != NO_REF) {
 360          if (count == 0 || (*w) == NO_NODE) {
 361            *w = p;
 362          } else if (dont_mark_here (*w)) {
 363            *w = p;
 364          }
 365        }
 366        switch (gatt) {
 367          case SEMI_SYMBOL:
 368          case COMMA_SYMBOL:
 369          case CLOSE_SYMBOL:
 370          case BUS_SYMBOL: {
 371            // Next symbol may have leading space.
 372            put_space = A68G_TRUE;
 373            break;
 374          }
 375          case OPEN_SYMBOL:
 376          case SUB_SYMBOL: {
 377            if (put_space) {
 378              a68g_bufcat (buffer, " ", BUFFER_SIZE);
 379            }
 380            // Next symbol has no leading space.
 381            put_space = A68G_FALSE;
 382            break;              
 383          }
 384          default: {
 385            if (put_space) {
 386              a68g_bufcat (buffer, " ", BUFFER_SIZE);
 387            }
 388            // Next symbol may have leading space.
 389            put_space = A68G_TRUE;
 390            break;
 391          }
 392        }
 393  // Attribute or symbol.
 394        if (z != NO_TEXT) {
 395          switch (gatt) {
 396            case DENOTATION:
 397            case IDENTIFIER: {
 398              ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s '%s'", z, NSYMBOL (p)) >= 0);
 399              break;
 400            }
 401            case OPERATOR: {
 402              ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
 403              break;
 404            }
 405            case DECLARER:
 406            case COLON_SYMBOL: {
 407              ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", z) >= 0);
 408              break;
 409            }
 410            default: {
 411              if (SUB (p) == NO_NODE && NSYMBOL (p) != NO_TEXT) {
 412                ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
 413              } else {
 414                ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", z) >= 0);
 415              }
 416              break;
 417            }
 418          }
 419        } else if (NSYMBOL (p) != NO_TEXT) {
 420          ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
 421        } else {
 422          ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "...") >= 0);
 423        }
 424        a68g_bufcat (buffer, A68G (edit_line), BUFFER_SIZE);
 425        count++;
 426      }
 427      FORWARD (p);
 428    }
 429    if (p != NO_NODE && count == MAX_TERMINALS) {
 430      a68g_bufcat (buffer, " ... ", BUFFER_SIZE);
 431    }
 432    return buffer;
 433  }
 434  
 435  //! @brief Preferably don't put intelligible diagnostic here.
 436  
 437  BOOL_T dont_mark_here (NODE_T * p)
 438  {
 439    switch (ATTRIBUTE (p)) {
 440    case ACCO_SYMBOL:
 441    case ALT_DO_SYMBOL:
 442    case ALT_EQUALS_SYMBOL:
 443    case ANDF_SYMBOL:
 444    case ASSERT_SYMBOL:
 445    case ASSIGN_SYMBOL:
 446    case ASSIGN_TO_SYMBOL:
 447    case AT_SYMBOL:
 448    case BEGIN_SYMBOL:
 449    case BITS_SYMBOL:
 450    case BOLD_COMMENT_SYMBOL:
 451    case BOLD_PRAGMAT_SYMBOL:
 452    case BOOL_SYMBOL:
 453    case BUS_SYMBOL:
 454    case BY_SYMBOL:
 455    case BYTES_SYMBOL:
 456    case CASE_SYMBOL:
 457    case CHANNEL_SYMBOL:
 458    case CHAR_SYMBOL:
 459    case CLOSE_SYMBOL:
 460    case CODE_SYMBOL:
 461    case COLON_SYMBOL:
 462    case COLUMN_SYMBOL:
 463    case COMMA_SYMBOL:
 464    case COMPLEX_SYMBOL:
 465    case COMPL_SYMBOL:
 466    case DIAGONAL_SYMBOL:
 467    case DO_SYMBOL:
 468    case DOTDOT_SYMBOL:
 469    case DOWNTO_SYMBOL:
 470    case EDOC_SYMBOL:
 471    case ELIF_SYMBOL:
 472    case ELSE_BAR_SYMBOL:
 473    case ELSE_SYMBOL:
 474    case EMPTY_SYMBOL:
 475    case END_SYMBOL:
 476    case ENVIRON_SYMBOL:
 477    case EQUALS_SYMBOL:
 478    case ESAC_SYMBOL:
 479    case EXIT_SYMBOL:
 480    case FALSE_SYMBOL:
 481    case FILE_SYMBOL:
 482    case FI_SYMBOL:
 483    case FLEX_SYMBOL:
 484    case FORMAT_DELIMITER_SYMBOL:
 485    case FORMAT_SYMBOL:
 486    case FOR_SYMBOL:
 487    case FROM_SYMBOL:
 488    case GO_SYMBOL:
 489    case GOTO_SYMBOL:
 490    case HEAP_SYMBOL:
 491    case IF_SYMBOL:
 492    case IN_SYMBOL:
 493    case INT_SYMBOL:
 494    case ISNT_SYMBOL:
 495    case IS_SYMBOL:
 496    case LOC_SYMBOL:
 497    case LONG_SYMBOL:
 498    case MAIN_SYMBOL:
 499    case MODE_SYMBOL:
 500    case NIL_SYMBOL:
 501    case OCCA_SYMBOL:
 502    case OD_SYMBOL:
 503    case OF_SYMBOL:
 504    case OPEN_SYMBOL:
 505    case OP_SYMBOL:
 506    case ORF_SYMBOL:
 507    case OUSE_SYMBOL:
 508    case OUT_SYMBOL:
 509    case PAR_SYMBOL:
 510    case PIPE_SYMBOL:
 511    case POINT_SYMBOL:
 512    case PRIO_SYMBOL:
 513    case PROC_SYMBOL:
 514    case REAL_SYMBOL:
 515    case REF_SYMBOL:
 516    case ROWS_SYMBOL:
 517    case ROW_SYMBOL:
 518    case SEMA_SYMBOL:
 519    case SEMI_SYMBOL:
 520    case SHORT_SYMBOL:
 521    case SKIP_SYMBOL:
 522    case SOUND_SYMBOL:
 523    case STRING_SYMBOL:
 524    case STRUCT_SYMBOL:
 525    case STYLE_I_COMMENT_SYMBOL:
 526    case STYLE_II_COMMENT_SYMBOL:
 527    case STYLE_I_PRAGMAT_SYMBOL:
 528    case SUB_SYMBOL:
 529    case THEN_BAR_SYMBOL:
 530    case THEN_SYMBOL:
 531    case TO_SYMBOL:
 532    case TRANSPOSE_SYMBOL:
 533    case TRUE_SYMBOL:
 534    case UNION_SYMBOL:
 535    case UNTIL_SYMBOL:
 536    case VOID_SYMBOL:
 537    case WHILE_SYMBOL:
 538    case SERIAL_CLAUSE:
 539    case ENQUIRY_CLAUSE:
 540    case INITIALISER_SERIES:
 541    case DECLARATION_LIST: {
 542        return A68G_TRUE;
 543      }
 544    }
 545    return A68G_FALSE;
 546  }
 547  
 548  void a68g_parser (void)
 549  {
 550  // Tokeniser.
 551    FILE_SOURCE_OPENED (&A68G_JOB) = A68G_TRUE;
 552    announce_phase ("initialiser");
 553    A68G_PARSER (error_tag) = (TAG_T *) new_tag ();
 554    init_parser ();
 555    if (ERROR_COUNT (&A68G_JOB) == 0) {
 556      int frame_stack_size_2 = A68G (frame_stack_size);
 557      int expr_stack_size_2 = A68G (expr_stack_size);
 558      int heap_size_2 = A68G (heap_size);
 559      int handle_pool_size_2 = A68G (handle_pool_size);
 560      BOOL_T ok;
 561      announce_phase ("tokeniser");
 562      ok = lexical_analyser ();
 563      if (!ok || errno != 0) {
 564        diagnostics_to_terminal (TOP_LINE (&A68G_JOB), A68G_ALL_DIAGNOSTICS);
 565        return;
 566      }
 567  // Maybe the program asks for more memory through a PRAGMAT. We restart.
 568      if (frame_stack_size_2 != A68G (frame_stack_size) || expr_stack_size_2 != A68G (expr_stack_size) || heap_size_2 != A68G (heap_size) || handle_pool_size_2 != A68G (handle_pool_size)) {
 569        announce_phase ("tokeniser");
 570        free_syntax_tree (TOP_NODE (&A68G_JOB));
 571        discard_heap ();
 572        init_before_tokeniser ();
 573        SOURCE_SCAN (&A68G_JOB)++;
 574        ok = lexical_analyser ();
 575        verbosity ();
 576      }
 577      if (!ok || errno != 0) {
 578        diagnostics_to_terminal (TOP_LINE (&A68G_JOB), A68G_ALL_DIAGNOSTICS);
 579        return;
 580      }
 581      ASSERT (close (FILE_SOURCE_FD (&A68G_JOB)) == 0);
 582      FILE_SOURCE_OPENED (&A68G_JOB) = A68G_FALSE;
 583      prune_echoes (OPTION_LIST (&A68G_JOB));
 584      TREE_LISTING_SAFE (&A68G_JOB) = A68G_TRUE;
 585      int renum = 0;
 586      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 587    }
 588  // Now the default precision of LONG LONG modes is fixed.
 589    if (long_mp_digits () == 0) {
 590      set_long_mp_digits (LONG_LONG_MP_DIGITS);
 591    }
 592  // Final initialisations.
 593    if (ERROR_COUNT (&A68G_JOB) == 0) {
 594      if (OPTION_REGRESSION_TEST (&A68G_JOB)) {
 595        a68g_bufcpy (A68G (a68g_cmd_name), "a68g", BUFFER_SIZE);
 596        io_close_tty_line ();
 597        WRITE (A68G_STDERR, "[");
 598        WRITE (A68G_STDERR, FILE_INITIAL_NAME (&A68G_JOB));
 599        WRITE (A68G_STDERR, "]\n");
 600      }
 601      A68G_STANDENV = NO_TABLE;
 602      init_postulates ();
 603      A68G (mode_count) = 0;
 604      make_special_mode (&M_HIP, A68G (mode_count)++);
 605      make_special_mode (&M_UNDEFINED, A68G (mode_count)++);
 606      make_special_mode (&M_ERROR, A68G (mode_count)++);
 607      make_special_mode (&M_VACUUM, A68G (mode_count)++);
 608      make_special_mode (&M_C_STRING, A68G (mode_count)++);
 609      make_special_mode (&M_COLLITEM, A68G (mode_count)++);
 610      make_special_mode (&M_SOUND_DATA, A68G (mode_count)++);
 611    }
 612  // Refinement preprocessor.
 613    if (ERROR_COUNT (&A68G_JOB) == 0) {
 614      announce_phase ("preprocessor");
 615      get_refinements ();
 616      if (ERROR_COUNT (&A68G_JOB) == 0) {
 617        put_refinements ();
 618      }
 619      int renum = 0;
 620      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 621      verbosity ();
 622    }
 623  // Top-down parser.
 624    if (ERROR_COUNT (&A68G_JOB) == 0) {
 625      announce_phase ("parser phase 1");
 626      check_parenthesis (TOP_NODE (&A68G_JOB));
 627      if (ERROR_COUNT (&A68G_JOB) == 0) {
 628        if (OPTION_BRACKETS (&A68G_JOB)) {
 629          substitute_brackets (TOP_NODE (&A68G_JOB));
 630        }
 631        A68G (symbol_table_count) = 0;
 632        A68G_STANDENV = new_symbol_table (NO_TABLE);
 633        LEVEL (A68G_STANDENV) = 0;
 634        top_down_parser (TOP_NODE (&A68G_JOB));
 635      }
 636      int renum = 0;
 637      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 638      verbosity ();
 639    }
 640  // Standard environment builder.
 641    if (ERROR_COUNT (&A68G_JOB) == 0) {
 642      announce_phase ("standard environ builder");
 643      TABLE (TOP_NODE (&A68G_JOB)) = new_symbol_table (A68G_STANDENV);
 644      make_standard_environ ();
 645      STANDENV_MOID (&A68G_JOB) = TOP_MOID (&A68G_JOB);
 646      verbosity ();
 647    }
 648  // Bottom-up parser.
 649    if (ERROR_COUNT (&A68G_JOB) == 0) {
 650      announce_phase ("parser phase 2");
 651      preliminary_symbol_table_setup (TOP_NODE (&A68G_JOB));
 652      bottom_up_parser (TOP_NODE (&A68G_JOB));
 653      int renum = 0;
 654      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 655      verbosity ();
 656    }
 657    if (ERROR_COUNT (&A68G_JOB) == 0) {
 658      announce_phase ("parser phase 3");
 659      bottom_up_error_check (TOP_NODE (&A68G_JOB));
 660      victal_checker (TOP_NODE (&A68G_JOB));
 661      if (ERROR_COUNT (&A68G_JOB) == 0) {
 662        finalise_symbol_table_setup (TOP_NODE (&A68G_JOB), 2);
 663        NEST (TABLE (TOP_NODE (&A68G_JOB))) = A68G (symbol_table_count) = 3;
 664        reset_symbol_table_nest_count (TOP_NODE (&A68G_JOB));
 665        fill_symbol_table_outer (TOP_NODE (&A68G_JOB), TABLE (TOP_NODE (&A68G_JOB)));
 666        set_nest (TOP_NODE (&A68G_JOB), NO_NODE);
 667        set_proc_level (TOP_NODE (&A68G_JOB), 1);
 668      }
 669      int renum = 0;
 670      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 671      verbosity ();
 672    }
 673  // Mode table builder.
 674    if (ERROR_COUNT (&A68G_JOB) == 0) {
 675      announce_phase ("mode table builder");
 676      make_moid_list (&A68G_JOB);
 677      verbosity ();
 678    }
 679    CROSS_REFERENCE_SAFE (&A68G_JOB) = A68G_TRUE;
 680  // Symbol table builder.
 681    if (ERROR_COUNT (&A68G_JOB) == 0) {
 682      announce_phase ("symbol table builder");
 683      collect_taxes (TOP_NODE (&A68G_JOB));
 684      verbosity ();
 685    }
 686  // Post parser.
 687    if (ERROR_COUNT (&A68G_JOB) == 0) {
 688      announce_phase ("parser phase 4");
 689      rearrange_goto_less_jumps (TOP_NODE (&A68G_JOB));
 690      verbosity ();
 691    }
 692  // Mode checker.
 693    if (ERROR_COUNT (&A68G_JOB) == 0) {
 694      announce_phase ("mode checker");
 695      mode_checker (TOP_NODE (&A68G_JOB));
 696      verbosity ();
 697    }
 698  // Coercion inserter.
 699    if (ERROR_COUNT (&A68G_JOB) == 0) {
 700      announce_phase ("coercion enforcer");
 701      coercion_inserter (TOP_NODE (&A68G_JOB));
 702      widen_denotation (TOP_NODE (&A68G_JOB));
 703      get_max_simplout_size (TOP_NODE (&A68G_JOB));
 704      set_moid_sizes (TOP_MOID (&A68G_JOB));
 705      verbosity ();
 706    }
 707  // Finalize tree.
 708    if (ERROR_COUNT (&A68G_JOB) == 0) {
 709      announce_phase ("finalize tree");
 710      if (OPTION_CONSERVATIVE_GC (&A68G_JOB) == A68G_FALSE) {
 711        annotate_gc (TOP_NODE (&A68G_JOB));
 712      }
 713      assign_offsets_table (A68G_STANDENV);
 714      assign_offsets (TOP_NODE (&A68G_JOB));
 715      assign_offsets_packs (TOP_MOID (&A68G_JOB));
 716      int renum = 0;
 717      renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
 718      verbosity ();
 719    }
 720  // Application checker.
 721    if (ERROR_COUNT (&A68G_JOB) == 0) {
 722      announce_phase ("application checker");
 723      mark_moids (TOP_NODE (&A68G_JOB));
 724      mark_auxilliary (TOP_NODE (&A68G_JOB));
 725      jumps_from_procs (TOP_NODE (&A68G_JOB));
 726      warn_for_unused_tags (TOP_NODE (&A68G_JOB));
 727      verbosity ();
 728    }
 729  // Scope checker.
 730    if (ERROR_COUNT (&A68G_JOB) == 0) {
 731      announce_phase ("static scope checker");
 732      tie_label_to_serial (TOP_NODE (&A68G_JOB));
 733      tie_label_to_unit (TOP_NODE (&A68G_JOB));
 734      bind_routine_tags_to_tree (TOP_NODE (&A68G_JOB));
 735      bind_format_tags_to_tree (TOP_NODE (&A68G_JOB));
 736      scope_checker (TOP_NODE (&A68G_JOB));
 737      verbosity ();
 738    }
 739  }
 740  
 741  //! @brief Renumber nodes.
 742  
 743  void renumber_nodes (NODE_T * p, int *n)
 744  {
 745    for (; p != NO_NODE; FORWARD (p)) {
 746      NUMBER (p) = (*n)++;
 747      renumber_nodes (SUB (p), n);
 748    }
 749  }
 750  
 751  //! @brief Register nodes.
 752  
 753  void register_nodes (NODE_T * p)
 754  {
 755    for (; p != NO_NODE; FORWARD (p)) {
 756      A68G (node_register)[NUMBER (p)] = p;
 757      register_nodes (SUB (p));
 758    }
 759  }
 760  
 761  //! @brief New_node_info.
 762  
 763  NODE_INFO_T *new_node_info (void)
 764  {
 765    NODE_INFO_T *z = (NODE_INFO_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (NODE_INFO_T));
 766    A68G (new_node_infos)++;
 767    PROCEDURE_LEVEL (z) = 0;
 768    CHAR_IN_LINE (z) = NO_TEXT;
 769    SYMBOL (z) = NO_TEXT;
 770    PRAGMENT (z) = NO_TEXT;
 771    PRAGMENT_TYPE (z) = 0;
 772    LINE (z) = NO_LINE;
 773    return z;
 774  }
 775  
 776  //! @brief New_genie_info.
 777  
 778  GINFO_T *new_genie_info (void)
 779  {
 780    GINFO_T *z = (GINFO_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (GINFO_T));
 781    A68G (new_genie_infos)++;
 782    UNIT (&PROP (z)) = NO_PPROC;
 783    SOURCE (&PROP (z)) = NO_NODE;
 784    PARTIAL_PROC (z) = NO_MOID;
 785    PARTIAL_LOCALE (z) = NO_MOID;
 786    IS_COERCION (z) = A68G_FALSE;
 787    IS_NEW_LEXICAL_LEVEL (z) = A68G_FALSE;
 788    NEED_DNS (z) = A68G_FALSE;
 789    PARENT (z) = NO_NODE;
 790    OFFSET (z) = NO_BYTE;
 791    CONSTANT (z) = NO_CONSTANT;
 792    LEVEL (z) = 0;
 793    ARGSIZE (z) = 0;
 794    SIZE (z) = 0;
 795    COMPILE_NAME (z) = NO_TEXT;
 796    COMPILE_NODE (z) = 0;
 797    return z;
 798  }
 799  
 800  //! @brief New_node.
 801  
 802  NODE_T *new_node (void)
 803  {
 804    NODE_T *z = (NODE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (NODE_T));
 805    A68G (new_nodes)++;
 806    STATUS (z) = NULL_MASK;
 807    CODEX (z) = NULL_MASK;
 808    TABLE (z) = NO_TABLE;
 809    INFO (z) = NO_NINFO;
 810    GINFO (z) = NO_GINFO;
 811    ATTRIBUTE (z) = 0;
 812    ANNOTATION (z) = 0;
 813    MOID (z) = NO_MOID;
 814    NEXT (z) = NO_NODE;
 815    PREVIOUS (z) = NO_NODE;
 816    SUB (z) = NO_NODE;
 817    NEST (z) = NO_NODE;
 818    NON_LOCAL (z) = NO_TABLE;
 819    TAX (z) = NO_TAG;
 820    TAX_GC (z) = NO_TAG;
 821    SEQUENCE (z) = NO_NODE;
 822    PACK (z) = NO_PACK;
 823    return z;
 824  }
 825  
 826  //! @brief New_symbol_table.
 827  
 828  TABLE_T *new_symbol_table (TABLE_T * p)
 829  {
 830    TABLE_T *z = (TABLE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (TABLE_T));
 831    NUM (z) = A68G (symbol_table_count);
 832    LEVEL (z) = A68G (symbol_table_count)++;
 833    NEST (z) = A68G (symbol_table_count);
 834    ATTRIBUTE (z) = 0;
 835    AP_INCREMENT (z) = 0;
 836    INITIALISE_FRAME (z) = A68G_TRUE;
 837    PROC_OPS (z) = A68G_TRUE;
 838    INITIALISE_ANON (z) = A68G_TRUE;
 839    PREVIOUS (z) = p;
 840    OUTER (z) = NO_TABLE;
 841    IDENTIFIERS (z) = NO_TAG;
 842    OPERATORS (z) = NO_TAG;
 843    PRIO (z) = NO_TAG;
 844    INDICANTS (z) = NO_TAG;
 845    LABELS (z) = NO_TAG;
 846    ANONYMOUS (z) = NO_TAG;
 847    JUMP_TO (z) = NO_NODE;
 848    SEQUENCE (z) = NO_NODE;
 849    return z;
 850  }
 851  
 852  //! @brief New_moid.
 853  
 854  MOID_T *new_moid (void)
 855  {
 856    MOID_T *z = (MOID_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (MOID_T));
 857    A68G (new_modes)++;
 858    ATTRIBUTE (z) = 0;
 859    NUMBER (z) = 0;
 860    DIM (z) = 0;
 861    USE (z) = A68G_FALSE;
 862    HAS_ROWS (z) = A68G_FALSE;
 863    SIZE (z) = 0;
 864    DIGITS (z) = 0;
 865    SIZE_COMPL (z) = 0;
 866    DIGITS_COMPL (z) = 0;
 867    PORTABLE (z) = A68G_TRUE;
 868    DERIVATE (z) = A68G_FALSE;
 869    NODE (z) = NO_NODE;
 870    PACK (z) = NO_PACK;
 871    SUB (z) = NO_MOID;
 872    EQUIVALENT_MODE (z) = NO_MOID;
 873    SLICE (z) = NO_MOID;
 874    TRIM (z) = NO_MOID;
 875    DEFLEXED (z) = NO_MOID;
 876    NAME (z) = NO_MOID;
 877    MULTIPLE_MODE (z) = NO_MOID;
 878    NEXT (z) = NO_MOID;
 879    return z;
 880  }
 881  
 882  //! @brief New_pack.
 883  
 884  PACK_T *new_pack (void)
 885  {
 886    PACK_T *z = (PACK_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (PACK_T));
 887    MOID (z) = NO_MOID;
 888    TEXT (z) = NO_TEXT;
 889    NODE (z) = NO_NODE;
 890    NEXT (z) = NO_PACK;
 891    PREVIOUS (z) = NO_PACK;
 892    SIZE (z) = 0;
 893    OFFSET (z) = 0;
 894    return z;
 895  }
 896  
 897  //! @brief New_tag.
 898  
 899  TAG_T *new_tag (void)
 900  {
 901    TAG_T *z = (TAG_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (TAG_T));
 902    STATUS (z) = NULL_MASK;
 903    CODEX (z) = NULL_MASK;
 904    TAG_TABLE (z) = NO_TABLE;
 905    MOID (z) = NO_MOID;
 906    NODE (z) = NO_NODE;
 907    UNIT (z) = NO_NODE;
 908    VALUE (z) = NO_TEXT;
 909    A68G_STANDENV_PROC (z) = 0;
 910    PROCEDURE (z) = NO_GPROC;
 911    SCOPE (z) = PRIMAL_SCOPE;
 912    SCOPE_ASSIGNED (z) = A68G_FALSE;
 913    PRIO (z) = 0;
 914    USE (z) = A68G_FALSE;
 915    IN_PROC (z) = A68G_FALSE;
 916    HEAP (z) = A68G_FALSE;
 917    SIZE (z) = 0;
 918    OFFSET (z) = 0;
 919    YOUNGEST_ENVIRON (z) = PRIMAL_SCOPE;
 920    LOC_ASSIGNED (z) = A68G_FALSE;
 921    NEXT (z) = NO_TAG;
 922    BODY (z) = NO_TAG;
 923    PORTABLE (z) = A68G_TRUE;
 924    NUMBER (z) = ++A68G_PARSER (tag_number);
 925    return z;
 926  }
 927  
 928  //! @brief Make special, internal mode.
 929  
 930  void make_special_mode (MOID_T ** n, int m)
 931  {
 932    (*n) = new_moid ();
 933    ATTRIBUTE (*n) = 0;
 934    NUMBER (*n) = m;
 935    PACK (*n) = NO_PACK;
 936    SUB (*n) = NO_MOID;
 937    EQUIVALENT (*n) = NO_MOID;
 938    DEFLEXED (*n) = NO_MOID;
 939    NAME (*n) = NO_MOID;
 940    SLICE (*n) = NO_MOID;
 941    TRIM (*n) = NO_MOID;
 942    ROWED (*n) = NO_MOID;
 943  }
 944  
 945  //! @brief Whether attributes match in subsequent nodes.
 946  
 947  BOOL_T whether (NODE_T * p, ...)
 948  {
 949    va_list vl;
 950    va_start (vl, p);
 951    int a;
 952    while ((a = va_arg (vl, int)) != STOP)
 953    {
 954      if (p != NO_NODE && a == WILDCARD) {
 955        FORWARD (p);
 956      } else if (p != NO_NODE && (a == KEYWORD)) {
 957        if (find_keyword_from_attribute (A68G (top_keyword), ATTRIBUTE (p)) != NO_KEYWORD) {
 958          FORWARD (p);
 959        } else {
 960          va_end (vl);
 961          return A68G_FALSE;
 962        }
 963      } else if (p != NO_NODE && (a >= 0 ? a == ATTRIBUTE (p) : (-a) != ATTRIBUTE (p))) {
 964        FORWARD (p);
 965      } else {
 966        va_end (vl);
 967        return A68G_FALSE;
 968      }
 969    }
 970    va_end (vl);
 971    return A68G_TRUE;
 972  }
 973  
 974  //! @brief Whether one of a series of attributes matches a node.
 975  
 976  BOOL_T is_one_of (NODE_T * p, ...)
 977  {
 978    if (p != NO_NODE) {
 979      va_list vl;
 980      va_start (vl, p);
 981      BOOL_T match = A68G_FALSE;
 982      int a;
 983      while ((a = va_arg (vl, int)) != STOP)
 984      {
 985        match = (BOOL_T) (match | (BOOL_T) (IS (p, a)));
 986      }
 987      va_end (vl);
 988      return match;
 989    } else {
 990      return A68G_FALSE;
 991    }
 992  }
 993  
 994  //! @brief Isolate nodes p-q making p a branch to p-q.
 995  
 996  void make_sub (NODE_T * p, NODE_T * q, int t)
 997  {
 998    NODE_T *z = new_node ();
 999    ABEND (p == NO_NODE || q == NO_NODE, ERROR_INTERNAL_CONSISTENCY, __func__);
1000    *z = *p;
1001    if (GINFO (p) != NO_GINFO) {
1002      GINFO (z) = new_genie_info ();
1003    }
1004    PREVIOUS (z) = NO_NODE;
1005    if (p == q) {
1006      NEXT (z) = NO_NODE;
1007    } else {
1008      if (NEXT (p) != NO_NODE) {
1009        PREVIOUS (NEXT (p)) = z;
1010      }
1011      NEXT (p) = NEXT (q);
1012      if (NEXT (p) != NO_NODE) {
1013        PREVIOUS (NEXT (p)) = p;
1014      }
1015      NEXT (q) = NO_NODE;
1016    }
1017    SUB (p) = z;
1018    ATTRIBUTE (p) = t;
1019  }
1020  
1021  //! @brief Find symbol table at level 'i'.
1022  
1023  TABLE_T *find_level (NODE_T * n, int i)
1024  {
1025    if (n == NO_NODE) {
1026      return NO_TABLE;
1027    } else {
1028      TABLE_T *s = TABLE (n);
1029      if (s != NO_TABLE && LEVEL (s) == i) {
1030        return s;
1031      } else if ((s = find_level (SUB (n), i)) != NO_TABLE) {
1032        return s;
1033      } else if ((s = find_level (NEXT (n), i)) != NO_TABLE) {
1034        return s;
1035      } else {
1036        return NO_TABLE;
1037      }
1038    }
1039  }
1040  
1041  //! @brief Whether 'p' is top of lexical level.
1042  
1043  BOOL_T is_new_lexical_level (NODE_T * p)
1044  {
1045    switch (ATTRIBUTE (p)) {
1046    case ALT_DO_PART:
1047    case BRIEF_ELIF_PART:
1048    case BRIEF_OUSE_PART:
1049    case BRIEF_CONFORMITY_OUSE_PART:
1050    case CHOICE:
1051    case CLOSED_CLAUSE:
1052    case CONDITIONAL_CLAUSE:
1053    case DO_PART:
1054    case ELIF_PART:
1055    case ELSE_PART:
1056    case FORMAT_TEXT:
1057    case CASE_CLAUSE:
1058    case CASE_CHOICE_CLAUSE:
1059    case CASE_IN_PART:
1060    case CASE_OUSE_PART:
1061    case OUT_PART:
1062    case ROUTINE_TEXT:
1063    case SPECIFIED_UNIT:
1064    case THEN_PART:
1065    case UNTIL_PART:
1066    case CONFORMITY_CLAUSE:
1067    case CONFORMITY_CHOICE:
1068    case CONFORMITY_IN_PART:
1069    case CONFORMITY_OUSE_PART:
1070    case WHILE_PART: {
1071        return A68G_TRUE;
1072      }
1073    default: {
1074        return A68G_FALSE;
1075      }
1076    }
1077  }
1078  
1079  //! @brief Some_node.
1080  
1081  NODE_T *some_node (char *t)
1082  {
1083    NODE_T *z = new_node ();
1084    INFO (z) = new_node_info ();
1085    GINFO (z) = new_genie_info ();
1086    NSYMBOL (z) = t;
1087    return z;
1088  }
     


© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)