parser.c
1 //! @file parser.c
2 //! @author J. Marcel van der Veer
3
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2025 J. Marcel van der Veer [algol68g@xs4all.nl].
8
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Mailloux-type Algol 68 parser driver.
25
26 // The Algol 68 grammar is a two level (Van Wijngaarden, "VW") grammar
27 // that incorporates, as syntactical rules, the semantical rules in
28 // other languages. Examples are correct use of symbols, modes and scope.
29 //
30 // This code constitutes an effective "VW Algol 68 parser". A pragmatic
31 // approach was chosen since in the early days of Algol 68, many "ab initio"
32 // implementations failed, probably because techniques to parse a language
33 // like Algol 68 had yet to be invented.
34 //
35 // This is a Mailloux-type parser, in the sense that it scans a "phrase" for
36 // definitions needed for parsing. Algol 68 allows for tags to be used
37 // before they are defined, which gives freedom in top-down programming.
38 //
39 // B. J. Mailloux. On the implementation of Algol 68.
40 // Thesis, Universiteit van Amsterdam (Mathematisch Centrum) [1968].
41 //
42 // Technically, Mailloux's approach renders the two-level grammar LALR.
43 //
44 // First part of the parser is the scanner. The source file is read,
45 // is tokenised, and if needed a refinement preprocessor elaborates a stepwise
46 // refined program. The result is a linear list of tokens that is input for the
47 // parser, that will transform the linear list into a syntax tree.
48 //
49 // Algol68G tokenises all symbols before the bottom-up parser is invoked.
50 // This means that scanning does not use information from the parser.
51 // The scanner does of course some rudimentary parsing. Format texts can have
52 // enclosed clauses in them, so we record information in a stack as to know
53 // what is being scanned. Also, the refinement preprocessor implements a
54 // (trivial) grammar.
55 //
56 // The scanner supports two stropping regimes: "bold" (or "upper") and "quote".
57 // Examples of both:
58 //
59 // bold stropping: BEGIN INT i = 1, j = 1; print (i + j) END
60 //
61 // quote stropping: 'BEGIN' 'INT' I = 1, J = 1; PRINT (I + J) 'END'
62 //
63 // Quote stropping was used frequently in the (excusez-le-mot) punch-card age.
64 // Hence, bold stropping is the default. There also existed point stropping,
65 // but that has not been implemented here.
66 //
67 // Next part of the parser is a recursive-descent type to check parenthesis.
68 // Also a first set-up is made of symbol tables, needed by the bottom-up parser.
69 // Next part is the bottom-up parser, that parses without knowing modes while
70 // parsing and reducing. It can therefore not exchange "[]" with "()" as was
71 // blessed by the Revised Report. This is solved by treating CALL and SLICE as
72 // equivalent for the moment and letting the mode checker sort it out later.
73 //
74 // Parsing progresses in various phases to avoid spurious diagnostics from a
75 // recovering parser. Every phase "tightens" the grammar more.
76 // An error in any phase makes the parser quit when that phase ends.
77 // The parser is forgiving in case of superfluous semicolons.
78 //
79 // These are the parser phases:
80 //
81 // (1) Parenthesis are checked to see whether they match. Then, a top-down
82 // parser determines the basic-block structure of the program
83 // so symbol tables can be set up that the bottom-up parser will consult
84 // as you can define things before they are applied.
85 //
86 // (2) A bottom-up parser resolves the structure of the program.
87 //
88 // (3) After the symbol tables have been finalised, a small rearrangement of the
89 // tree may be required where JUMPs have no GOTO. This leads to the
90 // non-standard situation that JUMPs without GOTO can have the syntactic
91 // position of a PRIMARY, SECONDARY or TERTIARY. The bottom-up parser also
92 // does not check VICTAL correctness of declarers. This is done separately.
93 // Also structure of format texts is checked separately.
94 //
95 // The parser sets up symbol tables and populates them as far as needed to parse
96 // the source. After the bottom-up parser terminates succesfully, the symbol tables
97 // are completed.
98 //
99 // (4) Next, modes are collected and rules for well-formedness and structural
100 // equivalence are applied. Then the symbol-table is completed now moids are
101 // all known.
102 //
103 // (5) Next phases are the mode checker and coercion inserter. The syntax tree is
104 // traversed to determine and check all modes, and to select operators. Then
105 // the tree is traversed again to insert coercions.
106 //
107 // (6) A static scope checker detects where objects are transported out of scope.
108 // At run time, a dynamic scope checker will check that what the static scope
109 // checker cannot see.
110
111 #include "a68g.h"
112 #include "a68g-parser.h"
113 #include "a68g-mp.h"
114 #include "a68g-postulates.h"
115 #include "a68g-prelude.h"
116
117 //! @brief First initialisations.
118
119 void init_before_tokeniser (void)
120 {
121 // Heap management set-up.
122 errno = 0;
123 init_heap ();
124 A68G (top_keyword) = NO_KEYWORD;
125 A68G (top_token) = NO_TOKEN;
126 TOP_NODE (&A68G_JOB) = NO_NODE;
127 TOP_MOID (&A68G_JOB) = NO_MOID;
128 TOP_LINE (&A68G_JOB) = NO_LINE;
129 STANDENV_MOID (&A68G_JOB) = NO_MOID;
130 set_up_tables ();
131 // Various initialisations.
132 ERROR_COUNT (&A68G_JOB) = WARNING_COUNT (&A68G_JOB) = 0;
133 ABEND (errno != 0, ERROR_ALLOCATION, __func__);
134 errno = 0;
135 }
136
137 void init_parser (void)
138 {
139 A68G_PARSER (stop_scanner) = A68G_FALSE;
140 A68G_PARSER (read_error) = A68G_FALSE;
141 A68G_PARSER (no_preprocessing) = A68G_FALSE;
142 }
143
144 //! @brief Is_ref_refety_flex.
145
146 BOOL_T is_ref_refety_flex (MOID_T * m)
147 {
148 if (IS_REF_FLEX (m)) {
149 return A68G_TRUE;
150 } else if (IS_REF (m)) {
151 return is_ref_refety_flex (SUB (m));
152 } else {
153 return A68G_FALSE;
154 }
155 }
156
157 //! @brief Count number of operands in operator parameter list.
158
159 int count_operands (NODE_T * p)
160 {
161 if (p != NO_NODE) {
162 if (IS (p, DECLARER)) {
163 return count_operands (NEXT (p));
164 } else if (IS (p, COMMA_SYMBOL)) {
165 return 1 + count_operands (NEXT (p));
166 } else {
167 return count_operands (NEXT (p)) + count_operands (SUB (p));
168 }
169 } else {
170 return 0;
171 }
172 }
173
174 //! @brief Count formal bounds in declarer in tree.
175
176 int count_formal_bounds (NODE_T * p)
177 {
178 if (p == NO_NODE) {
179 return 0;
180 } else {
181 if (IS (p, COMMA_SYMBOL)) {
182 return 1;
183 } else {
184 return count_formal_bounds (NEXT (p)) + count_formal_bounds (SUB (p));
185 }
186 }
187 }
188
189 //! @brief Count pictures.
190
191 void count_pictures (NODE_T * p, int *k)
192 {
193 for (; p != NO_NODE; FORWARD (p)) {
194 if (IS (p, PICTURE)) {
195 (*k)++;
196 }
197 count_pictures (SUB (p), k);
198 }
199 }
200
201 //! @brief Whether token cannot follow semicolon or EXIT.
202
203 BOOL_T is_semicolon_less (NODE_T * p)
204 {
205 switch (ATTRIBUTE (p)) {
206 case BUS_SYMBOL:
207 case CLOSE_SYMBOL:
208 case END_SYMBOL:
209 case SEMI_SYMBOL:
210 case EXIT_SYMBOL:
211 case THEN_BAR_SYMBOL:
212 case ELSE_BAR_SYMBOL:
213 case THEN_SYMBOL:
214 case ELIF_SYMBOL:
215 case ELSE_SYMBOL:
216 case FI_SYMBOL:
217 case IN_SYMBOL:
218 case OUT_SYMBOL:
219 case OUSE_SYMBOL:
220 case ESAC_SYMBOL:
221 case EDOC_SYMBOL:
222 case OCCA_SYMBOL:
223 case OD_SYMBOL:
224 case UNTIL_SYMBOL: {
225 return A68G_TRUE;
226 }
227 default: {
228 return A68G_FALSE;
229 }
230 }
231 }
232
233 //! @brief Whether formal bounds.
234
235 BOOL_T is_formal_bounds (NODE_T * p)
236 {
237 if (p == NO_NODE) {
238 return A68G_TRUE;
239 } else {
240 switch (ATTRIBUTE (p)) {
241 case OPEN_SYMBOL:
242 case CLOSE_SYMBOL:
243 case SUB_SYMBOL:
244 case BUS_SYMBOL:
245 case COMMA_SYMBOL:
246 case COLON_SYMBOL:
247 case DOTDOT_SYMBOL:
248 case INT_DENOTATION:
249 case IDENTIFIER:
250 case OPERATOR: {
251 return (BOOL_T) (is_formal_bounds (SUB (p)) && is_formal_bounds (NEXT (p)));
252 }
253 default: {
254 return A68G_FALSE;
255 }
256 }
257 }
258 }
259
260 //! @brief Whether token terminates a unit.
261
262 BOOL_T is_unit_terminator (NODE_T * p)
263 {
264 switch (ATTRIBUTE (p)) {
265 case BUS_SYMBOL:
266 case CLOSE_SYMBOL:
267 case END_SYMBOL:
268 case SEMI_SYMBOL:
269 case EXIT_SYMBOL:
270 case COMMA_SYMBOL:
271 case THEN_BAR_SYMBOL:
272 case ELSE_BAR_SYMBOL:
273 case THEN_SYMBOL:
274 case ELIF_SYMBOL:
275 case ELSE_SYMBOL:
276 case FI_SYMBOL:
277 case IN_SYMBOL:
278 case OUT_SYMBOL:
279 case OUSE_SYMBOL:
280 case ESAC_SYMBOL:
281 case EDOC_SYMBOL:
282 case OCCA_SYMBOL: {
283 return A68G_TRUE;
284 }
285 }
286 return A68G_FALSE;
287 }
288
289 //! @brief Whether token is a unit-terminator in a loop clause.
290
291 BOOL_T is_loop_keyword (NODE_T * p)
292 {
293 switch (ATTRIBUTE (p)) {
294 case FOR_SYMBOL:
295 case FROM_SYMBOL:
296 case BY_SYMBOL:
297 case TO_SYMBOL:
298 case DOWNTO_SYMBOL:
299 case WHILE_SYMBOL:
300 case DO_SYMBOL: {
301 return A68G_TRUE;
302 }
303 }
304 return A68G_FALSE;
305 }
306
307 //! @brief Get good attribute.
308
309 int get_good_attribute (NODE_T * p)
310 {
311 switch (ATTRIBUTE (p)) {
312 case UNIT:
313 case TERTIARY:
314 case SECONDARY:
315 case PRIMARY:
316 case ENCLOSED_CLAUSE: {
317 return get_good_attribute (SUB (p));
318 }
319 case DECLARER: {
320 if (IS (SUB (p), INDICANT)) {
321 if (SUB_SUB (p) != NO_NODE) {
322 return ATTRIBUTE (SUB_SUB (p));
323 } else {
324 return INDICANT;
325 }
326 } else {
327 return DECLARER;
328 }
329 }
330 case DECLARATION_LIST: {
331 if (SUB (p) != NO_NODE) {
332 return ATTRIBUTE (SUB (p));
333 } else {
334 return ATTRIBUTE (p);
335 }
336 }
337 default: {
338 return ATTRIBUTE (p);
339 }
340 }
341 }
342
343 //! @brief Intelligible diagnostic from syntax tree branch.
344
345 char *phrase_to_text (NODE_T * p, NODE_T ** w)
346 {
347 #define MAX_TERMINALS 10
348 int count = 0;
349 BOOL_T put_space = A68G_FALSE;
350 static BUFFER buffer;
351 buffer[0] = NULL_CHAR;
352 while (p != NO_NODE && count < MAX_TERMINALS) {
353 if (LINE_NUMBER (p) > 0) {
354 int gatt = get_good_attribute (p);
355 char *z = non_terminal_string (A68G (input_line), gatt);
356 // Where to put the error message?
357 // The actual content of a diagnostic is not as important
358 // as accurately indicating *were* the problem is!
359 if (w != NO_REF) {
360 if (count == 0 || (*w) == NO_NODE) {
361 *w = p;
362 } else if (dont_mark_here (*w)) {
363 *w = p;
364 }
365 }
366 switch (gatt) {
367 case SEMI_SYMBOL:
368 case COMMA_SYMBOL:
369 case CLOSE_SYMBOL:
370 case BUS_SYMBOL: {
371 // Next symbol may have leading space.
372 put_space = A68G_TRUE;
373 break;
374 }
375 case OPEN_SYMBOL:
376 case SUB_SYMBOL: {
377 if (put_space) {
378 a68g_bufcat (buffer, " ", BUFFER_SIZE);
379 }
380 // Next symbol has no leading space.
381 put_space = A68G_FALSE;
382 break;
383 }
384 default: {
385 if (put_space) {
386 a68g_bufcat (buffer, " ", BUFFER_SIZE);
387 }
388 // Next symbol may have leading space.
389 put_space = A68G_TRUE;
390 break;
391 }
392 }
393 // Attribute or symbol.
394 if (z != NO_TEXT) {
395 switch (gatt) {
396 case DENOTATION:
397 case IDENTIFIER: {
398 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s '%s'", z, NSYMBOL (p)) >= 0);
399 break;
400 }
401 case OPERATOR: {
402 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
403 break;
404 }
405 case DECLARER:
406 case COLON_SYMBOL: {
407 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", z) >= 0);
408 break;
409 }
410 default: {
411 if (SUB (p) == NO_NODE && NSYMBOL (p) != NO_TEXT) {
412 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
413 } else {
414 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", z) >= 0);
415 }
416 break;
417 }
418 }
419 } else if (NSYMBOL (p) != NO_TEXT) {
420 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "%s", NSYMBOL (p)) >= 0);
421 } else {
422 ASSERT (a68g_bufprt (A68G (edit_line), SNPRINTF_SIZE, "...") >= 0);
423 }
424 a68g_bufcat (buffer, A68G (edit_line), BUFFER_SIZE);
425 count++;
426 }
427 FORWARD (p);
428 }
429 if (p != NO_NODE && count == MAX_TERMINALS) {
430 a68g_bufcat (buffer, " ... ", BUFFER_SIZE);
431 }
432 return buffer;
433 }
434
435 //! @brief Preferably don't put intelligible diagnostic here.
436
437 BOOL_T dont_mark_here (NODE_T * p)
438 {
439 switch (ATTRIBUTE (p)) {
440 case ACCO_SYMBOL:
441 case ALT_DO_SYMBOL:
442 case ALT_EQUALS_SYMBOL:
443 case ANDF_SYMBOL:
444 case ASSERT_SYMBOL:
445 case ASSIGN_SYMBOL:
446 case ASSIGN_TO_SYMBOL:
447 case AT_SYMBOL:
448 case BEGIN_SYMBOL:
449 case BITS_SYMBOL:
450 case BOLD_COMMENT_SYMBOL:
451 case BOLD_PRAGMAT_SYMBOL:
452 case BOOL_SYMBOL:
453 case BUS_SYMBOL:
454 case BY_SYMBOL:
455 case BYTES_SYMBOL:
456 case CASE_SYMBOL:
457 case CHANNEL_SYMBOL:
458 case CHAR_SYMBOL:
459 case CLOSE_SYMBOL:
460 case CODE_SYMBOL:
461 case COLON_SYMBOL:
462 case COLUMN_SYMBOL:
463 case COMMA_SYMBOL:
464 case COMPLEX_SYMBOL:
465 case COMPL_SYMBOL:
466 case DIAGONAL_SYMBOL:
467 case DO_SYMBOL:
468 case DOTDOT_SYMBOL:
469 case DOWNTO_SYMBOL:
470 case EDOC_SYMBOL:
471 case ELIF_SYMBOL:
472 case ELSE_BAR_SYMBOL:
473 case ELSE_SYMBOL:
474 case EMPTY_SYMBOL:
475 case END_SYMBOL:
476 case ENVIRON_SYMBOL:
477 case EQUALS_SYMBOL:
478 case ESAC_SYMBOL:
479 case EXIT_SYMBOL:
480 case FALSE_SYMBOL:
481 case FILE_SYMBOL:
482 case FI_SYMBOL:
483 case FLEX_SYMBOL:
484 case FORMAT_DELIMITER_SYMBOL:
485 case FORMAT_SYMBOL:
486 case FOR_SYMBOL:
487 case FROM_SYMBOL:
488 case GO_SYMBOL:
489 case GOTO_SYMBOL:
490 case HEAP_SYMBOL:
491 case IF_SYMBOL:
492 case IN_SYMBOL:
493 case INT_SYMBOL:
494 case ISNT_SYMBOL:
495 case IS_SYMBOL:
496 case LOC_SYMBOL:
497 case LONG_SYMBOL:
498 case MAIN_SYMBOL:
499 case MODE_SYMBOL:
500 case NIL_SYMBOL:
501 case OCCA_SYMBOL:
502 case OD_SYMBOL:
503 case OF_SYMBOL:
504 case OPEN_SYMBOL:
505 case OP_SYMBOL:
506 case ORF_SYMBOL:
507 case OUSE_SYMBOL:
508 case OUT_SYMBOL:
509 case PAR_SYMBOL:
510 case PIPE_SYMBOL:
511 case POINT_SYMBOL:
512 case PRIO_SYMBOL:
513 case PROC_SYMBOL:
514 case REAL_SYMBOL:
515 case REF_SYMBOL:
516 case ROWS_SYMBOL:
517 case ROW_SYMBOL:
518 case SEMA_SYMBOL:
519 case SEMI_SYMBOL:
520 case SHORT_SYMBOL:
521 case SKIP_SYMBOL:
522 case SOUND_SYMBOL:
523 case STRING_SYMBOL:
524 case STRUCT_SYMBOL:
525 case STYLE_I_COMMENT_SYMBOL:
526 case STYLE_II_COMMENT_SYMBOL:
527 case STYLE_I_PRAGMAT_SYMBOL:
528 case SUB_SYMBOL:
529 case THEN_BAR_SYMBOL:
530 case THEN_SYMBOL:
531 case TO_SYMBOL:
532 case TRANSPOSE_SYMBOL:
533 case TRUE_SYMBOL:
534 case UNION_SYMBOL:
535 case UNTIL_SYMBOL:
536 case VOID_SYMBOL:
537 case WHILE_SYMBOL:
538 case SERIAL_CLAUSE:
539 case ENQUIRY_CLAUSE:
540 case INITIALISER_SERIES:
541 case DECLARATION_LIST: {
542 return A68G_TRUE;
543 }
544 }
545 return A68G_FALSE;
546 }
547
548 void a68g_parser (void)
549 {
550 // Tokeniser.
551 FILE_SOURCE_OPENED (&A68G_JOB) = A68G_TRUE;
552 announce_phase ("initialiser");
553 A68G_PARSER (error_tag) = (TAG_T *) new_tag ();
554 init_parser ();
555 if (ERROR_COUNT (&A68G_JOB) == 0) {
556 int frame_stack_size_2 = A68G (frame_stack_size);
557 int expr_stack_size_2 = A68G (expr_stack_size);
558 int heap_size_2 = A68G (heap_size);
559 int handle_pool_size_2 = A68G (handle_pool_size);
560 BOOL_T ok;
561 announce_phase ("tokeniser");
562 ok = lexical_analyser ();
563 if (!ok || errno != 0) {
564 diagnostics_to_terminal (TOP_LINE (&A68G_JOB), A68G_ALL_DIAGNOSTICS);
565 return;
566 }
567 // Maybe the program asks for more memory through a PRAGMAT. We restart.
568 if (frame_stack_size_2 != A68G (frame_stack_size) || expr_stack_size_2 != A68G (expr_stack_size) || heap_size_2 != A68G (heap_size) || handle_pool_size_2 != A68G (handle_pool_size)) {
569 announce_phase ("tokeniser");
570 free_syntax_tree (TOP_NODE (&A68G_JOB));
571 discard_heap ();
572 init_before_tokeniser ();
573 SOURCE_SCAN (&A68G_JOB)++;
574 ok = lexical_analyser ();
575 verbosity ();
576 }
577 if (!ok || errno != 0) {
578 diagnostics_to_terminal (TOP_LINE (&A68G_JOB), A68G_ALL_DIAGNOSTICS);
579 return;
580 }
581 ASSERT (close (FILE_SOURCE_FD (&A68G_JOB)) == 0);
582 FILE_SOURCE_OPENED (&A68G_JOB) = A68G_FALSE;
583 prune_echoes (OPTION_LIST (&A68G_JOB));
584 TREE_LISTING_SAFE (&A68G_JOB) = A68G_TRUE;
585 int renum = 0;
586 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
587 }
588 // Now the default precision of LONG LONG modes is fixed.
589 if (long_mp_digits () == 0) {
590 set_long_mp_digits (LONG_LONG_MP_DIGITS);
591 }
592 // Final initialisations.
593 if (ERROR_COUNT (&A68G_JOB) == 0) {
594 if (OPTION_REGRESSION_TEST (&A68G_JOB)) {
595 a68g_bufcpy (A68G (a68g_cmd_name), "a68g", BUFFER_SIZE);
596 io_close_tty_line ();
597 WRITE (A68G_STDERR, "[");
598 WRITE (A68G_STDERR, FILE_INITIAL_NAME (&A68G_JOB));
599 WRITE (A68G_STDERR, "]\n");
600 }
601 A68G_STANDENV = NO_TABLE;
602 init_postulates ();
603 A68G (mode_count) = 0;
604 make_special_mode (&M_HIP, A68G (mode_count)++);
605 make_special_mode (&M_UNDEFINED, A68G (mode_count)++);
606 make_special_mode (&M_ERROR, A68G (mode_count)++);
607 make_special_mode (&M_VACUUM, A68G (mode_count)++);
608 make_special_mode (&M_C_STRING, A68G (mode_count)++);
609 make_special_mode (&M_COLLITEM, A68G (mode_count)++);
610 make_special_mode (&M_SOUND_DATA, A68G (mode_count)++);
611 }
612 // Refinement preprocessor.
613 if (ERROR_COUNT (&A68G_JOB) == 0) {
614 announce_phase ("preprocessor");
615 get_refinements ();
616 if (ERROR_COUNT (&A68G_JOB) == 0) {
617 put_refinements ();
618 }
619 int renum = 0;
620 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
621 verbosity ();
622 }
623 // Top-down parser.
624 if (ERROR_COUNT (&A68G_JOB) == 0) {
625 announce_phase ("parser phase 1");
626 check_parenthesis (TOP_NODE (&A68G_JOB));
627 if (ERROR_COUNT (&A68G_JOB) == 0) {
628 if (OPTION_BRACKETS (&A68G_JOB)) {
629 substitute_brackets (TOP_NODE (&A68G_JOB));
630 }
631 A68G (symbol_table_count) = 0;
632 A68G_STANDENV = new_symbol_table (NO_TABLE);
633 LEVEL (A68G_STANDENV) = 0;
634 top_down_parser (TOP_NODE (&A68G_JOB));
635 }
636 int renum = 0;
637 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
638 verbosity ();
639 }
640 // Standard environment builder.
641 if (ERROR_COUNT (&A68G_JOB) == 0) {
642 announce_phase ("standard environ builder");
643 TABLE (TOP_NODE (&A68G_JOB)) = new_symbol_table (A68G_STANDENV);
644 make_standard_environ ();
645 STANDENV_MOID (&A68G_JOB) = TOP_MOID (&A68G_JOB);
646 verbosity ();
647 }
648 // Bottom-up parser.
649 if (ERROR_COUNT (&A68G_JOB) == 0) {
650 announce_phase ("parser phase 2");
651 preliminary_symbol_table_setup (TOP_NODE (&A68G_JOB));
652 bottom_up_parser (TOP_NODE (&A68G_JOB));
653 int renum = 0;
654 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
655 verbosity ();
656 }
657 if (ERROR_COUNT (&A68G_JOB) == 0) {
658 announce_phase ("parser phase 3");
659 bottom_up_error_check (TOP_NODE (&A68G_JOB));
660 victal_checker (TOP_NODE (&A68G_JOB));
661 if (ERROR_COUNT (&A68G_JOB) == 0) {
662 finalise_symbol_table_setup (TOP_NODE (&A68G_JOB), 2);
663 NEST (TABLE (TOP_NODE (&A68G_JOB))) = A68G (symbol_table_count) = 3;
664 reset_symbol_table_nest_count (TOP_NODE (&A68G_JOB));
665 fill_symbol_table_outer (TOP_NODE (&A68G_JOB), TABLE (TOP_NODE (&A68G_JOB)));
666 set_nest (TOP_NODE (&A68G_JOB), NO_NODE);
667 set_proc_level (TOP_NODE (&A68G_JOB), 1);
668 }
669 int renum = 0;
670 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
671 verbosity ();
672 }
673 // Mode table builder.
674 if (ERROR_COUNT (&A68G_JOB) == 0) {
675 announce_phase ("mode table builder");
676 make_moid_list (&A68G_JOB);
677 verbosity ();
678 }
679 CROSS_REFERENCE_SAFE (&A68G_JOB) = A68G_TRUE;
680 // Symbol table builder.
681 if (ERROR_COUNT (&A68G_JOB) == 0) {
682 announce_phase ("symbol table builder");
683 collect_taxes (TOP_NODE (&A68G_JOB));
684 verbosity ();
685 }
686 // Post parser.
687 if (ERROR_COUNT (&A68G_JOB) == 0) {
688 announce_phase ("parser phase 4");
689 rearrange_goto_less_jumps (TOP_NODE (&A68G_JOB));
690 verbosity ();
691 }
692 // Mode checker.
693 if (ERROR_COUNT (&A68G_JOB) == 0) {
694 announce_phase ("mode checker");
695 mode_checker (TOP_NODE (&A68G_JOB));
696 verbosity ();
697 }
698 // Coercion inserter.
699 if (ERROR_COUNT (&A68G_JOB) == 0) {
700 announce_phase ("coercion enforcer");
701 coercion_inserter (TOP_NODE (&A68G_JOB));
702 widen_denotation (TOP_NODE (&A68G_JOB));
703 get_max_simplout_size (TOP_NODE (&A68G_JOB));
704 set_moid_sizes (TOP_MOID (&A68G_JOB));
705 verbosity ();
706 }
707 // Finalize tree.
708 if (ERROR_COUNT (&A68G_JOB) == 0) {
709 announce_phase ("finalize tree");
710 if (OPTION_CONSERVATIVE_GC (&A68G_JOB) == A68G_FALSE) {
711 annotate_gc (TOP_NODE (&A68G_JOB));
712 }
713 assign_offsets_table (A68G_STANDENV);
714 assign_offsets (TOP_NODE (&A68G_JOB));
715 assign_offsets_packs (TOP_MOID (&A68G_JOB));
716 int renum = 0;
717 renumber_nodes (TOP_NODE (&A68G_JOB), &renum);
718 verbosity ();
719 }
720 // Application checker.
721 if (ERROR_COUNT (&A68G_JOB) == 0) {
722 announce_phase ("application checker");
723 mark_moids (TOP_NODE (&A68G_JOB));
724 mark_auxilliary (TOP_NODE (&A68G_JOB));
725 jumps_from_procs (TOP_NODE (&A68G_JOB));
726 warn_for_unused_tags (TOP_NODE (&A68G_JOB));
727 verbosity ();
728 }
729 // Scope checker.
730 if (ERROR_COUNT (&A68G_JOB) == 0) {
731 announce_phase ("static scope checker");
732 tie_label_to_serial (TOP_NODE (&A68G_JOB));
733 tie_label_to_unit (TOP_NODE (&A68G_JOB));
734 bind_routine_tags_to_tree (TOP_NODE (&A68G_JOB));
735 bind_format_tags_to_tree (TOP_NODE (&A68G_JOB));
736 scope_checker (TOP_NODE (&A68G_JOB));
737 verbosity ();
738 }
739 }
740
741 //! @brief Renumber nodes.
742
743 void renumber_nodes (NODE_T * p, int *n)
744 {
745 for (; p != NO_NODE; FORWARD (p)) {
746 NUMBER (p) = (*n)++;
747 renumber_nodes (SUB (p), n);
748 }
749 }
750
751 //! @brief Register nodes.
752
753 void register_nodes (NODE_T * p)
754 {
755 for (; p != NO_NODE; FORWARD (p)) {
756 A68G (node_register)[NUMBER (p)] = p;
757 register_nodes (SUB (p));
758 }
759 }
760
761 //! @brief New_node_info.
762
763 NODE_INFO_T *new_node_info (void)
764 {
765 NODE_INFO_T *z = (NODE_INFO_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (NODE_INFO_T));
766 A68G (new_node_infos)++;
767 PROCEDURE_LEVEL (z) = 0;
768 CHAR_IN_LINE (z) = NO_TEXT;
769 SYMBOL (z) = NO_TEXT;
770 PRAGMENT (z) = NO_TEXT;
771 PRAGMENT_TYPE (z) = 0;
772 LINE (z) = NO_LINE;
773 return z;
774 }
775
776 //! @brief New_genie_info.
777
778 GINFO_T *new_genie_info (void)
779 {
780 GINFO_T *z = (GINFO_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (GINFO_T));
781 A68G (new_genie_infos)++;
782 UNIT (&PROP (z)) = NO_PPROC;
783 SOURCE (&PROP (z)) = NO_NODE;
784 PARTIAL_PROC (z) = NO_MOID;
785 PARTIAL_LOCALE (z) = NO_MOID;
786 IS_COERCION (z) = A68G_FALSE;
787 IS_NEW_LEXICAL_LEVEL (z) = A68G_FALSE;
788 NEED_DNS (z) = A68G_FALSE;
789 PARENT (z) = NO_NODE;
790 OFFSET (z) = NO_BYTE;
791 CONSTANT (z) = NO_CONSTANT;
792 LEVEL (z) = 0;
793 ARGSIZE (z) = 0;
794 SIZE (z) = 0;
795 COMPILE_NAME (z) = NO_TEXT;
796 COMPILE_NODE (z) = 0;
797 return z;
798 }
799
800 //! @brief New_node.
801
802 NODE_T *new_node (void)
803 {
804 NODE_T *z = (NODE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (NODE_T));
805 A68G (new_nodes)++;
806 STATUS (z) = NULL_MASK;
807 CODEX (z) = NULL_MASK;
808 TABLE (z) = NO_TABLE;
809 INFO (z) = NO_NINFO;
810 GINFO (z) = NO_GINFO;
811 ATTRIBUTE (z) = 0;
812 ANNOTATION (z) = 0;
813 MOID (z) = NO_MOID;
814 NEXT (z) = NO_NODE;
815 PREVIOUS (z) = NO_NODE;
816 SUB (z) = NO_NODE;
817 NEST (z) = NO_NODE;
818 NON_LOCAL (z) = NO_TABLE;
819 TAX (z) = NO_TAG;
820 TAX_GC (z) = NO_TAG;
821 SEQUENCE (z) = NO_NODE;
822 PACK (z) = NO_PACK;
823 return z;
824 }
825
826 //! @brief New_symbol_table.
827
828 TABLE_T *new_symbol_table (TABLE_T * p)
829 {
830 TABLE_T *z = (TABLE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (TABLE_T));
831 NUM (z) = A68G (symbol_table_count);
832 LEVEL (z) = A68G (symbol_table_count)++;
833 NEST (z) = A68G (symbol_table_count);
834 ATTRIBUTE (z) = 0;
835 AP_INCREMENT (z) = 0;
836 INITIALISE_FRAME (z) = A68G_TRUE;
837 PROC_OPS (z) = A68G_TRUE;
838 INITIALISE_ANON (z) = A68G_TRUE;
839 PREVIOUS (z) = p;
840 OUTER (z) = NO_TABLE;
841 IDENTIFIERS (z) = NO_TAG;
842 OPERATORS (z) = NO_TAG;
843 PRIO (z) = NO_TAG;
844 INDICANTS (z) = NO_TAG;
845 LABELS (z) = NO_TAG;
846 ANONYMOUS (z) = NO_TAG;
847 JUMP_TO (z) = NO_NODE;
848 SEQUENCE (z) = NO_NODE;
849 return z;
850 }
851
852 //! @brief New_moid.
853
854 MOID_T *new_moid (void)
855 {
856 MOID_T *z = (MOID_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (MOID_T));
857 A68G (new_modes)++;
858 ATTRIBUTE (z) = 0;
859 NUMBER (z) = 0;
860 DIM (z) = 0;
861 USE (z) = A68G_FALSE;
862 HAS_ROWS (z) = A68G_FALSE;
863 SIZE (z) = 0;
864 DIGITS (z) = 0;
865 SIZE_COMPL (z) = 0;
866 DIGITS_COMPL (z) = 0;
867 PORTABLE (z) = A68G_TRUE;
868 DERIVATE (z) = A68G_FALSE;
869 NODE (z) = NO_NODE;
870 PACK (z) = NO_PACK;
871 SUB (z) = NO_MOID;
872 EQUIVALENT_MODE (z) = NO_MOID;
873 SLICE (z) = NO_MOID;
874 TRIM (z) = NO_MOID;
875 DEFLEXED (z) = NO_MOID;
876 NAME (z) = NO_MOID;
877 MULTIPLE_MODE (z) = NO_MOID;
878 NEXT (z) = NO_MOID;
879 return z;
880 }
881
882 //! @brief New_pack.
883
884 PACK_T *new_pack (void)
885 {
886 PACK_T *z = (PACK_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (PACK_T));
887 MOID (z) = NO_MOID;
888 TEXT (z) = NO_TEXT;
889 NODE (z) = NO_NODE;
890 NEXT (z) = NO_PACK;
891 PREVIOUS (z) = NO_PACK;
892 SIZE (z) = 0;
893 OFFSET (z) = 0;
894 return z;
895 }
896
897 //! @brief New_tag.
898
899 TAG_T *new_tag (void)
900 {
901 TAG_T *z = (TAG_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (TAG_T));
902 STATUS (z) = NULL_MASK;
903 CODEX (z) = NULL_MASK;
904 TAG_TABLE (z) = NO_TABLE;
905 MOID (z) = NO_MOID;
906 NODE (z) = NO_NODE;
907 UNIT (z) = NO_NODE;
908 VALUE (z) = NO_TEXT;
909 A68G_STANDENV_PROC (z) = 0;
910 PROCEDURE (z) = NO_GPROC;
911 SCOPE (z) = PRIMAL_SCOPE;
912 SCOPE_ASSIGNED (z) = A68G_FALSE;
913 PRIO (z) = 0;
914 USE (z) = A68G_FALSE;
915 IN_PROC (z) = A68G_FALSE;
916 HEAP (z) = A68G_FALSE;
917 SIZE (z) = 0;
918 OFFSET (z) = 0;
919 YOUNGEST_ENVIRON (z) = PRIMAL_SCOPE;
920 LOC_ASSIGNED (z) = A68G_FALSE;
921 NEXT (z) = NO_TAG;
922 BODY (z) = NO_TAG;
923 PORTABLE (z) = A68G_TRUE;
924 NUMBER (z) = ++A68G_PARSER (tag_number);
925 return z;
926 }
927
928 //! @brief Make special, internal mode.
929
930 void make_special_mode (MOID_T ** n, int m)
931 {
932 (*n) = new_moid ();
933 ATTRIBUTE (*n) = 0;
934 NUMBER (*n) = m;
935 PACK (*n) = NO_PACK;
936 SUB (*n) = NO_MOID;
937 EQUIVALENT (*n) = NO_MOID;
938 DEFLEXED (*n) = NO_MOID;
939 NAME (*n) = NO_MOID;
940 SLICE (*n) = NO_MOID;
941 TRIM (*n) = NO_MOID;
942 ROWED (*n) = NO_MOID;
943 }
944
945 //! @brief Whether attributes match in subsequent nodes.
946
947 BOOL_T whether (NODE_T * p, ...)
948 {
949 va_list vl;
950 va_start (vl, p);
951 int a;
952 while ((a = va_arg (vl, int)) != STOP)
953 {
954 if (p != NO_NODE && a == WILDCARD) {
955 FORWARD (p);
956 } else if (p != NO_NODE && (a == KEYWORD)) {
957 if (find_keyword_from_attribute (A68G (top_keyword), ATTRIBUTE (p)) != NO_KEYWORD) {
958 FORWARD (p);
959 } else {
960 va_end (vl);
961 return A68G_FALSE;
962 }
963 } else if (p != NO_NODE && (a >= 0 ? a == ATTRIBUTE (p) : (-a) != ATTRIBUTE (p))) {
964 FORWARD (p);
965 } else {
966 va_end (vl);
967 return A68G_FALSE;
968 }
969 }
970 va_end (vl);
971 return A68G_TRUE;
972 }
973
974 //! @brief Whether one of a series of attributes matches a node.
975
976 BOOL_T is_one_of (NODE_T * p, ...)
977 {
978 if (p != NO_NODE) {
979 va_list vl;
980 va_start (vl, p);
981 BOOL_T match = A68G_FALSE;
982 int a;
983 while ((a = va_arg (vl, int)) != STOP)
984 {
985 match = (BOOL_T) (match | (BOOL_T) (IS (p, a)));
986 }
987 va_end (vl);
988 return match;
989 } else {
990 return A68G_FALSE;
991 }
992 }
993
994 //! @brief Isolate nodes p-q making p a branch to p-q.
995
996 void make_sub (NODE_T * p, NODE_T * q, int t)
997 {
998 NODE_T *z = new_node ();
999 ABEND (p == NO_NODE || q == NO_NODE, ERROR_INTERNAL_CONSISTENCY, __func__);
1000 *z = *p;
1001 if (GINFO (p) != NO_GINFO) {
1002 GINFO (z) = new_genie_info ();
1003 }
1004 PREVIOUS (z) = NO_NODE;
1005 if (p == q) {
1006 NEXT (z) = NO_NODE;
1007 } else {
1008 if (NEXT (p) != NO_NODE) {
1009 PREVIOUS (NEXT (p)) = z;
1010 }
1011 NEXT (p) = NEXT (q);
1012 if (NEXT (p) != NO_NODE) {
1013 PREVIOUS (NEXT (p)) = p;
1014 }
1015 NEXT (q) = NO_NODE;
1016 }
1017 SUB (p) = z;
1018 ATTRIBUTE (p) = t;
1019 }
1020
1021 //! @brief Find symbol table at level 'i'.
1022
1023 TABLE_T *find_level (NODE_T * n, int i)
1024 {
1025 if (n == NO_NODE) {
1026 return NO_TABLE;
1027 } else {
1028 TABLE_T *s = TABLE (n);
1029 if (s != NO_TABLE && LEVEL (s) == i) {
1030 return s;
1031 } else if ((s = find_level (SUB (n), i)) != NO_TABLE) {
1032 return s;
1033 } else if ((s = find_level (NEXT (n), i)) != NO_TABLE) {
1034 return s;
1035 } else {
1036 return NO_TABLE;
1037 }
1038 }
1039 }
1040
1041 //! @brief Whether 'p' is top of lexical level.
1042
1043 BOOL_T is_new_lexical_level (NODE_T * p)
1044 {
1045 switch (ATTRIBUTE (p)) {
1046 case ALT_DO_PART:
1047 case BRIEF_ELIF_PART:
1048 case BRIEF_OUSE_PART:
1049 case BRIEF_CONFORMITY_OUSE_PART:
1050 case CHOICE:
1051 case CLOSED_CLAUSE:
1052 case CONDITIONAL_CLAUSE:
1053 case DO_PART:
1054 case ELIF_PART:
1055 case ELSE_PART:
1056 case FORMAT_TEXT:
1057 case CASE_CLAUSE:
1058 case CASE_CHOICE_CLAUSE:
1059 case CASE_IN_PART:
1060 case CASE_OUSE_PART:
1061 case OUT_PART:
1062 case ROUTINE_TEXT:
1063 case SPECIFIED_UNIT:
1064 case THEN_PART:
1065 case UNTIL_PART:
1066 case CONFORMITY_CLAUSE:
1067 case CONFORMITY_CHOICE:
1068 case CONFORMITY_IN_PART:
1069 case CONFORMITY_OUSE_PART:
1070 case WHILE_PART: {
1071 return A68G_TRUE;
1072 }
1073 default: {
1074 return A68G_FALSE;
1075 }
1076 }
1077 }
1078
1079 //! @brief Some_node.
1080
1081 NODE_T *some_node (char *t)
1082 {
1083 NODE_T *z = new_node ();
1084 INFO (z) = new_node_info ();
1085 GINFO (z) = new_genie_info ();
1086 NSYMBOL (z) = t;
1087 return z;
1088 }
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|