parser-scanner.c
1 //! @file parser-scanner.c
2 //! @author J. Marcel van der Veer
3
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
8
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Context-dependent Algol 68 tokeniser.
25
26 #include "a68g.h"
27 #include "a68g-parser.h"
28 #include "a68g-prelude.h"
29 #include "a68g-options.h"
30 #include "a68g-environ.h"
31
32 // Macros.
33
34 #define SCAN_DIGITS(c)\
35 while (IS_DIGIT (c)) {\
36 (sym++)[0] = (c);\
37 (c) = next_char (ref_l, ref_s, A68_TRUE);\
38 }
39
40 #define SCAN_EXPONENT_PART(c)\
41 (sym++)[0] = EXPONENT_CHAR;\
42 (c) = next_char (ref_l, ref_s, A68_TRUE);\
43 if ((c) == '+' || (c) == '-') {\
44 (sym++)[0] = (c);\
45 (c) = next_char (ref_l, ref_s, A68_TRUE);\
46 }\
47 SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
48 SCAN_DIGITS (c)
49
50 //! @brief Save scanner state, for character look-ahead.
51
52 void save_state (LINE_T * ref_l, char *ref_s, char ch)
53 {
54 SCAN_STATE_L (&A68_JOB) = ref_l;
55 SCAN_STATE_S (&A68_JOB) = ref_s;
56 SCAN_STATE_C (&A68_JOB) = ch;
57 }
58
59 //! @brief Restore scanner state, for character look-ahead.
60
61 void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
62 {
63 *ref_l = SCAN_STATE_L (&A68_JOB);
64 *ref_s = SCAN_STATE_S (&A68_JOB);
65 *ch = SCAN_STATE_C (&A68_JOB);
66 }
67
68 //! @brief New_source_line.
69
70 LINE_T *new_source_line (void)
71 {
72 LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
73 MARKER (z)[0] = NULL_CHAR;
74 STRING (z) = NO_TEXT;
75 FILENAME (z) = NO_TEXT;
76 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
77 NUMBER (z) = 0;
78 PRINT_STATUS (z) = 0;
79 LIST (z) = A68_TRUE;
80 NEXT (z) = NO_LINE;
81 PREVIOUS (z) = NO_LINE;
82 return z;
83 }
84
85 //! @brief Append a source line to the internal source file.
86
87 void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
88 {
89 LINE_T *z = new_source_line ();
90 // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
91 if (*line_num == 1) {
92 if (strlen (str) >= 2 && strncmp (str, "#!", 2) == 0) {
93 ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
94 (*line_num)++;
95 return;
96 }
97 }
98 // Link line into the chain.
99 STRING (z) = new_fixed_string (str);
100 FILENAME (z) = filename;
101 NUMBER (z) = (*line_num)++;
102 PRINT_STATUS (z) = NOT_PRINTED;
103 LIST (z) = A68_TRUE;
104 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
105 NEXT (z) = NO_LINE;
106 PREVIOUS (z) = *ref_l;
107 if (TOP_LINE (&A68_JOB) == NO_LINE) {
108 TOP_LINE (&A68_JOB) = z;
109 }
110 if (*ref_l != NO_LINE) {
111 NEXT (*ref_l) = z;
112 }
113 *ref_l = z;
114 }
115
116 // Scanner, tokenises the source code.
117
118 //! @brief Whether ch is unworthy.
119
120 void unworthy (LINE_T * u, char *v, char ch)
121 {
122 if (IS_PRINT (ch)) {
123 ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
124 } else {
125 ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
126 }
127 scan_error (u, v, A68 (edit_line));
128 }
129
130 //! @brief Concatenate lines that terminate in '\' with next line.
131
132 void concatenate_lines (LINE_T * top)
133 {
134 LINE_T *q;
135 // Work from bottom backwards.
136 for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
137 ;
138 }
139 for (; q != NO_LINE; BACKWARD (q)) {
140 char *z = STRING (q);
141 int len = (int) strlen (z);
142 if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
143 z[len - 2] = NULL_CHAR;
144 len += (int) strlen (STRING (NEXT (q)));
145 z = (char *) get_fixed_heap_space ((size_t) (len + 1));
146 bufcpy (z, STRING (q), len + 1);
147 bufcat (z, STRING (NEXT (q)), len + 1);
148 STRING (NEXT (q))[0] = NULL_CHAR;
149 STRING (q) = z;
150 }
151 }
152 }
153
154 //! @brief Whether u is bold tag v, independent of stropping regime.
155
156 BOOL_T is_bold (char *u, char *v)
157 {
158 unt len = (unt) strlen (v);
159 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
160 if (u[0] == '\'') {
161 return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
162 } else {
163 return A68_FALSE;
164 }
165 } else {
166 return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
167 }
168 }
169
170 //! @brief Skip string.
171
172 BOOL_T skip_string (LINE_T ** top, char **ch)
173 {
174 LINE_T *u = *top;
175 char *v = *ch;
176 v++;
177 while (u != NO_LINE) {
178 while (v[0] != NULL_CHAR) {
179 if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
180 *top = u;
181 *ch = &v[1];
182 return A68_TRUE;
183 } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
184 v += 2;
185 } else {
186 v++;
187 }
188 }
189 FORWARD (u);
190 if (u != NO_LINE) {
191 v = &(STRING (u)[0]);
192 } else {
193 v = NO_TEXT;
194 }
195 }
196 return A68_FALSE;
197 }
198
199 //! @brief Skip comment.
200
201 BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
202 {
203 LINE_T *u = *top;
204 char *v = *ch;
205 v++;
206 while (u != NO_LINE) {
207 while (v[0] != NULL_CHAR) {
208 if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
209 *top = u;
210 *ch = &v[1];
211 return A68_TRUE;
212 } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
213 *top = u;
214 *ch = &v[1];
215 return A68_TRUE;
216 } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
217 *top = u;
218 *ch = &v[1];
219 return A68_TRUE;
220 } else {
221 v++;
222 }
223 }
224 FORWARD (u);
225 if (u != NO_LINE) {
226 v = &(STRING (u)[0]);
227 } else {
228 v = NO_TEXT;
229 }
230 }
231 return A68_FALSE;
232 }
233
234 //! @brief Skip rest of pragmat.
235
236 BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
237 {
238 LINE_T *u = *top;
239 char *v = *ch;
240 while (u != NO_LINE) {
241 while (v[0] != NULL_CHAR) {
242 if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
243 *top = u;
244 *ch = &v[1];
245 return A68_TRUE;
246 } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
247 *top = u;
248 *ch = &v[1];
249 return A68_TRUE;
250 } else {
251 if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
252 scan_error (u, v, ERROR_PRAGMENT);
253 } else if (IS_UPPER (v[0])) {
254 // Skip a bold word as you may trigger on REPR, for instance ...
255 while (IS_UPPER (v[0])) {
256 v++;
257 }
258 } else {
259 v++;
260 }
261 }
262 }
263 FORWARD (u);
264 if (u != NO_LINE) {
265 v = &(STRING (u)[0]);
266 } else {
267 v = NO_TEXT;
268 }
269 }
270 return A68_FALSE;
271 }
272
273 //! @brief Return pointer to next token within pragmat.
274
275 char *get_pragmat_item (LINE_T ** top, char **ch)
276 {
277 LINE_T *u = *top;
278 char *v = *ch;
279 while (u != NO_LINE) {
280 while (v[0] != NULL_CHAR) {
281 if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
282 *top = u;
283 *ch = v;
284 return v;
285 } else {
286 v++;
287 }
288 }
289 FORWARD (u);
290 if (u != NO_LINE) {
291 v = &(STRING (u)[0]);
292 } else {
293 v = NO_TEXT;
294 }
295 }
296 return NO_TEXT;
297 }
298
299 //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
300
301 int streq (char *u, char *v)
302 {
303 int diff;
304 for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
305 diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
306 }
307 return diff;
308 }
309
310 //! @brief Scan for next pragmat and yield first pragmat item.
311
312 char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
313 {
314 LINE_T *u = *top;
315 char *v = *ch;
316 *delim = 0;
317 while (u != NO_LINE) {
318 while (v[0] != NULL_CHAR) {
319 LINE_T *start_l = u;
320 char *start_c = v;
321 // STRINGs must be skipped.
322 if (v[0] == QUOTE_CHAR) {
323 SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
324 }
325 // COMMENTS must be skipped.
326 else if (is_bold (v, "COMMENT")) {
327 SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
328 } else if (is_bold (v, "CO")) {
329 SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
330 } else if (v[0] == '#') {
331 SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
332 } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
333 // We caught a PRAGMAT.
334 char *item;
335 if (is_bold (v, "PRAGMAT")) {
336 *delim = BOLD_PRAGMAT_SYMBOL;
337 v = &v[strlen ("PRAGMAT")];
338 } else if (is_bold (v, "PR")) {
339 *delim = STYLE_I_PRAGMAT_SYMBOL;
340 v = &v[strlen ("PR")];
341 }
342 item = get_pragmat_item (&u, &v);
343 SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
344 // Item "preprocessor" restarts preprocessing if it is off.
345 if (A68_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
346 A68_PARSER (no_preprocessing) = A68_FALSE;
347 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
348 }
349 // If preprocessing is switched off, we idle to closing bracket.
350 else if (A68_PARSER (no_preprocessing)) {
351 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
352 }
353 // Item "nopreprocessor" stops preprocessing if it is on.
354 if (streq (item, "NOPREPROCESSOR") == 0) {
355 A68_PARSER (no_preprocessing) = A68_TRUE;
356 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
357 }
358 // Item "INCLUDE" includes a file.
359 else if (streq (item, "INCLUDE") == 0) {
360 *top = u;
361 *ch = v;
362 return item;
363 }
364 // Item "READ" includes a file.
365 else if (streq (item, "READ") == 0) {
366 *top = u;
367 *ch = v;
368 return item;
369 }
370 // Unrecognised item - probably options handled later by the tokeniser.
371 else {
372 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
373 }
374 } else if (IS_UPPER (v[0])) {
375 // Skip a bold word as you may trigger on REPR, for instance ...
376 while (IS_UPPER (v[0])) {
377 v++;
378 }
379 } else {
380 v++;
381 }
382 }
383 FORWARD (u);
384 if (u != NO_LINE) {
385 v = &(STRING (u)[0]);
386 } else {
387 v = NO_TEXT;
388 }
389 }
390 *top = u;
391 *ch = v;
392 return NO_TEXT;
393 }
394
395 //! @brief Include files.
396
397 void include_files (LINE_T * top)
398 {
399 // include_files
400 //
401 // syntax: PR read "filename" PR
402 // PR include "filename" PR
403 //
404 // The file gets inserted before the line containing the pragmat. In this way
405 // correct line numbers are preserved which helps diagnostics. A file that has
406 // been included will not be included a second time - it will be ignored.
407 // A rigorous fail-safe, but there is no mechanism to prevent recursive includes
408 // in A68 source code. User reports do not indicate sophisticated use of INCLUDE,
409 // so this is fine for now.
410 // TODO - some day we might need `app', analogous to `cpp'.
411 BOOL_T make_pass = A68_TRUE;
412 while (make_pass) {
413 LINE_T *s, *t, *u = top;
414 char *v = &(STRING (u)[0]);
415 make_pass = A68_FALSE;
416 errno = 0;
417 while (u != NO_LINE) {
418 int pr_lim;
419 char *item = next_preprocessor_item (&u, &v, &pr_lim);
420 LINE_T *start_l = u;
421 char *start_c = v;
422 // Search for PR include "filename" PR.
423 if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
424 FILE_T fd;
425 int n, linum, fsize, k, bytes_read, fnwid;
426 char *fbuf, delim;
427 BUFFER fnb;
428 char *fn;
429 // Skip to filename.
430 while (IS_ALPHA (v[0])) {
431 v++;
432 }
433 while (IS_SPACE (v[0])) {
434 v++;
435 }
436 // Scan quoted filename.
437 SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
438 delim = (v++)[0];
439 n = 0;
440 fnb[0] = NULL_CHAR;
441 // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
442 do {
443 SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
444 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
445 if (v[0] == delim) {
446 while (v[0] == delim && v[1] == delim) {
447 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
448 fnb[n++] = delim;
449 fnb[n] = NULL_CHAR;
450 v += 2;
451 }
452 } else if (IS_PRINT (v[0])) {
453 fnb[n++] = *(v++);
454 fnb[n] = NULL_CHAR;
455 } else {
456 SCAN_ERROR (A68_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
457 }
458 } while (v[0] != delim);
459 // Insist that the pragmat is closed properly.
460 v = &v[1];
461 SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
462 SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
463 // Make the name relative to the position of the source file (C preprocessor standard).
464 if (FILENAME (u) != NO_TEXT) {
465 fn = a68_relpath (a68_dirname (FILENAME (u)), a68_dirname (fnb), a68_basename (fnb));
466 } else {
467 fn = a68_relpath (FILE_PATH (&A68_JOB), a68_dirname (fnb), a68_basename (fnb));
468 }
469 // Do not check errno, since errno may be undefined here after a successful call.
470 if (fn != NO_TEXT) {
471 bufcpy (fnb, fn, BUFFER_SIZE);
472 } else {
473 SCAN_ERROR (A68_TRUE, NO_LINE, NO_TEXT, ERROR_SOURCE_FILE_INCLUDE_OPEN);
474 }
475 fnwid = (int) strlen (fnb) + 1;
476 fn = (char *) get_fixed_heap_space ((size_t) fnwid);
477 bufcpy (fn, fnb, fnwid);
478 // Ignore the file when included more than once.
479 for (t = top; t != NO_LINE; t = NEXT (t)) {
480 if (strcmp (FILENAME (t), fn) == 0) {
481 goto search_next_pragmat;
482 }
483 }
484 t = NO_LINE;
485 // Access the file.
486 errno = 0;
487 fd = open (fn, O_RDONLY | O_BINARY);
488 SCAN_ERROR (fd == -1, start_l, start_c, ERROR_SOURCE_FILE_INCLUDE_OPEN);
489 errno = 0;
490 fsize = (int) lseek (fd, 0, SEEK_END);
491 ASSERT (fsize >= 0);
492 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
493 fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
494 errno = 0;
495 ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
496 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
497 errno = 0;
498 bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
499 SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
500 // Buffer still usable?.
501 if (fsize > A68_PARSER (max_scan_buf_length)) {
502 A68_PARSER (max_scan_buf_length) = fsize;
503 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
504 }
505 // Link all lines into the list.
506 linum = 1;
507 s = u;
508 t = PREVIOUS (u);
509 k = 0;
510 if (fsize == 0) {
511 // If file is empty, insert single empty line.
512 A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
513 A68_PARSER (scan_buf)[1] = NULL_CHAR;
514 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
515 } else
516 while (k < fsize) {
517 n = 0;
518 A68_PARSER (scan_buf)[0] = NULL_CHAR;
519 while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
520 SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
521 A68_PARSER (scan_buf)[n++] = fbuf[k++];
522 A68_PARSER (scan_buf)[n] = NULL_CHAR;
523 }
524 A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
525 A68_PARSER (scan_buf)[n] = NULL_CHAR;
526 if (k < fsize) {
527 k++;
528 }
529 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
530 }
531 // Conclude and go find another include directive, if any.
532 NEXT (t) = s;
533 PREVIOUS (s) = t;
534 concatenate_lines (top);
535 ASSERT (close (fd) == 0);
536 make_pass = A68_TRUE;
537 }
538 search_next_pragmat:_SKIP_;
539 }
540 }
541 }
542
543 //! @brief Size of source file.
544
545 int get_source_size (void)
546 {
547 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
548 // This is why WIN32 must open as "read binary".
549 return (int) lseek (f, 0, SEEK_END);
550 }
551
552 //! @brief Append environment source lines.
553
554 void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
555 {
556 for (int k = 0; str[k] != NO_TEXT; k++) {
557 int zero_line_num = 0;
558 (*line_num)++;
559 append_source_line (str[k], ref_l, &zero_line_num, name);
560 }
561 }
562
563 //! @brief Read script file and make internal copy.
564
565 BOOL_T read_script_file (void)
566 {
567 LINE_T *ref_l = NO_LINE;
568 int k, n, num;
569 unt len;
570 BOOL_T file_end = A68_FALSE;
571 BUFFER filename, linenum;
572 char ch, *fn, *line;
573 char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
574 FILE_T source = FILE_SOURCE_FD (&A68_JOB);
575 ABEND (source == -1, ERROR_ACTION, __func__);
576 buffer[0] = NULL_CHAR;
577 n = 0;
578 len = (unt) (8 + A68_PARSER (source_file_size));
579 buffer = (char *) get_temp_heap_space (len);
580 ASSERT (lseek (source, 0, SEEK_SET) >= 0);
581 while (!file_end) {
582 // Read the original file name.
583 filename[0] = NULL_CHAR;
584 k = 0;
585 if (io_read (source, &ch, 1) == 0) {
586 file_end = A68_TRUE;
587 continue;
588 }
589 while (ch != NEWLINE_CHAR) {
590 filename[k++] = ch;
591 ASSERT (io_read (source, &ch, 1) == 1);
592 }
593 filename[k] = NULL_CHAR;
594 fn = TEXT (add_token (&A68 (top_token), filename));
595 // Read the original file number.
596 linenum[0] = NULL_CHAR;
597 k = 0;
598 ASSERT (io_read (source, &ch, 1) == 1);
599 while (ch != NEWLINE_CHAR) {
600 linenum[k++] = ch;
601 ASSERT (io_read (source, &ch, 1) == 1);
602 }
603 linenum[k] = NULL_CHAR;
604 num = (int) strtol (linenum, NO_VAR, 10);
605 ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
606 // COPY original line into buffer.
607 ASSERT (io_read (source, &ch, 1) == 1);
608 line = &buffer[n];
609 while (ch != NEWLINE_CHAR) {
610 buffer[n++] = ch;
611 ASSERT (io_read (source, &ch, 1) == 1);
612 ABEND ((unt) n >= len, ERROR_ACTION, __func__);
613 }
614 buffer[n++] = NEWLINE_CHAR;
615 buffer[n] = NULL_CHAR;
616 append_source_line (line, &ref_l, &num, fn);
617 }
618 return A68_TRUE;
619 }
620
621 //! @brief Read source file and make internal copy.
622
623 BOOL_T read_source_file (void)
624 {
625 LINE_T *ref_l = NO_LINE;
626 int line_num = 0, k, bytes_read;
627 ssize_t l;
628 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
629 char **prelude_start, **postlude, *buffer;
630 // Prelude.
631 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
632 prelude_start = bold_prelude_start;
633 postlude = bold_postlude;
634 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
635 prelude_start = quote_prelude_start;
636 postlude = quote_postlude;
637 } else {
638 prelude_start = postlude = NO_VAR;
639 }
640 append_environ (prelude_start, &ref_l, &line_num, "prelude");
641 // Read the file into a single buffer, so we save on system calls.
642 line_num = 1;
643 errno = 0;
644 buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
645 ABEND (errno != 0 || buffer == NO_TEXT, ERROR_ALLOCATION, __func__);
646 ASSERT (lseek (f, 0, SEEK_SET) >= 0);
647 ABEND (errno != 0, ERROR_ACTION, __func__);
648 errno = 0;
649 bytes_read = (int) io_read (f, buffer, (size_t) A68_PARSER (source_file_size));
650 ABEND (errno != 0 || bytes_read != A68_PARSER (source_file_size), ERROR_ACTION, __func__);
651 // Link all lines into the list.
652 k = 0;
653 while (k < A68_PARSER (source_file_size)) {
654 l = 0;
655 A68_PARSER (scan_buf)[0] = NULL_CHAR;
656 while (k < A68_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
657 if (k < A68_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
658 k++;
659 } else {
660 A68_PARSER (scan_buf)[l++] = buffer[k++];
661 A68_PARSER (scan_buf)[l] = NULL_CHAR;
662 }
663 }
664 A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
665 A68_PARSER (scan_buf)[l] = NULL_CHAR;
666 if (k < A68_PARSER (source_file_size)) {
667 k++;
668 }
669 append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68_JOB));
670 SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
671 }
672 // Postlude.
673 append_environ (postlude, &ref_l, &line_num, "postlude");
674 // Concatenate lines.
675 concatenate_lines (TOP_LINE (&A68_JOB));
676 // Include files.
677 include_files (TOP_LINE (&A68_JOB));
678 return A68_TRUE;
679 }
680
681 //! @brief Next_char get next character from internal copy of source file.
682
683 char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
684 {
685 char ch;
686 #if defined (NO_TYPO)
687 allow_typo = A68_FALSE;
688 #endif
689 LOW_STACK_ALERT (NO_NODE);
690 // Source empty?.
691 if (*ref_l == NO_LINE) {
692 return STOP_CHAR;
693 } else {
694 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
695 // Take new line?.
696 if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
697 *ref_l = NEXT (*ref_l);
698 if (*ref_l == NO_LINE) {
699 return STOP_CHAR;
700 }
701 *ref_s = STRING (*ref_l);
702 } else {
703 (*ref_s)++;
704 }
705 // Deliver next char.
706 ch = (*ref_s)[0];
707 if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
708 ch = next_char (ref_l, ref_s, allow_typo);
709 }
710 return ch;
711 }
712 }
713
714 //! @brief Find first character that can start a valid symbol.
715
716 void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
717 {
718 while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
719 if (*ref_l != NO_LINE) {
720 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
721 }
722 *ref_c = next_char (ref_l, ref_s, A68_FALSE);
723 }
724 }
725
726 //! @brief Handle a pragment (pragmat or comment).
727
728 char *pragment (int type, LINE_T ** ref_l, char **ref_c)
729 {
730 #define INIT_BUFFER {chars_in_buf = 0; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
731 #define ADD_ONE_CHAR(ch) {A68_PARSER (scan_buf)[chars_in_buf ++] = ch; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
732 char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
733 char *z = NO_TEXT;
734 LINE_T *start_l = *ref_l;
735 int term_s_length, chars_in_buf;
736 BOOL_T stop, pragmat = A68_FALSE;
737 // Set terminator.
738 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
739 if (type == STYLE_I_COMMENT_SYMBOL) {
740 term_s = "CO";
741 } else if (type == STYLE_II_COMMENT_SYMBOL) {
742 term_s = "#";
743 } else if (type == BOLD_COMMENT_SYMBOL) {
744 term_s = "COMMENT";
745 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
746 term_s = "PR";
747 pragmat = A68_TRUE;
748 } else if (type == BOLD_PRAGMAT_SYMBOL) {
749 term_s = "PRAGMAT";
750 pragmat = A68_TRUE;
751 }
752 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
753 if (type == STYLE_I_COMMENT_SYMBOL) {
754 term_s = "'CO'";
755 } else if (type == STYLE_II_COMMENT_SYMBOL) {
756 term_s = "#";
757 } else if (type == BOLD_COMMENT_SYMBOL) {
758 term_s = "'COMMENT'";
759 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
760 term_s = "'PR'";
761 pragmat = A68_TRUE;
762 } else if (type == BOLD_PRAGMAT_SYMBOL) {
763 term_s = "'PRAGMAT'";
764 pragmat = A68_TRUE;
765 }
766 }
767 term_s_length = (int) strlen (term_s);
768 // Scan for terminator.
769 INIT_BUFFER;
770 stop = A68_FALSE;
771 while (stop == A68_FALSE) {
772 SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
773 // A ".." or '..' delimited string in a PRAGMAT.
774 if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING))) {
775 char delim = c;
776 BOOL_T eos = A68_FALSE;
777 ADD_ONE_CHAR (c);
778 c = next_char (ref_l, ref_c, A68_FALSE);
779 while (!eos) {
780 SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
781 if (c == delim) {
782 ADD_ONE_CHAR (delim);
783 save_state (*ref_l, *ref_c, c);
784 c = next_char (ref_l, ref_c, A68_FALSE);
785 if (c == delim) {
786 c = next_char (ref_l, ref_c, A68_FALSE);
787 } else {
788 restore_state (ref_l, ref_c, &c);
789 eos = A68_TRUE;
790 }
791 } else if (IS_PRINT (c)) {
792 ADD_ONE_CHAR (c);
793 c = next_char (ref_l, ref_c, A68_FALSE);
794 } else {
795 unworthy (start_l, start_c, c);
796 }
797 }
798 } else if (EOL (c)) {
799 ADD_ONE_CHAR (NEWLINE_CHAR);
800 } else if (IS_PRINT (c) || IS_SPACE (c)) {
801 ADD_ONE_CHAR (c);
802 }
803 if (chars_in_buf >= term_s_length) {
804 // Check whether we encountered the terminator.
805 stop = (BOOL_T) (strcmp (term_s, &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0);
806 }
807 c = next_char (ref_l, ref_c, A68_FALSE);
808 }
809 A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
810 z = new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
811 if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
812 isolate_options (A68_PARSER (scan_buf), start_l);
813 }
814 return z;
815 #undef ADD_ONE_CHAR
816 #undef INIT_BUFFER
817 }
818
819 //! @brief Attribute for format item.
820
821 int get_format_item (char ch)
822 {
823 switch (TO_LOWER (ch)) {
824 case 'a': {
825 return FORMAT_ITEM_A;
826 }
827 case 'b': {
828 return FORMAT_ITEM_B;
829 }
830 case 'c': {
831 return FORMAT_ITEM_C;
832 }
833 case 'd': {
834 return FORMAT_ITEM_D;
835 }
836 case 'e': {
837 return FORMAT_ITEM_E;
838 }
839 case 'f': {
840 return FORMAT_ITEM_F;
841 }
842 case 'g': {
843 return FORMAT_ITEM_G;
844 }
845 case 'h': {
846 return FORMAT_ITEM_H;
847 }
848 case 'i': {
849 return FORMAT_ITEM_I;
850 }
851 case 'j': {
852 return FORMAT_ITEM_J;
853 }
854 case 'k': {
855 return FORMAT_ITEM_K;
856 }
857 case 'l':
858 case '/': {
859 return FORMAT_ITEM_L;
860 }
861 case 'm': {
862 return FORMAT_ITEM_M;
863 }
864 case 'n': {
865 return FORMAT_ITEM_N;
866 }
867 case 'o': {
868 return FORMAT_ITEM_O;
869 }
870 case 'p': {
871 return FORMAT_ITEM_P;
872 }
873 case 'q': {
874 return FORMAT_ITEM_Q;
875 }
876 case 'r': {
877 return FORMAT_ITEM_R;
878 }
879 case 's': {
880 return FORMAT_ITEM_S;
881 }
882 case 't': {
883 return FORMAT_ITEM_T;
884 }
885 case 'u': {
886 return FORMAT_ITEM_U;
887 }
888 case 'v': {
889 return FORMAT_ITEM_V;
890 }
891 case 'w': {
892 return FORMAT_ITEM_W;
893 }
894 case 'x': {
895 return FORMAT_ITEM_X;
896 }
897 case 'y': {
898 return FORMAT_ITEM_Y;
899 }
900 case 'z': {
901 return FORMAT_ITEM_Z;
902 }
903 case '+': {
904 return FORMAT_ITEM_PLUS;
905 }
906 case '-': {
907 return FORMAT_ITEM_MINUS;
908 }
909 case POINT_CHAR: {
910 return FORMAT_ITEM_POINT;
911 }
912 case '%': {
913 return FORMAT_ITEM_ESCAPE;
914 }
915 default: {
916 return 0;
917 }
918 }
919 }
920
921 //! @brief Whether input shows exponent character.
922
923 BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
924 {
925 BOOL_T ret = A68_FALSE;
926 char exp_syms[3];
927 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
928 exp_syms[0] = EXPONENT_CHAR;
929 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
930 exp_syms[2] = NULL_CHAR;
931 } else {
932 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
933 exp_syms[1] = BACKSLASH_CHAR;
934 exp_syms[2] = NULL_CHAR;
935 }
936 save_state (*ref_l, *ref_s, *ch);
937 if (strchr (exp_syms, *ch) != NO_TEXT) {
938 *ch = next_char (ref_l, ref_s, A68_TRUE);
939 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
940 }
941 restore_state (ref_l, ref_s, ch);
942 return ret;
943 }
944
945 //! @brief Whether input shows radix character.
946
947 BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
948 {
949 BOOL_T ret = A68_FALSE;
950 save_state (*ref_l, *ref_s, *ch);
951 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
952 if (*ch == TO_UPPER (RADIX_CHAR)) {
953 *ch = next_char (ref_l, ref_s, A68_TRUE);
954 ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
955 }
956 } else {
957 if (*ch == RADIX_CHAR) {
958 *ch = next_char (ref_l, ref_s, A68_TRUE);
959 ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
960 }
961 }
962 restore_state (ref_l, ref_s, ch);
963 return ret;
964 }
965
966 //! @brief Whether input shows decimal point.
967
968 BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
969 {
970 BOOL_T ret = A68_FALSE;
971 save_state (*ref_l, *ref_s, *ch);
972 if (*ch == POINT_CHAR) {
973 char exp_syms[3];
974 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
975 exp_syms[0] = EXPONENT_CHAR;
976 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
977 exp_syms[2] = NULL_CHAR;
978 } else {
979 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
980 exp_syms[1] = BACKSLASH_CHAR;
981 exp_syms[2] = NULL_CHAR;
982 }
983 *ch = next_char (ref_l, ref_s, A68_TRUE);
984 if (strchr (exp_syms, *ch) != NO_TEXT) {
985 *ch = next_char (ref_l, ref_s, A68_TRUE);
986 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
987 } else {
988 ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
989 }
990 }
991 restore_state (ref_l, ref_s, ch);
992 return ret;
993 }
994
995 //! @brief Get next token from internal copy of source file..
996
997 void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
998 {
999 char c = **ref_s, *sym = A68_PARSER (scan_buf);
1000 sym[0] = NULL_CHAR;
1001 get_good_char (&c, ref_l, ref_s);
1002 *start_l = *ref_l;
1003 *start_c = *ref_s;
1004 if (c == STOP_CHAR) {
1005 // We are at EOF.
1006 (sym++)[0] = STOP_CHAR;
1007 sym[0] = NULL_CHAR;
1008 return;
1009 }
1010 // In a format.
1011 if (in_format) {
1012 char *format_items;
1013 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1014 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1015 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1016 format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1017 } else {
1018 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1019 }
1020 if (strchr (format_items, c) != NO_TEXT) {
1021 // General format items.
1022 (sym++)[0] = c;
1023 sym[0] = NULL_CHAR;
1024 *att = get_format_item (c);
1025 (void) next_char (ref_l, ref_s, A68_FALSE);
1026 return;
1027 }
1028 if (IS_DIGIT (c)) {
1029 // INT denotation for static replicator.
1030 SCAN_DIGITS (c);
1031 sym[0] = NULL_CHAR;
1032 *att = STATIC_REPLICATOR;
1033 return;
1034 }
1035 }
1036 // Not in a format.
1037 if (IS_UPPER (c)) {
1038 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1039 // Upper case word - bold tag.
1040 while (IS_UPPER (c) || c == '_') {
1041 (sym++)[0] = c;
1042 c = next_char (ref_l, ref_s, A68_FALSE);
1043 }
1044 sym[0] = NULL_CHAR;
1045 *att = BOLD_TAG;
1046 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1047 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1048 (sym++)[0] = c;
1049 c = next_char (ref_l, ref_s, A68_TRUE);
1050 }
1051 sym[0] = NULL_CHAR;
1052 *att = IDENTIFIER;
1053 }
1054 } else if (c == '\'') {
1055 // Quote, uppercase word, quote - bold tag.
1056 int k = 0;
1057 c = next_char (ref_l, ref_s, A68_FALSE);
1058 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1059 (sym++)[0] = c;
1060 k++;
1061 c = next_char (ref_l, ref_s, A68_TRUE);
1062 }
1063 SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1064 sym[0] = NULL_CHAR;
1065 *att = BOLD_TAG;
1066 // Skip terminating quote, or complain if it is not there.
1067 SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1068 c = next_char (ref_l, ref_s, A68_FALSE);
1069 } else if (IS_LOWER (c)) {
1070 // Lower case word - identifier.
1071 while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1072 (sym++)[0] = c;
1073 c = next_char (ref_l, ref_s, A68_TRUE);
1074 }
1075 sym[0] = NULL_CHAR;
1076 *att = IDENTIFIER;
1077 } else if (c == POINT_CHAR) {
1078 // Begins with a point symbol - point, dotdot, L REAL denotation.
1079 if (is_decimal_point (ref_l, ref_s, &c)) {
1080 (sym++)[0] = '0';
1081 (sym++)[0] = POINT_CHAR;
1082 c = next_char (ref_l, ref_s, A68_TRUE);
1083 SCAN_DIGITS (c);
1084 if (is_exp_char (ref_l, ref_s, &c)) {
1085 SCAN_EXPONENT_PART (c);
1086 }
1087 sym[0] = NULL_CHAR;
1088 *att = REAL_DENOTATION;
1089 } else {
1090 c = next_char (ref_l, ref_s, A68_TRUE);
1091 if (c == POINT_CHAR) {
1092 (sym++)[0] = POINT_CHAR;
1093 (sym++)[0] = POINT_CHAR;
1094 sym[0] = NULL_CHAR;
1095 *att = DOTDOT_SYMBOL;
1096 c = next_char (ref_l, ref_s, A68_FALSE);
1097 } else {
1098 (sym++)[0] = POINT_CHAR;
1099 sym[0] = NULL_CHAR;
1100 *att = POINT_SYMBOL;
1101 }
1102 }
1103 } else if (IS_DIGIT (c)) {
1104 // Something that begins with a digit - L INT denotation, L REAL denotation.
1105 SCAN_DIGITS (c);
1106 if (is_decimal_point (ref_l, ref_s, &c)) {
1107 c = next_char (ref_l, ref_s, A68_TRUE);
1108 if (is_exp_char (ref_l, ref_s, &c)) {
1109 (sym++)[0] = POINT_CHAR;
1110 (sym++)[0] = '0';
1111 SCAN_EXPONENT_PART (c);
1112 *att = REAL_DENOTATION;
1113 } else {
1114 (sym++)[0] = POINT_CHAR;
1115 SCAN_DIGITS (c);
1116 if (is_exp_char (ref_l, ref_s, &c)) {
1117 SCAN_EXPONENT_PART (c);
1118 }
1119 *att = REAL_DENOTATION;
1120 }
1121 } else if (is_exp_char (ref_l, ref_s, &c)) {
1122 SCAN_EXPONENT_PART (c);
1123 *att = REAL_DENOTATION;
1124 } else if (is_radix_char (ref_l, ref_s, &c)) {
1125 (sym++)[0] = c;
1126 c = next_char (ref_l, ref_s, A68_TRUE);
1127 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1128 while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1129 (sym++)[0] = c;
1130 c = next_char (ref_l, ref_s, A68_TRUE);
1131 }
1132 } else {
1133 while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1134 (sym++)[0] = c;
1135 c = next_char (ref_l, ref_s, A68_TRUE);
1136 }
1137 }
1138 *att = BITS_DENOTATION;
1139 } else {
1140 *att = INT_DENOTATION;
1141 }
1142 sym[0] = NULL_CHAR;
1143 } else if (c == QUOTE_CHAR) {
1144 // STRING denotation.
1145 BOOL_T stop = A68_FALSE;
1146 while (!stop) {
1147 c = next_char (ref_l, ref_s, A68_FALSE);
1148 while (c != QUOTE_CHAR && c != STOP_CHAR) {
1149 SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1150 (sym++)[0] = c;
1151 c = next_char (ref_l, ref_s, A68_FALSE);
1152 }
1153 SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1154 c = next_char (ref_l, ref_s, A68_FALSE);
1155 if (c == QUOTE_CHAR) {
1156 (sym++)[0] = QUOTE_CHAR;
1157 } else {
1158 stop = A68_TRUE;
1159 }
1160 }
1161 sym[0] = NULL_CHAR;
1162 *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1163 } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1164 // Single character symbols.
1165 (sym++)[0] = c;
1166 (void) next_char (ref_l, ref_s, A68_FALSE);
1167 sym[0] = NULL_CHAR;
1168 *att = 0;
1169 } else if (c == '|') {
1170 // Bar.
1171 (sym++)[0] = c;
1172 c = next_char (ref_l, ref_s, A68_FALSE);
1173 if (c == ':') {
1174 (sym++)[0] = c;
1175 (void) next_char (ref_l, ref_s, A68_FALSE);
1176 }
1177 sym[0] = NULL_CHAR;
1178 *att = 0;
1179 } else if (c == '!' && OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1180 // Bar, will be replaced with modern variant.
1181 // For this reason ! is not a MONAD with quote-stropping.
1182 (sym++)[0] = '|';
1183 c = next_char (ref_l, ref_s, A68_FALSE);
1184 if (c == ':') {
1185 (sym++)[0] = c;
1186 (void) next_char (ref_l, ref_s, A68_FALSE);
1187 }
1188 sym[0] = NULL_CHAR;
1189 *att = 0;
1190 } else if (c == ':') {
1191 // Colon, semicolon, IS, ISNT.
1192 (sym++)[0] = c;
1193 c = next_char (ref_l, ref_s, A68_FALSE);
1194 if (c == '=') {
1195 (sym++)[0] = c;
1196 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1197 (sym++)[0] = c;
1198 c = next_char (ref_l, ref_s, A68_FALSE);
1199 }
1200 } else if (c == '/') {
1201 (sym++)[0] = c;
1202 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1203 (sym++)[0] = c;
1204 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1205 (sym++)[0] = c;
1206 c = next_char (ref_l, ref_s, A68_FALSE);
1207 }
1208 }
1209 } else if (c == ':') {
1210 (sym++)[0] = c;
1211 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1212 (sym++)[0] = c;
1213 }
1214 }
1215 sym[0] = NULL_CHAR;
1216 *att = 0;
1217 } else if (c == '=') {
1218 // Operator starting with "=".
1219 char *scanned = sym;
1220 (sym++)[0] = c;
1221 c = next_char (ref_l, ref_s, A68_FALSE);
1222 if (strchr (NOMADS, c) != NO_TEXT) {
1223 (sym++)[0] = c;
1224 c = next_char (ref_l, ref_s, A68_FALSE);
1225 }
1226 if (c == '=') {
1227 (sym++)[0] = c;
1228 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1229 (sym++)[0] = ':';
1230 c = next_char (ref_l, ref_s, A68_FALSE);
1231 if (strlen (sym) < 4 && c == '=') {
1232 (sym++)[0] = '=';
1233 (void) next_char (ref_l, ref_s, A68_FALSE);
1234 }
1235 }
1236 } else if (c == ':') {
1237 (sym++)[0] = c;
1238 sym[0] = NULL_CHAR;
1239 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1240 (sym++)[0] = '=';
1241 (void) next_char (ref_l, ref_s, A68_FALSE);
1242 } else {
1243 SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1244 }
1245 }
1246 sym[0] = NULL_CHAR;
1247 if (strcmp (scanned, "=") == 0) {
1248 *att = EQUALS_SYMBOL;
1249 } else {
1250 *att = OPERATOR;
1251 }
1252 } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1253 // Operator.
1254 char *scanned = sym;
1255 (sym++)[0] = c;
1256 c = next_char (ref_l, ref_s, A68_FALSE);
1257 if (strchr (NOMADS, c) != NO_TEXT) {
1258 (sym++)[0] = c;
1259 c = next_char (ref_l, ref_s, A68_FALSE);
1260 }
1261 if (c == '=') {
1262 (sym++)[0] = c;
1263 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1264 (sym++)[0] = ':';
1265 c = next_char (ref_l, ref_s, A68_FALSE);
1266 if (strlen (scanned) < 4 && c == '=') {
1267 (sym++)[0] = '=';
1268 (void) next_char (ref_l, ref_s, A68_FALSE);
1269 }
1270 }
1271 } else if (c == ':') {
1272 (sym++)[0] = c;
1273 sym[0] = NULL_CHAR;
1274 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1275 (sym++)[0] = '=';
1276 sym[0] = NULL_CHAR;
1277 (void) next_char (ref_l, ref_s, A68_FALSE);
1278 } else {
1279 SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1280 }
1281 }
1282 sym[0] = NULL_CHAR;
1283 *att = OPERATOR;
1284 } else {
1285 // Afuuus ... strange characters!.
1286 unworthy (*start_l, *start_c, (int) c);
1287 }
1288 }
1289
1290 //! @brief Whether att opens an embedded clause.
1291
1292 BOOL_T open_nested_clause (int att)
1293 {
1294 switch (att) {
1295 case OPEN_SYMBOL:
1296 case BEGIN_SYMBOL:
1297 case PAR_SYMBOL:
1298 case IF_SYMBOL:
1299 case CASE_SYMBOL:
1300 case FOR_SYMBOL:
1301 case FROM_SYMBOL:
1302 case BY_SYMBOL:
1303 case TO_SYMBOL:
1304 case DOWNTO_SYMBOL:
1305 case WHILE_SYMBOL:
1306 case DO_SYMBOL:
1307 case SUB_SYMBOL:
1308 case ACCO_SYMBOL: {
1309 return A68_TRUE;
1310 }
1311 }
1312 return A68_FALSE;
1313 }
1314
1315 //! @brief Whether att closes an embedded clause.
1316
1317 BOOL_T close_nested_clause (int att)
1318 {
1319 switch (att) {
1320 case CLOSE_SYMBOL:
1321 case END_SYMBOL:
1322 case FI_SYMBOL:
1323 case ESAC_SYMBOL:
1324 case OD_SYMBOL:
1325 case BUS_SYMBOL:
1326 case OCCA_SYMBOL: {
1327 return A68_TRUE;
1328 }
1329 }
1330 return A68_FALSE;
1331 }
1332
1333 //! @brief Cast a string to lower case.
1334
1335 void make_lower_case (char *p)
1336 {
1337 for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1338 p[0] = (char) TO_LOWER (p[0]);
1339 }
1340 }
1341
1342 //! @brief Construct a linear list of tokens.
1343
1344 void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1345 {
1346 char *lpr = NO_TEXT;
1347 int lprt = 0;
1348 while (l != NO_VAR && !A68_PARSER (stop_scanner)) {
1349 int att = 0;
1350 get_next_token (in_format, l, s, start_l, start_c, &att);
1351 if (A68_PARSER (scan_buf)[0] == STOP_CHAR) {
1352 A68_PARSER (stop_scanner) = A68_TRUE;
1353 } else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1354 KEYWORD_T *kw;
1355 char *c = NO_TEXT;
1356 BOOL_T make_node = A68_TRUE;
1357 char *trailing = NO_TEXT;
1358 if (att != IDENTIFIER) {
1359 kw = find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
1360 } else {
1361 kw = NO_KEYWORD;
1362 }
1363 if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1364 if (att == IDENTIFIER) {
1365 make_lower_case (A68_PARSER (scan_buf));
1366 }
1367 if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1368 int len = (int) strlen (A68_PARSER (scan_buf));
1369 while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_') {
1370 trailing = "_";
1371 A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1372 len--;
1373 }
1374 }
1375 c = TEXT (add_token (&A68 (top_token), A68_PARSER (scan_buf)));
1376 } else {
1377 if (IS (kw, TO_SYMBOL)) {
1378 // Merge GO and TO to GOTO.
1379 if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1380 ATTRIBUTE (*root) = GOTO_SYMBOL;
1381 NSYMBOL (*root) = TEXT (find_keyword (A68 (top_keyword), "GOTO"));
1382 make_node = A68_FALSE;
1383 } else {
1384 att = ATTRIBUTE (kw);
1385 c = TEXT (kw);
1386 }
1387 } else {
1388 if (att == 0 || att == BOLD_TAG) {
1389 att = ATTRIBUTE (kw);
1390 }
1391 c = TEXT (kw);
1392 // Handle pragments.
1393 if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1394 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1395 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1396 lpr = nlpr;
1397 } else {
1398 char *stale = lpr;
1399 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1400 a68_free (nlpr);
1401 a68_free (stale);
1402 }
1403 lprt = att;
1404 make_node = A68_FALSE;
1405 } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1406 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1407 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1408 lpr = nlpr;
1409 } else {
1410 char *stale = lpr;
1411 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1412 a68_free (nlpr);
1413 a68_free (stale);
1414 }
1415 lprt = att;
1416 if (!A68_PARSER (stop_scanner)) {
1417 (void) set_options (OPTION_LIST (&A68_JOB), A68_FALSE);
1418 make_node = A68_FALSE;
1419 }
1420 }
1421 }
1422 }
1423 // Add token to the tree.
1424 if (make_node) {
1425 NODE_T *q = new_node ();
1426 INFO (q) = new_node_info ();
1427 switch (att) {
1428 case ASSIGN_SYMBOL:
1429 case END_SYMBOL:
1430 case ESAC_SYMBOL:
1431 case OD_SYMBOL:
1432 case OF_SYMBOL:
1433 case FI_SYMBOL:
1434 case CLOSE_SYMBOL:
1435 case BUS_SYMBOL:
1436 case COLON_SYMBOL:
1437 case COMMA_SYMBOL:
1438 case DOTDOT_SYMBOL:
1439 case SEMI_SYMBOL: {
1440 GINFO (q) = NO_GINFO;
1441 break;
1442 }
1443 default: {
1444 GINFO (q) = new_genie_info ();
1445 break;
1446 }
1447 }
1448 STATUS (q) = OPTION_NODEMASK (&A68_JOB);
1449 LINE (INFO (q)) = *start_l;
1450 CHAR_IN_LINE (INFO (q)) = *start_c;
1451 PRIO (INFO (q)) = 0;
1452 PROCEDURE_LEVEL (INFO (q)) = 0;
1453 ATTRIBUTE (q) = att;
1454 NSYMBOL (q) = c;
1455 PREVIOUS (q) = *root;
1456 SUB (q) = NEXT (q) = NO_NODE;
1457 TABLE (q) = NO_TABLE;
1458 MOID (q) = NO_MOID;
1459 TAX (q) = NO_TAG;
1460 if (lpr != NO_TEXT) {
1461 NPRAGMENT (q) = lpr;
1462 NPRAGMENT_TYPE (q) = lprt;
1463 lpr = NO_TEXT;
1464 lprt = 0;
1465 }
1466 if (*root != NO_NODE) {
1467 NEXT (*root) = q;
1468 }
1469 if (TOP_NODE (&A68_JOB) == NO_NODE) {
1470 TOP_NODE (&A68_JOB) = q;
1471 }
1472 *root = q;
1473 if (trailing != NO_TEXT) {
1474 diagnostic (A68_WARNING | A68_FORCE_DIAGNOSTICS, q, WARNING_TRAILING, trailing, att);
1475 }
1476 }
1477 // Redirection in tokenising formats. The scanner is a recursive-descent type as
1478 // to know when it scans a format text and when not.
1479 if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1480 return;
1481 } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1482 tokenise_source (root, level + 1, A68_TRUE, l, s, start_l, start_c);
1483 } else if (in_format && open_nested_clause (att)) {
1484 NODE_T *z = PREVIOUS (*root);
1485 if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1486 tokenise_source (root, level, A68_FALSE, l, s, start_l, start_c);
1487 } else if (att == OPEN_SYMBOL) {
1488 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1489 } else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL) {
1490 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1491 } else if (OPTION_BRACKETS (&A68_JOB) && att == ACCO_SYMBOL) {
1492 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1493 }
1494 } else if (!in_format && level > 0 && open_nested_clause (att)) {
1495 tokenise_source (root, level + 1, A68_FALSE, l, s, start_l, start_c);
1496 } else if (!in_format && level > 0 && close_nested_clause (att)) {
1497 return;
1498 } else if (in_format && att == CLOSE_SYMBOL) {
1499 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1500 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL) {
1501 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1502 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == OCCA_SYMBOL) {
1503 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1504 }
1505 }
1506 }
1507 }
1508
1509 //! @brief Tokenise source file, build initial syntax tree.
1510
1511 BOOL_T lexical_analyser (void)
1512 {
1513 LINE_T *l = NO_LINE, *start_l = NO_LINE;
1514 char *s = NO_TEXT, *start_c = NO_TEXT;
1515 NODE_T *root = NO_NODE;
1516 A68_PARSER (scan_buf) = NO_TEXT;
1517 A68_PARSER (max_scan_buf_length) = A68_PARSER (source_file_size) = get_source_size ();
1518 // Errors in file?.
1519 if (A68_PARSER (max_scan_buf_length) == 0) {
1520 return A68_FALSE;
1521 }
1522 if (OPTION_RUN_SCRIPT (&A68_JOB)) {
1523 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
1524 if (!read_script_file ()) {
1525 return A68_FALSE;
1526 }
1527 } else {
1528 A68_PARSER (max_scan_buf_length) += KILOBYTE; // for the environ, more than enough
1529 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68_PARSER (max_scan_buf_length));
1530 // Errors in file?.
1531 if (!read_source_file ()) {
1532 return A68_FALSE;
1533 }
1534 }
1535 // Start tokenising.
1536 A68_PARSER (read_error) = A68_FALSE;
1537 A68_PARSER (stop_scanner) = A68_FALSE;
1538 if ((l = TOP_LINE (&A68_JOB)) != NO_LINE) {
1539 s = STRING (l);
1540 }
1541 tokenise_source (&root, 0, A68_FALSE, &l, &s, &start_l, &start_c);
1542 return A68_TRUE;
1543 }