parser-scanner.c
1 //! @file parser-scanner.c
2 //! @author J. Marcel van der Veer
3 //!
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
8 //!
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Context-dependent Algol 68 tokeniser.
25
26 #include "a68g.h"
27 #include "a68g-parser.h"
28 #include "a68g-prelude.h"
29 #include "a68g-options.h"
30 #include "a68g-environ.h"
31 #include "a68g-genie.h"
32
33 // Macros.
34
35 #define SCAN_DIGITS(c)\
36 while (IS_DIGIT (c)) {\
37 (sym++)[0] = (c);\
38 (c) = next_char (ref_l, ref_s, A68_TRUE);\
39 }
40
41 #define SCAN_EXPONENT_PART(c)\
42 (sym++)[0] = EXPONENT_CHAR;\
43 (c) = next_char (ref_l, ref_s, A68_TRUE);\
44 if ((c) == '+' || (c) == '-') {\
45 (sym++)[0] = (c);\
46 (c) = next_char (ref_l, ref_s, A68_TRUE);\
47 }\
48 SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
49 SCAN_DIGITS (c)
50
51 //! @brief Save scanner state, for character look-ahead.
52
53 void save_state (LINE_T * ref_l, char *ref_s, char ch)
54 {
55 SCAN_STATE_L (&A68_JOB) = ref_l;
56 SCAN_STATE_S (&A68_JOB) = ref_s;
57 SCAN_STATE_C (&A68_JOB) = ch;
58 }
59
60 //! @brief Restore scanner state, for character look-ahead.
61
62 void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
63 {
64 *ref_l = SCAN_STATE_L (&A68_JOB);
65 *ref_s = SCAN_STATE_S (&A68_JOB);
66 *ch = SCAN_STATE_C (&A68_JOB);
67 }
68
69 //! @brief New_source_line.
70
71 LINE_T *new_source_line (void)
72 {
73 LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
74 MARKER (z)[0] = NULL_CHAR;
75 STRING (z) = NO_TEXT;
76 FILENAME (z) = NO_TEXT;
77 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
78 NUMBER (z) = 0;
79 PRINT_STATUS (z) = 0;
80 LIST (z) = A68_TRUE;
81 NEXT (z) = NO_LINE;
82 PREVIOUS (z) = NO_LINE;
83 return z;
84 }
85
86 //! @brief Append a source line to the internal source file.
87
88 void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
89 {
90 LINE_T *z = new_source_line ();
91 // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
92 if (*line_num == 1) {
93 if (strlen (str) >= 2 && strncmp (str, "#!", 2) == 0) {
94 ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
95 (*line_num)++;
96 return;
97 }
98 }
99 // Link line into the chain.
100 STRING (z) = new_fixed_string (str);
101 FILENAME (z) = filename;
102 NUMBER (z) = (*line_num)++;
103 PRINT_STATUS (z) = NOT_PRINTED;
104 LIST (z) = A68_TRUE;
105 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
106 NEXT (z) = NO_LINE;
107 PREVIOUS (z) = *ref_l;
108 if (TOP_LINE (&A68_JOB) == NO_LINE) {
109 TOP_LINE (&A68_JOB) = z;
110 }
111 if (*ref_l != NO_LINE) {
112 NEXT (*ref_l) = z;
113 }
114 *ref_l = z;
115 }
116
117 // Scanner, tokenises the source code.
118
119 //! @brief Whether ch is unworthy.
120
121 void unworthy (LINE_T * u, char *v, char ch)
122 {
123 if (IS_PRINT (ch)) {
124 ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
125 } else {
126 ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
127 }
128 scan_error (u, v, A68 (edit_line));
129 }
130
131 //! @brief Concatenate lines that terminate in '\' with next line.
132
133 void concatenate_lines (LINE_T * top)
134 {
135 LINE_T *q;
136 // Work from bottom backwards.
137 for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
138 ;
139 }
140 for (; q != NO_LINE; BACKWARD (q)) {
141 char *z = STRING (q);
142 int len = (int) strlen (z);
143 if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
144 z[len - 2] = NULL_CHAR;
145 len += (int) strlen (STRING (NEXT (q)));
146 z = (char *) get_fixed_heap_space ((size_t) (len + 1));
147 bufcpy (z, STRING (q), len + 1);
148 bufcat (z, STRING (NEXT (q)), len + 1);
149 STRING (NEXT (q))[0] = NULL_CHAR;
150 STRING (q) = z;
151 }
152 }
153 }
154
155 //! @brief Whether u is bold tag v, independent of stropping regime.
156
157 BOOL_T is_bold (char *u, char *v)
158 {
159 unt len = (unt) strlen (v);
160 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
161 if (u[0] == '\'') {
162 return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
163 } else {
164 return A68_FALSE;
165 }
166 } else {
167 return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
168 }
169 }
170
171 //! @brief Skip string.
172
173 BOOL_T skip_string (LINE_T ** top, char **ch)
174 {
175 LINE_T *u = *top;
176 char *v = *ch;
177 v++;
178 while (u != NO_LINE) {
179 while (v[0] != NULL_CHAR) {
180 if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
181 *top = u;
182 *ch = &v[1];
183 return A68_TRUE;
184 } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
185 v += 2;
186 } else {
187 v++;
188 }
189 }
190 FORWARD (u);
191 if (u != NO_LINE) {
192 v = &(STRING (u)[0]);
193 } else {
194 v = NO_TEXT;
195 }
196 }
197 return A68_FALSE;
198 }
199
200 //! @brief Skip comment.
201
202 BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
203 {
204 LINE_T *u = *top;
205 char *v = *ch;
206 v++;
207 while (u != NO_LINE) {
208 while (v[0] != NULL_CHAR) {
209 if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
210 *top = u;
211 *ch = &v[1];
212 return A68_TRUE;
213 } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
214 *top = u;
215 *ch = &v[1];
216 return A68_TRUE;
217 } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
218 *top = u;
219 *ch = &v[1];
220 return A68_TRUE;
221 } else {
222 v++;
223 }
224 }
225 FORWARD (u);
226 if (u != NO_LINE) {
227 v = &(STRING (u)[0]);
228 } else {
229 v = NO_TEXT;
230 }
231 }
232 return A68_FALSE;
233 }
234
235 //! @brief Skip rest of pragmat.
236
237 BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
238 {
239 LINE_T *u = *top;
240 char *v = *ch;
241 while (u != NO_LINE) {
242 while (v[0] != NULL_CHAR) {
243 if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
244 *top = u;
245 *ch = &v[1];
246 return A68_TRUE;
247 } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
248 *top = u;
249 *ch = &v[1];
250 return A68_TRUE;
251 } else {
252 if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
253 scan_error (u, v, ERROR_PRAGMENT);
254 } else if (IS_UPPER (v[0])) {
255 // Skip a bold word as you may trigger on REPR, for instance ...
256 while (IS_UPPER (v[0])) {
257 v++;
258 }
259 } else {
260 v++;
261 }
262 }
263 }
264 FORWARD (u);
265 if (u != NO_LINE) {
266 v = &(STRING (u)[0]);
267 } else {
268 v = NO_TEXT;
269 }
270 }
271 return A68_FALSE;
272 }
273
274 //! @brief Return pointer to next token within pragmat.
275
276 char *get_pragmat_item (LINE_T ** top, char **ch)
277 {
278 LINE_T *u = *top;
279 char *v = *ch;
280 while (u != NO_LINE) {
281 while (v[0] != NULL_CHAR) {
282 if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
283 *top = u;
284 *ch = v;
285 return v;
286 } else {
287 v++;
288 }
289 }
290 FORWARD (u);
291 if (u != NO_LINE) {
292 v = &(STRING (u)[0]);
293 } else {
294 v = NO_TEXT;
295 }
296 }
297 return NO_TEXT;
298 }
299
300 //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
301
302 int streq (char *u, char *v)
303 {
304 int diff;
305 for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
306 diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
307 }
308 return diff;
309 }
310
311 //! @brief Scan for next pragmat and yield first pragmat item.
312
313 char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
314 {
315 LINE_T *u = *top;
316 char *v = *ch;
317 *delim = 0;
318 while (u != NO_LINE) {
319 while (v[0] != NULL_CHAR) {
320 LINE_T *start_l = u;
321 char *start_c = v;
322 // STRINGs must be skipped.
323 if (v[0] == QUOTE_CHAR) {
324 SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
325 }
326 // COMMENTS must be skipped.
327 else if (is_bold (v, "COMMENT")) {
328 SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
329 } else if (is_bold (v, "CO")) {
330 SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
331 } else if (v[0] == '#') {
332 SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
333 } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
334 // We caught a PRAGMAT.
335 char *item;
336 if (is_bold (v, "PRAGMAT")) {
337 *delim = BOLD_PRAGMAT_SYMBOL;
338 v = &v[strlen ("PRAGMAT")];
339 } else if (is_bold (v, "PR")) {
340 *delim = STYLE_I_PRAGMAT_SYMBOL;
341 v = &v[strlen ("PR")];
342 }
343 item = get_pragmat_item (&u, &v);
344 SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
345 // Item "preprocessor" restarts preprocessing if it is off.
346 if (A68_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
347 A68_PARSER (no_preprocessing) = A68_FALSE;
348 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
349 }
350 // If preprocessing is switched off, we idle to closing bracket.
351 else if (A68_PARSER (no_preprocessing)) {
352 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
353 }
354 // Item "nopreprocessor" stops preprocessing if it is on.
355 if (streq (item, "NOPREPROCESSOR") == 0) {
356 A68_PARSER (no_preprocessing) = A68_TRUE;
357 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
358 }
359 // Item "INCLUDE" includes a file.
360 else if (streq (item, "INCLUDE") == 0) {
361 *top = u;
362 *ch = v;
363 return item;
364 }
365 // Item "READ" includes a file.
366 else if (streq (item, "READ") == 0) {
367 *top = u;
368 *ch = v;
369 return item;
370 }
371 // Unrecognised item - probably options handled later by the tokeniser.
372 else {
373 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
374 }
375 } else if (IS_UPPER (v[0])) {
376 // Skip a bold word as you may trigger on REPR, for instance ...
377 while (IS_UPPER (v[0])) {
378 v++;
379 }
380 } else {
381 v++;
382 }
383 }
384 FORWARD (u);
385 if (u != NO_LINE) {
386 v = &(STRING (u)[0]);
387 } else {
388 v = NO_TEXT;
389 }
390 }
391 *top = u;
392 *ch = v;
393 return NO_TEXT;
394 }
395
396 //! @brief Include files.
397
398 void include_files (LINE_T * top)
399 {
400 // include_files
401 //
402 // syntax: PR read "filename" PR
403 // PR include "filename" PR
404 //
405 // The file gets inserted before the line containing the pragmat. In this way
406 // correct line numbers are preserved which helps diagnostics. A file that has
407 // been included will not be included a second time - it will be ignored.
408 // A rigorous fail-safe, but there is no mechanism to prevent recursive includes
409 // in A68 source code. User reports do not indicate sophisticated use of INCLUDE,
410 // so this is fine for now.
411 // TODO - some day we might need `app', analogous to `cpp'.
412 //
413 BOOL_T make_pass = A68_TRUE;
414 while (make_pass) {
415 LINE_T *s, *t, *u = top;
416 char *v = &(STRING (u)[0]);
417 make_pass = A68_FALSE;
418 errno = 0;
419 while (u != NO_LINE) {
420 int pr_lim;
421 char *item = next_preprocessor_item (&u, &v, &pr_lim);
422 LINE_T *start_l = u;
423 char *start_c = v;
424 // Search for PR include "filename" PR.
425 if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
426 FILE_T fd;
427 int n, linum, fsize, k, bytes_read, fnwid;
428 char *fbuf, delim;
429 BUFFER fnb;
430 char *fn;
431 // Skip to filename.
432 while (IS_ALPHA (v[0])) {
433 v++;
434 }
435 while (IS_SPACE (v[0])) {
436 v++;
437 }
438 // Scan quoted filename.
439 SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
440 delim = (v++)[0];
441 n = 0;
442 fnb[0] = NULL_CHAR;
443 // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
444 do {
445 SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
446 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
447 if (v[0] == delim) {
448 while (v[0] == delim && v[1] == delim) {
449 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
450 fnb[n++] = delim;
451 fnb[n] = NULL_CHAR;
452 v += 2;
453 }
454 } else if (IS_PRINT (v[0])) {
455 fnb[n++] = *(v++);
456 fnb[n] = NULL_CHAR;
457 } else {
458 SCAN_ERROR (A68_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
459 }
460 } while (v[0] != delim);
461 // Insist that the pragmat is closed properly.
462 v = &v[1];
463 SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
464 SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
465 // Make the name relative to the position of the source file (C preprocessor standard).
466 if (FILENAME (u) != NO_TEXT) {
467 fn = a68_relpath (a68_dirname (FILENAME (u)), a68_dirname (fnb), a68_basename (fnb));
468 } else {
469 fn = a68_relpath (FILE_PATH (&A68_JOB), a68_dirname (fnb), a68_basename (fnb));
470 }
471 // Do not check errno, since errno may be undefined here after a successful call.
472 if (fn != NO_TEXT) {
473 bufcpy (fnb, fn, BUFFER_SIZE);
474 } else {
475 char err[PATH_MAX + 1];
476 bufcpy (err, ERROR_SOURCE_FILE_OPEN, PATH_MAX);
477 bufcat (err, " ", PATH_MAX);
478 bufcat (err, fnb, PATH_MAX);
479 SCAN_ERROR (A68_TRUE, NO_LINE, NO_TEXT, err);
480 }
481 fnwid = (int) strlen (fnb) + 1;
482 fn = (char *) get_fixed_heap_space ((size_t) fnwid);
483 bufcpy (fn, fnb, fnwid);
484 // Ignore the file when included more than once.
485 for (t = top; t != NO_LINE; t = NEXT (t)) {
486 if (strcmp (FILENAME (t), fn) == 0) {
487 goto search_next_pragmat;
488 }
489 }
490 t = NO_LINE;
491 // Access the file.
492 errno = 0;
493 fd = open (fn, O_RDONLY | O_BINARY);
494 ASSERT (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s \"%s\"", ERROR_SOURCE_FILE_OPEN, fn) >= 0);
495 SCAN_ERROR (fd == -1, start_l, start_c, A68 (edit_line));
496 errno = 0;
497 fsize = (int) lseek (fd, 0, SEEK_END);
498 ASSERT (fsize >= 0);
499 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
500 fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
501 errno = 0;
502 ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
503 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
504 errno = 0;
505 bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
506 SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
507 // Buffer still usable?.
508 if (fsize > A68_PARSER (max_scan_buf_length)) {
509 A68_PARSER (max_scan_buf_length) = fsize;
510 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
511 }
512 // Link all lines into the list.
513 linum = 1;
514 s = u;
515 t = PREVIOUS (u);
516 k = 0;
517 if (fsize == 0) {
518 // If file is empty, insert single empty line.
519 A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
520 A68_PARSER (scan_buf)[1] = NULL_CHAR;
521 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
522 } else
523 while (k < fsize) {
524 n = 0;
525 A68_PARSER (scan_buf)[0] = NULL_CHAR;
526 while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
527 SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
528 A68_PARSER (scan_buf)[n++] = fbuf[k++];
529 A68_PARSER (scan_buf)[n] = NULL_CHAR;
530 }
531 A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
532 A68_PARSER (scan_buf)[n] = NULL_CHAR;
533 if (k < fsize) {
534 k++;
535 }
536 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
537 }
538 // Conclude and go find another include directive, if any.
539 NEXT (t) = s;
540 PREVIOUS (s) = t;
541 concatenate_lines (top);
542 ASSERT (close (fd) == 0);
543 make_pass = A68_TRUE;
544 }
545 search_next_pragmat:_SKIP_;
546 }
547 }
548 }
549
550 //! @brief Size of source file.
551
552 int get_source_size (void)
553 {
554 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
555 // This is why WIN32 must open as "read binary".
556 return (int) lseek (f, 0, SEEK_END);
557 }
558
559 //! @brief Append environment source lines.
560
561 void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
562 {
563 int k;
564 for (k = 0; str[k] != NO_TEXT; k++) {
565 int zero_line_num = 0;
566 (*line_num)++;
567 append_source_line (str[k], ref_l, &zero_line_num, name);
568 }
569 }
570
571 //! @brief Read script file and make internal copy.
572
573 BOOL_T read_script_file (void)
574 {
575 LINE_T *ref_l = NO_LINE;
576 int k, n, num;
577 unt len;
578 BOOL_T file_end = A68_FALSE;
579 BUFFER filename, linenum;
580 char ch, *fn, *line;
581 char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
582 FILE_T source = FILE_SOURCE_FD (&A68_JOB);
583 ABEND (source == -1, ERROR_ACTION, __func__);
584 buffer[0] = NULL_CHAR;
585 n = 0;
586 len = (unt) (8 + A68_PARSER (source_file_size));
587 buffer = (char *) get_temp_heap_space (len);
588 ASSERT (lseek (source, 0, SEEK_SET) >= 0);
589 while (!file_end) {
590 // Read the original file name.
591 filename[0] = NULL_CHAR;
592 k = 0;
593 if (io_read (source, &ch, 1) == 0) {
594 file_end = A68_TRUE;
595 continue;
596 }
597 while (ch != NEWLINE_CHAR) {
598 filename[k++] = ch;
599 ASSERT (io_read (source, &ch, 1) == 1);
600 }
601 filename[k] = NULL_CHAR;
602 fn = TEXT (add_token (&A68 (top_token), filename));
603 // Read the original file number.
604 linenum[0] = NULL_CHAR;
605 k = 0;
606 ASSERT (io_read (source, &ch, 1) == 1);
607 while (ch != NEWLINE_CHAR) {
608 linenum[k++] = ch;
609 ASSERT (io_read (source, &ch, 1) == 1);
610 }
611 linenum[k] = NULL_CHAR;
612 num = (int) strtol (linenum, NO_VAR, 10);
613 ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
614 // COPY original line into buffer.
615 ASSERT (io_read (source, &ch, 1) == 1);
616 line = &buffer[n];
617 while (ch != NEWLINE_CHAR) {
618 buffer[n++] = ch;
619 ASSERT (io_read (source, &ch, 1) == 1);
620 ABEND ((unt) n >= len, ERROR_ACTION, __func__);
621 }
622 buffer[n++] = NEWLINE_CHAR;
623 buffer[n] = NULL_CHAR;
624 append_source_line (line, &ref_l, &num, fn);
625 }
626 return A68_TRUE;
627 }
628
629 //! @brief Read source file and make internal copy.
630
631 BOOL_T read_source_file (void)
632 {
633 LINE_T *ref_l = NO_LINE;
634 int line_num = 0, k, bytes_read;
635 ssize_t l;
636 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
637 char **prelude_start, **postlude, *buffer;
638 // Prelude.
639 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
640 prelude_start = bold_prelude_start;
641 postlude = bold_postlude;
642 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
643 prelude_start = quote_prelude_start;
644 postlude = quote_postlude;
645 } else {
646 prelude_start = postlude = NO_VAR;
647 }
648 append_environ (prelude_start, &ref_l, &line_num, "prelude");
649 // Read the file into a single buffer, so we save on system calls.
650 line_num = 1;
651 errno = 0;
652 buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
653 ABEND (errno != 0 || buffer == NO_TEXT, ERROR_ALLOCATION, __func__);
654 ASSERT (lseek (f, 0, SEEK_SET) >= 0);
655 ABEND (errno != 0, ERROR_ACTION, __func__);
656 errno = 0;
657 bytes_read = (int) io_read (f, buffer, (size_t) A68_PARSER (source_file_size));
658 ABEND (errno != 0 || bytes_read != A68_PARSER (source_file_size), ERROR_ACTION, __func__);
659 // Link all lines into the list.
660 k = 0;
661 while (k < A68_PARSER (source_file_size)) {
662 l = 0;
663 A68_PARSER (scan_buf)[0] = NULL_CHAR;
664 while (k < A68_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
665 if (k < A68_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
666 k++;
667 } else {
668 A68_PARSER (scan_buf)[l++] = buffer[k++];
669 A68_PARSER (scan_buf)[l] = NULL_CHAR;
670 }
671 }
672 A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
673 A68_PARSER (scan_buf)[l] = NULL_CHAR;
674 if (k < A68_PARSER (source_file_size)) {
675 k++;
676 }
677 append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68_JOB));
678 SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
679 }
680 // Postlude.
681 append_environ (postlude, &ref_l, &line_num, "postlude");
682 // Concatenate lines.
683 concatenate_lines (TOP_LINE (&A68_JOB));
684 // Include files.
685 include_files (TOP_LINE (&A68_JOB));
686 return A68_TRUE;
687 }
688
689 //! @brief Next_char get next character from internal copy of source file.
690
691 char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
692 {
693 char ch;
694 #if defined (NO_TYPO)
695 allow_typo = A68_FALSE;
696 #endif
697 LOW_STACK_ALERT (NO_NODE);
698 // Source empty?.
699 if (*ref_l == NO_LINE) {
700 return STOP_CHAR;
701 } else {
702 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
703 // Take new line?.
704 if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
705 *ref_l = NEXT (*ref_l);
706 if (*ref_l == NO_LINE) {
707 return STOP_CHAR;
708 }
709 *ref_s = STRING (*ref_l);
710 } else {
711 (*ref_s)++;
712 }
713 // Deliver next char.
714 ch = (*ref_s)[0];
715 if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
716 ch = next_char (ref_l, ref_s, allow_typo);
717 }
718 return ch;
719 }
720 }
721
722 //! @brief Find first character that can start a valid symbol.
723
724 void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
725 {
726 while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
727 if (*ref_l != NO_LINE) {
728 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
729 }
730 *ref_c = next_char (ref_l, ref_s, A68_FALSE);
731 }
732 }
733
734 //! @brief Handle a pragment (pragmat or comment).
735
736 char *pragment (int type, LINE_T ** ref_l, char **ref_c)
737 {
738 #define INIT_BUFFER {chars_in_buf = 0; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
739 #define ADD_ONE_CHAR(ch) {A68_PARSER (scan_buf)[chars_in_buf ++] = ch; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
740 char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
741 char *z = NO_TEXT;
742 LINE_T *start_l = *ref_l;
743 int term_s_length, chars_in_buf;
744 BOOL_T stop, pragmat = A68_FALSE;
745 // Set terminator.
746 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
747 if (type == STYLE_I_COMMENT_SYMBOL) {
748 term_s = "CO";
749 } else if (type == STYLE_II_COMMENT_SYMBOL) {
750 term_s = "#";
751 } else if (type == BOLD_COMMENT_SYMBOL) {
752 term_s = "COMMENT";
753 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
754 term_s = "PR";
755 pragmat = A68_TRUE;
756 } else if (type == BOLD_PRAGMAT_SYMBOL) {
757 term_s = "PRAGMAT";
758 pragmat = A68_TRUE;
759 }
760 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
761 if (type == STYLE_I_COMMENT_SYMBOL) {
762 term_s = "'CO'";
763 } else if (type == STYLE_II_COMMENT_SYMBOL) {
764 term_s = "#";
765 } else if (type == BOLD_COMMENT_SYMBOL) {
766 term_s = "'COMMENT'";
767 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
768 term_s = "'PR'";
769 pragmat = A68_TRUE;
770 } else if (type == BOLD_PRAGMAT_SYMBOL) {
771 term_s = "'PRAGMAT'";
772 pragmat = A68_TRUE;
773 }
774 }
775 term_s_length = (int) strlen (term_s);
776 // Scan for terminator.
777 INIT_BUFFER;
778 stop = A68_FALSE;
779 while (stop == A68_FALSE) {
780 SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
781 // A ".." or '..' delimited string in a PRAGMAT.
782 if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING))) {
783 char delim = c;
784 BOOL_T eos = A68_FALSE;
785 ADD_ONE_CHAR (c);
786 c = next_char (ref_l, ref_c, A68_FALSE);
787 while (!eos) {
788 SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
789 if (c == delim) {
790 ADD_ONE_CHAR (delim);
791 save_state (*ref_l, *ref_c, c);
792 c = next_char (ref_l, ref_c, A68_FALSE);
793 if (c == delim) {
794 c = next_char (ref_l, ref_c, A68_FALSE);
795 } else {
796 restore_state (ref_l, ref_c, &c);
797 eos = A68_TRUE;
798 }
799 } else if (IS_PRINT (c)) {
800 ADD_ONE_CHAR (c);
801 c = next_char (ref_l, ref_c, A68_FALSE);
802 } else {
803 unworthy (start_l, start_c, c);
804 }
805 }
806 } else if (EOL (c)) {
807 ADD_ONE_CHAR (NEWLINE_CHAR);
808 } else if (IS_PRINT (c) || IS_SPACE (c)) {
809 ADD_ONE_CHAR (c);
810 }
811 if (chars_in_buf >= term_s_length) {
812 // Check whether we encountered the terminator.
813 stop = (BOOL_T) (strcmp (term_s, &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0);
814 }
815 c = next_char (ref_l, ref_c, A68_FALSE);
816 }
817 A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
818 z = new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
819 if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
820 isolate_options (A68_PARSER (scan_buf), start_l);
821 }
822 return z;
823 #undef ADD_ONE_CHAR
824 #undef INIT_BUFFER
825 }
826
827 //! @brief Attribute for format item.
828
829 int get_format_item (char ch)
830 {
831 switch (TO_LOWER (ch)) {
832 case 'a':{
833 return FORMAT_ITEM_A;
834 }
835 case 'b':{
836 return FORMAT_ITEM_B;
837 }
838 case 'c':{
839 return FORMAT_ITEM_C;
840 }
841 case 'd':{
842 return FORMAT_ITEM_D;
843 }
844 case 'e':{
845 return FORMAT_ITEM_E;
846 }
847 case 'f':{
848 return FORMAT_ITEM_F;
849 }
850 case 'g':{
851 return FORMAT_ITEM_G;
852 }
853 case 'h':{
854 return FORMAT_ITEM_H;
855 }
856 case 'i':{
857 return FORMAT_ITEM_I;
858 }
859 case 'j':{
860 return FORMAT_ITEM_J;
861 }
862 case 'k':{
863 return FORMAT_ITEM_K;
864 }
865 case 'l':
866 case '/':{
867 return FORMAT_ITEM_L;
868 }
869 case 'm':{
870 return FORMAT_ITEM_M;
871 }
872 case 'n':{
873 return FORMAT_ITEM_N;
874 }
875 case 'o':{
876 return FORMAT_ITEM_O;
877 }
878 case 'p':{
879 return FORMAT_ITEM_P;
880 }
881 case 'q':{
882 return FORMAT_ITEM_Q;
883 }
884 case 'r':{
885 return FORMAT_ITEM_R;
886 }
887 case 's':{
888 return FORMAT_ITEM_S;
889 }
890 case 't':{
891 return FORMAT_ITEM_T;
892 }
893 case 'u':{
894 return FORMAT_ITEM_U;
895 }
896 case 'v':{
897 return FORMAT_ITEM_V;
898 }
899 case 'w':{
900 return FORMAT_ITEM_W;
901 }
902 case 'x':{
903 return FORMAT_ITEM_X;
904 }
905 case 'y':{
906 return FORMAT_ITEM_Y;
907 }
908 case 'z':{
909 return FORMAT_ITEM_Z;
910 }
911 case '+':{
912 return FORMAT_ITEM_PLUS;
913 }
914 case '-':{
915 return FORMAT_ITEM_MINUS;
916 }
917 case POINT_CHAR:{
918 return FORMAT_ITEM_POINT;
919 }
920 case '%':{
921 return FORMAT_ITEM_ESCAPE;
922 }
923 default:{
924 return 0;
925 }
926 }
927 }
928
929 //! @brief Whether input shows exponent character.
930
931 BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
932 {
933 BOOL_T ret = A68_FALSE;
934 char exp_syms[3];
935 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
936 exp_syms[0] = EXPONENT_CHAR;
937 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
938 exp_syms[2] = NULL_CHAR;
939 } else {
940 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
941 exp_syms[1] = BACKSLASH_CHAR;
942 exp_syms[2] = NULL_CHAR;
943 }
944 save_state (*ref_l, *ref_s, *ch);
945 if (strchr (exp_syms, *ch) != NO_TEXT) {
946 *ch = next_char (ref_l, ref_s, A68_TRUE);
947 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
948 }
949 restore_state (ref_l, ref_s, ch);
950 return ret;
951 }
952
953 //! @brief Whether input shows radix character.
954
955 BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
956 {
957 BOOL_T ret = A68_FALSE;
958 save_state (*ref_l, *ref_s, *ch);
959 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
960 if (*ch == TO_UPPER (RADIX_CHAR)) {
961 *ch = next_char (ref_l, ref_s, A68_TRUE);
962 ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
963 }
964 } else {
965 if (*ch == RADIX_CHAR) {
966 *ch = next_char (ref_l, ref_s, A68_TRUE);
967 ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
968 }
969 }
970 restore_state (ref_l, ref_s, ch);
971 return ret;
972 }
973
974 //! @brief Whether input shows decimal point.
975
976 BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
977 {
978 BOOL_T ret = A68_FALSE;
979 save_state (*ref_l, *ref_s, *ch);
980 if (*ch == POINT_CHAR) {
981 char exp_syms[3];
982 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
983 exp_syms[0] = EXPONENT_CHAR;
984 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
985 exp_syms[2] = NULL_CHAR;
986 } else {
987 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
988 exp_syms[1] = BACKSLASH_CHAR;
989 exp_syms[2] = NULL_CHAR;
990 }
991 *ch = next_char (ref_l, ref_s, A68_TRUE);
992 if (strchr (exp_syms, *ch) != NO_TEXT) {
993 *ch = next_char (ref_l, ref_s, A68_TRUE);
994 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
995 } else {
996 ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
997 }
998 }
999 restore_state (ref_l, ref_s, ch);
1000 return ret;
1001 }
1002
1003 //! @brief Get next token from internal copy of source file..
1004
1005 void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
1006 {
1007 char c = **ref_s, *sym = A68_PARSER (scan_buf);
1008 sym[0] = NULL_CHAR;
1009 get_good_char (&c, ref_l, ref_s);
1010 *start_l = *ref_l;
1011 *start_c = *ref_s;
1012 if (c == STOP_CHAR) {
1013 // We are at EOF.
1014 (sym++)[0] = STOP_CHAR;
1015 sym[0] = NULL_CHAR;
1016 return;
1017 }
1018 // In a format.
1019 if (in_format) {
1020 char *format_items;
1021 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1022 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1023 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1024 format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1025 } else {
1026 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1027 }
1028 if (strchr (format_items, c) != NO_TEXT) {
1029 // General format items.
1030 (sym++)[0] = c;
1031 sym[0] = NULL_CHAR;
1032 *att = get_format_item (c);
1033 (void) next_char (ref_l, ref_s, A68_FALSE);
1034 return;
1035 }
1036 if (IS_DIGIT (c)) {
1037 // INT denotation for static replicator.
1038 SCAN_DIGITS (c);
1039 sym[0] = NULL_CHAR;
1040 *att = STATIC_REPLICATOR;
1041 return;
1042 }
1043 }
1044 // Not in a format.
1045 if (IS_UPPER (c)) {
1046 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1047 // Upper case word - bold tag.
1048 while (IS_UPPER (c) || c == '_') {
1049 (sym++)[0] = c;
1050 c = next_char (ref_l, ref_s, A68_FALSE);
1051 }
1052 sym[0] = NULL_CHAR;
1053 *att = BOLD_TAG;
1054 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1055 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1056 (sym++)[0] = c;
1057 c = next_char (ref_l, ref_s, A68_TRUE);
1058 }
1059 sym[0] = NULL_CHAR;
1060 *att = IDENTIFIER;
1061 }
1062 } else if (c == '\'') {
1063 // Quote, uppercase word, quote - bold tag.
1064 int k = 0;
1065 c = next_char (ref_l, ref_s, A68_FALSE);
1066 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1067 (sym++)[0] = c;
1068 k++;
1069 c = next_char (ref_l, ref_s, A68_TRUE);
1070 }
1071 SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1072 sym[0] = NULL_CHAR;
1073 *att = BOLD_TAG;
1074 // Skip terminating quote, or complain if it is not there.
1075 SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1076 c = next_char (ref_l, ref_s, A68_FALSE);
1077 } else if (IS_LOWER (c)) {
1078 // Lower case word - identifier.
1079 while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1080 (sym++)[0] = c;
1081 c = next_char (ref_l, ref_s, A68_TRUE);
1082 }
1083 sym[0] = NULL_CHAR;
1084 *att = IDENTIFIER;
1085 } else if (c == POINT_CHAR) {
1086 // Begins with a point symbol - point, dotdot, L REAL denotation.
1087 if (is_decimal_point (ref_l, ref_s, &c)) {
1088 (sym++)[0] = '0';
1089 (sym++)[0] = POINT_CHAR;
1090 c = next_char (ref_l, ref_s, A68_TRUE);
1091 SCAN_DIGITS (c);
1092 if (is_exp_char (ref_l, ref_s, &c)) {
1093 SCAN_EXPONENT_PART (c);
1094 }
1095 sym[0] = NULL_CHAR;
1096 *att = REAL_DENOTATION;
1097 } else {
1098 c = next_char (ref_l, ref_s, A68_TRUE);
1099 if (c == POINT_CHAR) {
1100 (sym++)[0] = POINT_CHAR;
1101 (sym++)[0] = POINT_CHAR;
1102 sym[0] = NULL_CHAR;
1103 *att = DOTDOT_SYMBOL;
1104 c = next_char (ref_l, ref_s, A68_FALSE);
1105 } else {
1106 (sym++)[0] = POINT_CHAR;
1107 sym[0] = NULL_CHAR;
1108 *att = POINT_SYMBOL;
1109 }
1110 }
1111 } else if (IS_DIGIT (c)) {
1112 // Something that begins with a digit - L INT denotation, L REAL denotation.
1113 SCAN_DIGITS (c);
1114 if (is_decimal_point (ref_l, ref_s, &c)) {
1115 c = next_char (ref_l, ref_s, A68_TRUE);
1116 if (is_exp_char (ref_l, ref_s, &c)) {
1117 (sym++)[0] = POINT_CHAR;
1118 (sym++)[0] = '0';
1119 SCAN_EXPONENT_PART (c);
1120 *att = REAL_DENOTATION;
1121 } else {
1122 (sym++)[0] = POINT_CHAR;
1123 SCAN_DIGITS (c);
1124 if (is_exp_char (ref_l, ref_s, &c)) {
1125 SCAN_EXPONENT_PART (c);
1126 }
1127 *att = REAL_DENOTATION;
1128 }
1129 } else if (is_exp_char (ref_l, ref_s, &c)) {
1130 SCAN_EXPONENT_PART (c);
1131 *att = REAL_DENOTATION;
1132 } else if (is_radix_char (ref_l, ref_s, &c)) {
1133 (sym++)[0] = c;
1134 c = next_char (ref_l, ref_s, A68_TRUE);
1135 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1136 while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1137 (sym++)[0] = c;
1138 c = next_char (ref_l, ref_s, A68_TRUE);
1139 }
1140 } else {
1141 while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1142 (sym++)[0] = c;
1143 c = next_char (ref_l, ref_s, A68_TRUE);
1144 }
1145 }
1146 *att = BITS_DENOTATION;
1147 } else {
1148 *att = INT_DENOTATION;
1149 }
1150 sym[0] = NULL_CHAR;
1151 } else if (c == QUOTE_CHAR) {
1152 // STRING denotation.
1153 BOOL_T stop = A68_FALSE;
1154 while (!stop) {
1155 c = next_char (ref_l, ref_s, A68_FALSE);
1156 while (c != QUOTE_CHAR && c != STOP_CHAR) {
1157 SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1158 (sym++)[0] = c;
1159 c = next_char (ref_l, ref_s, A68_FALSE);
1160 }
1161 SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1162 c = next_char (ref_l, ref_s, A68_FALSE);
1163 if (c == QUOTE_CHAR) {
1164 (sym++)[0] = QUOTE_CHAR;
1165 } else {
1166 stop = A68_TRUE;
1167 }
1168 }
1169 sym[0] = NULL_CHAR;
1170 *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1171 } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1172 // Single character symbols.
1173 (sym++)[0] = c;
1174 (void) next_char (ref_l, ref_s, A68_FALSE);
1175 sym[0] = NULL_CHAR;
1176 *att = 0;
1177 } else if (c == '|') {
1178 // Bar.
1179 (sym++)[0] = c;
1180 c = next_char (ref_l, ref_s, A68_FALSE);
1181 if (c == ':') {
1182 (sym++)[0] = c;
1183 (void) next_char (ref_l, ref_s, A68_FALSE);
1184 }
1185 sym[0] = NULL_CHAR;
1186 *att = 0;
1187 } else if (c == '!' && OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1188 // Bar, will be replaced with modern variant.
1189 // For this reason ! is not a MONAD with quote-stropping.
1190 (sym++)[0] = '|';
1191 c = next_char (ref_l, ref_s, A68_FALSE);
1192 if (c == ':') {
1193 (sym++)[0] = c;
1194 (void) next_char (ref_l, ref_s, A68_FALSE);
1195 }
1196 sym[0] = NULL_CHAR;
1197 *att = 0;
1198 } else if (c == ':') {
1199 // Colon, semicolon, IS, ISNT.
1200 (sym++)[0] = c;
1201 c = next_char (ref_l, ref_s, A68_FALSE);
1202 if (c == '=') {
1203 (sym++)[0] = c;
1204 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1205 (sym++)[0] = c;
1206 c = next_char (ref_l, ref_s, A68_FALSE);
1207 }
1208 } else if (c == '/') {
1209 (sym++)[0] = c;
1210 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1211 (sym++)[0] = c;
1212 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1213 (sym++)[0] = c;
1214 c = next_char (ref_l, ref_s, A68_FALSE);
1215 }
1216 }
1217 } else if (c == ':') {
1218 (sym++)[0] = c;
1219 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1220 (sym++)[0] = c;
1221 }
1222 }
1223 sym[0] = NULL_CHAR;
1224 *att = 0;
1225 } else if (c == '=') {
1226 // Operator starting with "=".
1227 char *scanned = sym;
1228 (sym++)[0] = c;
1229 c = next_char (ref_l, ref_s, A68_FALSE);
1230 if (strchr (NOMADS, c) != NO_TEXT) {
1231 (sym++)[0] = c;
1232 c = next_char (ref_l, ref_s, A68_FALSE);
1233 }
1234 if (c == '=') {
1235 (sym++)[0] = c;
1236 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1237 (sym++)[0] = ':';
1238 c = next_char (ref_l, ref_s, A68_FALSE);
1239 if (strlen (sym) < 4 && c == '=') {
1240 (sym++)[0] = '=';
1241 (void) next_char (ref_l, ref_s, A68_FALSE);
1242 }
1243 }
1244 } else if (c == ':') {
1245 (sym++)[0] = c;
1246 sym[0] = NULL_CHAR;
1247 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1248 (sym++)[0] = '=';
1249 (void) next_char (ref_l, ref_s, A68_FALSE);
1250 } else {
1251 SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1252 }
1253 }
1254 sym[0] = NULL_CHAR;
1255 if (strcmp (scanned, "=") == 0) {
1256 *att = EQUALS_SYMBOL;
1257 } else {
1258 *att = OPERATOR;
1259 }
1260 } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1261 // Operator.
1262 char *scanned = sym;
1263 (sym++)[0] = c;
1264 c = next_char (ref_l, ref_s, A68_FALSE);
1265 if (strchr (NOMADS, c) != NO_TEXT) {
1266 (sym++)[0] = c;
1267 c = next_char (ref_l, ref_s, A68_FALSE);
1268 }
1269 if (c == '=') {
1270 (sym++)[0] = c;
1271 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1272 (sym++)[0] = ':';
1273 c = next_char (ref_l, ref_s, A68_FALSE);
1274 if (strlen (scanned) < 4 && c == '=') {
1275 (sym++)[0] = '=';
1276 (void) next_char (ref_l, ref_s, A68_FALSE);
1277 }
1278 }
1279 } else if (c == ':') {
1280 (sym++)[0] = c;
1281 sym[0] = NULL_CHAR;
1282 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1283 (sym++)[0] = '=';
1284 sym[0] = NULL_CHAR;
1285 (void) next_char (ref_l, ref_s, A68_FALSE);
1286 } else {
1287 SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1288 }
1289 }
1290 sym[0] = NULL_CHAR;
1291 *att = OPERATOR;
1292 } else {
1293 // Afuuus ... strange characters!.
1294 unworthy (*start_l, *start_c, (int) c);
1295 }
1296 }
1297
1298 //! @brief Whether att opens an embedded clause.
1299
1300 BOOL_T open_nested_clause (int att)
1301 {
1302 switch (att) {
1303 case OPEN_SYMBOL:
1304 case BEGIN_SYMBOL:
1305 case PAR_SYMBOL:
1306 case IF_SYMBOL:
1307 case CASE_SYMBOL:
1308 case FOR_SYMBOL:
1309 case FROM_SYMBOL:
1310 case BY_SYMBOL:
1311 case TO_SYMBOL:
1312 case DOWNTO_SYMBOL:
1313 case WHILE_SYMBOL:
1314 case DO_SYMBOL:
1315 case SUB_SYMBOL:
1316 case ACCO_SYMBOL:
1317 {
1318 return A68_TRUE;
1319 }
1320 }
1321 return A68_FALSE;
1322 }
1323
1324 //! @brief Whether att closes an embedded clause.
1325
1326 BOOL_T close_nested_clause (int att)
1327 {
1328 switch (att) {
1329 case CLOSE_SYMBOL:
1330 case END_SYMBOL:
1331 case FI_SYMBOL:
1332 case ESAC_SYMBOL:
1333 case OD_SYMBOL:
1334 case BUS_SYMBOL:
1335 case OCCA_SYMBOL:
1336 {
1337 return A68_TRUE;
1338 }
1339 }
1340 return A68_FALSE;
1341 }
1342
1343 //! @brief Cast a string to lower case.
1344
1345 void make_lower_case (char *p)
1346 {
1347 for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1348 p[0] = (char) TO_LOWER (p[0]);
1349 }
1350 }
1351
1352 //! @brief Construct a linear list of tokens.
1353
1354 void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1355 {
1356 char *lpr = NO_TEXT;
1357 int lprt = 0;
1358 while (l != NO_VAR && !A68_PARSER (stop_scanner)) {
1359 int att = 0;
1360 get_next_token (in_format, l, s, start_l, start_c, &att);
1361 if (A68_PARSER (scan_buf)[0] == STOP_CHAR) {
1362 A68_PARSER (stop_scanner) = A68_TRUE;
1363 } else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1364 KEYWORD_T *kw;
1365 char *c = NO_TEXT;
1366 BOOL_T make_node = A68_TRUE;
1367 char *trailing = NO_TEXT;
1368 if (att != IDENTIFIER) {
1369 kw = find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
1370 } else {
1371 kw = NO_KEYWORD;
1372 }
1373 if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1374 if (att == IDENTIFIER) {
1375 make_lower_case (A68_PARSER (scan_buf));
1376 }
1377 if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1378 int len = (int) strlen (A68_PARSER (scan_buf));
1379 while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_') {
1380 trailing = "_";
1381 A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1382 len--;
1383 }
1384 }
1385 c = TEXT (add_token (&A68 (top_token), A68_PARSER (scan_buf)));
1386 } else {
1387 if (IS (kw, TO_SYMBOL)) {
1388 // Merge GO and TO to GOTO.
1389 if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1390 ATTRIBUTE (*root) = GOTO_SYMBOL;
1391 NSYMBOL (*root) = TEXT (find_keyword (A68 (top_keyword), "GOTO"));
1392 make_node = A68_FALSE;
1393 } else {
1394 att = ATTRIBUTE (kw);
1395 c = TEXT (kw);
1396 }
1397 } else {
1398 if (att == 0 || att == BOLD_TAG) {
1399 att = ATTRIBUTE (kw);
1400 }
1401 c = TEXT (kw);
1402 // Handle pragments.
1403 if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1404 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1405 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1406 lpr = nlpr;
1407 } else {
1408 char *stale = lpr;
1409 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1410 a68_free (nlpr);
1411 a68_free (stale);
1412 }
1413 lprt = att;
1414 make_node = A68_FALSE;
1415 } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1416 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1417 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1418 lpr = nlpr;
1419 } else {
1420 char *stale = lpr;
1421 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1422 a68_free (nlpr);
1423 a68_free (stale);
1424 }
1425 lprt = att;
1426 if (!A68_PARSER (stop_scanner)) {
1427 (void) set_options (OPTION_LIST (&A68_JOB), A68_FALSE);
1428 make_node = A68_FALSE;
1429 }
1430 }
1431 }
1432 }
1433 // Add token to the tree.
1434 if (make_node) {
1435 NODE_T *q = new_node ();
1436 INFO (q) = new_node_info ();
1437 switch (att) {
1438 case ASSIGN_SYMBOL:
1439 case END_SYMBOL:
1440 case ESAC_SYMBOL:
1441 case OD_SYMBOL:
1442 case OF_SYMBOL:
1443 case FI_SYMBOL:
1444 case CLOSE_SYMBOL:
1445 case BUS_SYMBOL:
1446 case COLON_SYMBOL:
1447 case COMMA_SYMBOL:
1448 case DOTDOT_SYMBOL:
1449 case SEMI_SYMBOL:
1450 {
1451 GINFO (q) = NO_GINFO;
1452 break;
1453 }
1454 default:{
1455 GINFO (q) = new_genie_info ();
1456 break;
1457 }
1458 }
1459 STATUS (q) = OPTION_NODEMASK (&A68_JOB);
1460 LINE (INFO (q)) = *start_l;
1461 CHAR_IN_LINE (INFO (q)) = *start_c;
1462 PRIO (INFO (q)) = 0;
1463 PROCEDURE_LEVEL (INFO (q)) = 0;
1464 ATTRIBUTE (q) = att;
1465 NSYMBOL (q) = c;
1466 PREVIOUS (q) = *root;
1467 SUB (q) = NEXT (q) = NO_NODE;
1468 TABLE (q) = NO_TABLE;
1469 MOID (q) = NO_MOID;
1470 TAX (q) = NO_TAG;
1471 if (lpr != NO_TEXT) {
1472 NPRAGMENT (q) = lpr;
1473 NPRAGMENT_TYPE (q) = lprt;
1474 lpr = NO_TEXT;
1475 lprt = 0;
1476 }
1477 if (*root != NO_NODE) {
1478 NEXT (*root) = q;
1479 }
1480 if (TOP_NODE (&A68_JOB) == NO_NODE) {
1481 TOP_NODE (&A68_JOB) = q;
1482 }
1483 *root = q;
1484 if (trailing != NO_TEXT) {
1485 diagnostic (A68_WARNING | A68_FORCE_DIAGNOSTICS, q, WARNING_TRAILING, trailing, att);
1486 }
1487 }
1488 // Redirection in tokenising formats. The scanner is a recursive-descent type as
1489 // to know when it scans a format text and when not.
1490 if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1491 return;
1492 } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1493 tokenise_source (root, level + 1, A68_TRUE, l, s, start_l, start_c);
1494 } else if (in_format && open_nested_clause (att)) {
1495 NODE_T *z = PREVIOUS (*root);
1496 if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1497 tokenise_source (root, level, A68_FALSE, l, s, start_l, start_c);
1498 } else if (att == OPEN_SYMBOL) {
1499 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1500 } else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL) {
1501 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1502 } else if (OPTION_BRACKETS (&A68_JOB) && att == ACCO_SYMBOL) {
1503 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1504 }
1505 } else if (!in_format && level > 0 && open_nested_clause (att)) {
1506 tokenise_source (root, level + 1, A68_FALSE, l, s, start_l, start_c);
1507 } else if (!in_format && level > 0 && close_nested_clause (att)) {
1508 return;
1509 } else if (in_format && att == CLOSE_SYMBOL) {
1510 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1511 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL) {
1512 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1513 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == OCCA_SYMBOL) {
1514 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1515 }
1516 }
1517 }
1518 }
1519
1520 //! @brief Tokenise source file, build initial syntax tree.
1521
1522 BOOL_T lexical_analyser (void)
1523 {
1524 LINE_T *l = NO_LINE, *start_l = NO_LINE;
1525 char *s = NO_TEXT, *start_c = NO_TEXT;
1526 NODE_T *root = NO_NODE;
1527 A68_PARSER (scan_buf) = NO_TEXT;
1528 A68_PARSER (max_scan_buf_length) = A68_PARSER (source_file_size) = get_source_size ();
1529 // Errors in file?.
1530 if (A68_PARSER (max_scan_buf_length) == 0) {
1531 return A68_FALSE;
1532 }
1533 if (OPTION_RUN_SCRIPT (&A68_JOB)) {
1534 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
1535 if (!read_script_file ()) {
1536 return A68_FALSE;
1537 }
1538 } else {
1539 A68_PARSER (max_scan_buf_length) += KILOBYTE; // for the environ, more than enough
1540 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68_PARSER (max_scan_buf_length));
1541 // Errors in file?.
1542 if (!read_source_file ()) {
1543 return A68_FALSE;
1544 }
1545 }
1546 // Start tokenising.
1547 A68_PARSER (read_error) = A68_FALSE;
1548 A68_PARSER (stop_scanner) = A68_FALSE;
1549 if ((l = TOP_LINE (&A68_JOB)) != NO_LINE) {
1550 s = STRING (l);
1551 }
1552 tokenise_source (&root, 0, A68_FALSE, &l, &s, &start_l, &start_c);
1553 return A68_TRUE;
1554 }