parser-scanner.c
1 //! @file parser-scanner.c
2 //! @author J. Marcel van der Veer
3
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2024 J. Marcel van der Veer [algol68g@xs4all.nl].
8
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Context-dependent Algol 68 tokeniser.
25
26 #include "a68g.h"
27 #include "a68g-parser.h"
28 #include "a68g-prelude.h"
29 #include "a68g-options.h"
30 #include "a68g-environ.h"
31
32 // Macros.
33
34 #define SCAN_DIGITS(c)\
35 while (IS_DIGIT (c)) {\
36 (sym++)[0] = (c);\
37 (c) = next_char (ref_l, ref_s, A68_TRUE);\
38 }
39
40 #define SCAN_EXPONENT_PART(c)\
41 (sym++)[0] = EXPONENT_CHAR;\
42 (c) = next_char (ref_l, ref_s, A68_TRUE);\
43 if ((c) == '+' || (c) == '-') {\
44 (sym++)[0] = (c);\
45 (c) = next_char (ref_l, ref_s, A68_TRUE);\
46 }\
47 SCAN_ERROR (!IS_DIGIT (c), *start_l, *start_c, ERROR_EXPONENT_DIGIT);\
48 SCAN_DIGITS (c)
49
50 //! @brief Save scanner state, for character look-ahead.
51
52 void save_state (LINE_T * ref_l, char *ref_s, char ch)
53 {
54 SCAN_STATE_L (&A68_JOB) = ref_l;
55 SCAN_STATE_S (&A68_JOB) = ref_s;
56 SCAN_STATE_C (&A68_JOB) = ch;
57 }
58
59 //! @brief Restore scanner state, for character look-ahead.
60
61 void restore_state (LINE_T ** ref_l, char **ref_s, char *ch)
62 {
63 *ref_l = SCAN_STATE_L (&A68_JOB);
64 *ref_s = SCAN_STATE_S (&A68_JOB);
65 *ch = SCAN_STATE_C (&A68_JOB);
66 }
67
68 //! @brief New_source_line.
69
70 LINE_T *new_source_line (void)
71 {
72 LINE_T *z = (LINE_T *) get_fixed_heap_space ((size_t) SIZE_ALIGNED (LINE_T));
73 MARKER (z)[0] = NULL_CHAR;
74 STRING (z) = NO_TEXT;
75 FILENAME (z) = NO_TEXT;
76 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
77 NUMBER (z) = 0;
78 PRINT_STATUS (z) = 0;
79 LIST (z) = A68_TRUE;
80 NEXT (z) = NO_LINE;
81 PREVIOUS (z) = NO_LINE;
82 return z;
83 }
84
85 //! @brief Append a source line to the internal source file.
86
87 void append_source_line (char *str, LINE_T ** ref_l, int *line_num, char *filename)
88 {
89 LINE_T *z = new_source_line ();
90 // Allow shell command in first line, f.i. "#!/usr/share/bin/a68g".
91 if (*line_num == 1) {
92 if (strlen (str) >= 2 && strncmp (str, "#!", 2) == 0) {
93 ABEND (strstr (str, "run-script") != NO_TEXT, ERROR_SHELL_SCRIPT, __func__);
94 (*line_num)++;
95 return;
96 }
97 }
98 // Link line into the chain.
99 STRING (z) = new_fixed_string (str);
100 FILENAME (z) = filename;
101 NUMBER (z) = (*line_num)++;
102 PRINT_STATUS (z) = NOT_PRINTED;
103 LIST (z) = A68_TRUE;
104 DIAGNOSTICS (z) = NO_DIAGNOSTIC;
105 NEXT (z) = NO_LINE;
106 PREVIOUS (z) = *ref_l;
107 if (TOP_LINE (&A68_JOB) == NO_LINE) {
108 TOP_LINE (&A68_JOB) = z;
109 }
110 if (*ref_l != NO_LINE) {
111 NEXT (*ref_l) = z;
112 }
113 *ref_l = z;
114 }
115
116 // Scanner, tokenises the source code.
117
118 //! @brief Whether ch is unworthy.
119
120 void unworthy (LINE_T * u, char *v, char ch)
121 {
122 if (IS_PRINT (ch)) {
123 ASSERT (a68_bufprt (A68 (edit_line), SNPRINTF_SIZE, "*%s", ERROR_UNWORTHY_CHARACTER) >= 0);
124 } else {
125 ASSERT (a68_bufprt (A68 (edit_line), SNPRINTF_SIZE, "*%s %s", ERROR_UNWORTHY_CHARACTER, ctrl_char (ch)) >= 0);
126 }
127 scan_error (u, v, A68 (edit_line));
128 }
129
130 //! @brief Concatenate lines that terminate in '\' with next line.
131
132 void concatenate_lines (LINE_T * top)
133 {
134 LINE_T *q;
135 // Work from bottom backwards.
136 for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; q = NEXT (q)) {
137 ;
138 }
139 for (; q != NO_LINE; BACKWARD (q)) {
140 char *z = STRING (q);
141 int len = (int) strlen (z);
142 if (len >= 2 && z[len - 2] == BACKSLASH_CHAR && z[len - 1] == NEWLINE_CHAR && NEXT (q) != NO_LINE && STRING (NEXT (q)) != NO_TEXT) {
143 z[len - 2] = NULL_CHAR;
144 len += (int) strlen (STRING (NEXT (q)));
145 z = (char *) get_fixed_heap_space ((size_t) (len + 1));
146 a68_bufcpy (z, STRING (q), len + 1);
147 a68_bufcat (z, STRING (NEXT (q)), len + 1);
148 STRING (NEXT (q))[0] = NULL_CHAR;
149 STRING (q) = z;
150 }
151 }
152 }
153
154 //! @brief Whether u is bold tag v, independent of stropping regime.
155
156 BOOL_T is_bold (char *u, char *v)
157 {
158 unt len = (unt) strlen (v);
159 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
160 if (u[0] == '\'') {
161 return (BOOL_T) (strncmp (++u, v, len) == 0 && u[len] == '\'');
162 } else {
163 return A68_FALSE;
164 }
165 } else {
166 return (BOOL_T) (strncmp (u, v, len) == 0 && !IS_UPPER (u[len]));
167 }
168 }
169
170 //! @brief Skip string.
171
172 BOOL_T skip_string (LINE_T ** top, char **ch)
173 {
174 LINE_T *u = *top;
175 char *v = *ch;
176 v++;
177 while (u != NO_LINE) {
178 while (v[0] != NULL_CHAR) {
179 if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR) {
180 *top = u;
181 *ch = &v[1];
182 return A68_TRUE;
183 } else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR) {
184 v += 2;
185 } else {
186 v++;
187 }
188 }
189 FORWARD (u);
190 if (u != NO_LINE) {
191 v = &(STRING (u)[0]);
192 } else {
193 v = NO_TEXT;
194 }
195 }
196 return A68_FALSE;
197 }
198
199 //! @brief Skip comment.
200
201 BOOL_T skip_comment (LINE_T ** top, char **ch, int delim)
202 {
203 LINE_T *u = *top;
204 char *v = *ch;
205 v++;
206 while (u != NO_LINE) {
207 while (v[0] != NULL_CHAR) {
208 if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL) {
209 *top = u;
210 *ch = &v[1];
211 return A68_TRUE;
212 } else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL) {
213 *top = u;
214 *ch = &v[1];
215 return A68_TRUE;
216 } else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL) {
217 *top = u;
218 *ch = &v[1];
219 return A68_TRUE;
220 } else {
221 v++;
222 }
223 }
224 FORWARD (u);
225 if (u != NO_LINE) {
226 v = &(STRING (u)[0]);
227 } else {
228 v = NO_TEXT;
229 }
230 }
231 return A68_FALSE;
232 }
233
234 //! @brief Skip rest of pragmat.
235
236 BOOL_T skip_pragmat (LINE_T ** top, char **ch, int delim, BOOL_T whitespace)
237 {
238 LINE_T *u = *top;
239 char *v = *ch;
240 while (u != NO_LINE) {
241 while (v[0] != NULL_CHAR) {
242 if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL) {
243 *top = u;
244 *ch = &v[1];
245 return A68_TRUE;
246 } else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL) {
247 *top = u;
248 *ch = &v[1];
249 return A68_TRUE;
250 } else {
251 if (whitespace && !IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
252 scan_error (u, v, ERROR_PRAGMENT);
253 } else if (IS_UPPER (v[0])) {
254 // Skip a bold word as you may trigger on REPR, for instance ...
255 while (IS_UPPER (v[0])) {
256 v++;
257 }
258 } else {
259 v++;
260 }
261 }
262 }
263 FORWARD (u);
264 if (u != NO_LINE) {
265 v = &(STRING (u)[0]);
266 } else {
267 v = NO_TEXT;
268 }
269 }
270 return A68_FALSE;
271 }
272
273 //! @brief Return pointer to next token within pragmat.
274
275 char *get_pragmat_item (LINE_T ** top, char **ch)
276 {
277 LINE_T *u = *top;
278 char *v = *ch;
279 while (u != NO_LINE) {
280 while (v[0] != NULL_CHAR) {
281 if (!IS_SPACE (v[0]) && v[0] != NEWLINE_CHAR) {
282 *top = u;
283 *ch = v;
284 return v;
285 } else {
286 v++;
287 }
288 }
289 FORWARD (u);
290 if (u != NO_LINE) {
291 v = &(STRING (u)[0]);
292 } else {
293 v = NO_TEXT;
294 }
295 }
296 return NO_TEXT;
297 }
298
299 //! @brief Case insensitive strncmp for at most the number of chars in 'v'.
300
301 int streq (char *u, char *v)
302 {
303 int diff;
304 for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++) {
305 diff = ((int) TO_LOWER (u[0])) - ((int) TO_LOWER (v[0]));
306 }
307 return diff;
308 }
309
310 //! @brief Scan for next pragmat and yield first pragmat item.
311
312 char *next_preprocessor_item (LINE_T ** top, char **ch, int *delim)
313 {
314 LINE_T *u = *top;
315 char *v = *ch;
316 *delim = 0;
317 while (u != NO_LINE) {
318 while (v[0] != NULL_CHAR) {
319 LINE_T *start_l = u;
320 char *start_c = v;
321 // STRINGs must be skipped.
322 if (v[0] == QUOTE_CHAR) {
323 SCAN_ERROR (!skip_string (&u, &v), start_l, start_c, ERROR_UNTERMINATED_STRING);
324 }
325 // COMMENTS must be skipped.
326 else if (is_bold (v, "COMMENT")) {
327 SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
328 } else if (is_bold (v, "CO")) {
329 SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
330 } else if (v[0] == '#') {
331 SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c, ERROR_UNTERMINATED_COMMENT);
332 } else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR")) {
333 // We caught a PRAGMAT.
334 char *item;
335 if (is_bold (v, "PRAGMAT")) {
336 *delim = BOLD_PRAGMAT_SYMBOL;
337 v = &v[strlen ("PRAGMAT")];
338 } else if (is_bold (v, "PR")) {
339 *delim = STYLE_I_PRAGMAT_SYMBOL;
340 v = &v[strlen ("PR")];
341 }
342 item = get_pragmat_item (&u, &v);
343 SCAN_ERROR (item == NO_TEXT, start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
344 // Item "preprocessor" restarts preprocessing if it is off.
345 if (A68_PARSER (no_preprocessing) && streq (item, "PREPROCESSOR") == 0) {
346 A68_PARSER (no_preprocessing) = A68_FALSE;
347 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
348 }
349 // If preprocessing is switched off, we idle to closing bracket.
350 else if (A68_PARSER (no_preprocessing)) {
351 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
352 }
353 // Item "nopreprocessor" stops preprocessing if it is on.
354 if (streq (item, "NOPREPROCESSOR") == 0) {
355 A68_PARSER (no_preprocessing) = A68_TRUE;
356 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
357 }
358 // Item "INCLUDE" includes a file.
359 else if (streq (item, "INCLUDE") == 0) {
360 *top = u;
361 *ch = v;
362 return item;
363 }
364 // Item "READ" includes a file.
365 else if (streq (item, "READ") == 0) {
366 *top = u;
367 *ch = v;
368 return item;
369 }
370 // Unrecognised item - probably options handled later by the tokeniser.
371 else {
372 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, A68_FALSE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
373 }
374 } else if (IS_UPPER (v[0])) {
375 // Skip a bold word as you may trigger on REPR, for instance ...
376 while (IS_UPPER (v[0])) {
377 v++;
378 }
379 } else {
380 v++;
381 }
382 }
383 FORWARD (u);
384 if (u != NO_LINE) {
385 v = &(STRING (u)[0]);
386 } else {
387 v = NO_TEXT;
388 }
389 }
390 *top = u;
391 *ch = v;
392 return NO_TEXT;
393 }
394
395 //! @brief Include files.
396
397 void include_files (LINE_T * top)
398 {
399 // include_files
400 //
401 // syntax: PR read "filename" PR
402 // PR include "filename" PR
403 //
404 // The file gets inserted before the line containing the pragmat. In this way
405 // correct line numbers are preserved which helps diagnostics. A file that has
406 // been included will not be included a second time - it will be ignored.
407 // A rigorous fail-safe, but there is no mechanism to prevent recursive includes
408 // in A68 source code. User reports do not indicate sophisticated use of INCLUDE,
409 // so this is fine for now.
410 // TODO - some day we might need `app', analogous to `cpp'.
411 BOOL_T make_pass = A68_TRUE;
412 while (make_pass) {
413 LINE_T *s, *t, *u = top;
414 char *v = &(STRING (u)[0]);
415 make_pass = A68_FALSE;
416 errno = 0;
417 while (u != NO_LINE) {
418 int pr_lim;
419 char *item = next_preprocessor_item (&u, &v, &pr_lim);
420 LINE_T *start_l = u;
421 char *start_c = v;
422 // Search for PR include "filename" PR.
423 if (item != NO_TEXT && (streq (item, "INCLUDE") == 0 || streq (item, "READ") == 0)) {
424 FILE_T fd;
425 int n, linum, fsize, k, bytes_read, fnwid;
426 char *fbuf, delim;
427 BUFFER fnb;
428 char *fn;
429 // Skip to filename.
430 while (IS_ALPHA (v[0])) {
431 v++;
432 }
433 while (IS_SPACE (v[0])) {
434 v++;
435 }
436 // Scan quoted filename.
437 SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c, ERROR_INCORRECT_FILENAME);
438 delim = (v++)[0];
439 n = 0;
440 fnb[0] = NULL_CHAR;
441 // Scan Algol 68 string (note: "" denotes a ", while in C it concatenates).
442 do {
443 SCAN_ERROR (EOL (v[0]), start_l, start_c, ERROR_INCORRECT_FILENAME);
444 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
445 if (v[0] == delim) {
446 while (v[0] == delim && v[1] == delim) {
447 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c, ERROR_INCORRECT_FILENAME);
448 fnb[n++] = delim;
449 fnb[n] = NULL_CHAR;
450 v += 2;
451 }
452 } else if (IS_PRINT (v[0])) {
453 fnb[n++] = *(v++);
454 fnb[n] = NULL_CHAR;
455 } else {
456 SCAN_ERROR (A68_TRUE, start_l, start_c, ERROR_INCORRECT_FILENAME);
457 }
458 } while (v[0] != delim);
459 // Insist that the pragmat is closed properly.
460 v = &v[1];
461 SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, A68_TRUE), start_l, start_c, ERROR_UNTERMINATED_PRAGMAT);
462 SCAN_ERROR (n == 0, start_l, start_c, ERROR_INCORRECT_FILENAME);
463 // Make the name relative to the position of the source file (C preprocessor standard).
464 if (FILENAME (u) != NO_TEXT) {
465 fn = a68_relpath (a68_dirname (FILENAME (u)), a68_dirname (fnb), a68_basename (fnb));
466 } else {
467 fn = a68_relpath (FILE_PATH (&A68_JOB), a68_dirname (fnb), a68_basename (fnb));
468 }
469 // Do not check errno, since errno may be undefined here after a successful call.
470 if (fn != NO_TEXT) {
471 a68_bufcpy (fnb, fn, BUFFER_SIZE);
472 } else {
473 SCAN_ERROR (A68_TRUE, NO_LINE, NO_TEXT, ERROR_SOURCE_FILE_INCLUDE_OPEN);
474 }
475 fnwid = (int) strlen (fnb) + 1;
476 fn = (char *) get_fixed_heap_space ((size_t) fnwid);
477 a68_bufcpy (fn, fnb, fnwid);
478 // Ignore the file when included more than once.
479 for (t = top; t != NO_LINE; t = NEXT (t)) {
480 if (strcmp (FILENAME (t), fn) == 0) {
481 goto search_next_pragmat;
482 }
483 }
484 // Access the file.
485 errno = 0;
486 fd = open (fn, O_RDONLY | O_BINARY);
487 SCAN_ERROR (fd == -1, start_l, start_c, ERROR_SOURCE_FILE_INCLUDE_OPEN);
488 errno = 0;
489 fsize = (int) lseek (fd, 0, SEEK_END);
490 ASSERT (fsize >= 0);
491 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
492 fbuf = (char *) get_temp_heap_space ((unt) (8 + fsize));
493 errno = 0;
494 ASSERT (lseek (fd, 0, SEEK_SET) >= 0);
495 SCAN_ERROR (errno != 0, start_l, start_c, ERROR_FILE_READ);
496 errno = 0;
497 bytes_read = (int) io_read (fd, fbuf, (size_t) fsize);
498 SCAN_ERROR (errno != 0 || bytes_read != fsize, start_l, start_c, ERROR_FILE_READ);
499 // Buffer still usable?.
500 if (fsize > A68_PARSER (max_scan_buf_length)) {
501 A68_PARSER (max_scan_buf_length) = fsize;
502 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
503 }
504 // Link all lines into the list.
505 linum = 1;
506 s = u;
507 t = PREVIOUS (u);
508 k = 0;
509 if (fsize == 0) {
510 // If file is empty, insert single empty line.
511 A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
512 A68_PARSER (scan_buf)[1] = NULL_CHAR;
513 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
514 } else
515 while (k < fsize) {
516 n = 0;
517 A68_PARSER (scan_buf)[0] = NULL_CHAR;
518 while (k < fsize && fbuf[k] != NEWLINE_CHAR) {
519 SCAN_ERROR ((IS_CNTRL (fbuf[k]) && !IS_SPACE (fbuf[k])) || fbuf[k] == STOP_CHAR, start_l, start_c, ERROR_FILE_INCLUDE_CTRL);
520 A68_PARSER (scan_buf)[n++] = fbuf[k++];
521 A68_PARSER (scan_buf)[n] = NULL_CHAR;
522 }
523 A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
524 A68_PARSER (scan_buf)[n] = NULL_CHAR;
525 if (k < fsize) {
526 k++;
527 }
528 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
529 }
530 // Conclude and go find another include directive, if any.
531 NEXT (t) = s;
532 PREVIOUS (s) = t;
533 concatenate_lines (top);
534 ASSERT (close (fd) == 0);
535 make_pass = A68_TRUE;
536 }
537 search_next_pragmat:_SKIP_;
538 }
539 }
540 }
541
542 //! @brief Size of source file.
543
544 int get_source_size (void)
545 {
546 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
547 // This is why WIN32 must open as "read binary".
548 return (int) lseek (f, 0, SEEK_END);
549 }
550
551 //! @brief Append environment source lines.
552
553 void append_environ (char *str[], LINE_T ** ref_l, int *line_num, char *name)
554 {
555 for (int k = 0; str[k] != NO_TEXT; k++) {
556 int zero_line_num = 0;
557 (*line_num)++;
558 append_source_line (str[k], ref_l, &zero_line_num, name);
559 }
560 }
561
562 //! @brief Read script file and make internal copy.
563
564 BOOL_T read_script_file (void)
565 {
566 LINE_T *ref_l = NO_LINE;
567 int k, n, num;
568 unt len;
569 BOOL_T file_end = A68_FALSE;
570 BUFFER filename, linenum;
571 char ch, *fn, *line;
572 char *buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
573 FILE_T source = FILE_SOURCE_FD (&A68_JOB);
574 ABEND (source == -1, ERROR_ACTION, __func__);
575 buffer[0] = NULL_CHAR;
576 n = 0;
577 len = (unt) (8 + A68_PARSER (source_file_size));
578 buffer = (char *) get_temp_heap_space (len);
579 ASSERT (lseek (source, 0, SEEK_SET) >= 0);
580 while (!file_end) {
581 // Read the original file name.
582 filename[0] = NULL_CHAR;
583 k = 0;
584 if (io_read (source, &ch, 1) == 0) {
585 file_end = A68_TRUE;
586 continue;
587 }
588 while (ch != NEWLINE_CHAR) {
589 filename[k++] = ch;
590 ASSERT (io_read (source, &ch, 1) == 1);
591 }
592 filename[k] = NULL_CHAR;
593 fn = TEXT (add_token (&A68 (top_token), filename));
594 // Read the original file number.
595 linenum[0] = NULL_CHAR;
596 k = 0;
597 ASSERT (io_read (source, &ch, 1) == 1);
598 while (ch != NEWLINE_CHAR) {
599 linenum[k++] = ch;
600 ASSERT (io_read (source, &ch, 1) == 1);
601 }
602 linenum[k] = NULL_CHAR;
603 num = (int) strtol (linenum, NO_VAR, 10);
604 ABEND (errno == ERANGE, ERROR_INTERNAL_CONSISTENCY, __func__);
605 // COPY original line into buffer.
606 ASSERT (io_read (source, &ch, 1) == 1);
607 line = &buffer[n];
608 while (ch != NEWLINE_CHAR) {
609 buffer[n++] = ch;
610 ASSERT (io_read (source, &ch, 1) == 1);
611 ABEND ((unt) n >= len, ERROR_ACTION, __func__);
612 }
613 buffer[n++] = NEWLINE_CHAR;
614 buffer[n] = NULL_CHAR;
615 append_source_line (line, &ref_l, &num, fn);
616 }
617 return A68_TRUE;
618 }
619
620 //! @brief Read source file and make internal copy.
621
622 BOOL_T read_source_file (void)
623 {
624 LINE_T *ref_l = NO_LINE;
625 int line_num = 0, k, bytes_read;
626 ssize_t l;
627 FILE_T f = FILE_SOURCE_FD (&A68_JOB);
628 char **prelude_start, **postlude, *buffer;
629 // Prelude.
630 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
631 prelude_start = bold_prelude_start;
632 postlude = bold_postlude;
633 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
634 prelude_start = quote_prelude_start;
635 postlude = quote_postlude;
636 } else {
637 prelude_start = postlude = NO_VAR;
638 }
639 append_environ (prelude_start, &ref_l, &line_num, "prelude");
640 // Read the file into a single buffer, so we save on system calls.
641 line_num = 1;
642 errno = 0;
643 buffer = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (source_file_size)));
644 ABEND (errno != 0 || buffer == NO_TEXT, ERROR_ALLOCATION, __func__);
645 ASSERT (lseek (f, 0, SEEK_SET) >= 0);
646 ABEND (errno != 0, ERROR_ACTION, __func__);
647 errno = 0;
648 bytes_read = (int) io_read (f, buffer, (size_t) A68_PARSER (source_file_size));
649 ABEND (errno != 0 || bytes_read != A68_PARSER (source_file_size), ERROR_ACTION, __func__);
650 // Link all lines into the list.
651 k = 0;
652 while (k < A68_PARSER (source_file_size)) {
653 l = 0;
654 A68_PARSER (scan_buf)[0] = NULL_CHAR;
655 while (k < A68_PARSER (source_file_size) && buffer[k] != NEWLINE_CHAR) {
656 if (k < A68_PARSER (source_file_size) - 1 && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR) {
657 k++;
658 } else {
659 A68_PARSER (scan_buf)[l++] = buffer[k++];
660 A68_PARSER (scan_buf)[l] = NULL_CHAR;
661 }
662 }
663 A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
664 A68_PARSER (scan_buf)[l] = NULL_CHAR;
665 if (k < A68_PARSER (source_file_size)) {
666 k++;
667 }
668 append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num, FILE_SOURCE_NAME (&A68_JOB));
669 SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)), NO_LINE, NO_TEXT, ERROR_FILE_SOURCE_CTRL);
670 }
671 // Postlude.
672 append_environ (postlude, &ref_l, &line_num, "postlude");
673 // Concatenate lines.
674 concatenate_lines (TOP_LINE (&A68_JOB));
675 // Include files.
676 include_files (TOP_LINE (&A68_JOB));
677 return A68_TRUE;
678 }
679
680 //! @brief Next_char get next character from internal copy of source file.
681
682 char next_char (LINE_T ** ref_l, char **ref_s, BOOL_T allow_typo)
683 {
684 char ch;
685 #if defined (NO_TYPO)
686 allow_typo = A68_FALSE;
687 #endif
688 LOW_STACK_ALERT (NO_NODE);
689 // Source empty?.
690 if (*ref_l == NO_LINE) {
691 return STOP_CHAR;
692 } else {
693 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
694 // Take new line?.
695 if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == NULL_CHAR) {
696 *ref_l = NEXT (*ref_l);
697 if (*ref_l == NO_LINE) {
698 return STOP_CHAR;
699 }
700 *ref_s = STRING (*ref_l);
701 } else {
702 (*ref_s)++;
703 }
704 // Deliver next char.
705 ch = (*ref_s)[0];
706 if (allow_typo && (IS_SPACE (ch) || ch == FORMFEED_CHAR)) {
707 ch = next_char (ref_l, ref_s, allow_typo);
708 }
709 return ch;
710 }
711 }
712
713 //! @brief Find first character that can start a valid symbol.
714
715 void get_good_char (char *ref_c, LINE_T ** ref_l, char **ref_s)
716 {
717 while (*ref_c != STOP_CHAR && (IS_SPACE (*ref_c) || (*ref_c == NULL_CHAR))) {
718 if (*ref_l != NO_LINE) {
719 LIST (*ref_l) = (BOOL_T) (OPTION_NODEMASK (&A68_JOB) & SOURCE_MASK ? A68_TRUE : A68_FALSE);
720 }
721 *ref_c = next_char (ref_l, ref_s, A68_FALSE);
722 }
723 }
724
725 //! @brief Handle a pragment (pragmat or comment).
726
727 char *pragment (int type, LINE_T ** ref_l, char **ref_c)
728 {
729 #define INIT_BUFFER {chars_in_buf = 0; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
730 #define ADD_ONE_CHAR(ch) {A68_PARSER (scan_buf)[chars_in_buf ++] = ch; A68_PARSER (scan_buf)[chars_in_buf] = NULL_CHAR;}
731 char c = **ref_c, *term_s = NO_TEXT, *start_c = *ref_c;
732 char *z = NO_TEXT;
733 LINE_T *start_l = *ref_l;
734 int term_s_length, chars_in_buf;
735 BOOL_T stop, pragmat = A68_FALSE;
736 // Set terminator.
737 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
738 if (type == STYLE_I_COMMENT_SYMBOL) {
739 term_s = "CO";
740 } else if (type == STYLE_II_COMMENT_SYMBOL) {
741 term_s = "#";
742 } else if (type == BOLD_COMMENT_SYMBOL) {
743 term_s = "COMMENT";
744 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
745 term_s = "PR";
746 pragmat = A68_TRUE;
747 } else if (type == BOLD_PRAGMAT_SYMBOL) {
748 term_s = "PRAGMAT";
749 pragmat = A68_TRUE;
750 }
751 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
752 if (type == STYLE_I_COMMENT_SYMBOL) {
753 term_s = "'CO'";
754 } else if (type == STYLE_II_COMMENT_SYMBOL) {
755 term_s = "#";
756 } else if (type == BOLD_COMMENT_SYMBOL) {
757 term_s = "'COMMENT'";
758 } else if (type == STYLE_I_PRAGMAT_SYMBOL) {
759 term_s = "'PR'";
760 pragmat = A68_TRUE;
761 } else if (type == BOLD_PRAGMAT_SYMBOL) {
762 term_s = "'PRAGMAT'";
763 pragmat = A68_TRUE;
764 }
765 }
766 term_s_length = (int) strlen (term_s);
767 // Scan for terminator.
768 INIT_BUFFER;
769 stop = A68_FALSE;
770 while (stop == A68_FALSE) {
771 SCAN_ERROR (c == STOP_CHAR, start_l, start_c, ERROR_UNTERMINATED_PRAGMENT);
772 // A ".." or '..' delimited string in a PRAGMAT.
773 if (pragmat && (c == QUOTE_CHAR || (c == '\'' && OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING))) {
774 char delim = c;
775 BOOL_T eos = A68_FALSE;
776 ADD_ONE_CHAR (c);
777 c = next_char (ref_l, ref_c, A68_FALSE);
778 while (!eos) {
779 SCAN_ERROR (EOL (c), start_l, start_c, ERROR_LONG_STRING);
780 if (c == delim) {
781 ADD_ONE_CHAR (delim);
782 save_state (*ref_l, *ref_c, c);
783 c = next_char (ref_l, ref_c, A68_FALSE);
784 if (c == delim) {
785 c = next_char (ref_l, ref_c, A68_FALSE);
786 } else {
787 restore_state (ref_l, ref_c, &c);
788 eos = A68_TRUE;
789 }
790 } else if (IS_PRINT (c)) {
791 ADD_ONE_CHAR (c);
792 c = next_char (ref_l, ref_c, A68_FALSE);
793 } else {
794 unworthy (start_l, start_c, c);
795 }
796 }
797 } else if (EOL (c)) {
798 ADD_ONE_CHAR (NEWLINE_CHAR);
799 } else if (IS_PRINT (c) || IS_SPACE (c)) {
800 ADD_ONE_CHAR (c);
801 }
802 if (chars_in_buf >= term_s_length) {
803 // Check whether we encountered the terminator.
804 stop = (BOOL_T) (strcmp (term_s, &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0);
805 }
806 c = next_char (ref_l, ref_c, A68_FALSE);
807 }
808 A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = NULL_CHAR;
809 z = new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
810 if (type == STYLE_I_PRAGMAT_SYMBOL || type == BOLD_PRAGMAT_SYMBOL) {
811 isolate_options (A68_PARSER (scan_buf), start_l);
812 }
813 return z;
814 #undef ADD_ONE_CHAR
815 #undef INIT_BUFFER
816 }
817
818 //! @brief Attribute for format item.
819
820 int get_format_item (char ch)
821 {
822 switch (TO_LOWER (ch)) {
823 case 'a': {
824 return FORMAT_ITEM_A;
825 }
826 case 'b': {
827 return FORMAT_ITEM_B;
828 }
829 case 'c': {
830 return FORMAT_ITEM_C;
831 }
832 case 'd': {
833 return FORMAT_ITEM_D;
834 }
835 case 'e': {
836 return FORMAT_ITEM_E;
837 }
838 case 'f': {
839 return FORMAT_ITEM_F;
840 }
841 case 'g': {
842 return FORMAT_ITEM_G;
843 }
844 case 'h': {
845 return FORMAT_ITEM_H;
846 }
847 case 'i': {
848 return FORMAT_ITEM_I;
849 }
850 case 'j': {
851 return FORMAT_ITEM_J;
852 }
853 case 'k': {
854 return FORMAT_ITEM_K;
855 }
856 case 'l':
857 case '/': {
858 return FORMAT_ITEM_L;
859 }
860 case 'm': {
861 return FORMAT_ITEM_M;
862 }
863 case 'n': {
864 return FORMAT_ITEM_N;
865 }
866 case 'o': {
867 return FORMAT_ITEM_O;
868 }
869 case 'p': {
870 return FORMAT_ITEM_P;
871 }
872 case 'q': {
873 return FORMAT_ITEM_Q;
874 }
875 case 'r': {
876 return FORMAT_ITEM_R;
877 }
878 case 's': {
879 return FORMAT_ITEM_S;
880 }
881 case 't': {
882 return FORMAT_ITEM_T;
883 }
884 case 'u': {
885 return FORMAT_ITEM_U;
886 }
887 case 'v': {
888 return FORMAT_ITEM_V;
889 }
890 case 'w': {
891 return FORMAT_ITEM_W;
892 }
893 case 'x': {
894 return FORMAT_ITEM_X;
895 }
896 case 'y': {
897 return FORMAT_ITEM_Y;
898 }
899 case 'z': {
900 return FORMAT_ITEM_Z;
901 }
902 case '+': {
903 return FORMAT_ITEM_PLUS;
904 }
905 case '-': {
906 return FORMAT_ITEM_MINUS;
907 }
908 case POINT_CHAR: {
909 return FORMAT_ITEM_POINT;
910 }
911 case '%': {
912 return FORMAT_ITEM_ESCAPE;
913 }
914 default: {
915 return 0;
916 }
917 }
918 }
919
920 //! @brief Whether input shows exponent character.
921
922 BOOL_T is_exp_char (LINE_T ** ref_l, char **ref_s, char *ch)
923 {
924 BOOL_T ret = A68_FALSE;
925 char exp_syms[3];
926 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
927 exp_syms[0] = EXPONENT_CHAR;
928 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
929 exp_syms[2] = NULL_CHAR;
930 } else {
931 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
932 exp_syms[1] = BACKSLASH_CHAR;
933 exp_syms[2] = NULL_CHAR;
934 }
935 save_state (*ref_l, *ref_s, *ch);
936 if (strchr (exp_syms, *ch) != NO_TEXT) {
937 *ch = next_char (ref_l, ref_s, A68_TRUE);
938 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
939 }
940 restore_state (ref_l, ref_s, ch);
941 return ret;
942 }
943
944 //! @brief Whether input shows radix character.
945
946 BOOL_T is_radix_char (LINE_T ** ref_l, char **ref_s, char *ch)
947 {
948 BOOL_T ret = A68_FALSE;
949 save_state (*ref_l, *ref_s, *ch);
950 if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
951 if (*ch == TO_UPPER (RADIX_CHAR)) {
952 *ch = next_char (ref_l, ref_s, A68_TRUE);
953 ret = (BOOL_T) (strchr ("0123456789ABCDEF", *ch) != NO_TEXT);
954 }
955 } else {
956 if (*ch == RADIX_CHAR) {
957 *ch = next_char (ref_l, ref_s, A68_TRUE);
958 ret = (BOOL_T) (strchr ("0123456789abcdef", *ch) != NO_TEXT);
959 }
960 }
961 restore_state (ref_l, ref_s, ch);
962 return ret;
963 }
964
965 //! @brief Whether input shows decimal point.
966
967 BOOL_T is_decimal_point (LINE_T ** ref_l, char **ref_s, char *ch)
968 {
969 BOOL_T ret = A68_FALSE;
970 save_state (*ref_l, *ref_s, *ch);
971 if (*ch == POINT_CHAR) {
972 char exp_syms[3];
973 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
974 exp_syms[0] = EXPONENT_CHAR;
975 exp_syms[1] = (char) TO_UPPER (EXPONENT_CHAR);
976 exp_syms[2] = NULL_CHAR;
977 } else {
978 exp_syms[0] = (char) TO_UPPER (EXPONENT_CHAR);
979 exp_syms[1] = BACKSLASH_CHAR;
980 exp_syms[2] = NULL_CHAR;
981 }
982 *ch = next_char (ref_l, ref_s, A68_TRUE);
983 if (strchr (exp_syms, *ch) != NO_TEXT) {
984 *ch = next_char (ref_l, ref_s, A68_TRUE);
985 ret = (BOOL_T) (strchr ("+-0123456789", *ch) != NO_TEXT);
986 } else {
987 ret = (BOOL_T) (strchr ("0123456789", *ch) != NO_TEXT);
988 }
989 }
990 restore_state (ref_l, ref_s, ch);
991 return ret;
992 }
993
994 //! @brief Get next token from internal copy of source file..
995
996 void get_next_token (BOOL_T in_format, LINE_T ** ref_l, char **ref_s, LINE_T ** start_l, char **start_c, int *att)
997 {
998 char c = **ref_s, *sym = A68_PARSER (scan_buf);
999 sym[0] = NULL_CHAR;
1000 get_good_char (&c, ref_l, ref_s);
1001 *start_l = *ref_l;
1002 *start_c = *ref_s;
1003 if (c == STOP_CHAR) {
1004 // We are at EOF.
1005 (sym++)[0] = STOP_CHAR;
1006 sym[0] = NULL_CHAR;
1007 return;
1008 }
1009 // In a format.
1010 if (in_format) {
1011 char *format_items;
1012 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1013 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1014 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1015 format_items = "/%\\+-.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1016 } else {
1017 format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
1018 }
1019 if (strchr (format_items, c) != NO_TEXT) {
1020 // General format items.
1021 (sym++)[0] = c;
1022 sym[0] = NULL_CHAR;
1023 *att = get_format_item (c);
1024 (void) next_char (ref_l, ref_s, A68_FALSE);
1025 return;
1026 }
1027 if (IS_DIGIT (c)) {
1028 // INT denotation for static replicator.
1029 SCAN_DIGITS (c);
1030 sym[0] = NULL_CHAR;
1031 *att = STATIC_REPLICATOR;
1032 return;
1033 }
1034 }
1035 // Not in a format.
1036 if (IS_UPPER (c)) {
1037 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1038 // Upper case word - bold tag.
1039 while (IS_UPPER (c) || c == '_') {
1040 (sym++)[0] = c;
1041 c = next_char (ref_l, ref_s, A68_FALSE);
1042 }
1043 sym[0] = NULL_CHAR;
1044 *att = BOLD_TAG;
1045 } else if (OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1046 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1047 (sym++)[0] = c;
1048 c = next_char (ref_l, ref_s, A68_TRUE);
1049 }
1050 sym[0] = NULL_CHAR;
1051 *att = IDENTIFIER;
1052 }
1053 } else if (c == '\'') {
1054 // Quote, uppercase word, quote - bold tag.
1055 int k = 0;
1056 c = next_char (ref_l, ref_s, A68_FALSE);
1057 while (IS_UPPER (c) || IS_DIGIT (c) || c == '_') {
1058 (sym++)[0] = c;
1059 k++;
1060 c = next_char (ref_l, ref_s, A68_TRUE);
1061 }
1062 SCAN_ERROR (k == 0, *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1063 sym[0] = NULL_CHAR;
1064 *att = BOLD_TAG;
1065 // Skip terminating quote, or complain if it is not there.
1066 SCAN_ERROR (c != '\'', *start_l, *start_c, ERROR_QUOTED_BOLD_TAG);
1067 c = next_char (ref_l, ref_s, A68_FALSE);
1068 } else if (IS_LOWER (c)) {
1069 // Lower case word - identifier.
1070 while (IS_LOWER (c) || IS_DIGIT (c) || c == '_') {
1071 (sym++)[0] = c;
1072 c = next_char (ref_l, ref_s, A68_TRUE);
1073 }
1074 sym[0] = NULL_CHAR;
1075 *att = IDENTIFIER;
1076 } else if (c == POINT_CHAR) {
1077 // Begins with a point symbol - point, dotdot, L REAL denotation.
1078 if (is_decimal_point (ref_l, ref_s, &c)) {
1079 (sym++)[0] = '0';
1080 (sym++)[0] = POINT_CHAR;
1081 c = next_char (ref_l, ref_s, A68_TRUE);
1082 SCAN_DIGITS (c);
1083 if (is_exp_char (ref_l, ref_s, &c)) {
1084 SCAN_EXPONENT_PART (c);
1085 }
1086 sym[0] = NULL_CHAR;
1087 *att = REAL_DENOTATION;
1088 } else {
1089 c = next_char (ref_l, ref_s, A68_TRUE);
1090 if (c == POINT_CHAR) {
1091 (sym++)[0] = POINT_CHAR;
1092 (sym++)[0] = POINT_CHAR;
1093 sym[0] = NULL_CHAR;
1094 *att = DOTDOT_SYMBOL;
1095 c = next_char (ref_l, ref_s, A68_FALSE);
1096 } else {
1097 (sym++)[0] = POINT_CHAR;
1098 sym[0] = NULL_CHAR;
1099 *att = POINT_SYMBOL;
1100 }
1101 }
1102 } else if (IS_DIGIT (c)) {
1103 // Something that begins with a digit - L INT denotation, L REAL denotation.
1104 SCAN_DIGITS (c);
1105 if (is_decimal_point (ref_l, ref_s, &c)) {
1106 c = next_char (ref_l, ref_s, A68_TRUE);
1107 if (is_exp_char (ref_l, ref_s, &c)) {
1108 (sym++)[0] = POINT_CHAR;
1109 (sym++)[0] = '0';
1110 SCAN_EXPONENT_PART (c);
1111 *att = REAL_DENOTATION;
1112 } else {
1113 (sym++)[0] = POINT_CHAR;
1114 SCAN_DIGITS (c);
1115 if (is_exp_char (ref_l, ref_s, &c)) {
1116 SCAN_EXPONENT_PART (c);
1117 }
1118 *att = REAL_DENOTATION;
1119 }
1120 } else if (is_exp_char (ref_l, ref_s, &c)) {
1121 SCAN_EXPONENT_PART (c);
1122 *att = REAL_DENOTATION;
1123 } else if (is_radix_char (ref_l, ref_s, &c)) {
1124 (sym++)[0] = c;
1125 c = next_char (ref_l, ref_s, A68_TRUE);
1126 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING) {
1127 while (IS_DIGIT (c) || strchr ("abcdef", c) != NO_TEXT) {
1128 (sym++)[0] = c;
1129 c = next_char (ref_l, ref_s, A68_TRUE);
1130 }
1131 } else {
1132 while (IS_DIGIT (c) || strchr ("ABCDEF", c) != NO_TEXT) {
1133 (sym++)[0] = c;
1134 c = next_char (ref_l, ref_s, A68_TRUE);
1135 }
1136 }
1137 *att = BITS_DENOTATION;
1138 } else {
1139 *att = INT_DENOTATION;
1140 }
1141 sym[0] = NULL_CHAR;
1142 } else if (c == QUOTE_CHAR) {
1143 // STRING denotation.
1144 BOOL_T stop = A68_FALSE;
1145 while (!stop) {
1146 c = next_char (ref_l, ref_s, A68_FALSE);
1147 while (c != QUOTE_CHAR && c != STOP_CHAR) {
1148 SCAN_ERROR (EOL (c), *start_l, *start_c, ERROR_LONG_STRING);
1149 (sym++)[0] = c;
1150 c = next_char (ref_l, ref_s, A68_FALSE);
1151 }
1152 SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, ERROR_UNTERMINATED_STRING);
1153 c = next_char (ref_l, ref_s, A68_FALSE);
1154 if (c == QUOTE_CHAR) {
1155 (sym++)[0] = QUOTE_CHAR;
1156 } else {
1157 stop = A68_TRUE;
1158 }
1159 }
1160 sym[0] = NULL_CHAR;
1161 *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
1162 } else if (strchr ("#$()[]{},;@", c) != NO_TEXT) {
1163 // Single character symbols.
1164 (sym++)[0] = c;
1165 (void) next_char (ref_l, ref_s, A68_FALSE);
1166 sym[0] = NULL_CHAR;
1167 *att = 0;
1168 } else if (c == '|') {
1169 // Bar.
1170 (sym++)[0] = c;
1171 c = next_char (ref_l, ref_s, A68_FALSE);
1172 if (c == ':') {
1173 (sym++)[0] = c;
1174 (void) next_char (ref_l, ref_s, A68_FALSE);
1175 }
1176 sym[0] = NULL_CHAR;
1177 *att = 0;
1178 } else if (c == '!' && OPTION_STROPPING (&A68_JOB) == QUOTE_STROPPING) {
1179 // Bar, will be replaced with modern variant.
1180 // For this reason ! is not a MONAD with quote-stropping.
1181 (sym++)[0] = '|';
1182 c = next_char (ref_l, ref_s, A68_FALSE);
1183 if (c == ':') {
1184 (sym++)[0] = c;
1185 (void) next_char (ref_l, ref_s, A68_FALSE);
1186 }
1187 sym[0] = NULL_CHAR;
1188 *att = 0;
1189 } else if (c == ':') {
1190 // Colon, semicolon, IS, ISNT.
1191 (sym++)[0] = c;
1192 c = next_char (ref_l, ref_s, A68_FALSE);
1193 if (c == '=') {
1194 (sym++)[0] = c;
1195 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1196 (sym++)[0] = c;
1197 c = next_char (ref_l, ref_s, A68_FALSE);
1198 }
1199 } else if (c == '/') {
1200 (sym++)[0] = c;
1201 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1202 (sym++)[0] = c;
1203 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == ':') {
1204 (sym++)[0] = c;
1205 c = next_char (ref_l, ref_s, A68_FALSE);
1206 }
1207 }
1208 } else if (c == ':') {
1209 (sym++)[0] = c;
1210 if ((c = next_char (ref_l, ref_s, A68_FALSE)) == '=') {
1211 (sym++)[0] = c;
1212 }
1213 }
1214 sym[0] = NULL_CHAR;
1215 *att = 0;
1216 } else if (c == '=') {
1217 // Operator starting with "=".
1218 char *scanned = sym;
1219 (sym++)[0] = c;
1220 c = next_char (ref_l, ref_s, A68_FALSE);
1221 if (strchr (NOMADS, c) != NO_TEXT) {
1222 (sym++)[0] = c;
1223 c = next_char (ref_l, ref_s, A68_FALSE);
1224 }
1225 if (c == '=') {
1226 (sym++)[0] = c;
1227 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1228 (sym++)[0] = ':';
1229 c = next_char (ref_l, ref_s, A68_FALSE);
1230 if (strlen (sym) < 4 && c == '=') {
1231 (sym++)[0] = '=';
1232 (void) next_char (ref_l, ref_s, A68_FALSE);
1233 }
1234 }
1235 } else if (c == ':') {
1236 (sym++)[0] = c;
1237 sym[0] = NULL_CHAR;
1238 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1239 (sym++)[0] = '=';
1240 (void) next_char (ref_l, ref_s, A68_FALSE);
1241 } else {
1242 SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0), *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1243 }
1244 }
1245 sym[0] = NULL_CHAR;
1246 if (strcmp (scanned, "=") == 0) {
1247 *att = EQUALS_SYMBOL;
1248 } else {
1249 *att = OPERATOR;
1250 }
1251 } else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT) {
1252 // Operator.
1253 char *scanned = sym;
1254 (sym++)[0] = c;
1255 c = next_char (ref_l, ref_s, A68_FALSE);
1256 if (strchr (NOMADS, c) != NO_TEXT) {
1257 (sym++)[0] = c;
1258 c = next_char (ref_l, ref_s, A68_FALSE);
1259 }
1260 if (c == '=') {
1261 (sym++)[0] = c;
1262 if (next_char (ref_l, ref_s, A68_FALSE) == ':') {
1263 (sym++)[0] = ':';
1264 c = next_char (ref_l, ref_s, A68_FALSE);
1265 if (strlen (scanned) < 4 && c == '=') {
1266 (sym++)[0] = '=';
1267 (void) next_char (ref_l, ref_s, A68_FALSE);
1268 }
1269 }
1270 } else if (c == ':') {
1271 (sym++)[0] = c;
1272 sym[0] = NULL_CHAR;
1273 if (next_char (ref_l, ref_s, A68_FALSE) == '=') {
1274 (sym++)[0] = '=';
1275 sym[0] = NULL_CHAR;
1276 (void) next_char (ref_l, ref_s, A68_FALSE);
1277 } else {
1278 SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0, *start_l, *start_c, ERROR_INVALID_OPERATOR_TAG);
1279 }
1280 }
1281 sym[0] = NULL_CHAR;
1282 *att = OPERATOR;
1283 } else {
1284 // Afuuus ... strange characters!.
1285 unworthy (*start_l, *start_c, (int) c);
1286 }
1287 }
1288
1289 //! @brief Whether att opens an embedded clause.
1290
1291 BOOL_T open_nested_clause (int att)
1292 {
1293 switch (att) {
1294 case OPEN_SYMBOL:
1295 case BEGIN_SYMBOL:
1296 case PAR_SYMBOL:
1297 case IF_SYMBOL:
1298 case CASE_SYMBOL:
1299 case FOR_SYMBOL:
1300 case FROM_SYMBOL:
1301 case BY_SYMBOL:
1302 case TO_SYMBOL:
1303 case DOWNTO_SYMBOL:
1304 case WHILE_SYMBOL:
1305 case DO_SYMBOL:
1306 case SUB_SYMBOL:
1307 case ACCO_SYMBOL: {
1308 return A68_TRUE;
1309 }
1310 }
1311 return A68_FALSE;
1312 }
1313
1314 //! @brief Whether att closes an embedded clause.
1315
1316 BOOL_T close_nested_clause (int att)
1317 {
1318 switch (att) {
1319 case CLOSE_SYMBOL:
1320 case END_SYMBOL:
1321 case FI_SYMBOL:
1322 case ESAC_SYMBOL:
1323 case OD_SYMBOL:
1324 case BUS_SYMBOL:
1325 case OCCA_SYMBOL: {
1326 return A68_TRUE;
1327 }
1328 }
1329 return A68_FALSE;
1330 }
1331
1332 //! @brief Cast a string to lower case.
1333
1334 void make_lower_case (char *p)
1335 {
1336 for (; p != NO_TEXT && p[0] != NULL_CHAR; p++) {
1337 p[0] = (char) TO_LOWER (p[0]);
1338 }
1339 }
1340
1341 //! @brief Construct a linear list of tokens.
1342
1343 void tokenise_source (NODE_T ** root, int level, BOOL_T in_format, LINE_T ** l, char **s, LINE_T ** start_l, char **start_c)
1344 {
1345 char *lpr = NO_TEXT;
1346 int lprt = 0;
1347 while (l != NO_VAR && !A68_PARSER (stop_scanner)) {
1348 int att = 0;
1349 get_next_token (in_format, l, s, start_l, start_c, &att);
1350 if (A68_PARSER (scan_buf)[0] == STOP_CHAR) {
1351 A68_PARSER (stop_scanner) = A68_TRUE;
1352 } else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL) {
1353 KEYWORD_T *kw;
1354 char *c = NO_TEXT;
1355 BOOL_T make_node = A68_TRUE;
1356 char *trailing = NO_TEXT;
1357 if (att != IDENTIFIER) {
1358 kw = find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
1359 } else {
1360 kw = NO_KEYWORD;
1361 }
1362 if (!(kw != NO_KEYWORD && att != ROW_CHAR_DENOTATION)) {
1363 if (att == IDENTIFIER) {
1364 make_lower_case (A68_PARSER (scan_buf));
1365 }
1366 if (att != ROW_CHAR_DENOTATION && att != LITERAL) {
1367 int len = (int) strlen (A68_PARSER (scan_buf));
1368 while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_') {
1369 trailing = "_";
1370 A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
1371 len--;
1372 }
1373 }
1374 c = TEXT (add_token (&A68 (top_token), A68_PARSER (scan_buf)));
1375 } else {
1376 if (IS (kw, TO_SYMBOL)) {
1377 // Merge GO and TO to GOTO.
1378 if (*root != NO_NODE && IS (*root, GO_SYMBOL)) {
1379 ATTRIBUTE (*root) = GOTO_SYMBOL;
1380 NSYMBOL (*root) = TEXT (find_keyword (A68 (top_keyword), "GOTO"));
1381 make_node = A68_FALSE;
1382 } else {
1383 att = ATTRIBUTE (kw);
1384 c = TEXT (kw);
1385 }
1386 } else {
1387 if (att == 0 || att == BOLD_TAG) {
1388 att = ATTRIBUTE (kw);
1389 }
1390 c = TEXT (kw);
1391 // Handle pragments.
1392 if (att == STYLE_II_COMMENT_SYMBOL || att == STYLE_I_COMMENT_SYMBOL || att == BOLD_COMMENT_SYMBOL) {
1393 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1394 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1395 lpr = nlpr;
1396 } else {
1397 char *stale = lpr;
1398 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1399 a68_free (nlpr);
1400 a68_free (stale);
1401 }
1402 lprt = att;
1403 make_node = A68_FALSE;
1404 } else if (att == STYLE_I_PRAGMAT_SYMBOL || att == BOLD_PRAGMAT_SYMBOL) {
1405 char *nlpr = pragment (ATTRIBUTE (kw), l, s);
1406 if (lpr == NO_TEXT || (int) strlen (lpr) == 0) {
1407 lpr = nlpr;
1408 } else {
1409 char *stale = lpr;
1410 lpr = new_string (lpr, "\n\n", nlpr, NO_TEXT);
1411 a68_free (nlpr);
1412 a68_free (stale);
1413 }
1414 lprt = att;
1415 if (!A68_PARSER (stop_scanner)) {
1416 (void) set_options (OPTION_LIST (&A68_JOB), A68_FALSE);
1417 make_node = A68_FALSE;
1418 }
1419 }
1420 }
1421 }
1422 // Add token to the tree.
1423 if (make_node) {
1424 NODE_T *q = new_node ();
1425 INFO (q) = new_node_info ();
1426 switch (att) {
1427 case ASSIGN_SYMBOL:
1428 case END_SYMBOL:
1429 case ESAC_SYMBOL:
1430 case OD_SYMBOL:
1431 case OF_SYMBOL:
1432 case FI_SYMBOL:
1433 case CLOSE_SYMBOL:
1434 case BUS_SYMBOL:
1435 case COLON_SYMBOL:
1436 case COMMA_SYMBOL:
1437 case DOTDOT_SYMBOL:
1438 case SEMI_SYMBOL: {
1439 GINFO (q) = NO_GINFO;
1440 break;
1441 }
1442 default: {
1443 GINFO (q) = new_genie_info ();
1444 break;
1445 }
1446 }
1447 STATUS (q) = OPTION_NODEMASK (&A68_JOB);
1448 LINE (INFO (q)) = *start_l;
1449 CHAR_IN_LINE (INFO (q)) = *start_c;
1450 PRIO (INFO (q)) = 0;
1451 PROCEDURE_LEVEL (INFO (q)) = 0;
1452 ATTRIBUTE (q) = att;
1453 NSYMBOL (q) = c;
1454 PREVIOUS (q) = *root;
1455 SUB (q) = NEXT (q) = NO_NODE;
1456 TABLE (q) = NO_TABLE;
1457 MOID (q) = NO_MOID;
1458 TAX (q) = NO_TAG;
1459 if (lpr != NO_TEXT) {
1460 NPRAGMENT (q) = lpr;
1461 NPRAGMENT_TYPE (q) = lprt;
1462 lpr = NO_TEXT;
1463 lprt = 0;
1464 }
1465 if (*root != NO_NODE) {
1466 NEXT (*root) = q;
1467 }
1468 if (TOP_NODE (&A68_JOB) == NO_NODE) {
1469 TOP_NODE (&A68_JOB) = q;
1470 }
1471 *root = q;
1472 if (trailing != NO_TEXT) {
1473 diagnostic (A68_WARNING | A68_FORCE_DIAGNOSTICS, q, WARNING_TRAILING, trailing, att);
1474 }
1475 }
1476 // Redirection in tokenising formats. The scanner is a recursive-descent type as
1477 // to know when it scans a format text and when not.
1478 if (in_format && att == FORMAT_DELIMITER_SYMBOL) {
1479 return;
1480 } else if (!in_format && att == FORMAT_DELIMITER_SYMBOL) {
1481 tokenise_source (root, level + 1, A68_TRUE, l, s, start_l, start_c);
1482 } else if (in_format && open_nested_clause (att)) {
1483 NODE_T *z = PREVIOUS (*root);
1484 if (z != NO_NODE && is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H, FORMAT_ITEM_F, STOP)) {
1485 tokenise_source (root, level, A68_FALSE, l, s, start_l, start_c);
1486 } else if (att == OPEN_SYMBOL) {
1487 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1488 } else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL) {
1489 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1490 } else if (OPTION_BRACKETS (&A68_JOB) && att == ACCO_SYMBOL) {
1491 ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
1492 }
1493 } else if (!in_format && level > 0 && open_nested_clause (att)) {
1494 tokenise_source (root, level + 1, A68_FALSE, l, s, start_l, start_c);
1495 } else if (!in_format && level > 0 && close_nested_clause (att)) {
1496 return;
1497 } else if (in_format && att == CLOSE_SYMBOL) {
1498 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1499 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL) {
1500 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1501 } else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == OCCA_SYMBOL) {
1502 ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
1503 }
1504 }
1505 }
1506 }
1507
1508 //! @brief Tokenise source file, build initial syntax tree.
1509
1510 BOOL_T lexical_analyser (void)
1511 {
1512 LINE_T *l = NO_LINE, *start_l = NO_LINE;
1513 char *s = NO_TEXT, *start_c = NO_TEXT;
1514 NODE_T *root = NO_NODE;
1515 A68_PARSER (scan_buf) = NO_TEXT;
1516 A68_PARSER (max_scan_buf_length) = A68_PARSER (source_file_size) = get_source_size ();
1517 // Errors in file?.
1518 if (A68_PARSER (max_scan_buf_length) == 0) {
1519 return A68_FALSE;
1520 }
1521 if (OPTION_RUN_SCRIPT (&A68_JOB)) {
1522 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) (8 + A68_PARSER (max_scan_buf_length)));
1523 if (!read_script_file ()) {
1524 return A68_FALSE;
1525 }
1526 } else {
1527 A68_PARSER (max_scan_buf_length) += KILOBYTE; // for the environ, more than enough
1528 A68_PARSER (scan_buf) = (char *) get_temp_heap_space ((unt) A68_PARSER (max_scan_buf_length));
1529 // Errors in file?.
1530 if (!read_source_file ()) {
1531 return A68_FALSE;
1532 }
1533 }
1534 // Start tokenising.
1535 A68_PARSER (read_error) = A68_FALSE;
1536 A68_PARSER (stop_scanner) = A68_FALSE;
1537 if ((l = TOP_LINE (&A68_JOB)) != NO_LINE) {
1538 s = STRING (l);
1539 }
1540 tokenise_source (&root, 0, A68_FALSE, &l, &s, &start_l, &start_c);
1541 return A68_TRUE;
1542 }