scan.c
1 //! @file scan.c
2 //! @author J. Marcel van der Veer
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2026 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Fortran scanner.
25
26 #include <vif.h>
27
28 #define ADD_CHAR(c) {curlex[k++] = (c);}
29 #define ADD_LEX ADD_CHAR (CUR_COL)
30 #define ADD_LEX_NEXT {ADD_LEX; next_col (ALLOW_CONT);}
31 #define ADD_RAW_NEXT {curlex[k++] = CUR_LIN.text[curcol]; next_col (ALLOW_CONT);}
32 #define ADD_CHR(c) {curlex[k++] = tolower (c);}
33
34 #define SKIP_SPACE\
35 while (CUR_COL == ' ') {\
36 next_col (ALLOW_CONT);\
37 }
38
39 void strip_leading_zeroes (char *s)
40 {
41 // Strip leading zeroes.
42 if (s != NO_TEXT) {
43 NEW_RECORD (t);
44 RECCPY (t, s);
45 RECCLR (s);
46 int_4 len = strlen (t), k = 0;
47 while (k < len && len > 1 && t[k] == '0') {
48 k++;
49 len--;
50 }
51 strcpy (s, &t[k]);
52 } else {
53 SYNTAX (2801, s);
54 }
55 }
56
57 void next_col (int_4 cont_allowed)
58 {
59 // Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
60 // Below code allows for an arbitratry number of continuation cards.
61 //
62 curcol++;
63 if (column_72 && curcol == 72) { // Vintage source line cannot exceed column 72.
64 while (CUR_COL != '\0') {
65 curcol++;
66 }
67 }
68 if (CUR_COL == '\0' && cont_allowed) {
69 SAVE_POS (1);
70 curlin++;
71 if (curlin >= nftnlines) {
72 RESTORE_POS (1);
73 return;
74 }
75 if (strlen (CUR_LIN.text) < 6) {
76 RESTORE_POS (1);
77 return;
78 }
79 if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
80 RESTORE_POS (1);
81 return;
82 }
83 for (int_4 i = 0; i < 5; i++) {
84 if (POS (i) != ' ') {
85 SYNTAX (2802, "continuation card columns 1-5");
86 }
87 }
88 curcol = 6;
89 }
90 }
91
92 void skip_card (int_4 check)
93 {
94 if (check) {
95 // When a correct statement has left the scanner at the final token.
96 if (!EQUAL (prelex, curlex)) {
97 SYNTAX (2803, curlex);
98 } else if (prelin == curlin) {
99 (void) scan (EXPECT_NONE);
100 if (prelin == curlin) {
101 if (!EQUAL (prelex, curlex)) {
102 SYNTAX (2804, curlex);
103 }
104 }
105 }
106 }
107 // VIF is sloppy about trailing text.
108 // This is intentional, old code can have text from column 73 onward.
109 if (prelin == curlin) {
110 int_4 rc;
111 do {
112 rc = scan (EXPECT_NONE);
113 }
114 while (rc != END_OF_LINE && rc != END_OF_MODULE);
115 } else if (CUR_LIN.text == NO_TEXT) {
116 return;
117 } else if (strlen (CUR_LIN.text) >= 6 && POS(5) != ' ') {
118 int_4 rc;
119 do {
120 rc = scan (EXPECT_NONE);
121 }
122 while (rc != END_OF_LINE && rc != END_OF_MODULE);
123 }
124 }
125
126 int_4 scan_hollerith (void)
127 {
128 int_4 k = 0, N = 0;
129 if (hollerith) {
130 SYNTAX (2805, "Hollerith strings are disabled");
131 }
132 if (!is_int4 (curlex, &N)) {
133 SCANER (2806, "invalid hollerith length", NO_TEXT);
134 return curret = END_OF_MODULE;
135 } else {
136 k = 0;
137 RECCLR (curlex);
138 ADD_CHR ('"');
139 next_col (ALLOW_CONT);
140 for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
141 if (CUR_COL == '\0') {
142 go_on = FALSE;
143 } else if (CUR_COL == '"') {
144 ADD_CHR ('\"');
145 next_col (ALLOW_CONT);
146 } else {
147 ADD_RAW_NEXT;
148 }
149 }
150 ADD_CHR ('"');
151 return curret = TEXT;
152 }
153 }
154
155 int_4 scan_string (void)
156 {
157 int_4 k = 0;
158 ADD_CHR ('"');
159 next_col (ALLOW_CONT);
160 int_4 go_on = TRUE;
161 while (go_on) {
162 if (CUR_COL == '\0') {
163 SCANER (2807, "unterminated string", NO_TEXT);
164 ADD_CHR ('"');
165 return curret = END_OF_MODULE;
166 } else if (CUR_COL == '\'') {
167 SAVE_POS (1);
168 next_col (ALLOW_CONT);
169 if (CUR_COL == '\'') {
170 ADD_CHR ('\'');
171 next_col (ALLOW_CONT);
172 } else {
173 RESTORE_POS (1);
174 go_on = FALSE;
175 }
176 } else if (CUR_COL == '"') {
177 ADD_CHR ('\\');
178 ADD_CHR ('"');
179 next_col (ALLOW_CONT);
180 } else {
181 ADD_RAW_NEXT;
182 }
183 }
184 if (CUR_COL != '\'') {
185 SCANER (2808, "unterminated string", NO_TEXT);
186 return curret = END_OF_MODULE;
187 } else {
188 ADD_CHR ('"');
189 next_col (ALLOW_CONT);
190 return curret = TEXT;
191 }
192 }
193
194 int_4 scan_string_alt (void)
195 {
196 int_4 k = 0;
197 ADD_CHR ('"');
198 next_col (ALLOW_CONT);
199 int_4 go_on = TRUE;
200 while (go_on) {
201 if (CUR_COL == '\0') {
202 SCANER (2809, "unterminated string", NO_TEXT);
203 ADD_CHR ('"');
204 return curret = END_OF_MODULE;
205 } else if (CUR_COL == '"') {
206 SAVE_POS (1);
207 next_col (ALLOW_CONT);
208 if (CUR_COL == '"') {
209 ADD_CHR ('"');
210 next_col (ALLOW_CONT);
211 } else {
212 RESTORE_POS (1);
213 go_on = FALSE;
214 }
215 } else if (CUR_COL == '"') {
216 ADD_CHR ('\\');
217 ADD_CHR ('"');
218 next_col (ALLOW_CONT);
219 } else {
220 ADD_RAW_NEXT;
221 }
222 }
223 if (CUR_COL != '"') {
224 SCANER (2810, "unterminated string", NO_TEXT);
225 return curret = END_OF_MODULE;
226 } else {
227 ADD_CHR ('"');
228 next_col (ALLOW_CONT);
229 return curret = TEXT;
230 }
231 }
232
233 int_4 scan_exponent (void)
234 {
235 int_4 k = strlen (curlex);
236 if (EXPONENT (curcol)) {
237 ADD_LEX_NEXT;
238 SKIP_SPACE;
239 if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
240 ADD_LEX_NEXT;
241 SKIP_SPACE;
242 while (isdigit (CUR_COL)) {
243 ADD_LEX_NEXT;
244 SKIP_SPACE;
245 }
246 }
247 }
248 return curret = FLT_NUMBER;
249 }
250
251 int_4 scan_declarer (int_4 k)
252 {
253 if (TOKEN ("integer")) {
254 SKIP_SPACE;
255 if (CUR_COL == '*') {
256 ADD_LEX_NEXT;
257 SKIP_SPACE;
258 CHECKDIGIT (2811, CUR_COL);
259 while (isdigit (CUR_COL)) {
260 ADD_LEX_NEXT;
261 SKIP_SPACE;
262 }
263 }
264 return DECLAR;
265 }
266 if (TOKEN ("logical")) {
267 SKIP_SPACE;
268 if (CUR_COL == '*') {
269 ADD_LEX_NEXT;
270 SKIP_SPACE;
271 CHECKDIGIT (2812, CUR_COL);
272 while (isdigit (CUR_COL)) {
273 ADD_LEX_NEXT;
274 SKIP_SPACE;
275 }
276 }
277 return DECLAR;
278 }
279 if (TOKEN ("real")) {
280 SKIP_SPACE;
281 if (CUR_COL == '*') {
282 ADD_LEX_NEXT;
283 SKIP_SPACE;
284 CHECKDIGIT (2813, CUR_COL);
285 while (isdigit (CUR_COL)) {
286 ADD_LEX_NEXT;
287 SKIP_SPACE;
288 }
289 }
290 return DECLAR;
291 }
292 if (TOKEN ("complex")) {
293 SKIP_SPACE;
294 if (CUR_COL == '*') {
295 ADD_LEX_NEXT;
296 SKIP_SPACE;
297 CHECKDIGIT (2814, CUR_COL);
298 while (isdigit (CUR_COL)) {
299 ADD_LEX_NEXT;
300 SKIP_SPACE;
301 }
302 }
303 return DECLAR;
304 }
305 if (TOKEN ("character")) {
306 SKIP_SPACE;
307 if (CUR_COL == '*') {
308 ADD_LEX_NEXT;
309 SKIP_SPACE;
310 if (CUR_COL == '(') {
311 ADD_LEX_NEXT;
312 SKIP_SPACE;
313 if (CUR_COL == '*') {
314 ADD_LEX_NEXT;
315 } else {
316 while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
317 ADD_LEX_NEXT;
318 }
319 }
320 SKIP_SPACE;
321 if (CUR_COL == ')') {
322 ADD_LEX_NEXT;
323 }
324 } else {
325 CHECKDIGIT (2815, CUR_COL);
326 while (isdigit (CUR_COL)) {
327 ADD_LEX_NEXT;
328 SKIP_SPACE;
329 }
330 }
331 }
332 return DECLAR;
333 }
334 return LEXEME;
335 }
336
337 int_4 scan_bin (int_4 k)
338 {
339 // Radix 2 number.
340 ADD_CHR ('0');
341 ADD_CHR ('b');
342 next_col (ALLOW_CONT);
343 while (CUR_COL != '\'' && CUR_COL != '\0') {
344 if (CUR_COL != '0' && CUR_COL != '1') {
345 SCANER (2816, "invalid binary digit", NO_TEXT);
346 return END_OF_MODULE;
347 }
348 ADD_LEX_NEXT;
349 }
350 if (CUR_COL != '\'') {
351 SCANER (2817, "invalid denotation", NO_TEXT);
352 return END_OF_MODULE;
353 }
354 next_col (ALLOW_CONT);
355 strip_leading_zeroes (curlex);
356 return INT_NUMBER;
357 }
358
359 int_4 scan_hex (int_4 k)
360 {
361 // Radix 16 number.
362 ADD_CHR ('0');
363 ADD_CHR ('x');
364 next_col (ALLOW_CONT);
365 while (CUR_COL != '\'' && CUR_COL != '\0') {
366 if (!isxdigit (CUR_COL)) {
367 SCANER (2818, "invalid hex digit", NO_TEXT);
368 return END_OF_MODULE;
369 }
370 ADD_LEX_NEXT;
371 }
372 if (CUR_COL != '\'') {
373 SCANER (2819, "invalid denotation", NO_TEXT);
374 return END_OF_MODULE;
375 }
376 next_col (ALLOW_CONT);
377 strip_leading_zeroes (curlex);
378 return INT_NUMBER;
379 }
380
381 int_4 scan_part (char *expect)
382 {
383 int_4 k = 0;
384 RECCLR (curlex);
385 CUR_LIN.proc = nprocs;
386 // Skip empty lines.
387 if (curcol == START_OF_LINE && curlin < nftnlines) {
388 if (POS (0) == '\0') {
389 curlin++;
390 return curret = scan_part (expect);
391 }
392 }
393 if (curcol > START_OF_LINE && CUR_COL == '\0') {
394 // Next scan starts at new line.
395 curlin++;
396 curcol = START_OF_LINE;
397 }
398 if (curlin >= nftnlines) {
399 return curret = END_OF_MODULE;
400 }
401 if (curcol == START_OF_LINE) {
402 while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
403 if (POS (0) == '/') {
404 vif_jcl ();
405 }
406 curlin++;
407 if (curlin >= nftnlines) {
408 return curret = END_OF_MODULE;
409 }
410 }
411 if (CUR_LIN.isn > 0) {
412 if (POS (5) == ' ') {
413 curcol = 0;
414 return curret = END_OF_LINE;
415 } else {
416 // All but first line can be continuations.
417 curcol = 6;
418 }
419 } else {
420 curcol = 0;
421 }
422 }
423 // Skip trailing blanks.
424 SKIP_SPACE;
425 if (CUR_COL == '\0') {
426 // No symbol left at card, scan again on next card.
427 curlin++;
428 curcol = START_OF_LINE;
429 return curret = scan_part (expect);
430 }
431 // Mark start of lexeme for messages.
432 prelin = curlin;
433 precol = curcol;
434 if (islower (CUR_COL)) {
435 // A letter, possibly a radix.
436 if (CUR_COL == 'b') {
437 SAVE_POS (1);
438 next_col (ALLOW_CONT);
439 if (CUR_COL != '\'') {
440 RESTORE_POS (1);
441 } else {
442 return curret = scan_bin (k);
443 }
444 } else if (CUR_COL == 'x') {
445 SAVE_POS (1);
446 next_col (ALLOW_CONT);
447 if (CUR_COL != '\'') {
448 RESTORE_POS (1);
449 } else {
450 return curret = scan_hex (k);
451 }
452 }
453 // Fetch identifier or keyword.
454 // Identifiers may contain spaces if the part upto the first space is not a keyword.
455 // Here VIF differs from vintage FORTRAN.
456 int_4 space_chk = TRUE;
457 while (_IDFCHAR (CUR_COL)) {
458 if (CUR_COL == ' ') {
459 if (space_chk && reserved (curlex)) {
460 break;
461 } else {
462 space_chk = FALSE;
463 }
464 } else if (CUR_COL == '$') {
465 ADD_CHAR ('_');
466 } else {
467 ADD_LEX;
468 }
469 next_col (ALLOW_CONT);
470 }
471 // END, END DO, END IF, END WHILE
472 if (TOKEN ("end")) {
473 SKIP_SPACE;
474 while (islower (CUR_COL)) {
475 ADD_LEX_NEXT;
476 }
477 }
478 // ELSE IF
479 if (TOKEN ("else")) {
480 SKIP_SPACE;
481 while (islower (CUR_COL)) {
482 ADD_LEX_NEXT;
483 }
484 }
485 // Catch declarers here.
486 if (scan_declarer (k) == DECLAR) {
487 return curret = DECLAR;
488 } else {
489 return curret = WORD;
490 }
491 } else if (isdigit (CUR_COL)) {
492 if (curcol < 5) {
493 // Label.
494 while (curcol < 5 && isdigit (CUR_COL)) {
495 ADD_LEX_NEXT;
496 SKIP_SPACE;
497 }
498 return curret = LABEL;
499 }
500 // Number.
501 while (isdigit (CUR_COL)) {
502 ADD_LEX_NEXT;
503 SKIP_SPACE;
504 }
505 if (EQUAL (expect, EXPECT_LABEL)) {
506 return curret = LABEL;
507 }
508 if (CUR_COL == 'h') {
509 // Hollerith operand
510 return scan_hollerith ();
511 }
512 if (CUR_COL != '.' && !EXPONENT (curcol)) {
513 strip_leading_zeroes (curlex);
514 return curret = INT_NUMBER;
515 } else {
516 if (CUR_COL == '.') {
517 // Special symbols .XYZZY. like (n/2.eq.1).
518 SAVE_POS (1);
519 next_col (ALLOW_CONT);
520 while (islower (CUR_COL)) {
521 next_col (ALLOW_CONT);
522 }
523 if (CUR_COL == '.') {
524 RESTORE_POS (1);
525 strip_leading_zeroes (curlex);
526 return curret = INT_NUMBER;
527 } else {
528 RESTORE_POS (1);
529 }
530 // Fraction.
531 ADD_LEX_NEXT;
532 SKIP_SPACE;
533 while (isdigit (CUR_COL)) {
534 ADD_LEX_NEXT;
535 SKIP_SPACE;
536 }
537 }
538 // Exponent part.
539 return scan_exponent ();
540 }
541 } else if (CUR_COL == '.') {
542 ADD_LEX_NEXT;
543 SKIP_SPACE;
544 // Fraction.
545 if (isdigit (CUR_COL)) {
546 while (isdigit (CUR_COL)) {
547 ADD_LEX_NEXT;
548 SKIP_SPACE;
549 }
550 // Exponent part.
551 return scan_exponent ();
552 }
553 // Special symbols .XYZZY. .
554 if (CUR_COL == '.') {
555 next_col (ALLOW_CONT);
556 }
557 while (islower (CUR_COL)) {
558 ADD_LEX_NEXT;
559 }
560 if (CUR_COL == '.') {
561 ADD_LEX_NEXT;
562 } else {
563 SCANER (2820, "stray dot", NO_TEXT);
564 }
565 } else if (CUR_COL == '*') {
566 ADD_LEX_NEXT;
567 if (CUR_COL == '*') {
568 ADD_LEX_NEXT;
569 } else if (CUR_COL == '%') {
570 ADD_LEX_NEXT;
571 }
572 } else if (CUR_COL == '/') {
573 // Concatenation.
574 ADD_LEX_NEXT;
575 if (CUR_COL == '/') {
576 ADD_LEX_NEXT;
577 } else if (CUR_COL == '=') {
578 ADD_LEX_NEXT;
579 }
580 } else if (CUR_COL == '\'') {
581 // Character string.
582 return scan_string ();
583 } else if (CUR_COL == '"') {
584 // Character string.
585 return scan_string_alt ();
586 } else if (CUR_COL == '=') {
587 ADD_LEX_NEXT;
588 if (CUR_COL == '=') {
589 ADD_LEX_NEXT;
590 }
591 } else if (CUR_COL == '!') {
592 ADD_LEX_NEXT;
593 if (CUR_COL == '=') {
594 ADD_LEX_NEXT;
595 }
596 } else if (CUR_COL == '<') {
597 ADD_LEX_NEXT;
598 if (CUR_COL == '=') {
599 ADD_LEX_NEXT;
600 }
601 } else if (CUR_COL == '>') {
602 ADD_LEX_NEXT;
603 if (CUR_COL == '=') {
604 ADD_LEX_NEXT;
605 }
606 } else if (CUR_COL != '\0') {
607 // Something else.
608 ADD_LEX_NEXT;
609 } else {
610 // No symbol left at card, scan again on next card.
611 curlin++;
612 curcol = START_OF_LINE;
613 return curret = scan_part (expect);
614 }
615 return curret = LEXEME;
616 }
617
618 int_4 scan (char *expect)
619 {
620 int_4 rc;
621 RECCPY (prelex, curlex);
622 preret = curret;
623 RECCLR (curlex);
624 rc = scan_part (expect);
625 if (rc == END_OF_LINE || rc == END_OF_MODULE) {
626 return curret = rc;
627 }
628 if (rc == LABEL) {
629 return curret = rc;
630 }
631 if (TOKEN ("double")) {
632 scan_part (EXPECT_NONE);
633 if (TOKEN ("precision")) {
634 // RECCPY (curlex, "real*8");
635 RECCPY (curlex, "doubleprecision");
636 } else if (TOKEN ("complex")) {
637 // RECCPY (curlex, "complex*16");
638 RECCPY (curlex, "doublecomplex");
639 } else {
640 RECCPY (curlex, "real*8");
641 EXPECT (2821, "precision");
642 }
643 return curret = DECLAR;
644 } else if (TOKEN ("go")) {
645 scan_part (EXPECT_NONE);
646 if (!TOKEN ("to")) {
647 SCANER (2822, "invalid goto", NO_TEXT);
648 }
649 RECCPY (curlex, "goto");
650 return curret = WORD;
651 }
652 if (EQUAL (expect, EXPECT_LABEL)) {
653 ;
654 } else if (expect != NO_TEXT && !EQUAL (curlex, expect)) {
655 NEW_RECORD (str);
656 _srecordf (str, "%s but found %s", expect, curlex);
657 EXPECT (2823, str);
658 return curret = ERR;
659 }
660 return curret = rc;
661 }
662
663 int_4 scan_fmt (void)
664 {
665 int_4 k = 0;
666 RECCPY (prelex, curlex);
667 preret = curret;
668 RECCLR (curlex);
669 CUR_LIN.proc = nprocs;
670 // Skip empty lines.
671 if (curcol == START_OF_LINE && curlin < nftnlines) {
672 if (POS (0) == '\0') {
673 curlin++;
674 return curret = scan_fmt ();
675 }
676 }
677 if (curcol > START_OF_LINE && CUR_COL == '\0') {
678 // Next scan starts at new line.
679 curlin++;
680 curcol = START_OF_LINE;
681 }
682 if (curlin >= nftnlines) {
683 return curret = END_OF_MODULE;
684 }
685 if (curcol == START_OF_LINE) {
686 while (IS_COMMENT (POS (0))) {
687 curlin++;
688 if (curlin >= nftnlines) {
689 return curret = END_OF_MODULE;
690 }
691 }
692 if (CUR_LIN.isn > 1) {
693 if (POS (5) == ' ') {
694 curcol = 0;
695 return curret = END_OF_LINE;
696 } else {
697 // All but first line can be continuations.
698 curcol = 6;
699 }
700 } else {
701 curcol = 0;
702 }
703 }
704 // Skip trailing blanks.
705 SKIP_SPACE;
706 if (CUR_COL == '\0') {
707 // No symbol left at card, scan again on next card.
708 curlin++;
709 curcol = START_OF_LINE;
710 return curret = scan_fmt ();
711 }
712 // Mark start of lexeme for messages.
713 prelin = curlin;
714 precol = curcol;
715 if (islower (CUR_COL)) {
716 // Format specifier.
717 while (islower (CUR_COL) || isdigit (CUR_COL)) {
718 ADD_RAW_NEXT;
719 }
720 if (CUR_COL == '.') {
721 ADD_LEX_NEXT;
722 }
723 while (isdigit (CUR_COL)) {
724 ADD_LEX_NEXT;
725 }
726 if (islower (CUR_COL)) {
727 ADD_RAW_NEXT;
728 while (isdigit (CUR_COL)) {
729 ADD_LEX_NEXT;
730 }
731 }
732 return curret = LEXEME;
733 } else if (isdigit (CUR_COL)) {
734 // Number.
735 while (isdigit (CUR_COL)) {
736 ADD_LEX_NEXT;
737 }
738 if (CUR_COL == 'h') {
739 // Hollerith format item
740 return scan_hollerith ();
741 } else {
742 strip_leading_zeroes (curlex);
743 return curret = INT_NUMBER;
744 }
745 } else if (CUR_COL == '\'') {
746 // Character string.
747 return scan_string ();
748 } else if (CUR_COL == '"') {
749 // Character string.
750 return scan_string_alt ();
751 } else if (CUR_COL != '\0') {
752 // Something else.
753 ADD_LEX_NEXT;
754 } else {
755 // No symbol left at card, scan again on next card.
756 curlin++;
757 curcol = START_OF_LINE;
758 return curret = scan_fmt ();
759 }
760 return curret = LEXEME;
761 }
762
763 logical_4 lookahead(char *expect)
764 {
765 (void) scan (EXPECT_NONE);
766 logical_4 check = TOKEN (expect);
767 UNSCAN;
768 return check;
769 }
|
© 2002-2026 J.M. van der Veer (jmvdveer@xs4all.nl)
|