scan.c
1 //! @file scan.c
2 //! @author J. Marcel van der Veer
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Fortran scanner.
25
26 #include <vif.h>
27
28 #define ADD_CHAR(c) {curlex[k++] = (c);}
29 #define ADD_LEX ADD_CHAR (CUR_COL)
30 #define ADD_LEX_NEXT {ADD_LEX; next_col (TRUE);}
31 #define ADD_RAW_NEXT {curlex[k++] = CUR_LIN.text[curcol]; next_col (TRUE);}
32 #define ADD_CHR(c) {curlex[k++] = tolower (c);}
33
34 #define SKIP_SPACE\
35 while (CUR_COL == ' ') {\
36 next_col (TRUE);\
37 }
38
39 void strip_leading_zeroes (char *s)
40 {
41 // Strip leading zeroes.
42 if (s != NO_TEXT) {
43 NEW_RECORD (t);
44 RECCPY (t, s);
45 RECCLR (s);
46 int_4 len = strlen (t), k = 0;
47 while (k < len && len > 1 && t[k] == '0') {
48 k++;
49 len--;
50 }
51 strcpy (s, &t[k]);
52 } else {
53 SYNTAX (2801, s);
54 }
55 }
56
57 void next_col (int_4 cont_allowed)
58 {
59 // Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
60 // Below code allows for an arbitratry number of continuation cards.
61 curcol++;
62 if (CUR_COL == '\0') {
63 if (! cont_allowed) {
64 return;
65 }
66 SAVE_POS;
67 curlin++;
68 if (curlin >= nftnlines) {
69 RESTORE_POS;
70 return;
71 }
72 if (strlen (CUR_LIN.text) < 6) {
73 RESTORE_POS;
74 return;
75 }
76 if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
77 RESTORE_POS;
78 return;
79 }
80 for (int_4 i = 0; i < 5; i++) {
81 if (POS (i) != ' ') {
82 SYNTAX (2802, "continuation card columns 1-5");
83 }
84 }
85 curcol = 6;
86 }
87 }
88
89 void skip_card (int_4 check)
90 {
91 if (check) {
92 // When a correct statement has left the scanner at the final token.
93 if (!EQUAL (prelex, curlex)) {
94 SYNTAX (2803, curlex);
95 } else if (prelin == curlin) {
96 (void) scan (EXPECT_NONE);
97 if (prelin == curlin) {
98 if (!EQUAL (prelex, curlex)) {
99 SYNTAX (2804, curlex);
100 }
101 }
102 }
103 }
104 // VIF is sloppy about trailing text.
105 // This is intentional, old code can have text from column 73 onward.
106 if (prelin == curlin) {
107 int_4 rc;
108 do {
109 rc = scan (EXPECT_NONE);
110 }
111 while (rc != END_OF_LINE && rc != END_OF_MODULE);
112 } else if (CUR_LIN.text == NO_TEXT) {
113 return;
114 } else if (strlen (CUR_LIN.text) >= 6 && POS(5) != ' ') {
115 int_4 rc;
116 do {
117 rc = scan (EXPECT_NONE);
118 }
119 while (rc != END_OF_LINE && rc != END_OF_MODULE);
120 }
121 }
122
123 int_4 scan_hollerith (void)
124 {
125 int_4 k = 0, N = 0;
126 if (hollerith) {
127 SYNTAX (2805, "disabled Hollerith string");
128 }
129 if (!is_int4 (curlex, &N)) {
130 SCANER (2806, "invalid hollerith length", NO_TEXT);
131 return curret = END_OF_MODULE;
132 } else {
133 k = 0;
134 RECCLR (curlex);
135 ADD_CHR ('"');
136 next_col (TRUE);
137 for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
138 if (CUR_COL == '\0') {
139 go_on = FALSE;
140 } else if (CUR_COL == '"') {
141 ADD_CHR ('\"');
142 next_col (TRUE);
143 } else {
144 ADD_RAW_NEXT;
145 }
146 }
147 ADD_CHR ('"');
148 return curret = TEXT;
149 }
150 }
151
152 int_4 scan_string (void)
153 {
154 int_4 k = 0;
155 ADD_CHR ('"');
156 next_col (TRUE);
157 int_4 go_on = TRUE;
158 while (go_on) {
159 if (CUR_COL == '\0') {
160 SCANER (2807, "unterminated string", NO_TEXT);
161 ADD_CHR ('"');
162 return curret = END_OF_MODULE;
163 } else if (CUR_COL == '\'') {
164 SAVE_POS;
165 next_col (TRUE);
166 if (CUR_COL == '\'') {
167 ADD_CHR ('\'');
168 next_col (TRUE);
169 } else {
170 RESTORE_POS;
171 go_on = FALSE;
172 }
173 } else if (CUR_COL == '"') {
174 ADD_CHR ('\\');
175 ADD_CHR ('"');
176 next_col (TRUE);
177 } else {
178 ADD_RAW_NEXT;
179 }
180 }
181 if (CUR_COL != '\'') {
182 SCANER (2808, "unterminated string", NO_TEXT);
183 return curret = END_OF_MODULE;
184 } else {
185 ADD_CHR ('"');
186 next_col (TRUE);
187 return curret = TEXT;
188 }
189 }
190
191 int_4 scan_string_alt (void)
192 {
193 int_4 k = 0;
194 ADD_CHR ('"');
195 next_col (TRUE);
196 int_4 go_on = TRUE;
197 while (go_on) {
198 if (CUR_COL == '\0') {
199 SCANER (2809, "unterminated string", NO_TEXT);
200 ADD_CHR ('"');
201 return curret = END_OF_MODULE;
202 } else if (CUR_COL == '"') {
203 SAVE_POS;
204 next_col (TRUE);
205 if (CUR_COL == '"') {
206 ADD_CHR ('"');
207 next_col (TRUE);
208 } else {
209 RESTORE_POS;
210 go_on = FALSE;
211 }
212 } else if (CUR_COL == '"') {
213 ADD_CHR ('\\');
214 ADD_CHR ('"');
215 next_col (TRUE);
216 } else {
217 ADD_RAW_NEXT;
218 }
219 }
220 if (CUR_COL != '"') {
221 SCANER (2810, "unterminated string", NO_TEXT);
222 return curret = END_OF_MODULE;
223 } else {
224 ADD_CHR ('"');
225 next_col (TRUE);
226 return curret = TEXT;
227 }
228 }
229
230 int_4 scan_exponent (void)
231 {
232 int_4 k = strlen (curlex);
233 if (EXPONENT (curcol)) {
234 ADD_LEX_NEXT;
235 SKIP_SPACE;
236 if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
237 ADD_LEX_NEXT;
238 SKIP_SPACE;
239 while (isdigit (CUR_COL)) {
240 ADD_LEX_NEXT;
241 SKIP_SPACE;
242 }
243 }
244 }
245 return curret = FLT_NUMBER;
246 }
247
248 int_4 scan_declarer (int_4 k)
249 {
250 if (TOKEN ("integer")) {
251 SKIP_SPACE;
252 if (CUR_COL == '*') {
253 ADD_LEX_NEXT;
254 SKIP_SPACE;
255 CHECKDIGIT (2811, CUR_COL);
256 while (isdigit (CUR_COL)) {
257 ADD_LEX_NEXT;
258 SKIP_SPACE;
259 }
260 }
261 return DECLAR;
262 }
263 if (TOKEN ("logical")) {
264 SKIP_SPACE;
265 if (CUR_COL == '*') {
266 ADD_LEX_NEXT;
267 SKIP_SPACE;
268 CHECKDIGIT (2812, CUR_COL);
269 while (isdigit (CUR_COL)) {
270 ADD_LEX_NEXT;
271 SKIP_SPACE;
272 }
273 }
274 return DECLAR;
275 }
276 if (TOKEN ("real")) {
277 SKIP_SPACE;
278 if (CUR_COL == '*') {
279 ADD_LEX_NEXT;
280 SKIP_SPACE;
281 CHECKDIGIT (2813, CUR_COL);
282 while (isdigit (CUR_COL)) {
283 ADD_LEX_NEXT;
284 SKIP_SPACE;
285 }
286 }
287 return DECLAR;
288 }
289 if (TOKEN ("complex")) {
290 SKIP_SPACE;
291 if (CUR_COL == '*') {
292 ADD_LEX_NEXT;
293 SKIP_SPACE;
294 CHECKDIGIT (2814, CUR_COL);
295 while (isdigit (CUR_COL)) {
296 ADD_LEX_NEXT;
297 SKIP_SPACE;
298 }
299 }
300 return DECLAR;
301 }
302 if (TOKEN ("character")) {
303 SKIP_SPACE;
304 if (CUR_COL == '*') {
305 ADD_LEX_NEXT;
306 SKIP_SPACE;
307 if (CUR_COL == '(') {
308 ADD_LEX_NEXT;
309 SKIP_SPACE;
310 if (CUR_COL == '*') {
311 ADD_LEX_NEXT;
312 } else {
313 while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
314 ADD_LEX_NEXT;
315 }
316 }
317 SKIP_SPACE;
318 if (CUR_COL == ')') {
319 ADD_LEX_NEXT;
320 }
321 } else {
322 CHECKDIGIT (2815, CUR_COL);
323 while (isdigit (CUR_COL)) {
324 ADD_LEX_NEXT;
325 SKIP_SPACE;
326 }
327 }
328 }
329 return DECLAR;
330 }
331 return LEXEME;
332 }
333
334 int_4 scan_bin (int_4 k)
335 {
336 // Radix 2 number.
337 ADD_CHR ('0');
338 ADD_CHR ('b');
339 next_col (TRUE);
340 while (CUR_COL != '\'' && CUR_COL != '\0') {
341 if (CUR_COL != '0' && CUR_COL != '1') {
342 SCANER (2816, "invalid binary digit", NO_TEXT);
343 return END_OF_MODULE;
344 }
345 ADD_LEX_NEXT;
346 }
347 if (CUR_COL != '\'') {
348 SCANER (2817, "invalid denotation", NO_TEXT);
349 return END_OF_MODULE;
350 }
351 next_col (TRUE);
352 strip_leading_zeroes (curlex);
353 return INT_NUMBER;
354 }
355
356 int_4 scan_hex (int_4 k)
357 {
358 // Radix 16 number.
359 ADD_CHR ('0');
360 ADD_CHR ('x');
361 next_col (TRUE);
362 while (CUR_COL != '\'' && CUR_COL != '\0') {
363 if (!isxdigit (CUR_COL)) {
364 SCANER (2818, "invalid hex digit", NO_TEXT);
365 return END_OF_MODULE;
366 }
367 ADD_LEX_NEXT;
368 }
369 if (CUR_COL != '\'') {
370 SCANER (2819, "invalid denotation", NO_TEXT);
371 return END_OF_MODULE;
372 }
373 next_col (TRUE);
374 strip_leading_zeroes (curlex);
375 return INT_NUMBER;
376 }
377
378 int_4 scan_part (char *expect)
379 {
380 int_4 k = 0;
381 RECCLR (curlex);
382 CUR_LIN.proc = nprocs;
383 // Skip empty lines.
384 if (curcol == START_OF_LINE && curlin < nftnlines) {
385 if (POS (0) == '\0') {
386 curlin++;
387 return curret = scan_part (expect);
388 }
389 }
390 if (curcol > START_OF_LINE && CUR_COL == '\0') {
391 // Next scan starts at new line.
392 curlin++;
393 curcol = START_OF_LINE;
394 }
395 if (curlin >= nftnlines) {
396 return curret = END_OF_MODULE;
397 }
398 if (curcol == START_OF_LINE) {
399 while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
400 if (POS (0) == '/') {
401 vif_jcl ();
402 }
403 curlin++;
404 if (curlin >= nftnlines) {
405 return curret = END_OF_MODULE;
406 }
407 }
408 if (CUR_LIN.isn > 0) {
409 if (POS (5) == ' ') {
410 curcol = 0;
411 return curret = END_OF_LINE;
412 } else {
413 // All but first line can be continuations.
414 curcol = 6;
415 }
416 } else {
417 curcol = 0;
418 }
419 }
420 // Skip trailing blanks.
421 SKIP_SPACE;
422 if (CUR_COL == '\0') {
423 // No symbol left at card, scan again on next card.
424 curlin++;
425 curcol = START_OF_LINE;
426 return curret = scan_part (expect);
427 }
428 // Mark start of lexeme for messages.
429 prelin = curlin;
430 precol = curcol;
431 if (islower (CUR_COL)) {
432 // A letter, possibly a radix.
433 if (CUR_COL == 'b') {
434 SAVE_POS;
435 next_col (TRUE);
436 if (CUR_COL != '\'') {
437 RESTORE_POS;
438 } else {
439 return curret = scan_bin (k);
440 }
441 } else if (CUR_COL == 'x') {
442 SAVE_POS;
443 next_col (TRUE);
444 if (CUR_COL != '\'') {
445 RESTORE_POS;
446 } else {
447 return curret = scan_hex (k);
448 }
449 }
450 // Fetch identifier or keyword.
451 // Identifiers may contain spaces if the part upto the first space is not a keyword.
452 // Here VIF differs from vintage FORTRAN.
453 int_4 space_chk = TRUE;
454 while (_IDFCHAR (CUR_COL)) {
455 if (CUR_COL == ' ') {
456 if (space_chk && reserved (curlex)) {
457 break;
458 } else {
459 space_chk = FALSE;
460 }
461 } else if (CUR_COL == '$') {
462 ADD_CHAR ('_');
463 } else {
464 ADD_LEX;
465 }
466 next_col (TRUE);
467 }
468 // END, END DO, END IF, END WHILE
469 if (TOKEN ("end")) {
470 SKIP_SPACE;
471 while (islower (CUR_COL)) {
472 ADD_LEX_NEXT;
473 }
474 }
475 // ELSE IF
476 if (TOKEN ("else")) {
477 SKIP_SPACE;
478 while (islower (CUR_COL)) {
479 ADD_LEX_NEXT;
480 }
481 }
482 // Catch declarers here.
483 if (scan_declarer (k) == DECLAR) {
484 return curret = DECLAR;
485 } else {
486 return curret = WORD;
487 }
488 } else if (isdigit (CUR_COL)) {
489 if (curcol < 5) {
490 // Label.
491 while (curcol < 5 && isdigit (CUR_COL)) {
492 ADD_LEX_NEXT;
493 SKIP_SPACE;
494 }
495 return curret = LABEL;
496 }
497 // Number.
498 while (isdigit (CUR_COL)) {
499 ADD_LEX_NEXT;
500 SKIP_SPACE;
501 }
502 if (EQUAL (expect, EXPECT_LABEL)) {
503 return curret = LABEL;
504 }
505 if (CUR_COL == 'h') {
506 // Hollerith operand
507 return scan_hollerith ();
508 }
509 if (CUR_COL != '.' && !EXPONENT (curcol)) {
510 strip_leading_zeroes (curlex);
511 return curret = INT_NUMBER;
512 } else {
513 if (CUR_COL == '.') {
514 // Special symbols .XYZZY. like (n/2.eq.1).
515 SAVE_POS;
516 next_col (TRUE);
517 while (islower (CUR_COL)) {
518 next_col (TRUE);
519 }
520 if (CUR_COL == '.') {
521 RESTORE_POS;
522 strip_leading_zeroes (curlex);
523 return curret = INT_NUMBER;
524 } else {
525 RESTORE_POS;
526 }
527 // Fraction.
528 ADD_LEX_NEXT;
529 SKIP_SPACE;
530 while (isdigit (CUR_COL)) {
531 ADD_LEX_NEXT;
532 SKIP_SPACE;
533 }
534 }
535 // Exponent part.
536 return scan_exponent ();
537 }
538 } else if (CUR_COL == '.') {
539 ADD_LEX_NEXT;
540 SKIP_SPACE;
541 // Fraction.
542 if (isdigit (CUR_COL)) {
543 while (isdigit (CUR_COL)) {
544 ADD_LEX_NEXT;
545 SKIP_SPACE;
546 }
547 // Exponent part.
548 return scan_exponent ();
549 }
550 // Special symbols .XYZZY. .
551 if (CUR_COL == '.') {
552 next_col (TRUE);
553 }
554 while (islower (CUR_COL)) {
555 ADD_LEX_NEXT;
556 }
557 if (CUR_COL == '.') {
558 ADD_LEX_NEXT;
559 } else {
560 SCANER (2820, "stray dot", NO_TEXT);
561 }
562 } else if (CUR_COL == '*') {
563 ADD_LEX_NEXT;
564 if (CUR_COL == '*') {
565 ADD_LEX_NEXT;
566 } else if (CUR_COL == '%') {
567 ADD_LEX_NEXT;
568 }
569 } else if (CUR_COL == '/') {
570 // Concatenation.
571 ADD_LEX_NEXT;
572 if (CUR_COL == '/') {
573 ADD_LEX_NEXT;
574 } else if (CUR_COL == '=') {
575 ADD_LEX_NEXT;
576 }
577 } else if (CUR_COL == '\'') {
578 // Character string.
579 return scan_string ();
580 } else if (CUR_COL == '"') {
581 // Character string.
582 return scan_string_alt ();
583 } else if (CUR_COL == '=') {
584 ADD_LEX_NEXT;
585 if (CUR_COL == '=') {
586 ADD_LEX_NEXT;
587 }
588 } else if (CUR_COL == '!') {
589 ADD_LEX_NEXT;
590 if (CUR_COL == '=') {
591 ADD_LEX_NEXT;
592 }
593 } else if (CUR_COL == '<') {
594 ADD_LEX_NEXT;
595 if (CUR_COL == '=') {
596 ADD_LEX_NEXT;
597 }
598 } else if (CUR_COL == '>') {
599 ADD_LEX_NEXT;
600 if (CUR_COL == '=') {
601 ADD_LEX_NEXT;
602 }
603 } else if (CUR_COL != '\0') {
604 // Something else.
605 ADD_LEX_NEXT;
606 } else {
607 // No symbol left at card, scan again on next card.
608 curlin++;
609 curcol = START_OF_LINE;
610 return curret = scan_part (expect);
611 }
612 return curret = LEXEME;
613 }
614
615 int_4 scan (char *expect)
616 {
617 int_4 rc;
618 RECCPY (prelex, curlex);
619 preret = curret;
620 RECCLR (curlex);
621 rc = scan_part (expect);
622 if (rc == END_OF_LINE || rc == END_OF_MODULE) {
623 return curret = rc;
624 }
625 if (rc == LABEL) {
626 return curret = rc;
627 }
628 if (TOKEN ("double")) {
629 scan_part (EXPECT_NONE);
630 if (TOKEN ("precision")) {
631 RECCPY (curlex, "real*8");
632 } else if (TOKEN ("complex")) {
633 RECCPY (curlex, "complex*16");
634 } else {
635 RECCPY (curlex, "real*8");
636 EXPECT (2821, "precision");
637 }
638 return curret = DECLAR;
639 } else if (TOKEN ("go")) {
640 scan_part (EXPECT_NONE);
641 if (!TOKEN ("to")) {
642 SCANER (2822, "invalid goto", NO_TEXT);
643 }
644 RECCPY (curlex, "goto");
645 return curret = WORD;
646 }
647 if (EQUAL (expect, EXPECT_LABEL)) {
648 ;
649 } else if (expect != NO_TEXT && !EQUAL (curlex, expect)) {
650 NEW_RECORD (str);
651 _srecordf (str, "%s but found %s", expect, curlex);
652 EXPECT (2823, str);
653 return curret = ERR;
654 }
655 return curret = rc;
656 }
657
658 int_4 scan_fmt (void)
659 {
660 int_4 k = 0;
661 RECCPY (prelex, curlex);
662 preret = curret;
663 RECCLR (curlex);
664 CUR_LIN.proc = nprocs;
665 // Skip empty lines.
666 if (curcol == START_OF_LINE && curlin < nftnlines) {
667 if (POS (0) == '\0') {
668 curlin++;
669 return curret = scan_fmt ();
670 }
671 }
672 if (curcol > START_OF_LINE && CUR_COL == '\0') {
673 // Next scan starts at new line.
674 curlin++;
675 curcol = START_OF_LINE;
676 }
677 if (curlin >= nftnlines) {
678 return curret = END_OF_MODULE;
679 }
680 if (curcol == START_OF_LINE) {
681 while (IS_COMMENT (POS (0))) {
682 curlin++;
683 if (curlin >= nftnlines) {
684 return curret = END_OF_MODULE;
685 }
686 }
687 if (CUR_LIN.isn > 1) {
688 if (POS (5) == ' ') {
689 curcol = 0;
690 return curret = END_OF_LINE;
691 } else {
692 // All but first line can be continuations.
693 curcol = 6;
694 }
695 } else {
696 curcol = 0;
697 }
698 }
699 // Skip trailing blanks.
700 SKIP_SPACE;
701 if (CUR_COL == '\0') {
702 // No symbol left at card, scan again on next card.
703 curlin++;
704 curcol = START_OF_LINE;
705 return curret = scan_fmt ();
706 }
707 // Mark start of lexeme for messages.
708 prelin = curlin;
709 precol = curcol;
710 if (islower (CUR_COL)) {
711 // Format specifier.
712 while (islower (CUR_COL) || isdigit (CUR_COL)) {
713 ADD_RAW_NEXT;
714 }
715 if (CUR_COL == '.') {
716 ADD_LEX_NEXT;
717 }
718 while (isdigit (CUR_COL)) {
719 ADD_LEX_NEXT;
720 }
721 if (islower (CUR_COL)) {
722 ADD_RAW_NEXT;
723 while (isdigit (CUR_COL)) {
724 ADD_LEX_NEXT;
725 }
726 }
727 return curret = LEXEME;
728 } else if (isdigit (CUR_COL)) {
729 // Number.
730 while (isdigit (CUR_COL)) {
731 ADD_LEX_NEXT;
732 }
733 if (CUR_COL == 'h') {
734 // Hollerith format item
735 return scan_hollerith ();
736 } else {
737 strip_leading_zeroes (curlex);
738 return curret = INT_NUMBER;
739 }
740 } else if (CUR_COL == '\'') {
741 // Character string.
742 return scan_string ();
743 } else if (CUR_COL == '"') {
744 // Character string.
745 return scan_string_alt ();
746 } else if (CUR_COL != '\0') {
747 // Something else.
748 ADD_LEX_NEXT;
749 } else {
750 // No symbol left at card, scan again on next card.
751 curlin++;
752 curcol = START_OF_LINE;
753 return curret = scan_fmt ();
754 }
755 return curret = LEXEME;
756 }
757
758 logical_4 lookahead(char *expect)
759 {
760 (void) scan (EXPECT_NONE);
761 logical_4 check = TOKEN (expect);
762 UNSCAN;
763 return check;
764 }
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|