scan.c
1 //! @file scan.c
2 //! @author J. Marcel van der Veer
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Fortran scanner.
25
26 #include <vif.h>
27
28 #define ADD_CHAR(c) {curlex[k++] = (c);}
29 #define ADD_LEX ADD_CHAR (CUR_COL)
30 #define ADD_LEX_NEXT {ADD_LEX; next_col (TRUE);}
31 #define ADD_RAW_NEXT {curlex[k++] = CUR_LIN.text[curcol]; next_col (TRUE);}
32 #define ADD_CHR(c) {curlex[k++] = tolower (c);}
33
34 #define SKIP_SPACE\
35 while (CUR_COL == ' ') {\
36 next_col (TRUE);\
37 }
38
39 void next_col (int_4 cont_allowed)
40 {
41 // Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
42 // Below code allows for an arbitratry number of continuation cards.
43 curcol++;
44 if (CUR_COL == '\0') {
45 if (! cont_allowed) {
46 return;
47 }
48 SAVE_POS;
49 curlin++;
50 if (curlin >= nftnlines) {
51 RESTORE_POS;
52 return;
53 }
54 if (strlen (CUR_LIN.text) < 6) {
55 RESTORE_POS;
56 return;
57 }
58 if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
59 RESTORE_POS;
60 return;
61 }
62 for (int_4 i = 0; i < 5; i++) {
63 if (POS (i) != ' ') {
64 SYNTAX (2601, "continuation card columns 1-5");
65 }
66 }
67 curcol = 6;
68 }
69 }
70
71 void skip_card (int_4 check)
72 {
73 if (check) {
74 // When a correct statement has left the scanner at the final token.
75 if (!EQUAL (prelex, curlex)) {
76 SYNTAX (2602, curlex);
77 } else if (prelin == curlin) {
78 (void) scan (EXPECT_NONE);
79 if (prelin == curlin) {
80 if (!EQUAL (prelex, curlex)) {
81 SYNTAX (2603, curlex);
82 }
83 }
84 }
85 }
86 // VIF is sloppy about trailing text.
87 // This is intentional, old code can have text from column 73 onward.
88 if (prelin == curlin) {
89 int_4 rc;
90 do {
91 rc = scan (EXPECT_NONE);
92 }
93 while (rc != END_OF_LINE && rc != END_OF_MODULE);
94 } else if (CUR_LIN.text == NO_TEXT) {
95 return;
96 } else if (strlen (CUR_LIN.text) >= 6 && POS(5) != ' ') {
97 int_4 rc;
98 do {
99 rc = scan (EXPECT_NONE);
100 }
101 while (rc != END_OF_LINE && rc != END_OF_MODULE);
102 }
103 }
104
105 int_4 scan_hollerith (void)
106 {
107 int_4 k = 0, N = 0;
108 if (hollerith) {
109 SYNTAX (2604, "disabled Hollerith string");
110 }
111 if (!isint_4 (curlex, &N)) {
112 SCANER (2605, "invalid hollerith length", NO_TEXT);
113 return curret = END_OF_MODULE;
114 } else {
115 k = 0;
116 RECCLR (curlex);
117 ADD_CHR ('"');
118 next_col (TRUE);
119 for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
120 if (CUR_COL == '\0') {
121 go_on = FALSE;
122 } else if (CUR_COL == '"') {
123 ADD_CHR ('\"');
124 next_col (TRUE);
125 } else {
126 ADD_RAW_NEXT;
127 }
128 }
129 ADD_CHR ('"');
130 return curret = TEXT;
131 }
132 }
133
134 int_4 scan_string (void)
135 {
136 int_4 k = 0;
137 ADD_CHR ('"');
138 next_col (TRUE);
139 int_4 go_on = TRUE;
140 while (go_on) {
141 if (CUR_COL == '\0') {
142 SCANER (2606, "unterminated string", NO_TEXT);
143 ADD_CHR ('"');
144 return curret = END_OF_MODULE;
145 } else if (CUR_COL == '\'') {
146 SAVE_POS;
147 next_col (TRUE);
148 if (CUR_COL == '\'') {
149 ADD_CHR ('\'');
150 next_col (TRUE);
151 } else {
152 RESTORE_POS;
153 go_on = FALSE;
154 }
155 } else if (CUR_COL == '"') {
156 ADD_CHR ('\\');
157 ADD_CHR ('"');
158 next_col (TRUE);
159 } else {
160 ADD_RAW_NEXT;
161 }
162 }
163 if (CUR_COL != '\'') {
164 SCANER (2607, "unterminated string", NO_TEXT);
165 return curret = END_OF_MODULE;
166 } else {
167 ADD_CHR ('"');
168 next_col (TRUE);
169 return curret = TEXT;
170 }
171 }
172
173 int_4 scan_string_alt (void)
174 {
175 int_4 k = 0;
176 ADD_CHR ('"');
177 next_col (TRUE);
178 int_4 go_on = TRUE;
179 while (go_on) {
180 if (CUR_COL == '\0') {
181 SCANER (2608, "unterminated string", NO_TEXT);
182 ADD_CHR ('"');
183 return curret = END_OF_MODULE;
184 } else if (CUR_COL == '"') {
185 SAVE_POS;
186 next_col (TRUE);
187 if (CUR_COL == '"') {
188 ADD_CHR ('"');
189 next_col (TRUE);
190 } else {
191 RESTORE_POS;
192 go_on = FALSE;
193 }
194 } else if (CUR_COL == '"') {
195 ADD_CHR ('\\');
196 ADD_CHR ('"');
197 next_col (TRUE);
198 } else {
199 ADD_RAW_NEXT;
200 }
201 }
202 if (CUR_COL != '"') {
203 SCANER (2609, "unterminated string", NO_TEXT);
204 return curret = END_OF_MODULE;
205 } else {
206 ADD_CHR ('"');
207 next_col (TRUE);
208 return curret = TEXT;
209 }
210 }
211
212 int_4 scan_exponent (void)
213 {
214 int_4 k = strlen (curlex);
215 if (EXPONENT (curcol)) {
216 ADD_LEX_NEXT;
217 SKIP_SPACE;
218 if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
219 ADD_LEX_NEXT;
220 SKIP_SPACE;
221 while (isdigit (CUR_COL)) {
222 ADD_LEX_NEXT;
223 SKIP_SPACE;
224 }
225 }
226 }
227 return curret = FLT_NUMBER;
228 }
229
230 int_4 scan_declarer (int_4 k)
231 {
232 if (TOKEN ("integer")) {
233 SKIP_SPACE;
234 if (CUR_COL == '*') {
235 ADD_LEX_NEXT;
236 SKIP_SPACE;
237 CHECKDIGIT (2610, CUR_COL);
238 while (isdigit (CUR_COL)) {
239 ADD_LEX_NEXT;
240 SKIP_SPACE;
241 }
242 }
243 return DECLAR;
244 }
245 if (TOKEN ("logical")) {
246 SKIP_SPACE;
247 if (CUR_COL == '*') {
248 ADD_LEX_NEXT;
249 SKIP_SPACE;
250 CHECKDIGIT (2611, CUR_COL);
251 while (isdigit (CUR_COL)) {
252 ADD_LEX_NEXT;
253 SKIP_SPACE;
254 }
255 }
256 return DECLAR;
257 }
258 if (TOKEN ("real")) {
259 SKIP_SPACE;
260 if (CUR_COL == '*') {
261 ADD_LEX_NEXT;
262 SKIP_SPACE;
263 CHECKDIGIT (2612, CUR_COL);
264 while (isdigit (CUR_COL)) {
265 ADD_LEX_NEXT;
266 SKIP_SPACE;
267 }
268 }
269 return DECLAR;
270 }
271 if (TOKEN ("complex")) {
272 SKIP_SPACE;
273 if (CUR_COL == '*') {
274 ADD_LEX_NEXT;
275 SKIP_SPACE;
276 CHECKDIGIT (2613, CUR_COL);
277 while (isdigit (CUR_COL)) {
278 ADD_LEX_NEXT;
279 SKIP_SPACE;
280 }
281 }
282 return DECLAR;
283 }
284 if (TOKEN ("character")) {
285 SKIP_SPACE;
286 if (CUR_COL == '*') {
287 ADD_LEX_NEXT;
288 SKIP_SPACE;
289 if (CUR_COL == '(') {
290 ADD_LEX_NEXT;
291 SKIP_SPACE;
292 if (CUR_COL == '*') {
293 ADD_LEX_NEXT;
294 } else {
295 while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
296 ADD_LEX_NEXT;
297 }
298 }
299 SKIP_SPACE;
300 if (CUR_COL == ')') {
301 ADD_LEX_NEXT;
302 }
303 } else {
304 CHECKDIGIT (2614, CUR_COL);
305 while (isdigit (CUR_COL)) {
306 ADD_LEX_NEXT;
307 SKIP_SPACE;
308 }
309 }
310 }
311 return DECLAR;
312 }
313 return LEXEME;
314 }
315
316 int_4 scan_bin (int_4 k)
317 {
318 // Radix 2 number.
319 ADD_CHR ('0');
320 ADD_CHR ('b');
321 next_col (TRUE);
322 while (CUR_COL != '\'' && CUR_COL != '\0') {
323 if (CUR_COL != '0' && CUR_COL != '1') {
324 SCANER (2615, "invalid binary digit", NO_TEXT);
325 return END_OF_MODULE;
326 }
327 ADD_LEX_NEXT;
328 }
329 if (CUR_COL != '\'') {
330 SCANER (2616, "invalid denotation", NO_TEXT);
331 return END_OF_MODULE;
332 }
333 next_col (TRUE);
334 return INT_NUMBER;
335 }
336
337 int_4 scan_hex (int_4 k)
338 {
339 // Radix 16 number.
340 ADD_CHR ('0');
341 ADD_CHR ('x');
342 next_col (TRUE);
343 while (CUR_COL != '\'' && CUR_COL != '\0') {
344 if (!isxdigit (CUR_COL)) {
345 SCANER (2617, "invalid hex digit", NO_TEXT);
346 return END_OF_MODULE;
347 }
348 ADD_LEX_NEXT;
349 }
350 if (CUR_COL != '\'') {
351 SCANER (2618, "invalid denotation", NO_TEXT);
352 return END_OF_MODULE;
353 }
354 next_col (TRUE);
355 return INT_NUMBER;
356 }
357
358 int_4 scan_part (char *expect)
359 {
360 int_4 k = 0;
361 RECCLR (curlex);
362 CUR_LIN.proc = nprocs;
363 // Skip empty lines.
364 if (curcol == START_OF_LINE && curlin < nftnlines) {
365 if (POS (0) == '\0') {
366 curlin++;
367 return curret = scan_part (expect);
368 }
369 }
370 if (curcol > START_OF_LINE && CUR_COL == '\0') {
371 // Next scan starts at new line.
372 curlin++;
373 curcol = START_OF_LINE;
374 }
375 if (curlin >= nftnlines) {
376 return curret = END_OF_MODULE;
377 }
378 if (curcol == START_OF_LINE) {
379 while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
380 if (POS (0) == '/') {
381 vif_jcl ();
382 }
383 curlin++;
384 if (curlin >= nftnlines) {
385 return curret = END_OF_MODULE;
386 }
387 }
388 if (CUR_LIN.isn > 0) {
389 if (POS (5) == ' ') {
390 curcol = 0;
391 return curret = END_OF_LINE;
392 } else {
393 // All but first line can be continuations.
394 curcol = 6;
395 }
396 } else {
397 curcol = 0;
398 }
399 }
400 // Skip trailing blanks.
401 SKIP_SPACE;
402 if (CUR_COL == '\0') {
403 // No symbol left at card, scan again on next card.
404 curlin++;
405 curcol = START_OF_LINE;
406 return curret = scan_part (expect);
407 }
408 // Mark start of lexeme for messages.
409 prelin = curlin;
410 precol = curcol;
411 if (islower (CUR_COL)) {
412 // A letter, possibly a radix.
413 if (CUR_COL == 'b') {
414 SAVE_POS;
415 next_col (TRUE);
416 if (CUR_COL != '\'') {
417 RESTORE_POS;
418 } else {
419 return curret = scan_bin (k);
420 }
421 } else if (CUR_COL == 'x') {
422 SAVE_POS;
423 next_col (TRUE);
424 if (CUR_COL != '\'') {
425 RESTORE_POS;
426 } else {
427 return curret = scan_hex (k);
428 }
429 }
430 // Fetch identifier or keyword.
431 // Identifiers may contain spaces if the part upto the first space is not a keyword.
432 // Here VIF differs from vintage FORTRAN.
433 int_4 space_chk = TRUE;
434 while (_IDFCHAR (CUR_COL)) {
435 if (CUR_COL == ' ') {
436 if (space_chk && reserved (curlex)) {
437 break;
438 } else {
439 space_chk = FALSE;
440 }
441 } else if (CUR_COL == '$') {
442 ADD_CHAR ('_');
443 } else {
444 ADD_LEX;
445 }
446 next_col (TRUE);
447 }
448 // END, END DO, END IF, END WHILE
449 if (TOKEN ("end")) {
450 SKIP_SPACE;
451 while (islower (CUR_COL)) {
452 ADD_LEX_NEXT;
453 }
454 }
455 // ELSE IF
456 if (TOKEN ("else")) {
457 SKIP_SPACE;
458 while (islower (CUR_COL)) {
459 ADD_LEX_NEXT;
460 }
461 }
462 // Catch declarers here.
463 if (scan_declarer (k) == DECLAR) {
464 return curret = DECLAR;
465 } else {
466 return curret = WORD;
467 }
468 } else if (isdigit (CUR_COL)) {
469 if (curcol < 5) {
470 // Label.
471 while (curcol < 5 && isdigit (CUR_COL)) {
472 ADD_LEX_NEXT;
473 SKIP_SPACE;
474 }
475 return curret = LABEL;
476 }
477 // Number.
478 while (isdigit (CUR_COL)) {
479 ADD_LEX_NEXT;
480 SKIP_SPACE;
481 }
482 if (EQUAL (expect, EXPECT_LABEL)) {
483 return curret = LABEL;
484 }
485 if (CUR_COL == 'h') {
486 // Hollerith operand
487 return scan_hollerith ();
488 }
489 if (CUR_COL != '.' && !EXPONENT (curcol)) {
490 return curret = INT_NUMBER;
491 } else {
492 if (CUR_COL == '.') {
493 // Special symbols .XYZZY. like (n/2.eq.1).
494 SAVE_POS;
495 next_col (TRUE);
496 while (islower (CUR_COL)) {
497 next_col (TRUE);
498 }
499 if (CUR_COL == '.') {
500 RESTORE_POS;
501 return curret = INT_NUMBER;
502 } else {
503 RESTORE_POS;
504 }
505 // Fraction.
506 ADD_LEX_NEXT;
507 SKIP_SPACE;
508 while (isdigit (CUR_COL)) {
509 ADD_LEX_NEXT;
510 SKIP_SPACE;
511 }
512 }
513 // Exponent part.
514 return scan_exponent ();
515 }
516 } else if (CUR_COL == '.') {
517 ADD_LEX_NEXT;
518 SKIP_SPACE;
519 // Fraction.
520 if (isdigit (CUR_COL)) {
521 while (isdigit (CUR_COL)) {
522 ADD_LEX_NEXT;
523 SKIP_SPACE;
524 }
525 // Exponent part.
526 return scan_exponent ();
527 }
528 // Special symbols .XYZZY. .
529 if (CUR_COL == '.') {
530 next_col (TRUE);
531 }
532 while (islower (CUR_COL)) {
533 ADD_LEX_NEXT;
534 }
535 if (CUR_COL == '.') {
536 ADD_LEX_NEXT;
537 } else {
538 SCANER (2619, "stray dot", NO_TEXT);
539 }
540 } else if (CUR_COL == '*') {
541 ADD_LEX_NEXT;
542 if (CUR_COL == '*') {
543 ADD_LEX_NEXT;
544 } else if (CUR_COL == '%') {
545 ADD_LEX_NEXT;
546 }
547 } else if (CUR_COL == '/') {
548 // Concatenation.
549 ADD_LEX_NEXT;
550 if (CUR_COL == '/') {
551 ADD_LEX_NEXT;
552 } else if (CUR_COL == '=') {
553 ADD_LEX_NEXT;
554 }
555 } else if (CUR_COL == '\'') {
556 // Character string.
557 return scan_string ();
558 } else if (CUR_COL == '"') {
559 // Character string.
560 return scan_string_alt ();
561 } else if (CUR_COL == '=') {
562 ADD_LEX_NEXT;
563 if (CUR_COL == '=') {
564 ADD_LEX_NEXT;
565 }
566 } else if (CUR_COL == '!') {
567 ADD_LEX_NEXT;
568 if (CUR_COL == '=') {
569 ADD_LEX_NEXT;
570 }
571 } else if (CUR_COL == '<') {
572 ADD_LEX_NEXT;
573 if (CUR_COL == '=') {
574 ADD_LEX_NEXT;
575 }
576 } else if (CUR_COL == '>') {
577 ADD_LEX_NEXT;
578 if (CUR_COL == '=') {
579 ADD_LEX_NEXT;
580 }
581 } else if (CUR_COL != '\0') {
582 // Something else.
583 ADD_LEX_NEXT;
584 } else {
585 // No symbol left at card, scan again on next card.
586 curlin++;
587 curcol = START_OF_LINE;
588 return curret = scan_part (expect);
589 }
590 return curret = LEXEME;
591 }
592
593 int_4 scan (char *expect)
594 {
595 int_4 rc;
596 strcpy (prelex, curlex);
597 preret = curret;
598 RECCLR (curlex);
599 rc = scan_part (expect);
600 if (rc == END_OF_LINE || rc == END_OF_MODULE) {
601 return curret = rc;
602 }
603 if (rc == LABEL) {
604 return curret = rc;
605 }
606 if (TOKEN ("double")) {
607 scan_part (EXPECT_NONE);
608 if (TOKEN ("precision")) {
609 strcpy (curlex, "real*8");
610 } else if (TOKEN ("complex")) {
611 strcpy (curlex, "complex*16");
612 } else {
613 strcpy (curlex, "real*8");
614 EXPECT (2620, "precision");
615 }
616 return curret = DECLAR;
617 } else if (TOKEN ("go")) {
618 scan_part (EXPECT_NONE);
619 if (!TOKEN ("to")) {
620 SCANER (2621, "invalid goto", NO_TEXT);
621 }
622 strcpy (curlex, "goto");
623 return curret = WORD;
624 }
625 if (EQUAL (expect, EXPECT_LABEL)) {
626 ;
627 } else if (expect != NO_TEXT && !EQUAL (curlex, expect)) {
628 RECORD str;
629 _srecordf (str, "%s but found %s", expect, curlex);
630 EXPECT (2622, str);
631 return curret = ERR;
632 }
633 return curret = rc;
634 }
635
636 int_4 scan_fmt (void)
637 {
638 int_4 k = 0;
639 strcpy (prelex, curlex);
640 preret = curret;
641 RECCLR (curlex);
642 CUR_LIN.proc = nprocs;
643 // Skip empty lines.
644 if (curcol == START_OF_LINE && curlin < nftnlines) {
645 if (POS (0) == '\0') {
646 curlin++;
647 return curret = scan_fmt ();
648 }
649 }
650 if (curcol > START_OF_LINE && CUR_COL == '\0') {
651 // Next scan starts at new line.
652 curlin++;
653 curcol = START_OF_LINE;
654 }
655 if (curlin >= nftnlines) {
656 return curret = END_OF_MODULE;
657 }
658 if (curcol == START_OF_LINE) {
659 while (IS_COMMENT (POS (0))) {
660 curlin++;
661 if (curlin >= nftnlines) {
662 return curret = END_OF_MODULE;
663 }
664 }
665 if (CUR_LIN.isn > 1) {
666 if (POS (5) == ' ') {
667 curcol = 0;
668 return curret = END_OF_LINE;
669 } else {
670 // All but first line can be continuations.
671 curcol = 6;
672 }
673 } else {
674 curcol = 0;
675 }
676 }
677 // Skip trailing blanks.
678 SKIP_SPACE;
679 if (CUR_COL == '\0') {
680 // No symbol left at card, scan again on next card.
681 curlin++;
682 curcol = START_OF_LINE;
683 return curret = scan_fmt ();
684 }
685 // Mark start of lexeme for messages.
686 prelin = curlin;
687 precol = curcol;
688 if (islower (CUR_COL)) {
689 // Format specifier.
690 while (islower (CUR_COL) || isdigit (CUR_COL)) {
691 ADD_RAW_NEXT;
692 }
693 if (CUR_COL == '.') {
694 ADD_LEX_NEXT;
695 }
696 while (isdigit (CUR_COL)) {
697 ADD_LEX_NEXT;
698 }
699 if (islower (CUR_COL)) {
700 ADD_RAW_NEXT;
701 while (isdigit (CUR_COL)) {
702 ADD_LEX_NEXT;
703 }
704 }
705 return curret = LEXEME;
706 } else if (isdigit (CUR_COL)) {
707 // Number.
708 while (isdigit (CUR_COL)) {
709 ADD_LEX_NEXT;
710 }
711 if (CUR_COL == 'h') {
712 // Hollerith format item
713 return scan_hollerith ();
714 } else {
715 return curret = INT_NUMBER;
716 }
717 } else if (CUR_COL == '\'') {
718 // Character string.
719 return scan_string ();
720 } else if (CUR_COL == '"') {
721 // Character string.
722 return scan_string_alt ();
723 } else if (CUR_COL != '\0') {
724 // Something else.
725 ADD_LEX_NEXT;
726 } else {
727 // No symbol left at card, scan again on next card.
728 curlin++;
729 curcol = START_OF_LINE;
730 return curret = scan_fmt ();
731 }
732 return curret = LEXEME;
733 }
734
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|