scan.c
1 //! @file scan.c
2 //! @author J. Marcel van der Veer
3 //
4 //! @section Copyright
5 //
6 // This file is part of VIF - vintage FORTRAN compiler.
7 // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
8 //
9 //! @section License
10 //
11 // This program is free software; you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by the
13 // Free Software Foundation; either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 // more details. You should have received a copy of the GNU General Public
20 // License along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 //! @section Synopsis
23 //!
24 //! Fortran scanner.
25
26 #include <vif.h>
27
28 #define POS(n) (tolower (source[curlin].text[n]))
29 #define EXPONENT(n) _EXPCHAR (POS (n))
30 #define CUR_COL (POS (curcol))
31 #define ADD_LEX curlex[k++] = CUR_COL;
32 #define ADD_LEX_NEXT ADD_LEX; next_col (TRUE);
33 #define ADD_RAW_NEXT curlex[k++] = source[curlin].text[curcol]; next_col (TRUE);
34 #define ADD_CHR(c) curlex[k++] = tolower (c);
35
36 #define SKIP_SPACE\
37 while (CUR_COL == ' ') {\
38 next_col (TRUE);\
39 }
40
41 void next_col (int_4 cont_allowed)
42 {
43 // Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
44 // Below code allows for an arbitratry number of continuation cards.
45 curcol++;
46 if (CUR_COL == '\0') {
47 if (! cont_allowed) {
48 return;
49 }
50 SAVE_POS;
51 curlin++;
52 if (curlin >= nftnlines) {
53 RESTORE_POS;
54 return;
55 }
56 if (strlen (source[curlin].text) < 6) {
57 RESTORE_POS;
58 return;
59 }
60 if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
61 RESTORE_POS;
62 return;
63 }
64 for (int_4 i = 0; i < 5; i++) {
65 if (POS (i) != ' ') {
66 SYNTAX (2501, "continuation card columns 1-5");
67 }
68 }
69 curcol = 6;
70 }
71 }
72
73 void skip_card ()
74 {
75 if (prelin == curlin) {
76 int_4 rc;
77 do {
78 rc = scan (NULL);
79 }
80 while (rc != END_OF_LINE && rc != END_OF_MODULE);
81 } else if (source[curlin].text == NULL) {
82 return;
83 } else if (strlen (source[curlin].text) >= 6 && POS(5) != ' ') {
84 int_4 rc;
85 do {
86 rc = scan (NULL);
87 }
88 while (rc != END_OF_LINE && rc != END_OF_MODULE);
89 }
90 }
91
92 void skip_card_expr ()
93 {
94 // A correct expression leaves the scanner at the last symbol of that expression.
95 // So we do careful look-ahead.
96 if (strcmp (prelex, curlex) != 0) {
97 SYNTAX (2502, curlex);
98 } else if (prelin == curlin) {
99 (void) scan (NULL);
100 if (prelin == curlin) {
101 if (strcmp (prelex, curlex) != 0) {
102 SYNTAX (2503, curlex);
103 }
104 }
105 }
106 skip_card ();
107 }
108
109 int_4 scan_hollerith (void)
110 {
111 int_4 k = 0, N = 0;
112 if (hollerith) {
113 SYNTAX (2504, "disabled Hollerith string");
114 }
115 if (!isint_4 (curlex, &N)) {
116 SCANER (2505, "invalid hollerith length", NULL);
117 return curret = END_OF_MODULE;
118 } else {
119 k = 0;
120 RECCLR (curlex);
121 ADD_CHR ('"');
122 next_col (TRUE);
123 for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
124 if (CUR_COL == '\0') {
125 go_on = FALSE;
126 } else if (CUR_COL == '"') {
127 ADD_CHR ('\"');
128 next_col (TRUE);
129 } else {
130 ADD_RAW_NEXT;
131 }
132 }
133 ADD_CHR ('"');
134 return curret = TEXT;
135 }
136 }
137
138 int_4 scan_string (void)
139 {
140 int_4 k = 0;
141 ADD_CHR ('"');
142 next_col (TRUE);
143 int_4 go_on = TRUE;
144 while (go_on) {
145 if (CUR_COL == '\0') {
146 SCANER (2506, "unterminated string", NULL);
147 ADD_CHR ('"');
148 return curret = END_OF_MODULE;
149 } else if (CUR_COL == '\'') {
150 SAVE_POS;
151 next_col (TRUE);
152 if (CUR_COL == '\'') {
153 ADD_CHR ('\'');
154 next_col (TRUE);
155 } else {
156 RESTORE_POS;
157 go_on = FALSE;
158 }
159 } else if (CUR_COL == '"') {
160 ADD_CHR ('\\');
161 ADD_CHR ('"');
162 next_col (TRUE);
163 } else {
164 ADD_RAW_NEXT;
165 }
166 }
167 if (CUR_COL != '\'') {
168 SCANER (2507, "unterminated string", NULL);
169 return curret = END_OF_MODULE;
170 } else {
171 ADD_CHR ('"');
172 next_col (TRUE);
173 return curret = TEXT;
174 }
175 }
176
177 int_4 scan_string_alt (void)
178 {
179 int_4 k = 0;
180 ADD_CHR ('"');
181 next_col (TRUE);
182 int_4 go_on = TRUE;
183 while (go_on) {
184 if (CUR_COL == '\0') {
185 SCANER (2508, "unterminated string", NULL);
186 ADD_CHR ('"');
187 return curret = END_OF_MODULE;
188 } else if (CUR_COL == '"') {
189 SAVE_POS;
190 next_col (TRUE);
191 if (CUR_COL == '"') {
192 ADD_CHR ('"');
193 next_col (TRUE);
194 } else {
195 RESTORE_POS;
196 go_on = FALSE;
197 }
198 } else if (CUR_COL == '"') {
199 ADD_CHR ('\\');
200 ADD_CHR ('"');
201 next_col (TRUE);
202 } else {
203 ADD_RAW_NEXT;
204 }
205 }
206 if (CUR_COL != '"') {
207 SCANER (2509, "unterminated string", NULL);
208 return curret = END_OF_MODULE;
209 } else {
210 ADD_CHR ('"');
211 next_col (TRUE);
212 return curret = TEXT;
213 }
214 }
215
216 int_4 scan_exponent (void)
217 {
218 int_4 k = strlen (curlex);
219 if (EXPONENT (curcol)) {
220 ADD_LEX_NEXT;
221 SKIP_SPACE;
222 if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
223 ADD_LEX_NEXT;
224 SKIP_SPACE;
225 while (isdigit (CUR_COL)) {
226 ADD_LEX_NEXT;
227 SKIP_SPACE;
228 }
229 }
230 }
231 return curret = FLT_NUMBER;
232 }
233
234 int_4 scan_part (void)
235 {
236 int_4 k = 0;
237 RECCLR (curlex);
238 source[curlin].proc = nprocs;
239 // Skip empty lines.
240 if (curcol == START_OF_LINE && curlin < nftnlines) {
241 if (POS (0) == '\0') {
242 curlin++;
243 return curret = scan_part ();
244 }
245 }
246 if (curcol > START_OF_LINE && CUR_COL == '\0') {
247 // Next scan starts at new line.
248 curlin++;
249 curcol = START_OF_LINE;
250 }
251 if (curlin >= nftnlines) {
252 return curret = END_OF_MODULE;
253 }
254 if (curcol == START_OF_LINE) {
255 while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
256 curlin++;
257 if (curlin >= nftnlines) {
258 return curret = END_OF_MODULE;
259 }
260 }
261 if (source[curlin].isn > 0) {
262 if (POS (5) == ' ') {
263 curcol = 0;
264 return curret = END_OF_LINE;
265 } else {
266 // All but first line can be continuations.
267 curcol = 6;
268 }
269 } else {
270 curcol = 0;
271 }
272 }
273 // Skip trailing blanks.
274 SKIP_SPACE;
275 if (CUR_COL == '\0') {
276 // No symbol left at card, scan again on next card.
277 curlin++;
278 curcol = START_OF_LINE;
279 return curret = scan_part ();
280 }
281 // Mark start of lexeme for messages.
282 prelin = curlin;
283 precol = curcol;
284 if (islower (CUR_COL)) {
285 // A letter, possibly a radix.
286 if (CUR_COL == 'b') {
287 SAVE_POS;
288 next_col (TRUE);
289 if (CUR_COL != '\'') {
290 RESTORE_POS;
291 } else {
292 // Radix 2 number.
293 ADD_CHR ('0');
294 ADD_CHR ('b');
295 next_col (TRUE);
296 while (CUR_COL != '\'' && CUR_COL != '\0') {
297 if (CUR_COL != '0' && CUR_COL != '1') {
298 SCANER (2510, "invalid binary digit", NULL);
299 return curret = END_OF_MODULE;
300 }
301 ADD_LEX_NEXT;
302 }
303 if (CUR_COL != '\'') {
304 SCANER (2511, "invalid denotation", NULL);
305 return curret = END_OF_MODULE;
306 }
307 next_col (TRUE);
308 return curret = INT_NUMBER;
309 }
310 } else if (CUR_COL == 'x') {
311 SAVE_POS;
312 next_col (TRUE);
313 if (CUR_COL != '\'') {
314 RESTORE_POS;
315 } else {
316 // Radix 16 number.
317 ADD_CHR ('0');
318 ADD_CHR ('x');
319 next_col (TRUE);
320 while (CUR_COL != '\'' && CUR_COL != '\0') {
321 if (!isxdigit (CUR_COL)) {
322 SCANER (2512, "invalid hex digit", NULL);
323 return curret = END_OF_MODULE;
324 }
325 ADD_LEX_NEXT;
326 }
327 if (CUR_COL != '\'') {
328 SCANER (2513, "invalid denotation", NULL);
329 return curret = END_OF_MODULE;
330 }
331 next_col (TRUE);
332 return curret = INT_NUMBER;
333 }
334 }
335 // Fetch identifier or keyword.
336 while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
337 ADD_LEX;
338 // next_col (FALSE); // End-of-card is end of symbol.
339 next_col (TRUE);
340 }
341 // END, END DO, END IF, END WHILE
342 if (TOKEN ("end")) {
343 SKIP_SPACE;
344 while (islower (CUR_COL)) {
345 ADD_LEX_NEXT;
346 }
347 }
348 // ELSE IF
349 if (TOKEN ("else")) {
350 SKIP_SPACE;
351 while (islower (CUR_COL)) {
352 ADD_LEX_NEXT;
353 }
354 }
355 // Catch declarers here.
356 if (TOKEN ("integer")) {
357 SKIP_SPACE;
358 if (CUR_COL == '*') {
359 ADD_LEX_NEXT;
360 SKIP_SPACE;
361 CHECKDIGIT (2514, CUR_COL);
362 while (isdigit (CUR_COL)) {
363 ADD_LEX_NEXT;
364 }
365 }
366 return curret = DECLAR;
367 }
368 if (TOKEN ("logical")) {
369 SKIP_SPACE;
370 if (CUR_COL == '*') {
371 ADD_LEX_NEXT;
372 SKIP_SPACE;
373 CHECKDIGIT (2515, CUR_COL);
374 while (isdigit (CUR_COL)) {
375 ADD_LEX_NEXT;
376 }
377 }
378 return curret = DECLAR;
379 }
380 if (TOKEN ("real")) {
381 SKIP_SPACE;
382 if (CUR_COL == '*') {
383 ADD_LEX_NEXT;
384 SKIP_SPACE;
385 CHECKDIGIT (2516, CUR_COL);
386 while (isdigit (CUR_COL)) {
387 ADD_LEX_NEXT;
388 }
389 }
390 return curret = DECLAR;
391 }
392 if (TOKEN ("complex")) {
393 SKIP_SPACE;
394 if (CUR_COL == '*') {
395 ADD_LEX_NEXT;
396 SKIP_SPACE;
397 CHECKDIGIT (2517, CUR_COL);
398 while (isdigit (CUR_COL)) {
399 ADD_LEX_NEXT;
400 }
401 }
402 return curret = DECLAR;
403 }
404 if (TOKEN ("character")) {
405 SKIP_SPACE;
406 if (CUR_COL == '*') {
407 ADD_LEX_NEXT;
408 SKIP_SPACE;
409 if (CUR_COL == '(') {
410 ADD_LEX_NEXT;
411 SKIP_SPACE;
412 if (CUR_COL == '*') {
413 ADD_LEX_NEXT;
414 } else {
415 while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
416 ADD_LEX_NEXT;
417 }
418 }
419 SKIP_SPACE;
420 if (CUR_COL == ')') {
421 ADD_LEX_NEXT;
422 }
423 } else {
424 CHECKDIGIT (2518, CUR_COL);
425 while (isdigit (CUR_COL)) {
426 ADD_LEX_NEXT;
427 }
428 }
429 }
430 return curret = DECLAR;
431 }
432 // Word.
433 return curret = WORD;
434 } else if (isdigit (CUR_COL)) {
435 if (curcol < 5) {
436 // Label.
437 while (curcol < 5 && isdigit (CUR_COL)) {
438 ADD_LEX_NEXT;
439 }
440 return curret = LABEL;
441 }
442 // Number.
443 while (isdigit (CUR_COL)) {
444 ADD_LEX_NEXT;
445 }
446 if (CUR_COL == 'h') {
447 // Hollerith operand
448 return scan_hollerith ();
449 }
450 if (CUR_COL != '.' && !EXPONENT (curcol)) {
451 return curret = INT_NUMBER;
452 } else {
453 if (CUR_COL == '.') {
454 // Special symbols .XYZZY. like (n/2.eq.1).
455 SAVE_POS;
456 next_col (TRUE);
457 while (islower (CUR_COL)) {
458 next_col (TRUE);
459 }
460 if (CUR_COL == '.') {
461 RESTORE_POS;
462 return curret = INT_NUMBER;
463 } else {
464 RESTORE_POS;
465 }
466 // Fraction.
467 ADD_LEX_NEXT;
468 SKIP_SPACE;
469 while (isdigit (CUR_COL)) {
470 ADD_LEX_NEXT;
471 SKIP_SPACE;
472 }
473 }
474 // Exponent part.
475 return scan_exponent ();
476 }
477 } else if (CUR_COL == '.') {
478 ADD_LEX_NEXT;
479 SKIP_SPACE;
480 // Fraction.
481 if (isdigit (CUR_COL)) {
482 while (isdigit (CUR_COL)) {
483 ADD_LEX_NEXT;
484 SKIP_SPACE;
485 }
486 // Exponent part.
487 return scan_exponent ();
488 }
489 // Special symbols .XYZZY. .
490 if (CUR_COL == '.') {
491 next_col (TRUE);
492 }
493 while (islower (CUR_COL)) {
494 ADD_LEX_NEXT;
495 }
496 if (CUR_COL == '.') {
497 ADD_LEX_NEXT;
498 } else {
499 SCANER (2519, "stray dot", NULL);
500 }
501 } else if (CUR_COL == '*') {
502 ADD_LEX_NEXT;
503 if (CUR_COL == '*') {
504 ADD_LEX_NEXT;
505 } else if (CUR_COL == '%') {
506 ADD_LEX_NEXT;
507 }
508 } else if (CUR_COL == '/') {
509 // Concatenation.
510 ADD_LEX_NEXT;
511 if (CUR_COL == '/') {
512 ADD_LEX_NEXT;
513 } else if (CUR_COL == '=') {
514 ADD_LEX_NEXT;
515 }
516 } else if (CUR_COL == '\'') {
517 // Character string.
518 return scan_string ();
519 } else if (CUR_COL == '"') {
520 // Character string.
521 return scan_string_alt ();
522 } else if (CUR_COL == '=') {
523 ADD_LEX_NEXT;
524 if (CUR_COL == '=') {
525 ADD_LEX_NEXT;
526 }
527 } else if (CUR_COL == '!') {
528 ADD_LEX_NEXT;
529 if (CUR_COL == '=') {
530 ADD_LEX_NEXT;
531 }
532 } else if (CUR_COL == '<') {
533 ADD_LEX_NEXT;
534 if (CUR_COL == '=') {
535 ADD_LEX_NEXT;
536 }
537 } else if (CUR_COL == '>') {
538 ADD_LEX_NEXT;
539 if (CUR_COL == '=') {
540 ADD_LEX_NEXT;
541 }
542 } else if (CUR_COL != '\0') {
543 // Something else.
544 ADD_LEX_NEXT;
545 } else {
546 // No symbol left at card, scan again on next card.
547 curlin++;
548 curcol = START_OF_LINE;
549 return curret = scan_part ();
550 }
551 return curret = LEXEME;
552 }
553
554 int_4 scan (char *expect)
555 {
556 int_4 rc;
557 strcpy (prelex, curlex);
558 preret = curret;
559 RECCLR (curlex);
560 rc = scan_part ();
561 if (rc == END_OF_LINE || rc == END_OF_MODULE) {
562 return curret = rc;
563 }
564 if (rc == LABEL) {
565 return curret = rc;
566 }
567 if (TOKEN ("double")) {
568 scan_part ();
569 if (TOKEN ("precision")) {
570 strcpy (curlex, "real*8");
571 } else if (TOKEN ("complex")) {
572 strcpy (curlex, "complex*16");
573 } else {
574 strcpy (curlex, "real*8");
575 EXPECT (2520, "precision");
576 }
577 return curret = DECLAR;
578 } else if (TOKEN ("go")) {
579 scan_part ();
580 if (!TOKEN ("to")) {
581 SCANER (2521, "invalid goto", NULL);
582 }
583 strcpy (curlex, "goto");
584 return curret = WORD;
585 }
586 if (expect != NULL && !EQUAL (curlex, expect)) {
587 RECORD str;
588 _srecordf (str, "%s but found %s", expect, curlex);
589 EXPECT (2522, str);
590 return curret = ERR;
591 }
592 return curret = rc;
593 }
594
595 int_4 scan_fmt (void)
596 {
597 int_4 k = 0;
598 strcpy (prelex, curlex);
599 preret = curret;
600 RECCLR (curlex);
601 source[curlin].proc = nprocs;
602 // Skip empty lines.
603 if (curcol == START_OF_LINE && curlin < nftnlines) {
604 if (POS (0) == '\0') {
605 curlin++;
606 return curret = scan_fmt ();
607 }
608 }
609 if (curcol > START_OF_LINE && CUR_COL == '\0') {
610 // Next scan starts at new line.
611 curlin++;
612 curcol = START_OF_LINE;
613 }
614 if (curlin >= nftnlines) {
615 return curret = END_OF_MODULE;
616 }
617 if (curcol == START_OF_LINE) {
618 while (IS_COMMENT (POS (0))) {
619 curlin++;
620 if (curlin >= nftnlines) {
621 return curret = END_OF_MODULE;
622 }
623 }
624 if (source[curlin].isn > 1) {
625 if (POS (5) == ' ') {
626 curcol = 0;
627 return curret = END_OF_LINE;
628 } else {
629 // All but first line can be continuations.
630 curcol = 6;
631 }
632 } else {
633 curcol = 0;
634 }
635 }
636 // Skip trailing blanks.
637 SKIP_SPACE;
638 if (CUR_COL == '\0') {
639 // No symbol left at card, scan again on next card.
640 curlin++;
641 curcol = START_OF_LINE;
642 return curret = scan_fmt ();
643 }
644 // Mark start of lexeme for messages.
645 prelin = curlin;
646 precol = curcol;
647 if (islower (CUR_COL)) {
648 // Format specifier.
649 while (islower (CUR_COL) || isdigit (CUR_COL)) {
650 ADD_RAW_NEXT;
651 }
652 if (CUR_COL == '.') {
653 ADD_LEX_NEXT;
654 }
655 while (isdigit (CUR_COL)) {
656 ADD_LEX_NEXT;
657 }
658 if (islower (CUR_COL)) {
659 ADD_RAW_NEXT;
660 while (isdigit (CUR_COL)) {
661 ADD_LEX_NEXT;
662 }
663 }
664 return curret = LEXEME;
665 } else if (isdigit (CUR_COL)) {
666 // Number.
667 while (isdigit (CUR_COL)) {
668 ADD_LEX_NEXT;
669 }
670 if (CUR_COL == 'h') {
671 // Hollerith format item
672 return scan_hollerith ();
673 } else {
674 return curret = INT_NUMBER;
675 }
676 } else if (CUR_COL == '\'') {
677 // Character string.
678 return scan_string ();
679 } else if (CUR_COL == '"') {
680 // Character string.
681 return scan_string_alt ();
682 } else if (CUR_COL != '\0') {
683 // Something else.
684 ADD_LEX_NEXT;
685 } else {
686 // No symbol left at card, scan again on next card.
687 curlin++;
688 curcol = START_OF_LINE;
689 return curret = scan_fmt ();
690 }
691 return curret = LEXEME;
692 }
693
© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)
|