scan.c

     
   1  //! @file scan.c
   2  //! @author J. Marcel van der Veer
   3  //
   4  //! @section Copyright
   5  //
   6  // This file is part of VIF - vintage FORTRAN compiler.
   7  // Copyright 2020-2024 J. Marcel van der Veer <algol68g@xs4all.nl>.
   8  //
   9  //! @section License
  10  //
  11  // This program is free software; you can redistribute it and/or modify it 
  12  // under the terms of the GNU General Public License as published by the 
  13  // Free Software Foundation; either version 3 of the License, or 
  14  // (at your option) any later version.
  15  //
  16  // This program is distributed in the hope that it will be useful, but 
  17  // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  // more details. You should have received a copy of the GNU General Public 
  20  // License along with this program. If not, see <http://www.gnu.org/licenses/>.
  21  
  22  //! @section Synopsis
  23  //!
  24  //! Fortran scanner.
  25  
  26  #include <vif.h>
  27  
  28  #define POS(n) (tolower (source[curlin].text[n]))
  29  #define EXPONENT(n) _EXPCHAR (POS (n))
  30  #define CUR_COL (POS (curcol)) 
  31  #define ADD_LEX curlex[k++] = CUR_COL;
  32  #define ADD_LEX_NEXT ADD_LEX; next_col (TRUE);
  33  #define ADD_RAW_NEXT curlex[k++] = source[curlin].text[curcol]; next_col (TRUE);
  34  #define ADD_CHR(c) curlex[k++] = tolower (c);
  35  
  36  #define SKIP_SPACE\
  37    while (CUR_COL == ' ') {\
  38      next_col (TRUE);\
  39    }
  40  
  41  void next_col (int_4 cont_allowed)
  42  {
  43  // Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
  44  // Below code allows for an arbitratry number of continuation cards.
  45    curcol++;
  46    if (CUR_COL == '\0') {
  47      if (! cont_allowed) {
  48        return;
  49      }
  50      SAVE_POS;
  51      curlin++;
  52      if (curlin >= nftnlines) {
  53        RESTORE_POS;
  54        return;
  55      }
  56      if (strlen (source[curlin].text) < 6) {
  57        RESTORE_POS;
  58        return;
  59      }
  60      if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
  61        RESTORE_POS;
  62        return;
  63      }
  64      for (int_4 i = 0; i < 5; i++) {
  65        if (POS (i) != ' ') {
  66          SYNTAX (2501, "continuation card columns 1-5");
  67        }
  68      }
  69      curcol = 6;
  70    }
  71  }
  72  
  73  void skip_card ()
  74  {
  75    if (prelin == curlin) {
  76      int_4 rc;
  77      do {
  78        rc = scan (NULL);
  79      }
  80      while (rc != END_OF_LINE && rc != END_OF_MODULE);
  81    } else if (source[curlin].text == NULL) {
  82      return;
  83    } else if (strlen (source[curlin].text) >= 6 && POS(5) != ' ') {
  84      int_4 rc;
  85      do {
  86        rc = scan (NULL);
  87      }
  88      while (rc != END_OF_LINE && rc != END_OF_MODULE);
  89    }
  90  }
  91  
  92  void skip_card_expr ()
  93  {
  94  // A correct expression leaves the scanner at the last symbol of that expression.
  95  // So we do careful look-ahead.
  96    if (strcmp (prelex, curlex) != 0) {
  97      SYNTAX (2502, curlex);
  98    } else if (prelin == curlin) {
  99      (void) scan (NULL);
 100      if (prelin == curlin) {
 101        if (strcmp (prelex, curlex) != 0) {
 102          SYNTAX (2503, curlex);
 103        }
 104      }
 105    }
 106    skip_card ();
 107  }
 108  
 109  int_4 scan_hollerith (void)
 110  {
 111    int_4 k = 0, N = 0;
 112    if (hollerith) {
 113      SYNTAX (2504, "disabled Hollerith string");
 114    }
 115    if (!isint_4 (curlex, &N)) {
 116      SCANER (2505, "invalid hollerith length", NULL);
 117      return curret = END_OF_MODULE;
 118    } else {
 119      k = 0;
 120      RECCLR (curlex);
 121      ADD_CHR ('"');
 122      next_col (TRUE);
 123      for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
 124        if (CUR_COL == '\0') {
 125          go_on = FALSE;
 126        } else if (CUR_COL == '"') {
 127          ADD_CHR ('\"');
 128          next_col (TRUE);
 129        } else {
 130          ADD_RAW_NEXT;
 131        }
 132      }
 133      ADD_CHR ('"');
 134      return curret = TEXT;
 135    }
 136  }
 137  
 138  int_4 scan_string (void)
 139  {
 140    int_4 k = 0;
 141    ADD_CHR ('"');
 142    next_col (TRUE);
 143    int_4 go_on = TRUE;
 144    while (go_on) {
 145      if (CUR_COL == '\0') {
 146        SCANER (2506, "unterminated string", NULL);
 147        ADD_CHR ('"');
 148        return curret = END_OF_MODULE;
 149      } else if (CUR_COL == '\'') {
 150        SAVE_POS;
 151        next_col (TRUE);
 152        if (CUR_COL == '\'') {
 153          ADD_CHR ('\'');
 154          next_col (TRUE);
 155        } else {
 156          RESTORE_POS;
 157          go_on = FALSE;
 158        }
 159      } else if (CUR_COL == '"') {
 160        ADD_CHR ('\\');
 161        ADD_CHR ('"');
 162        next_col (TRUE);
 163      } else {
 164        ADD_RAW_NEXT;
 165      }
 166    }
 167    if (CUR_COL != '\'') {
 168      SCANER (2507, "unterminated string", NULL);
 169      return curret = END_OF_MODULE;
 170    } else {
 171      ADD_CHR ('"');
 172      next_col (TRUE);
 173      return curret = TEXT;
 174    }
 175  }
 176  
 177  int_4 scan_string_alt (void)
 178  {
 179    int_4 k = 0;
 180    ADD_CHR ('"');
 181    next_col (TRUE);
 182    int_4 go_on = TRUE;
 183    while (go_on) {
 184      if (CUR_COL == '\0') {
 185        SCANER (2508, "unterminated string", NULL);
 186        ADD_CHR ('"');
 187        return curret = END_OF_MODULE;
 188      } else if (CUR_COL == '"') {
 189        SAVE_POS;
 190        next_col (TRUE);
 191        if (CUR_COL == '"') {
 192          ADD_CHR ('"');
 193          next_col (TRUE);
 194        } else {
 195          RESTORE_POS;
 196          go_on = FALSE;
 197        }
 198      } else if (CUR_COL == '"') {
 199        ADD_CHR ('\\');
 200        ADD_CHR ('"');
 201        next_col (TRUE);
 202      } else {
 203        ADD_RAW_NEXT;
 204      }
 205    }
 206    if (CUR_COL != '"') {
 207      SCANER (2509, "unterminated string", NULL);
 208      return curret = END_OF_MODULE;
 209    } else {
 210      ADD_CHR ('"');
 211      next_col (TRUE);
 212      return curret = TEXT;
 213    }
 214  }
 215  
 216  int_4 scan_exponent (void)
 217  {
 218    int_4 k = strlen (curlex);
 219    if (EXPONENT (curcol)) {
 220      ADD_LEX_NEXT;
 221      SKIP_SPACE;
 222      if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
 223        ADD_LEX_NEXT;
 224        SKIP_SPACE;
 225        while (isdigit (CUR_COL)) {
 226          ADD_LEX_NEXT;
 227          SKIP_SPACE;
 228        }
 229      }
 230    }
 231    return curret = FLT_NUMBER;
 232  }
 233  
 234  int_4 scan_part (void)
 235  {
 236    int_4 k = 0;
 237    RECCLR (curlex);
 238    source[curlin].proc = nprocs;
 239  // Skip empty lines.
 240    if (curcol == START_OF_LINE && curlin < nftnlines) {
 241      if (POS (0) == '\0') {
 242        curlin++;
 243        return curret = scan_part ();
 244      }
 245    }
 246    if (curcol > START_OF_LINE && CUR_COL == '\0') {
 247  // Next scan starts at new line.
 248      curlin++;
 249      curcol = START_OF_LINE;
 250    }
 251    if (curlin >= nftnlines) {
 252      return curret = END_OF_MODULE;
 253    }
 254    if (curcol == START_OF_LINE) {
 255      while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
 256        curlin++;
 257        if (curlin >= nftnlines) {
 258          return curret = END_OF_MODULE;
 259        }
 260      }
 261      if (source[curlin].isn > 0) {
 262        if (POS (5) == ' ') {
 263          curcol = 0;
 264          return curret = END_OF_LINE;
 265        } else {
 266  // All but first line can be continuations.
 267          curcol = 6;
 268        }
 269      } else {
 270        curcol = 0;
 271      }
 272    }
 273  // Skip trailing blanks.
 274    SKIP_SPACE;
 275    if (CUR_COL == '\0') {
 276  // No symbol left at card, scan again on next card.
 277      curlin++;
 278      curcol = START_OF_LINE;
 279      return curret = scan_part ();
 280    }
 281  // Mark start of lexeme for messages.
 282    prelin = curlin;
 283    precol = curcol;
 284    if (islower (CUR_COL)) {
 285  // A letter, possibly a radix.
 286      if (CUR_COL == 'b') {
 287        SAVE_POS;
 288        next_col (TRUE);
 289        if (CUR_COL != '\'') {
 290          RESTORE_POS;
 291        } else {
 292  // Radix 2 number.
 293          ADD_CHR ('0');
 294          ADD_CHR ('b');
 295          next_col (TRUE);
 296          while (CUR_COL != '\'' && CUR_COL != '\0') {
 297            if (CUR_COL != '0' && CUR_COL != '1') {
 298              SCANER (2510, "invalid binary digit", NULL);
 299              return curret = END_OF_MODULE;
 300            }
 301            ADD_LEX_NEXT;
 302          }
 303          if (CUR_COL != '\'') {
 304            SCANER (2511, "invalid denotation", NULL);
 305            return curret = END_OF_MODULE;
 306          }
 307          next_col (TRUE);
 308          return curret = INT_NUMBER;
 309        }
 310      } else if (CUR_COL == 'x') {
 311        SAVE_POS;
 312        next_col (TRUE);
 313        if (CUR_COL != '\'') {
 314          RESTORE_POS;
 315        } else {
 316  // Radix 16 number.
 317          ADD_CHR ('0');
 318          ADD_CHR ('x');
 319          next_col (TRUE);
 320          while (CUR_COL != '\'' && CUR_COL != '\0') {
 321            if (!isxdigit (CUR_COL)) {
 322              SCANER (2512, "invalid hex digit", NULL);
 323              return curret = END_OF_MODULE;
 324            }
 325            ADD_LEX_NEXT;
 326          }
 327          if (CUR_COL != '\'') {
 328            SCANER (2513, "invalid denotation", NULL);
 329            return curret = END_OF_MODULE;
 330          }
 331          next_col (TRUE);
 332          return curret = INT_NUMBER;
 333        }
 334      }
 335  // Fetch identifier or keyword.
 336      while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
 337        ADD_LEX;
 338  //    next_col (FALSE); // End-of-card is end of symbol.
 339        next_col (TRUE);
 340      }
 341  // END, END DO, END IF, END WHILE
 342      if (TOKEN ("end")) {
 343        SKIP_SPACE;
 344        while (islower (CUR_COL)) {
 345          ADD_LEX_NEXT;
 346        }
 347      }
 348  // ELSE IF
 349      if (TOKEN ("else")) {
 350        SKIP_SPACE;
 351        while (islower (CUR_COL)) {
 352          ADD_LEX_NEXT;
 353        }
 354      }
 355  // Catch declarers here.
 356      if (TOKEN ("integer")) {
 357        SKIP_SPACE;
 358        if (CUR_COL == '*') {
 359          ADD_LEX_NEXT;
 360          SKIP_SPACE;
 361          CHECKDIGIT (2514, CUR_COL);
 362          while (isdigit (CUR_COL)) {
 363            ADD_LEX_NEXT;
 364          }
 365        }
 366        return curret = DECLAR;
 367      }
 368      if (TOKEN ("logical")) {
 369        SKIP_SPACE;
 370        if (CUR_COL == '*') {
 371          ADD_LEX_NEXT;
 372          SKIP_SPACE;
 373          CHECKDIGIT (2515, CUR_COL);
 374          while (isdigit (CUR_COL)) {
 375            ADD_LEX_NEXT;
 376          }
 377        }
 378        return curret = DECLAR;
 379      }
 380      if (TOKEN ("real")) {
 381        SKIP_SPACE;
 382        if (CUR_COL == '*') {
 383          ADD_LEX_NEXT;
 384          SKIP_SPACE;
 385          CHECKDIGIT (2516, CUR_COL);
 386          while (isdigit (CUR_COL)) {
 387            ADD_LEX_NEXT;
 388          }
 389        }
 390        return curret = DECLAR;
 391      }
 392      if (TOKEN ("complex")) {
 393        SKIP_SPACE;
 394        if (CUR_COL == '*') {
 395          ADD_LEX_NEXT;
 396          SKIP_SPACE;
 397          CHECKDIGIT (2517, CUR_COL);
 398          while (isdigit (CUR_COL)) {
 399            ADD_LEX_NEXT;
 400          }
 401        }
 402        return curret = DECLAR;
 403      }
 404      if (TOKEN ("character")) {
 405        SKIP_SPACE;
 406        if (CUR_COL == '*') {
 407          ADD_LEX_NEXT;
 408          SKIP_SPACE;
 409          if (CUR_COL == '(') {
 410            ADD_LEX_NEXT;
 411            SKIP_SPACE;
 412            if (CUR_COL == '*') {
 413              ADD_LEX_NEXT;
 414            } else {
 415              while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
 416                ADD_LEX_NEXT;
 417              }
 418            }
 419            SKIP_SPACE;
 420            if (CUR_COL == ')') {
 421              ADD_LEX_NEXT;
 422            }
 423          } else {
 424            CHECKDIGIT (2518, CUR_COL);
 425            while (isdigit (CUR_COL)) {
 426              ADD_LEX_NEXT;
 427            }
 428          }
 429        }
 430        return curret = DECLAR;
 431      }
 432  // Word.
 433      return curret = WORD;
 434    } else if (isdigit (CUR_COL)) {
 435      if (curcol < 5) {
 436  // Label.
 437        while (curcol < 5 && isdigit (CUR_COL)) {
 438          ADD_LEX_NEXT;
 439        }
 440        return curret = LABEL;
 441      }
 442  // Number.
 443      while (isdigit (CUR_COL)) {
 444        ADD_LEX_NEXT;
 445      }
 446      if (CUR_COL == 'h') {
 447  // Hollerith operand
 448        return scan_hollerith ();
 449      }
 450      if (CUR_COL != '.' && !EXPONENT (curcol)) {
 451        return curret = INT_NUMBER;
 452      } else {
 453        if (CUR_COL == '.') {
 454  // Special symbols .XYZZY. like (n/2.eq.1).
 455          SAVE_POS;
 456          next_col (TRUE);
 457          while (islower (CUR_COL)) {
 458            next_col (TRUE);
 459          }
 460          if (CUR_COL == '.') {
 461            RESTORE_POS;
 462            return curret = INT_NUMBER;
 463          } else {
 464            RESTORE_POS;
 465          }
 466  // Fraction.
 467          ADD_LEX_NEXT;
 468          SKIP_SPACE;
 469          while (isdigit (CUR_COL)) {
 470            ADD_LEX_NEXT;
 471            SKIP_SPACE;
 472          }
 473        }
 474  // Exponent part.
 475        return scan_exponent ();
 476      }
 477    } else if (CUR_COL == '.') {
 478      ADD_LEX_NEXT;
 479      SKIP_SPACE;
 480  // Fraction.
 481      if (isdigit (CUR_COL)) {
 482        while (isdigit (CUR_COL)) {
 483          ADD_LEX_NEXT;
 484          SKIP_SPACE;
 485        }
 486  // Exponent part.
 487        return scan_exponent ();
 488      }
 489  // Special symbols .XYZZY. .
 490      if (CUR_COL == '.') {
 491        next_col (TRUE);
 492      }
 493      while (islower (CUR_COL)) {
 494        ADD_LEX_NEXT;
 495      }
 496      if (CUR_COL == '.') {
 497        ADD_LEX_NEXT;
 498      } else {
 499        SCANER (2519, "stray dot", NULL);
 500      }
 501    } else if (CUR_COL == '*') {
 502      ADD_LEX_NEXT;
 503      if (CUR_COL == '*') {
 504        ADD_LEX_NEXT;
 505      } else if (CUR_COL == '%') {
 506        ADD_LEX_NEXT;
 507      }
 508    } else if (CUR_COL == '/') {
 509  // Concatenation.
 510      ADD_LEX_NEXT;
 511      if (CUR_COL == '/') {
 512        ADD_LEX_NEXT;
 513      } else if (CUR_COL == '=') {
 514        ADD_LEX_NEXT;
 515      }
 516    } else if (CUR_COL == '\'') {
 517  // Character string.
 518      return scan_string ();
 519    } else if (CUR_COL == '"') {
 520  // Character string.
 521      return scan_string_alt ();
 522    } else if (CUR_COL == '=') {
 523      ADD_LEX_NEXT;
 524      if (CUR_COL == '=') {
 525        ADD_LEX_NEXT;
 526      }
 527    } else if (CUR_COL == '!') {
 528      ADD_LEX_NEXT;
 529      if (CUR_COL == '=') {
 530        ADD_LEX_NEXT;
 531      }
 532    } else if (CUR_COL == '<') {
 533      ADD_LEX_NEXT;
 534      if (CUR_COL == '=') {
 535        ADD_LEX_NEXT;
 536      }
 537    } else if (CUR_COL == '>') {
 538      ADD_LEX_NEXT;
 539      if (CUR_COL == '=') {
 540        ADD_LEX_NEXT;
 541      }
 542    } else if (CUR_COL != '\0') {
 543  // Something else.
 544      ADD_LEX_NEXT;
 545    } else {
 546  // No symbol left at card, scan again on next card.
 547      curlin++;
 548      curcol = START_OF_LINE;
 549      return curret = scan_part ();
 550    }
 551    return curret = LEXEME;
 552  }
 553  
 554  int_4 scan (char *expect)
 555  {
 556    int_4 rc;
 557    strcpy (prelex, curlex);
 558    preret = curret;
 559    RECCLR (curlex);
 560    rc = scan_part ();
 561    if (rc == END_OF_LINE || rc == END_OF_MODULE) {
 562      return curret = rc;
 563    }
 564    if (rc == LABEL) {
 565      return curret = rc;
 566    }
 567    if (TOKEN ("double")) {
 568      scan_part ();
 569      if (TOKEN ("precision")) {
 570        strcpy (curlex, "real*8");
 571      } else if (TOKEN ("complex")) {
 572        strcpy (curlex, "complex*16");
 573      } else {
 574        strcpy (curlex, "real*8");
 575        EXPECT (2520, "precision");
 576      }
 577      return curret = DECLAR;
 578    } else if (TOKEN ("go")) {
 579      scan_part ();
 580      if (!TOKEN ("to")) {
 581        SCANER (2521, "invalid goto", NULL);
 582      }
 583      strcpy (curlex, "goto");
 584      return curret = WORD;
 585    }
 586    if (expect != NULL && !EQUAL (curlex, expect)) {
 587      RECORD str;
 588      _srecordf (str, "%s but found %s", expect, curlex);
 589      EXPECT (2522, str);
 590      return curret = ERR;
 591    }
 592    return curret = rc;
 593  }
 594  
 595  int_4 scan_fmt (void)
 596  {
 597    int_4 k = 0;
 598    strcpy (prelex, curlex);
 599    preret = curret;
 600    RECCLR (curlex);
 601    source[curlin].proc = nprocs;
 602  // Skip empty lines.
 603    if (curcol == START_OF_LINE && curlin < nftnlines) {
 604      if (POS (0) == '\0') {
 605        curlin++;
 606        return curret = scan_fmt ();
 607      }
 608    }
 609    if (curcol > START_OF_LINE && CUR_COL == '\0') {
 610  // Next scan starts at new line.
 611      curlin++;
 612      curcol = START_OF_LINE;
 613    }
 614    if (curlin >= nftnlines) {
 615      return curret = END_OF_MODULE;
 616    }
 617    if (curcol == START_OF_LINE) {
 618      while (IS_COMMENT (POS (0))) {
 619        curlin++;
 620        if (curlin >= nftnlines) {
 621          return curret = END_OF_MODULE;
 622        }
 623      }
 624      if (source[curlin].isn > 1) {
 625        if (POS (5) == ' ') {
 626          curcol = 0;
 627          return curret = END_OF_LINE;
 628        } else {
 629  // All but first line can be continuations.
 630          curcol = 6;
 631        }
 632      } else {
 633        curcol = 0;
 634      }
 635    }
 636  // Skip trailing blanks.
 637    SKIP_SPACE;
 638    if (CUR_COL == '\0') {
 639  // No symbol left at card, scan again on next card.
 640      curlin++;
 641      curcol = START_OF_LINE;
 642      return curret = scan_fmt ();
 643    }
 644  // Mark start of lexeme for messages.
 645    prelin = curlin;
 646    precol = curcol;
 647    if (islower (CUR_COL)) {
 648  // Format specifier.
 649      while (islower (CUR_COL) || isdigit (CUR_COL)) {
 650        ADD_RAW_NEXT;
 651      }
 652      if (CUR_COL == '.') {
 653        ADD_LEX_NEXT;
 654      }
 655      while (isdigit (CUR_COL)) {
 656        ADD_LEX_NEXT;
 657      }
 658      if (islower (CUR_COL)) {
 659        ADD_RAW_NEXT;
 660        while (isdigit (CUR_COL)) {
 661          ADD_LEX_NEXT;
 662        }
 663      }
 664      return curret = LEXEME;
 665    } else if (isdigit (CUR_COL)) {
 666  // Number.
 667      while (isdigit (CUR_COL)) {
 668        ADD_LEX_NEXT;
 669      }
 670      if (CUR_COL == 'h') {
 671  // Hollerith format item
 672        return scan_hollerith ();
 673      } else {
 674        return curret = INT_NUMBER;
 675      }
 676    } else if (CUR_COL == '\'') {
 677  // Character string.
 678      return scan_string ();
 679    } else if (CUR_COL == '"') {
 680  // Character string.
 681      return scan_string_alt ();
 682    } else if (CUR_COL != '\0') {
 683  // Something else.
 684      ADD_LEX_NEXT;
 685    } else {
 686  // No symbol left at card, scan again on next card.
 687      curlin++;
 688      curcol = START_OF_LINE;
 689      return curret = scan_fmt ();
 690    }
 691    return curret = LEXEME;
 692  }
 693  
     


© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)