genie-regex.c

     
   1  //! @file genie-regex.c
   2  //! @author J. Marcel van der Veer
   3  //!
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  //!
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis
  23  //!
  24  //! Low-level regular expression routines.
  25  
  26  #include "a68g.h"
  27  #include "a68g-genie.h"
  28  #include "a68g-prelude.h"
  29  #include "a68g-mp.h"
  30  #include "a68g-double.h"
  31  #include "a68g-transput.h"
  32  
  33  //! @brief grep in string (STRING, STRING, REF INT, REF INT) INT.
  34  
  35  int grep_in_string (char *pat, char *str, int *start, int *end)
  36  {
  37    int rc, nmatch, k, max_k, widest;
  38    regex_t compiled;
  39    regmatch_t *matches;
  40    rc = regcomp (&compiled, pat, REG_NEWLINE | REG_EXTENDED);
  41    if (rc != 0) {
  42      regfree (&compiled);
  43      return rc;
  44    }
  45    nmatch = (int) (RE_NSUB (&compiled));
  46    if (nmatch == 0) {
  47      nmatch = 1;
  48    }
  49    matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
  50    if (nmatch > 0 && matches == NO_REGMATCH) {
  51      regfree (&compiled);
  52      return 2;
  53    }
  54    rc = regexec (&compiled, str, (size_t) nmatch, matches, 0);
  55    if (rc != 0) {
  56      regfree (&compiled);
  57      return rc;
  58    }
  59  // Find widest match. Do not assume it is the first one.
  60    widest = 0;
  61    max_k = 0;
  62    for (k = 0; k < nmatch; k++) {
  63      int dif = (int) RM_EO (&matches[k]) - (int) RM_SO (&matches[k]);
  64      if (dif > widest) {
  65        widest = dif;
  66        max_k = k;
  67      }
  68    }
  69    if (start != NO_INT) {
  70      (*start) = (int) RM_SO (&matches[max_k]);
  71    }
  72    if (end != NO_INT) {
  73      (*end) = (int) RM_EO (&matches[max_k]);
  74    }
  75    a68_free (matches);
  76    return 0;
  77  }
  78  
  79  //! @brief Return code for regex interface.
  80  
  81  void push_grep_rc (NODE_T * p, int rc)
  82  {
  83    switch (rc) {
  84    case 0:
  85      {
  86        PUSH_VALUE (p, 0, A68_INT);
  87        return;
  88      }
  89    case REG_NOMATCH:
  90      {
  91        PUSH_VALUE (p, 1, A68_INT);
  92        return;
  93      }
  94    case REG_ESPACE:
  95      {
  96        PUSH_VALUE (p, 3, A68_INT);
  97        return;
  98      }
  99    default:
 100      {
 101        PUSH_VALUE (p, 2, A68_INT);
 102        return;
 103      }
 104    }
 105  }
 106  
 107  //! @brief PROC grep in string = (STRING, STRING, REF INT, REF INT) INT
 108  
 109  void genie_grep_in_string (NODE_T * p)
 110  {
 111    A68_REF ref_pat, ref_beg, ref_end, ref_str, row;
 112    A68_ARRAY *arr;
 113    A68_TUPLE *tup;
 114    int rc, nmatch, k, max_k, widest;
 115    regex_t compiled;
 116    regmatch_t *matches;
 117    POP_REF (p, &ref_end);
 118    POP_REF (p, &ref_beg);
 119    POP_REF (p, &ref_str);
 120    POP_REF (p, &ref_pat);
 121    row = *(A68_REF *) & ref_str;
 122    CHECK_INIT (p, INITIALISED (&row), M_ROWS);
 123    GET_DESCRIPTOR (arr, tup, &row);
 124    reset_transput_buffer (PATTERN_BUFFER);
 125    reset_transput_buffer (STRING_BUFFER);
 126    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 127    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
 128    rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 129    if (rc != 0) {
 130      push_grep_rc (p, rc);
 131      regfree (&compiled);
 132      return;
 133    }
 134    nmatch = (int) (RE_NSUB (&compiled));
 135    if (nmatch == 0) {
 136      nmatch = 1;
 137    }
 138    matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 139    if (nmatch > 0 && matches == NULL) {
 140      rc = 2;
 141      PUSH_VALUE (p, rc, A68_INT);
 142      regfree (&compiled);
 143      return;
 144    }
 145    rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
 146    if (rc != 0) {
 147      push_grep_rc (p, rc);
 148      regfree (&compiled);
 149      return;
 150    }
 151  // Find widest match. Do not assume it is the first one.
 152    widest = 0;
 153    max_k = 0;
 154    for (k = 0; k < nmatch; k++) {
 155      int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
 156      if (dif > widest) {
 157        widest = dif;
 158        max_k = k;
 159      }
 160    }
 161    if (!IS_NIL (ref_beg)) {
 162      A68_INT *i = DEREF (A68_INT, &ref_beg);
 163      STATUS (i) = INIT_MASK;
 164      VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
 165    }
 166    if (!IS_NIL (ref_end)) {
 167      A68_INT *i = DEREF (A68_INT, &ref_end);
 168      STATUS (i) = INIT_MASK;
 169      VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
 170    }
 171    a68_free (matches);
 172    push_grep_rc (p, 0);
 173  }
 174  
 175  //! @brief PROC grep in substring = (STRING, STRING, REF INT, REF INT) INT
 176  
 177  void genie_grep_in_substring (NODE_T * p)
 178  {
 179    A68_REF ref_pat, ref_beg, ref_end, ref_str, row;
 180    A68_ARRAY *arr;
 181    A68_TUPLE *tup;
 182    int rc, nmatch, k, max_k, widest;
 183    regex_t compiled;
 184    regmatch_t *matches;
 185    POP_REF (p, &ref_end);
 186    POP_REF (p, &ref_beg);
 187    POP_REF (p, &ref_str);
 188    POP_REF (p, &ref_pat);
 189    row = *(A68_REF *) & ref_str;
 190    CHECK_INIT (p, INITIALISED (&row), M_ROWS);
 191    GET_DESCRIPTOR (arr, tup, &row);
 192    reset_transput_buffer (PATTERN_BUFFER);
 193    reset_transput_buffer (STRING_BUFFER);
 194    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 195    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
 196    rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 197    if (rc != 0) {
 198      push_grep_rc (p, rc);
 199      regfree (&compiled);
 200      return;
 201    }
 202    nmatch = (int) (RE_NSUB (&compiled));
 203    if (nmatch == 0) {
 204      nmatch = 1;
 205    }
 206    matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 207    if (nmatch > 0 && matches == NULL) {
 208      rc = 2;
 209      PUSH_VALUE (p, rc, A68_INT);
 210      regfree (&compiled);
 211      return;
 212    }
 213    rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, REG_NOTBOL);
 214    if (rc != 0) {
 215      push_grep_rc (p, rc);
 216      regfree (&compiled);
 217      return;
 218    }
 219  // Find widest match. Do not assume it is the first one.
 220    widest = 0;
 221    max_k = 0;
 222    for (k = 0; k < nmatch; k++) {
 223      int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
 224      if (dif > widest) {
 225        widest = dif;
 226        max_k = k;
 227      }
 228    }
 229    if (!IS_NIL (ref_beg)) {
 230      A68_INT *i = DEREF (A68_INT, &ref_beg);
 231      STATUS (i) = INIT_MASK;
 232      VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
 233    }
 234    if (!IS_NIL (ref_end)) {
 235      A68_INT *i = DEREF (A68_INT, &ref_end);
 236      STATUS (i) = INIT_MASK;
 237      VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
 238    }
 239    a68_free (matches);
 240    push_grep_rc (p, 0);
 241  }
 242  
 243  //! @brief PROC sub in string = (STRING, STRING, REF STRING) INT
 244  
 245  void genie_sub_in_string (NODE_T * p)
 246  {
 247    A68_REF ref_pat, ref_rep, ref_str;
 248    int rc, nmatch, k, max_k, widest, begin, end;
 249    char *txt;
 250    regex_t compiled;
 251    regmatch_t *matches;
 252    POP_REF (p, &ref_str);
 253    POP_REF (p, &ref_rep);
 254    POP_REF (p, &ref_pat);
 255    if (IS_NIL (ref_str)) {
 256      PUSH_VALUE (p, 3, A68_INT);
 257      return;
 258    }
 259    reset_transput_buffer (STRING_BUFFER);
 260    reset_transput_buffer (REPLACE_BUFFER);
 261    reset_transput_buffer (PATTERN_BUFFER);
 262    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 263    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) DEREF (A68_REF, &ref_str));
 264    rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 265    if (rc != 0) {
 266      push_grep_rc (p, rc);
 267      regfree (&compiled);
 268      return;
 269    }
 270    nmatch = (int) (RE_NSUB (&compiled));
 271    if (nmatch == 0) {
 272      nmatch = 1;
 273    }
 274    matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 275    if (nmatch > 0 && matches == NULL) {
 276      PUSH_VALUE (p, rc, A68_INT);
 277      regfree (&compiled);
 278      return;
 279    }
 280    rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
 281    if (rc != 0) {
 282      push_grep_rc (p, rc);
 283      regfree (&compiled);
 284      return;
 285    }
 286  // Find widest match. Do not assume it is the first one.
 287    widest = 0;
 288    max_k = 0;
 289    for (k = 0; k < nmatch; k++) {
 290      int dif = (int) RM_EO (&(matches[k])) - (int) RM_SO (&(matches[k]));
 291      if (dif > widest) {
 292        widest = dif;
 293        max_k = k;
 294      }
 295    }
 296    begin = (int) RM_SO (&(matches[max_k])) + 1;
 297    end = (int) RM_EO (&(matches[max_k]));
 298  // Substitute text.
 299    txt = get_transput_buffer (STRING_BUFFER);
 300    for (k = 0; k < begin - 1; k++) {
 301      plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
 302    }
 303    add_a_string_transput_buffer (p, REPLACE_BUFFER, (BYTE_T *) & ref_rep);
 304    for (k = end; k < get_transput_buffer_size (STRING_BUFFER); k++) {
 305      plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
 306    }
 307    *DEREF (A68_REF, &ref_str) = c_to_a_string (p, get_transput_buffer (REPLACE_BUFFER), DEFAULT_WIDTH);
 308    a68_free (matches);
 309    push_grep_rc (p, 0);
 310  }