genie-regex.c

     
   1  //! @file genie-regex.c
   2  //! @author J. Marcel van der Veer
   3  
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2024 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis
  23  //!
  24  //! Low-level regular expression routines.
  25  
  26  #include "a68g.h"
  27  #include "a68g-genie.h"
  28  #include "a68g-prelude.h"
  29  #include "a68g-transput.h"
  30  
  31  //! @brief Return code for regex interface.
  32  
  33  static void push_grep_ret (NODE_T * p, int ret)
  34  {
  35    switch (ret) {
  36    case 0: {
  37        PUSH_VALUE (p, 0, A68_INT);
  38        return;
  39      }
  40    case REG_NOMATCH: {
  41        PUSH_VALUE (p, 1, A68_INT);
  42        return;
  43      }
  44    case REG_ESPACE: {
  45        PUSH_VALUE (p, 3, A68_INT);
  46        return;
  47      }
  48    default: {
  49        PUSH_VALUE (p, 2, A68_INT);
  50        return;
  51      }
  52    }
  53  }
  54  
  55  //! @brief grep in string (STRING, STRING, REF INT, REF INT) INT.
  56  
  57  int grep_in_string (char *pat, char *str, int *start, int *end)
  58  {
  59    regex_t compiled;
  60    int ret = regcomp (&compiled, pat, REG_NEWLINE | REG_EXTENDED);
  61    if (ret != 0) {
  62      regfree (&compiled);
  63      return ret;
  64    }
  65    int nmatch = (int) (RE_NSUB (&compiled));
  66    if (nmatch == 0) {
  67      nmatch = 1;
  68    }
  69    regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
  70    if (nmatch > 0 && matches == NO_REGMATCH) {
  71      regfree (&compiled);
  72      return 2;
  73    }
  74    ret = regexec (&compiled, str, (size_t) nmatch, matches, 0);
  75    if (ret != 0) {
  76      regfree (&compiled);
  77      return ret;
  78    }
  79  // Find widest match. Do not assume it is the first one.
  80    int widest = 0, max_k = 0;
  81    for (int k = 0; k < nmatch; k++) {
  82      int dif = (int) RM_EO (&matches[k]) - (int) RM_SO (&matches[k]);
  83      if (dif > widest) {
  84        widest = dif;
  85        max_k = k;
  86      }
  87    }
  88    if (start != NO_INT) {
  89      (*start) = (int) RM_SO (&matches[max_k]);
  90    }
  91    if (end != NO_INT) {
  92      (*end) = (int) RM_EO (&matches[max_k]);
  93    }
  94    a68_free (matches);
  95    return 0;
  96  }
  97  
  98  //! @brief PROC grep in string = (STRING, STRING, REF INT, REF INT) INT
  99  
 100  void genie_grep_in_string (NODE_T * p)
 101  {
 102    A68_REF ref_pat, ref_beg, ref_end, ref_str;
 103    POP_REF (p, &ref_end);
 104    POP_REF (p, &ref_beg);
 105    POP_REF (p, &ref_str);
 106    POP_REF (p, &ref_pat);
 107    A68_REF row = *(A68_REF *) & ref_str;
 108    CHECK_INIT (p, INITIALISED (&row), M_ROWS);
 109    A68_ARRAY *arr; A68_TUPLE *tup;
 110    GET_DESCRIPTOR (arr, tup, &row);
 111    reset_transput_buffer (PATTERN_BUFFER);
 112    reset_transput_buffer (STRING_BUFFER);
 113    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 114    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
 115    regex_t compiled;
 116    int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 117    if (ret != 0) {
 118      push_grep_ret (p, ret);
 119      regfree (&compiled);
 120      return;
 121    }
 122    int nmatch = (int) (RE_NSUB (&compiled));
 123    if (nmatch == 0) {
 124      nmatch = 1;
 125    }
 126    regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 127    if (nmatch > 0 && matches == NULL) {
 128      ret = 2;
 129      PUSH_VALUE (p, ret, A68_INT);
 130      regfree (&compiled);
 131      return;
 132    }
 133    ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
 134    if (ret != 0) {
 135      push_grep_ret (p, ret);
 136      regfree (&compiled);
 137      return;
 138    }
 139  // Find widest match. Do not assume it is the first one.
 140    int widest = 0, max_k = 0;
 141    for (int k = 0; k < nmatch; k++) {
 142      int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
 143      if (dif > widest) {
 144        widest = dif;
 145        max_k = k;
 146      }
 147    }
 148    if (!IS_NIL (ref_beg)) {
 149      A68_INT *i = DEREF (A68_INT, &ref_beg);
 150      STATUS (i) = INIT_MASK;
 151      VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
 152    }
 153    if (!IS_NIL (ref_end)) {
 154      A68_INT *i = DEREF (A68_INT, &ref_end);
 155      STATUS (i) = INIT_MASK;
 156      VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
 157    }
 158    a68_free (matches);
 159    push_grep_ret (p, 0);
 160  }
 161  
 162  //! @brief PROC grep in substring = (STRING, STRING, REF INT, REF INT) INT
 163  
 164  void genie_grep_in_substring (NODE_T * p)
 165  {
 166    A68_REF ref_pat, ref_beg, ref_end, ref_str;
 167    POP_REF (p, &ref_end);
 168    POP_REF (p, &ref_beg);
 169    POP_REF (p, &ref_str);
 170    POP_REF (p, &ref_pat);
 171    A68_REF row = *(A68_REF *) & ref_str;
 172    CHECK_INIT (p, INITIALISED (&row), M_ROWS);
 173    A68_ARRAY *arr; A68_TUPLE *tup;
 174    GET_DESCRIPTOR (arr, tup, &row);
 175    reset_transput_buffer (PATTERN_BUFFER);
 176    reset_transput_buffer (STRING_BUFFER);
 177    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 178    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
 179    regex_t compiled;
 180    int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 181    if (ret != 0) {
 182      push_grep_ret (p, ret);
 183      regfree (&compiled);
 184      return;
 185    }
 186    int nmatch = (int) (RE_NSUB (&compiled));
 187    if (nmatch == 0) {
 188      nmatch = 1;
 189    }
 190    regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 191    if (nmatch > 0 && matches == NULL) {
 192      ret = 2;
 193      PUSH_VALUE (p, ret, A68_INT);
 194      regfree (&compiled);
 195      return;
 196    }
 197    ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, REG_NOTBOL);
 198    if (ret != 0) {
 199      push_grep_ret (p, ret);
 200      regfree (&compiled);
 201      return;
 202    }
 203  // Find widest match. Do not assume it is the first one.
 204    int widest = 0, max_k = 0;
 205    for (int k = 0; k < nmatch; k++) {
 206      int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
 207      if (dif > widest) {
 208        widest = dif;
 209        max_k = k;
 210      }
 211    }
 212    if (!IS_NIL (ref_beg)) {
 213      A68_INT *i = DEREF (A68_INT, &ref_beg);
 214      STATUS (i) = INIT_MASK;
 215      VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
 216    }
 217    if (!IS_NIL (ref_end)) {
 218      A68_INT *i = DEREF (A68_INT, &ref_end);
 219      STATUS (i) = INIT_MASK;
 220      VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
 221    }
 222    a68_free (matches);
 223    push_grep_ret (p, 0);
 224  }
 225  
 226  //! @brief PROC sub in string = (STRING, STRING, REF STRING) INT
 227  
 228  void genie_sub_in_string (NODE_T * p)
 229  {
 230    A68_REF ref_pat, ref_rep, ref_str;
 231    POP_REF (p, &ref_str);
 232    POP_REF (p, &ref_rep);
 233    POP_REF (p, &ref_pat);
 234    if (IS_NIL (ref_str)) {
 235      PUSH_VALUE (p, 3, A68_INT);
 236      return;
 237    }
 238    reset_transput_buffer (STRING_BUFFER);
 239    reset_transput_buffer (REPLACE_BUFFER);
 240    reset_transput_buffer (PATTERN_BUFFER);
 241    add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
 242    add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) DEREF (A68_REF, &ref_str));
 243    regex_t compiled;
 244    int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
 245    if (ret != 0) {
 246      push_grep_ret (p, ret);
 247      regfree (&compiled);
 248      return;
 249    }
 250    int nmatch = (int) (RE_NSUB (&compiled));
 251    if (nmatch == 0) {
 252      nmatch = 1;
 253    }
 254    regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
 255    if (nmatch > 0 && matches == NULL) {
 256      PUSH_VALUE (p, ret, A68_INT);
 257      regfree (&compiled);
 258      return;
 259    }
 260    ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
 261    if (ret != 0) {
 262      push_grep_ret (p, ret);
 263      regfree (&compiled);
 264      return;
 265    }
 266  // Find widest match. Do not assume it is the first one.
 267    int widest = 0, max_k = 0;
 268    for (int k = 0; k < nmatch; k++) {
 269      int dif = (int) RM_EO (&(matches[k])) - (int) RM_SO (&(matches[k]));
 270      if (dif > widest) {
 271        widest = dif;
 272        max_k = k;
 273      }
 274    }
 275  // Substitute text.
 276    int begin = (int) RM_SO (&(matches[max_k])) + 1, end = (int) RM_EO (&(matches[max_k]));
 277    char *txt = get_transput_buffer (STRING_BUFFER);
 278    for (int k = 0; k < begin - 1; k++) {
 279      plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
 280    }
 281    add_a_string_transput_buffer (p, REPLACE_BUFFER, (BYTE_T *) & ref_rep);
 282    for (int k = end; k < get_transput_buffer_size (STRING_BUFFER); k++) {
 283      plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
 284    }
 285    *DEREF (A68_REF, &ref_str) = c_to_a_string (p, get_transput_buffer (REPLACE_BUFFER), DEFAULT_WIDTH);
 286    a68_free (matches);
 287    push_grep_ret (p, 0);
 288  }