genie-regex.c
1 //! @file genie-regex.c
2 //! @author J. Marcel van der Veer
3 //!
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
8 //!
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Low-level regular expression routines.
25
26 #include "a68g.h"
27 #include "a68g-genie.h"
28 #include "a68g-prelude.h"
29 #include "a68g-mp.h"
30 #include "a68g-double.h"
31 #include "a68g-transput.h"
32
33 //! @brief grep in string (STRING, STRING, REF INT, REF INT) INT.
34
35 int grep_in_string (char *pat, char *str, int *start, int *end)
36 {
37 int rc, nmatch, k, max_k, widest;
38 regex_t compiled;
39 regmatch_t *matches;
40 rc = regcomp (&compiled, pat, REG_NEWLINE | REG_EXTENDED);
41 if (rc != 0) {
42 regfree (&compiled);
43 return rc;
44 }
45 nmatch = (int) (RE_NSUB (&compiled));
46 if (nmatch == 0) {
47 nmatch = 1;
48 }
49 matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
50 if (nmatch > 0 && matches == NO_REGMATCH) {
51 regfree (&compiled);
52 return 2;
53 }
54 rc = regexec (&compiled, str, (size_t) nmatch, matches, 0);
55 if (rc != 0) {
56 regfree (&compiled);
57 return rc;
58 }
59 // Find widest match. Do not assume it is the first one.
60 widest = 0;
61 max_k = 0;
62 for (k = 0; k < nmatch; k++) {
63 int dif = (int) RM_EO (&matches[k]) - (int) RM_SO (&matches[k]);
64 if (dif > widest) {
65 widest = dif;
66 max_k = k;
67 }
68 }
69 if (start != NO_INT) {
70 (*start) = (int) RM_SO (&matches[max_k]);
71 }
72 if (end != NO_INT) {
73 (*end) = (int) RM_EO (&matches[max_k]);
74 }
75 a68_free (matches);
76 return 0;
77 }
78
79 //! @brief Return code for regex interface.
80
81 void push_grep_rc (NODE_T * p, int rc)
82 {
83 switch (rc) {
84 case 0:
85 {
86 PUSH_VALUE (p, 0, A68_INT);
87 return;
88 }
89 case REG_NOMATCH:
90 {
91 PUSH_VALUE (p, 1, A68_INT);
92 return;
93 }
94 case REG_ESPACE:
95 {
96 PUSH_VALUE (p, 3, A68_INT);
97 return;
98 }
99 default:
100 {
101 PUSH_VALUE (p, 2, A68_INT);
102 return;
103 }
104 }
105 }
106
107 //! @brief PROC grep in string = (STRING, STRING, REF INT, REF INT) INT
108
109 void genie_grep_in_string (NODE_T * p)
110 {
111 A68_REF ref_pat, ref_beg, ref_end, ref_str, row;
112 A68_ARRAY *arr;
113 A68_TUPLE *tup;
114 int rc, nmatch, k, max_k, widest;
115 regex_t compiled;
116 regmatch_t *matches;
117 POP_REF (p, &ref_end);
118 POP_REF (p, &ref_beg);
119 POP_REF (p, &ref_str);
120 POP_REF (p, &ref_pat);
121 row = *(A68_REF *) & ref_str;
122 CHECK_INIT (p, INITIALISED (&row), M_ROWS);
123 GET_DESCRIPTOR (arr, tup, &row);
124 reset_transput_buffer (PATTERN_BUFFER);
125 reset_transput_buffer (STRING_BUFFER);
126 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
127 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
128 rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
129 if (rc != 0) {
130 push_grep_rc (p, rc);
131 regfree (&compiled);
132 return;
133 }
134 nmatch = (int) (RE_NSUB (&compiled));
135 if (nmatch == 0) {
136 nmatch = 1;
137 }
138 matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
139 if (nmatch > 0 && matches == NULL) {
140 rc = 2;
141 PUSH_VALUE (p, rc, A68_INT);
142 regfree (&compiled);
143 return;
144 }
145 rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
146 if (rc != 0) {
147 push_grep_rc (p, rc);
148 regfree (&compiled);
149 return;
150 }
151 // Find widest match. Do not assume it is the first one.
152 widest = 0;
153 max_k = 0;
154 for (k = 0; k < nmatch; k++) {
155 int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
156 if (dif > widest) {
157 widest = dif;
158 max_k = k;
159 }
160 }
161 if (!IS_NIL (ref_beg)) {
162 A68_INT *i = DEREF (A68_INT, &ref_beg);
163 STATUS (i) = INIT_MASK;
164 VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
165 }
166 if (!IS_NIL (ref_end)) {
167 A68_INT *i = DEREF (A68_INT, &ref_end);
168 STATUS (i) = INIT_MASK;
169 VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
170 }
171 a68_free (matches);
172 push_grep_rc (p, 0);
173 }
174
175 //! @brief PROC grep in substring = (STRING, STRING, REF INT, REF INT) INT
176
177 void genie_grep_in_substring (NODE_T * p)
178 {
179 A68_REF ref_pat, ref_beg, ref_end, ref_str, row;
180 A68_ARRAY *arr;
181 A68_TUPLE *tup;
182 int rc, nmatch, k, max_k, widest;
183 regex_t compiled;
184 regmatch_t *matches;
185 POP_REF (p, &ref_end);
186 POP_REF (p, &ref_beg);
187 POP_REF (p, &ref_str);
188 POP_REF (p, &ref_pat);
189 row = *(A68_REF *) & ref_str;
190 CHECK_INIT (p, INITIALISED (&row), M_ROWS);
191 GET_DESCRIPTOR (arr, tup, &row);
192 reset_transput_buffer (PATTERN_BUFFER);
193 reset_transput_buffer (STRING_BUFFER);
194 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
195 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
196 rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
197 if (rc != 0) {
198 push_grep_rc (p, rc);
199 regfree (&compiled);
200 return;
201 }
202 nmatch = (int) (RE_NSUB (&compiled));
203 if (nmatch == 0) {
204 nmatch = 1;
205 }
206 matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
207 if (nmatch > 0 && matches == NULL) {
208 rc = 2;
209 PUSH_VALUE (p, rc, A68_INT);
210 regfree (&compiled);
211 return;
212 }
213 rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, REG_NOTBOL);
214 if (rc != 0) {
215 push_grep_rc (p, rc);
216 regfree (&compiled);
217 return;
218 }
219 // Find widest match. Do not assume it is the first one.
220 widest = 0;
221 max_k = 0;
222 for (k = 0; k < nmatch; k++) {
223 int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
224 if (dif > widest) {
225 widest = dif;
226 max_k = k;
227 }
228 }
229 if (!IS_NIL (ref_beg)) {
230 A68_INT *i = DEREF (A68_INT, &ref_beg);
231 STATUS (i) = INIT_MASK;
232 VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
233 }
234 if (!IS_NIL (ref_end)) {
235 A68_INT *i = DEREF (A68_INT, &ref_end);
236 STATUS (i) = INIT_MASK;
237 VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
238 }
239 a68_free (matches);
240 push_grep_rc (p, 0);
241 }
242
243 //! @brief PROC sub in string = (STRING, STRING, REF STRING) INT
244
245 void genie_sub_in_string (NODE_T * p)
246 {
247 A68_REF ref_pat, ref_rep, ref_str;
248 int rc, nmatch, k, max_k, widest, begin, end;
249 char *txt;
250 regex_t compiled;
251 regmatch_t *matches;
252 POP_REF (p, &ref_str);
253 POP_REF (p, &ref_rep);
254 POP_REF (p, &ref_pat);
255 if (IS_NIL (ref_str)) {
256 PUSH_VALUE (p, 3, A68_INT);
257 return;
258 }
259 reset_transput_buffer (STRING_BUFFER);
260 reset_transput_buffer (REPLACE_BUFFER);
261 reset_transput_buffer (PATTERN_BUFFER);
262 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
263 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) DEREF (A68_REF, &ref_str));
264 rc = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
265 if (rc != 0) {
266 push_grep_rc (p, rc);
267 regfree (&compiled);
268 return;
269 }
270 nmatch = (int) (RE_NSUB (&compiled));
271 if (nmatch == 0) {
272 nmatch = 1;
273 }
274 matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
275 if (nmatch > 0 && matches == NULL) {
276 PUSH_VALUE (p, rc, A68_INT);
277 regfree (&compiled);
278 return;
279 }
280 rc = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
281 if (rc != 0) {
282 push_grep_rc (p, rc);
283 regfree (&compiled);
284 return;
285 }
286 // Find widest match. Do not assume it is the first one.
287 widest = 0;
288 max_k = 0;
289 for (k = 0; k < nmatch; k++) {
290 int dif = (int) RM_EO (&(matches[k])) - (int) RM_SO (&(matches[k]));
291 if (dif > widest) {
292 widest = dif;
293 max_k = k;
294 }
295 }
296 begin = (int) RM_SO (&(matches[max_k])) + 1;
297 end = (int) RM_EO (&(matches[max_k]));
298 // Substitute text.
299 txt = get_transput_buffer (STRING_BUFFER);
300 for (k = 0; k < begin - 1; k++) {
301 plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
302 }
303 add_a_string_transput_buffer (p, REPLACE_BUFFER, (BYTE_T *) & ref_rep);
304 for (k = end; k < get_transput_buffer_size (STRING_BUFFER); k++) {
305 plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
306 }
307 *DEREF (A68_REF, &ref_str) = c_to_a_string (p, get_transput_buffer (REPLACE_BUFFER), DEFAULT_WIDTH);
308 a68_free (matches);
309 push_grep_rc (p, 0);
310 }