genie-regex.c
1 //! @file genie-regex.c
2 //! @author J. Marcel van der Veer
3
4 //! @section Copyright
5 //!
6 //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
7 //! Copyright 2001-2025 J. Marcel van der Veer [algol68g@xs4all.nl].
8
9 //! @section License
10 //!
11 //! This program is free software; you can redistribute it and/or modify it
12 //! under the terms of the GNU General Public License as published by the
13 //! Free Software Foundation; either version 3 of the License, or
14 //! (at your option) any later version.
15 //!
16 //! This program is distributed in the hope that it will be useful, but
17 //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 //! more details. You should have received a copy of the GNU General Public
20 //! License along with this program. If not, see [http://www.gnu.org/licenses/].
21
22 //! @section Synopsis
23 //!
24 //! Low-level regular expression routines.
25
26 #include "a68g.h"
27 #include "a68g-genie.h"
28 #include "a68g-prelude.h"
29 #include "a68g-transput.h"
30
31 //! @brief Return code for regex interface.
32
33 static void push_grep_ret (NODE_T * p, int ret)
34 {
35 switch (ret) {
36 case 0: {
37 PUSH_VALUE (p, 0, A68_INT);
38 return;
39 }
40 case REG_NOMATCH: {
41 PUSH_VALUE (p, 1, A68_INT);
42 return;
43 }
44 case REG_ESPACE: {
45 PUSH_VALUE (p, 3, A68_INT);
46 return;
47 }
48 default: {
49 PUSH_VALUE (p, 2, A68_INT);
50 return;
51 }
52 }
53 }
54
55 //! @brief grep in string (STRING, STRING, REF INT, REF INT) INT.
56
57 int grep_in_string (char *pat, char *str, int *start, int *end)
58 {
59 regex_t compiled;
60 int ret = regcomp (&compiled, pat, REG_NEWLINE | REG_EXTENDED);
61 if (ret != 0) {
62 regfree (&compiled);
63 return ret;
64 }
65 int nmatch = (int) (RE_NSUB (&compiled));
66 if (nmatch == 0) {
67 nmatch = 1;
68 }
69 regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
70 if (nmatch > 0 && matches == NO_REGMATCH) {
71 regfree (&compiled);
72 return 2;
73 }
74 ret = regexec (&compiled, str, (size_t) nmatch, matches, 0);
75 if (ret != 0) {
76 regfree (&compiled);
77 return ret;
78 }
79 // Find widest match. Do not assume it is the first one.
80 int widest = 0, max_k = 0;
81 for (int k = 0; k < nmatch; k++) {
82 int dif = (int) RM_EO (&matches[k]) - (int) RM_SO (&matches[k]);
83 if (dif > widest) {
84 widest = dif;
85 max_k = k;
86 }
87 }
88 if (start != NO_INT) {
89 (*start) = (int) RM_SO (&matches[max_k]);
90 }
91 if (end != NO_INT) {
92 (*end) = (int) RM_EO (&matches[max_k]);
93 }
94 a68_free (matches);
95 return 0;
96 }
97
98 //! @brief PROC grep in string = (STRING, STRING, REF INT, REF INT) INT
99
100 void genie_grep_in_string (NODE_T * p)
101 {
102 A68_REF ref_pat, ref_beg, ref_end, ref_str;
103 POP_REF (p, &ref_end);
104 POP_REF (p, &ref_beg);
105 POP_REF (p, &ref_str);
106 POP_REF (p, &ref_pat);
107 A68_REF row = *(A68_REF *) & ref_str;
108 CHECK_INIT (p, INITIALISED (&row), M_ROWS);
109 A68_ARRAY *arr; A68_TUPLE *tup;
110 GET_DESCRIPTOR (arr, tup, &row);
111 reset_transput_buffer (PATTERN_BUFFER);
112 reset_transput_buffer (STRING_BUFFER);
113 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
114 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
115 regex_t compiled;
116 int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
117 if (ret != 0) {
118 push_grep_ret (p, ret);
119 regfree (&compiled);
120 return;
121 }
122 int nmatch = (int) (RE_NSUB (&compiled));
123 if (nmatch == 0) {
124 nmatch = 1;
125 }
126 regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
127 if (nmatch > 0 && matches == NULL) {
128 ret = 2;
129 PUSH_VALUE (p, ret, A68_INT);
130 regfree (&compiled);
131 return;
132 }
133 ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
134 if (ret != 0) {
135 push_grep_ret (p, ret);
136 regfree (&compiled);
137 return;
138 }
139 // Find widest match. Do not assume it is the first one.
140 int widest = 0, max_k = 0;
141 for (int k = 0; k < nmatch; k++) {
142 int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
143 if (dif > widest) {
144 widest = dif;
145 max_k = k;
146 }
147 }
148 if (!IS_NIL (ref_beg)) {
149 A68_INT *i = DEREF (A68_INT, &ref_beg);
150 STATUS (i) = INIT_MASK;
151 VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
152 }
153 if (!IS_NIL (ref_end)) {
154 A68_INT *i = DEREF (A68_INT, &ref_end);
155 STATUS (i) = INIT_MASK;
156 VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
157 }
158 a68_free (matches);
159 push_grep_ret (p, 0);
160 }
161
162 //! @brief PROC grep in substring = (STRING, STRING, REF INT, REF INT) INT
163
164 void genie_grep_in_substring (NODE_T * p)
165 {
166 A68_REF ref_pat, ref_beg, ref_end, ref_str;
167 POP_REF (p, &ref_end);
168 POP_REF (p, &ref_beg);
169 POP_REF (p, &ref_str);
170 POP_REF (p, &ref_pat);
171 A68_REF row = *(A68_REF *) & ref_str;
172 CHECK_INIT (p, INITIALISED (&row), M_ROWS);
173 A68_ARRAY *arr; A68_TUPLE *tup;
174 GET_DESCRIPTOR (arr, tup, &row);
175 reset_transput_buffer (PATTERN_BUFFER);
176 reset_transput_buffer (STRING_BUFFER);
177 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
178 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) & ref_str);
179 regex_t compiled;
180 int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
181 if (ret != 0) {
182 push_grep_ret (p, ret);
183 regfree (&compiled);
184 return;
185 }
186 int nmatch = (int) (RE_NSUB (&compiled));
187 if (nmatch == 0) {
188 nmatch = 1;
189 }
190 regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
191 if (nmatch > 0 && matches == NULL) {
192 ret = 2;
193 PUSH_VALUE (p, ret, A68_INT);
194 regfree (&compiled);
195 return;
196 }
197 ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, REG_NOTBOL);
198 if (ret != 0) {
199 push_grep_ret (p, ret);
200 regfree (&compiled);
201 return;
202 }
203 // Find widest match. Do not assume it is the first one.
204 int widest = 0, max_k = 0;
205 for (int k = 0; k < nmatch; k++) {
206 int dif = (int) (RM_EO (&(matches[k]))) - (int) (RM_SO (&(matches[k])));
207 if (dif > widest) {
208 widest = dif;
209 max_k = k;
210 }
211 }
212 if (!IS_NIL (ref_beg)) {
213 A68_INT *i = DEREF (A68_INT, &ref_beg);
214 STATUS (i) = INIT_MASK;
215 VALUE (i) = (int) (RM_SO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup));
216 }
217 if (!IS_NIL (ref_end)) {
218 A68_INT *i = DEREF (A68_INT, &ref_end);
219 STATUS (i) = INIT_MASK;
220 VALUE (i) = (int) (RM_EO (&(matches[max_k]))) + (int) (LOWER_BOUND (tup)) - 1;
221 }
222 a68_free (matches);
223 push_grep_ret (p, 0);
224 }
225
226 //! @brief PROC sub in string = (STRING, STRING, REF STRING) INT
227
228 void genie_sub_in_string (NODE_T * p)
229 {
230 A68_REF ref_pat, ref_rep, ref_str;
231 POP_REF (p, &ref_str);
232 POP_REF (p, &ref_rep);
233 POP_REF (p, &ref_pat);
234 if (IS_NIL (ref_str)) {
235 PUSH_VALUE (p, 3, A68_INT);
236 return;
237 }
238 reset_transput_buffer (STRING_BUFFER);
239 reset_transput_buffer (REPLACE_BUFFER);
240 reset_transput_buffer (PATTERN_BUFFER);
241 add_a_string_transput_buffer (p, PATTERN_BUFFER, (BYTE_T *) & ref_pat);
242 add_a_string_transput_buffer (p, STRING_BUFFER, (BYTE_T *) DEREF (A68_REF, &ref_str));
243 regex_t compiled;
244 int ret = regcomp (&compiled, get_transput_buffer (PATTERN_BUFFER), REG_NEWLINE | REG_EXTENDED);
245 if (ret != 0) {
246 push_grep_ret (p, ret);
247 regfree (&compiled);
248 return;
249 }
250 int nmatch = (int) (RE_NSUB (&compiled));
251 if (nmatch == 0) {
252 nmatch = 1;
253 }
254 regmatch_t *matches = a68_alloc ((size_t) (nmatch * SIZE_ALIGNED (regmatch_t)), __func__, __LINE__);
255 if (nmatch > 0 && matches == NULL) {
256 PUSH_VALUE (p, ret, A68_INT);
257 regfree (&compiled);
258 return;
259 }
260 ret = regexec (&compiled, get_transput_buffer (STRING_BUFFER), (size_t) nmatch, matches, 0);
261 if (ret != 0) {
262 push_grep_ret (p, ret);
263 regfree (&compiled);
264 return;
265 }
266 // Find widest match. Do not assume it is the first one.
267 int widest = 0, max_k = 0;
268 for (int k = 0; k < nmatch; k++) {
269 int dif = (int) RM_EO (&(matches[k])) - (int) RM_SO (&(matches[k]));
270 if (dif > widest) {
271 widest = dif;
272 max_k = k;
273 }
274 }
275 // Substitute text.
276 int begin = (int) RM_SO (&(matches[max_k])) + 1, end = (int) RM_EO (&(matches[max_k]));
277 char *txt = get_transput_buffer (STRING_BUFFER);
278 for (int k = 0; k < begin - 1; k++) {
279 plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
280 }
281 add_a_string_transput_buffer (p, REPLACE_BUFFER, (BYTE_T *) & ref_rep);
282 for (int k = end; k < get_transput_buffer_size (STRING_BUFFER); k++) {
283 plusab_transput_buffer (p, REPLACE_BUFFER, txt[k]);
284 }
285 *DEREF (A68_REF, &ref_str) = c_to_a_string (p, get_transput_buffer (REPLACE_BUFFER), DEFAULT_WIDTH);
286 a68_free (matches);
287 push_grep_ret (p, 0);
288 }
© 2002-2025 J.M. van der Veer (jmvdveer@xs4all.nl)
|