Blame


1 2228b436 2024-02-10 benni #ifdef __OpenBSD__
2 2228b436 2024-02-10 benni # define _BSD_SOURCE
3 2228b436 2024-02-10 benni #elif defined(__GNUC__)
4 2228b436 2024-02-10 benni # define _DEFAULT_SOURCE
5 2228b436 2024-02-10 benni # define _GNU_SOURCE
6 2228b436 2024-02-10 benni #endif
7 2228b436 2024-02-10 benni
8 2228b436 2024-02-10 benni #include <stdbool.h>
9 2228b436 2024-02-10 benni #include <stdlib.h>
10 2228b436 2024-02-10 benni #include <string.h>
11 2228b436 2024-02-10 benni #include <unistd.h>
12 2228b436 2024-02-10 benni #include <ctype.h>
13 2228b436 2024-02-10 benni #include <stdio.h>
14 2228b436 2024-02-10 benni #include <err.h>
15 2228b436 2024-02-10 benni
16 2228b436 2024-02-10 benni #define new(T) ((T *)calloc (1, sizeof (T)))
17 2228b436 2024-02-10 benni
18 2228b436 2024-02-10 benni enum syntax_type {
19 2228b436 2024-02-10 benni S_SUB,
20 2228b436 2024-02-10 benni S_STRING,
21 2228b436 2024-02-10 benni S_PLUS,
22 2228b436 2024-02-10 benni S_STAR,
23 2228b436 2024-02-10 benni S_OPT,
24 2228b436 2024-02-10 benni S_BINARY,
25 2228b436 2024-02-10 benni S_ANYOF,
26 2228b436 2024-02-10 benni S_EXCEPT,
27 2228b436 2024-02-10 benni S_EPSILON,
28 2228b436 2024-02-10 benni S_IDENT,
29 2228b436 2024-02-10 benni };
30 2228b436 2024-02-10 benni
31 2228b436 2024-02-10 benni struct syntax {
32 2228b436 2024-02-10 benni enum syntax_type type;
33 2228b436 2024-02-10 benni union {
34 2228b436 2024-02-10 benni struct syntax *syn;
35 2228b436 2024-02-10 benni char *string;
36 2228b436 2024-02-10 benni struct {
37 2228b436 2024-02-10 benni const char *op;
38 2228b436 2024-02-10 benni struct syntax *left, *right;
39 2228b436 2024-02-10 benni };
40 2228b436 2024-02-10 benni struct {
41 2228b436 2024-02-10 benni char begin, end;
42 2228b436 2024-02-10 benni };
43 2228b436 2024-02-10 benni
44 2228b436 2024-02-10 benni };
45 2228b436 2024-02-10 benni };
46 2228b436 2024-02-10 benni
47 2228b436 2024-02-10 benni struct definition {
48 2228b436 2024-02-10 benni char *name;
49 2228b436 2024-02-10 benni struct syntax *syn;
50 2228b436 2024-02-10 benni };
51 2228b436 2024-02-10 benni
52 2228b436 2024-02-10 benni enum Visibility {
53 2228b436 2024-02-10 benni V_PRIVATE,
54 2228b436 2024-02-10 benni V_PUBLIC,
55 2228b436 2024-02-10 benni V_PACKAGE,
56 2228b436 2024-02-10 benni };
57 2228b436 2024-02-10 benni
58 2228b436 2024-02-10 benni static const char *visibilities[] = {
59 2228b436 2024-02-10 benni "private ",
60 2228b436 2024-02-10 benni "public ",
61 2228b436 2024-02-10 benni "",
62 2228b436 2024-02-10 benni };
63 2228b436 2024-02-10 benni
64 2228b436 2024-02-10 benni static const char *strip_ws (const char *s)
65 2228b436 2024-02-10 benni {
66 2228b436 2024-02-10 benni
67 2228b436 2024-02-10 benni while (*s != '\0' && isspace (*s))
68 2228b436 2024-02-10 benni ++s;
69 2228b436 2024-02-10 benni return s;
70 2228b436 2024-02-10 benni }
71 2228b436 2024-02-10 benni
72 2228b436 2024-02-10 benni static bool starts_with (const char *s, const char *prefix)
73 2228b436 2024-02-10 benni {
74 2228b436 2024-02-10 benni return strncmp (s, prefix, strlen (prefix)) == 0;
75 2228b436 2024-02-10 benni }
76 2228b436 2024-02-10 benni
77 2228b436 2024-02-10 benni static char *parse_string (const char **s, char end)
78 2228b436 2024-02-10 benni {
79 2228b436 2024-02-10 benni char ch, *str;
80 2228b436 2024-02-10 benni size_t len, cap;
81 2228b436 2024-02-10 benni bool esc = false;
82 2228b436 2024-02-10 benni
83 2228b436 2024-02-10 benni len = 0;
84 2228b436 2024-02-10 benni cap = 10;
85 2228b436 2024-02-10 benni str = malloc (cap + 1);
86 2228b436 2024-02-10 benni
87 2228b436 2024-02-10 benni while (1) {
88 2228b436 2024-02-10 benni ch = *(*s)++;
89 2228b436 2024-02-10 benni if (ch == end && !esc)
90 2228b436 2024-02-10 benni break;
91 2228b436 2024-02-10 benni if (ch == '\0')
92 2228b436 2024-02-10 benni return NULL;
93 2228b436 2024-02-10 benni
94 4b5f0f64 2024-02-10 benni esc = !esc && (ch == '\\');
95 2228b436 2024-02-10 benni
96 2228b436 2024-02-10 benni if (len == cap) {
97 2228b436 2024-02-10 benni cap *= 2;
98 2228b436 2024-02-10 benni str = realloc (str, cap + 1);
99 2228b436 2024-02-10 benni }
100 2228b436 2024-02-10 benni
101 2228b436 2024-02-10 benni str[len++] = ch;
102 2228b436 2024-02-10 benni }
103 2228b436 2024-02-10 benni str[len] = '\0';
104 2228b436 2024-02-10 benni return str;
105 2228b436 2024-02-10 benni }
106 2228b436 2024-02-10 benni
107 2228b436 2024-02-10 benni static char *parse_ident (const char **s)
108 2228b436 2024-02-10 benni {
109 2228b436 2024-02-10 benni char *str;
110 2228b436 2024-02-10 benni size_t len, cap;
111 2228b436 2024-02-10 benni
112 2228b436 2024-02-10 benni len = 0;
113 2228b436 2024-02-10 benni cap = 10;
114 2228b436 2024-02-10 benni str = malloc (cap + 1);
115 2228b436 2024-02-10 benni
116 2228b436 2024-02-10 benni while (1) {
117 2228b436 2024-02-10 benni if (!isalpha (**s))
118 2228b436 2024-02-10 benni break;
119 2228b436 2024-02-10 benni
120 2228b436 2024-02-10 benni if (len == cap) {
121 2228b436 2024-02-10 benni cap *= 2;
122 2228b436 2024-02-10 benni str = realloc (str, cap + 1);
123 2228b436 2024-02-10 benni }
124 2228b436 2024-02-10 benni str[len++] = *(*s)++;
125 2228b436 2024-02-10 benni }
126 2228b436 2024-02-10 benni str[len] = '\0';
127 2228b436 2024-02-10 benni return str;
128 2228b436 2024-02-10 benni }
129 2228b436 2024-02-10 benni
130 2228b436 2024-02-10 benni static struct syntax *parse_range (struct syntax *syn, const char **s)
131 2228b436 2024-02-10 benni {
132 2228b436 2024-02-10 benni size_t len, cap;
133 2228b436 2024-02-10 benni char ch, prev, *str;
134 2228b436 2024-02-10 benni
135 2228b436 2024-02-10 benni if (**s == '^') {
136 2228b436 2024-02-10 benni syn->type = S_EXCEPT;
137 2228b436 2024-02-10 benni ++*s;
138 2228b436 2024-02-10 benni } else {
139 2228b436 2024-02-10 benni syn->type = S_ANYOF;
140 2228b436 2024-02-10 benni }
141 2228b436 2024-02-10 benni
142 2228b436 2024-02-10 benni len = 0;
143 2228b436 2024-02-10 benni cap = 10;
144 2228b436 2024-02-10 benni str = malloc (cap + 1);
145 2228b436 2024-02-10 benni
146 2228b436 2024-02-10 benni while (1) {
147 2228b436 2024-02-10 benni ch = *(*s)++;
148 2228b436 2024-02-10 benni switch (ch) {
149 2228b436 2024-02-10 benni case '\0':
150 2228b436 2024-02-10 benni return NULL;
151 2228b436 2024-02-10 benni case ']':
152 2228b436 2024-02-10 benni goto end;
153 2228b436 2024-02-10 benni case '-':
154 2228b436 2024-02-10 benni if (len == 0)
155 2228b436 2024-02-10 benni return NULL;
156 2228b436 2024-02-10 benni ch = *(*s)++;
157 2228b436 2024-02-10 benni
158 2228b436 2024-02-10 benni if (ch == ']') {
159 2228b436 2024-02-10 benni if (len == cap) {
160 2228b436 2024-02-10 benni cap *= 2;
161 2228b436 2024-02-10 benni str = realloc (str, cap + 1);
162 2228b436 2024-02-10 benni }
163 2228b436 2024-02-10 benni str[len++] = '-';
164 2228b436 2024-02-10 benni goto end;
165 2228b436 2024-02-10 benni }
166 2228b436 2024-02-10 benni
167 2228b436 2024-02-10 benni prev = str[len - 1];
168 2228b436 2024-02-10 benni if (prev > ch)
169 2228b436 2024-02-10 benni return NULL;
170 2228b436 2024-02-10 benni
171 2228b436 2024-02-10 benni if ((int)(cap - len) < (ch - prev)) {
172 2228b436 2024-02-10 benni cap = cap * 2 + (ch - prev);
173 2228b436 2024-02-10 benni str = realloc (str, cap + 1);
174 2228b436 2024-02-10 benni }
175 2228b436 2024-02-10 benni
176 2228b436 2024-02-10 benni for (char i = prev + 1; i <= ch; ++i)
177 2228b436 2024-02-10 benni str[len++] = i;
178 2228b436 2024-02-10 benni break;
179 2228b436 2024-02-10 benni case '\\':
180 2228b436 2024-02-10 benni ch = *(*s)++;
181 2228b436 2024-02-10 benni switch (ch) {
182 2228b436 2024-02-10 benni case 'a': ch = '\a'; break;
183 2228b436 2024-02-10 benni case 'b': ch = '\b'; break;
184 2228b436 2024-02-10 benni case 'f': ch = '\f'; break;
185 2228b436 2024-02-10 benni case 'n': ch = '\n'; break;
186 2228b436 2024-02-10 benni case 'r': ch = '\r'; break;
187 2228b436 2024-02-10 benni case 't': ch = '\t'; break;
188 2228b436 2024-02-10 benni case 'v': ch = '\v'; break;
189 2228b436 2024-02-10 benni case '\\': ch = '\\'; break;
190 2228b436 2024-02-10 benni case '\'': ch = '\''; break;
191 2228b436 2024-02-10 benni case '"': ch = '\"'; break;
192 2228b436 2024-02-10 benni default: break;
193 2228b436 2024-02-10 benni }
194 2228b436 2024-02-10 benni
195 2228b436 2024-02-10 benni /* FALLTHROUGH */
196 2228b436 2024-02-10 benni default:
197 2228b436 2024-02-10 benni if (len == cap) {
198 2228b436 2024-02-10 benni cap *= 2;
199 2228b436 2024-02-10 benni str = realloc (str, cap + 1);
200 2228b436 2024-02-10 benni }
201 2228b436 2024-02-10 benni str[len++] = ch;
202 2228b436 2024-02-10 benni break;
203 2228b436 2024-02-10 benni }
204 2228b436 2024-02-10 benni
205 2228b436 2024-02-10 benni }
206 2228b436 2024-02-10 benni end:
207 2228b436 2024-02-10 benni str[len++] = '\0';
208 2228b436 2024-02-10 benni syn->string = realloc (str, len);
209 2228b436 2024-02-10 benni return syn;
210 2228b436 2024-02-10 benni }
211 2228b436 2024-02-10 benni
212 2228b436 2024-02-10 benni static struct syntax *parse_syn (const char **s);
213 2228b436 2024-02-10 benni static struct syntax *syn_atom (const char **s)
214 2228b436 2024-02-10 benni {
215 2228b436 2024-02-10 benni struct syntax *syn = new (struct syntax);
216 2228b436 2024-02-10 benni char ch;
217 2228b436 2024-02-10 benni
218 2228b436 2024-02-10 benni for (; isspace (**s); ++*s);
219 2228b436 2024-02-10 benni
220 2228b436 2024-02-10 benni ch = *(*s)++;
221 2228b436 2024-02-10 benni
222 2228b436 2024-02-10 benni switch (ch) {
223 2228b436 2024-02-10 benni case '(':
224 2228b436 2024-02-10 benni syn->type = S_SUB;
225 2228b436 2024-02-10 benni syn->syn = parse_syn (s);
226 2228b436 2024-02-10 benni if (syn->syn == NULL || **s != ')')
227 2228b436 2024-02-10 benni return NULL;
228 2228b436 2024-02-10 benni ++*s;
229 2228b436 2024-02-10 benni break;
230 2228b436 2024-02-10 benni case '[':
231 2228b436 2024-02-10 benni return parse_range (syn, s);
232 2228b436 2024-02-10 benni case '"':
233 2228b436 2024-02-10 benni syn->type = S_STRING;
234 2228b436 2024-02-10 benni syn->string = parse_string (s, '"');
235 2228b436 2024-02-10 benni if (syn->string == NULL)
236 2228b436 2024-02-10 benni return NULL;
237 2228b436 2024-02-10 benni break;
238 2228b436 2024-02-10 benni case ';':
239 2228b436 2024-02-10 benni case ')':
240 2228b436 2024-02-10 benni case '|':
241 2228b436 2024-02-10 benni case '&':
242 2228b436 2024-02-10 benni syn->type = S_EPSILON;
243 2228b436 2024-02-10 benni --*s;
244 2228b436 2024-02-10 benni break;
245 2228b436 2024-02-10 benni default:
246 2228b436 2024-02-10 benni if (!isalpha (ch)) {
247 2228b436 2024-02-10 benni warnx ("invalid input: '%c'", ch);
248 2228b436 2024-02-10 benni return NULL;
249 2228b436 2024-02-10 benni }
250 2228b436 2024-02-10 benni --*s;
251 2228b436 2024-02-10 benni syn->type = S_IDENT;
252 2228b436 2024-02-10 benni syn->string = parse_ident (s);
253 2228b436 2024-02-10 benni break;
254 2228b436 2024-02-10 benni }
255 2228b436 2024-02-10 benni
256 2228b436 2024-02-10 benni return syn;
257 2228b436 2024-02-10 benni }
258 2228b436 2024-02-10 benni
259 2228b436 2024-02-10 benni static struct syntax *syn_suffix (const char **s)
260 2228b436 2024-02-10 benni {
261 2228b436 2024-02-10 benni struct syntax *syn, *n;
262 2228b436 2024-02-10 benni char ch;
263 2228b436 2024-02-10 benni
264 2228b436 2024-02-10 benni syn = syn_atom (s);
265 2228b436 2024-02-10 benni if (syn == NULL)
266 2228b436 2024-02-10 benni return NULL;
267 2228b436 2024-02-10 benni
268 2228b436 2024-02-10 benni while (**s == '*' || **s == '+' || **s == '?') {
269 2228b436 2024-02-10 benni n = new (struct syntax);
270 2228b436 2024-02-10 benni ch = *(*s)++;
271 2228b436 2024-02-10 benni switch (ch) {
272 2228b436 2024-02-10 benni case '*':
273 2228b436 2024-02-10 benni n->type = S_STAR;
274 2228b436 2024-02-10 benni break;
275 2228b436 2024-02-10 benni case '+':
276 2228b436 2024-02-10 benni n->type = S_PLUS;
277 2228b436 2024-02-10 benni break;
278 2228b436 2024-02-10 benni case '?':
279 2228b436 2024-02-10 benni n->type = S_OPT;
280 2228b436 2024-02-10 benni break;
281 2228b436 2024-02-10 benni }
282 2228b436 2024-02-10 benni n->syn = syn;
283 2228b436 2024-02-10 benni syn = n;
284 2228b436 2024-02-10 benni }
285 2228b436 2024-02-10 benni
286 2228b436 2024-02-10 benni return syn;
287 2228b436 2024-02-10 benni }
288 2228b436 2024-02-10 benni
289 2228b436 2024-02-10 benni static struct syntax *syn_andthen (const char **s)
290 2228b436 2024-02-10 benni {
291 2228b436 2024-02-10 benni struct syntax *syn, *n;
292 2228b436 2024-02-10 benni
293 2228b436 2024-02-10 benni syn = syn_suffix (s);
294 2228b436 2024-02-10 benni if (syn == NULL)
295 2228b436 2024-02-10 benni return NULL;
296 2228b436 2024-02-10 benni
297 2228b436 2024-02-10 benni while (1) {
298 2228b436 2024-02-10 benni for (; isspace (**s); ++*s);
299 dcf32492 2024-02-17 benni if (**s == ';' || **s == '|' || **s == ')' || **s == '\\' || **s == '&')
300 2228b436 2024-02-10 benni break;
301 2228b436 2024-02-10 benni
302 2228b436 2024-02-10 benni n = new (struct syntax);
303 2228b436 2024-02-10 benni n->type = S_BINARY;
304 2228b436 2024-02-10 benni n->op = "andThen";
305 2228b436 2024-02-10 benni n->left = syn;
306 2228b436 2024-02-10 benni n->right = syn_suffix (s);
307 2228b436 2024-02-10 benni if (n->right == NULL)
308 2228b436 2024-02-10 benni return NULL;
309 2228b436 2024-02-10 benni syn = n;
310 2228b436 2024-02-10 benni }
311 2228b436 2024-02-10 benni
312 2228b436 2024-02-10 benni return syn;
313 2228b436 2024-02-10 benni }
314 2228b436 2024-02-10 benni
315 2228b436 2024-02-10 benni static struct syntax *syn_butonly (const char **s)
316 2228b436 2024-02-10 benni {
317 2228b436 2024-02-10 benni struct syntax *syn, *n;
318 2228b436 2024-02-10 benni
319 2228b436 2024-02-10 benni syn = syn_andthen (s);
320 2228b436 2024-02-10 benni if (syn == NULL)
321 2228b436 2024-02-10 benni return NULL;
322 2228b436 2024-02-10 benni
323 2228b436 2024-02-10 benni while (1) {
324 2228b436 2024-02-10 benni for (; isspace (**s); ++*s);
325 2228b436 2024-02-10 benni if (**s != '&')
326 2228b436 2024-02-10 benni break;
327 2228b436 2024-02-10 benni ++*s;
328 2228b436 2024-02-10 benni
329 2228b436 2024-02-10 benni n = new (struct syntax);
330 2228b436 2024-02-10 benni n->type = S_BINARY;
331 2228b436 2024-02-10 benni n->op = "butOnly";
332 2228b436 2024-02-10 benni n->left = syn;
333 2228b436 2024-02-10 benni n->right = syn_andthen (s);
334 2228b436 2024-02-10 benni if (n->right == NULL)
335 2228b436 2024-02-10 benni return NULL;
336 2228b436 2024-02-10 benni syn = n;
337 2228b436 2024-02-10 benni }
338 2228b436 2024-02-10 benni
339 2228b436 2024-02-10 benni return syn;
340 2228b436 2024-02-10 benni }
341 2228b436 2024-02-10 benni
342 2228b436 2024-02-10 benni static struct syntax *syn_butnot (const char **s)
343 2228b436 2024-02-10 benni {
344 2228b436 2024-02-10 benni struct syntax *syn, *n;
345 2228b436 2024-02-10 benni
346 2228b436 2024-02-10 benni syn = syn_butonly (s);
347 2228b436 2024-02-10 benni if (syn == NULL)
348 2228b436 2024-02-10 benni return NULL;
349 2228b436 2024-02-10 benni
350 2228b436 2024-02-10 benni while (1) {
351 2228b436 2024-02-10 benni for (; isspace (**s); ++*s);
352 2228b436 2024-02-10 benni if (**s != '\\')
353 2228b436 2024-02-10 benni break;
354 2228b436 2024-02-10 benni ++*s;
355 2228b436 2024-02-10 benni
356 2228b436 2024-02-10 benni n = new (struct syntax);
357 2228b436 2024-02-10 benni n->type = S_BINARY;
358 2228b436 2024-02-10 benni n->op = "butNot";
359 2228b436 2024-02-10 benni n->left = syn;
360 2228b436 2024-02-10 benni n->right = syn_butonly (s);
361 2228b436 2024-02-10 benni if (n->right == NULL)
362 2228b436 2024-02-10 benni return NULL;
363 2228b436 2024-02-10 benni syn = n;
364 2228b436 2024-02-10 benni }
365 2228b436 2024-02-10 benni
366 2228b436 2024-02-10 benni return syn;
367 2228b436 2024-02-10 benni }
368 2228b436 2024-02-10 benni
369 2228b436 2024-02-10 benni static struct syntax *syn_or (const char **s)
370 2228b436 2024-02-10 benni {
371 2228b436 2024-02-10 benni struct syntax *syn, *n;
372 2228b436 2024-02-10 benni
373 2228b436 2024-02-10 benni syn = syn_butnot (s);
374 2228b436 2024-02-10 benni if (syn == NULL)
375 2228b436 2024-02-10 benni return NULL;
376 2228b436 2024-02-10 benni
377 2228b436 2024-02-10 benni while (1) {
378 2228b436 2024-02-10 benni for (; isspace (**s); ++*s);
379 2228b436 2024-02-10 benni if (**s != '|')
380 2228b436 2024-02-10 benni break;
381 2228b436 2024-02-10 benni ++*s;
382 2228b436 2024-02-10 benni
383 2228b436 2024-02-10 benni n = new (struct syntax);
384 2228b436 2024-02-10 benni n->type = S_BINARY;
385 2228b436 2024-02-10 benni n->op = "orElse";
386 2228b436 2024-02-10 benni n->left = syn;
387 2228b436 2024-02-10 benni n->right = syn_butnot (s);
388 2228b436 2024-02-10 benni if (n->right == NULL)
389 2228b436 2024-02-10 benni return NULL;
390 2228b436 2024-02-10 benni syn = n;
391 2228b436 2024-02-10 benni }
392 2228b436 2024-02-10 benni
393 2228b436 2024-02-10 benni return syn;
394 2228b436 2024-02-10 benni }
395 2228b436 2024-02-10 benni
396 2228b436 2024-02-10 benni static struct syntax *parse_syn (const char **s)
397 2228b436 2024-02-10 benni {
398 2228b436 2024-02-10 benni return syn_or (s);
399 2228b436 2024-02-10 benni }
400 2228b436 2024-02-10 benni
401 2228b436 2024-02-10 benni static void print_char (FILE *file, char ch)
402 2228b436 2024-02-10 benni {
403 2228b436 2024-02-10 benni char str[3] = { '\\', '\0', '\0' };
404 2228b436 2024-02-10 benni switch (ch) {
405 2228b436 2024-02-10 benni case '\a': str[1] = 'a'; break;
406 2228b436 2024-02-10 benni case '\b': str[1] = 'b'; break;
407 2228b436 2024-02-10 benni case '\f': str[1] = 'f'; break;
408 2228b436 2024-02-10 benni case '\n': str[1] = 'n'; break;
409 2228b436 2024-02-10 benni case '\r': str[1] = 'r'; break;
410 2228b436 2024-02-10 benni case '\t': str[1] = 't'; break;
411 2228b436 2024-02-10 benni case '\v': str[1] = 'v'; break;
412 2228b436 2024-02-10 benni case '\\': str[1] = '\\'; break;
413 2228b436 2024-02-10 benni case '\'': str[1] = '\''; break;
414 2228b436 2024-02-10 benni default:
415 2228b436 2024-02-10 benni fprintf (file, "'%c'", ch);
416 2228b436 2024-02-10 benni return;
417 2228b436 2024-02-10 benni }
418 2228b436 2024-02-10 benni fprintf (file, "'%s'", str);
419 2228b436 2024-02-10 benni }
420 2228b436 2024-02-10 benni
421 2228b436 2024-02-10 benni static void print_syntax (FILE *file, const struct syntax *syn, int nesting)
422 2228b436 2024-02-10 benni {
423 2228b436 2024-02-10 benni switch (syn->type) {
424 2228b436 2024-02-10 benni case S_EPSILON:
425 2228b436 2024-02-10 benni fprintf (file, "TokenFragment.epsilon()");
426 2228b436 2024-02-10 benni break;
427 2228b436 2024-02-10 benni case S_SUB:
428 2228b436 2024-02-10 benni print_syntax (file, syn->syn, nesting);
429 2228b436 2024-02-10 benni break;
430 2228b436 2024-02-10 benni case S_STRING:
431 2228b436 2024-02-10 benni fprintf (file, "TokenFragment.of(\"%s\")", syn->string);
432 2228b436 2024-02-10 benni break;
433 2228b436 2024-02-10 benni case S_ANYOF:
434 2228b436 2024-02-10 benni case S_EXCEPT:
435 2228b436 2024-02-10 benni fprintf (file, "TokenFragment.%s(", syn->type == S_ANYOF ? "anyOf" : "except");
436 2228b436 2024-02-10 benni print_char (file, syn->string[0]);
437 2228b436 2024-02-10 benni for (size_t i = 1; syn->string[i] != '\0'; ++i) {
438 2228b436 2024-02-10 benni fputs (", ", file);
439 2228b436 2024-02-10 benni print_char (file, syn->string[i]);
440 2228b436 2024-02-10 benni }
441 2228b436 2024-02-10 benni fputc (')', file);
442 2228b436 2024-02-10 benni break;
443 2228b436 2024-02-10 benni case S_PLUS:
444 2228b436 2024-02-10 benni print_syntax (file, syn->syn, nesting);
445 2228b436 2024-02-10 benni fprintf (file, ".plus()");
446 2228b436 2024-02-10 benni break;
447 2228b436 2024-02-10 benni case S_STAR:
448 2228b436 2024-02-10 benni print_syntax (file, syn->syn, nesting);
449 2228b436 2024-02-10 benni fprintf (file, ".star()");
450 2228b436 2024-02-10 benni break;
451 2228b436 2024-02-10 benni case S_OPT:
452 2228b436 2024-02-10 benni print_syntax (file, syn->syn, nesting);
453 2228b436 2024-02-10 benni fprintf (file, ".optional()");
454 2228b436 2024-02-10 benni break;
455 2228b436 2024-02-10 benni case S_BINARY:
456 2228b436 2024-02-10 benni print_syntax (file, syn->left, nesting);
457 2228b436 2024-02-10 benni ++nesting;
458 2228b436 2024-02-10 benni fputc ('\n', file);
459 2228b436 2024-02-10 benni for (int i = 0; i < nesting; ++i)
460 2228b436 2024-02-10 benni fputc ('\t', file);
461 2228b436 2024-02-10 benni fprintf (file, ".%s(", syn->op);
462 2228b436 2024-02-10 benni print_syntax (file, syn->right, nesting);
463 2228b436 2024-02-10 benni fprintf (file, ")");
464 2228b436 2024-02-10 benni break;
465 2228b436 2024-02-10 benni case S_IDENT:
466 2228b436 2024-02-10 benni fprintf (file, "%c%s", tolower (syn->string[0]), syn->string + 1);
467 2228b436 2024-02-10 benni break;
468 2228b436 2024-02-10 benni }
469 2228b436 2024-02-10 benni }
470 2228b436 2024-02-10 benni
471 2228b436 2024-02-10 benni static int parse_def (const char *s, struct definition *def)
472 2228b436 2024-02-10 benni {
473 2228b436 2024-02-10 benni size_t i;
474 2228b436 2024-02-10 benni
475 2228b436 2024-02-10 benni for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
476 2228b436 2024-02-10 benni if (i == 0)
477 2228b436 2024-02-10 benni return 1;
478 2228b436 2024-02-10 benni
479 2228b436 2024-02-10 benni def->name = strndup (s, i);
480 2228b436 2024-02-10 benni s += i;
481 2228b436 2024-02-10 benni
482 2228b436 2024-02-10 benni for (; isspace (*s); ++s);
483 2228b436 2024-02-10 benni
484 2228b436 2024-02-10 benni if (*s++ != ':')
485 2228b436 2024-02-10 benni return 1;
486 2228b436 2024-02-10 benni
487 2228b436 2024-02-10 benni def->syn = parse_syn (&s);
488 2228b436 2024-02-10 benni if (def->syn == NULL)
489 2228b436 2024-02-10 benni return 1;
490 2228b436 2024-02-10 benni
491 2228b436 2024-02-10 benni if (*s != ';')
492 2228b436 2024-02-10 benni return 1;
493 2228b436 2024-02-10 benni
494 2228b436 2024-02-10 benni return 0;
495 2228b436 2024-02-10 benni }
496 2228b436 2024-02-10 benni
497 2228b436 2024-02-10 benni static int parse_vardef (const char *s, struct definition *def)
498 2228b436 2024-02-10 benni {
499 2228b436 2024-02-10 benni size_t i;
500 2228b436 2024-02-10 benni
501 2228b436 2024-02-10 benni for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
502 2228b436 2024-02-10 benni if (i == 0)
503 2228b436 2024-02-10 benni return 1;
504 2228b436 2024-02-10 benni
505 2228b436 2024-02-10 benni def->name = strndup (s, i);
506 2228b436 2024-02-10 benni s += i;
507 2228b436 2024-02-10 benni
508 2228b436 2024-02-10 benni for (; isspace (*s); ++s);
509 2228b436 2024-02-10 benni if (*s++ != '=')
510 2228b436 2024-02-10 benni return 1;
511 2228b436 2024-02-10 benni
512 2228b436 2024-02-10 benni def->syn = parse_syn (&s);
513 2228b436 2024-02-10 benni if (def->syn == NULL)
514 2228b436 2024-02-10 benni return 1;
515 2228b436 2024-02-10 benni
516 2228b436 2024-02-10 benni if (*s != ';')
517 2228b436 2024-02-10 benni return 1;
518 2228b436 2024-02-10 benni
519 2228b436 2024-02-10 benni return 0;
520 2228b436 2024-02-10 benni }
521 2228b436 2024-02-10 benni
522 2228b436 2024-02-10 benni static int compile (FILE *infile, const char *infilename, FILE *outfile, const char *vis, const char *package, const char *classname)
523 2228b436 2024-02-10 benni {
524 2228b436 2024-02-10 benni size_t linenum = 0, ntok, tokcap, nvar, varcap;
525 2228b436 2024-02-10 benni char line[1024];
526 2228b436 2024-02-10 benni const char *s;
527 2228b436 2024-02-10 benni struct definition *tokens, *vars;
528 2228b436 2024-02-10 benni char *eof = NULL;
529 2228b436 2024-02-10 benni char *error = NULL;
530 2228b436 2024-02-10 benni
531 2228b436 2024-02-10 benni if (package != NULL)
532 2228b436 2024-02-10 benni fprintf (outfile, "package %s;\n\n", package);
533 2228b436 2024-02-10 benni
534 2228b436 2024-02-10 benni nvar = 0;
535 2228b436 2024-02-10 benni varcap = 10;
536 2228b436 2024-02-10 benni vars = calloc (varcap, sizeof (struct definition));
537 2228b436 2024-02-10 benni
538 2228b436 2024-02-10 benni // Parse declarations
539 2228b436 2024-02-10 benni while (1) {
540 2228b436 2024-02-10 benni ++linenum;
541 2228b436 2024-02-10 benni if (fgets (line, sizeof (line), infile) == NULL) {
542 2228b436 2024-02-10 benni warnx ("%s: %zu: unexpected end of file", infilename, linenum);
543 2228b436 2024-02-10 benni return 1;
544 2228b436 2024-02-10 benni }
545 2228b436 2024-02-10 benni
546 2228b436 2024-02-10 benni line[strcspn (line, "\n")] = '\0';
547 2228b436 2024-02-10 benni s = strip_ws (line);
548 2228b436 2024-02-10 benni if (*s == '\0' || starts_with (s, "//"))
549 2228b436 2024-02-10 benni continue;
550 2228b436 2024-02-10 benni
551 2228b436 2024-02-10 benni if (strcmp (s, "%%") == 0)
552 2228b436 2024-02-10 benni break;
553 2228b436 2024-02-10 benni
554 2228b436 2024-02-10 benni if (starts_with (s, "%import ")) {
555 2228b436 2024-02-10 benni fprintf (outfile, "%s;\n", s + 1);
556 2228b436 2024-02-10 benni } else if (starts_with (s, "%eof ")) {
557 2228b436 2024-02-10 benni s = strip_ws (s + 4);
558 2228b436 2024-02-10 benni eof = strdup (s);
559 2228b436 2024-02-10 benni } else if (starts_with (s, "%error ")) {
560 2228b436 2024-02-10 benni s = strip_ws (s + 6);
561 2228b436 2024-02-10 benni error = strdup (s);
562 2228b436 2024-02-10 benni } else {
563 2228b436 2024-02-10 benni if (nvar == varcap) {
564 2228b436 2024-02-10 benni varcap *= 2;
565 2228b436 2024-02-10 benni vars = reallocarray (vars, varcap, sizeof (struct definition));
566 2228b436 2024-02-10 benni }
567 2228b436 2024-02-10 benni
568 2228b436 2024-02-10 benni if (parse_vardef (s, &vars[nvar++]) != 0) {
569 2228b436 2024-02-10 benni warnx ("%s: %zu: unrecognized statement: %s", infilename, linenum, s);
570 2228b436 2024-02-10 benni return 1;
571 2228b436 2024-02-10 benni }
572 2228b436 2024-02-10 benni }
573 2228b436 2024-02-10 benni }
574 2228b436 2024-02-10 benni
575 2228b436 2024-02-10 benni if (eof == NULL)
576 2228b436 2024-02-10 benni warnx ("%s: %zu: end-of-file token not defined. Use `%%eof NAME` to define an EOF token.", infilename, linenum);
577 2228b436 2024-02-10 benni if (error == NULL)
578 2228b436 2024-02-10 benni warnx ("%s: %zu: error token not defined. Use `%%error NAME` to define an error token.", infilename, linenum);
579 2228b436 2024-02-10 benni
580 2228b436 2024-02-10 benni fprintf (outfile, "\npublic class %s {\n", classname);
581 2228b436 2024-02-10 benni
582 2228b436 2024-02-10 benni ntok = 0;
583 2228b436 2024-02-10 benni tokcap = 10;
584 2228b436 2024-02-10 benni tokens = calloc (tokcap, sizeof (struct definition));
585 2228b436 2024-02-10 benni
586 2228b436 2024-02-10 benni // Parse definitions
587 2228b436 2024-02-10 benni while (fgets (line, sizeof (line), infile) != NULL) {
588 2228b436 2024-02-10 benni ++linenum;
589 2228b436 2024-02-10 benni line[strcspn (line, "\n")] = '\0';
590 2228b436 2024-02-10 benni s = strip_ws (line);
591 2228b436 2024-02-10 benni if (*s == '\0' || starts_with (s, "//"))
592 2228b436 2024-02-10 benni continue;
593 2228b436 2024-02-10 benni
594 2228b436 2024-02-10 benni if (strcmp (s, "%%") == 0)
595 2228b436 2024-02-10 benni break;
596 2228b436 2024-02-10 benni
597 2228b436 2024-02-10 benni if (ntok == tokcap) {
598 2228b436 2024-02-10 benni tokcap *= 2;
599 2228b436 2024-02-10 benni tokens = reallocarray (tokens, tokcap, sizeof (struct definition));
600 2228b436 2024-02-10 benni }
601 2228b436 2024-02-10 benni
602 2228b436 2024-02-10 benni if (parse_def (s, &tokens[ntok++]) != 0) {
603 2228b436 2024-02-10 benni warnx ("%s: %zu: invalid token definition", infilename, linenum);
604 2228b436 2024-02-10 benni return 1;
605 2228b436 2024-02-10 benni }
606 2228b436 2024-02-10 benni }
607 2228b436 2024-02-10 benni
608 2228b436 2024-02-10 benni if (ntok == 0) {
609 2228b436 2024-02-10 benni warnx ("%s: %zu: no token definitions", infilename, linenum);
610 2228b436 2024-02-10 benni return 1;
611 2228b436 2024-02-10 benni }
612 2228b436 2024-02-10 benni
613 2228b436 2024-02-10 benni fprintf (outfile, "\t%sstatic enum TokenType {\n", vis);
614 2228b436 2024-02-10 benni if (eof != NULL)
615 2228b436 2024-02-10 benni fprintf (outfile, "\t\t%s,\n", eof);
616 2228b436 2024-02-10 benni if (error != NULL)
617 2228b436 2024-02-10 benni fprintf (outfile, "\t\t%s,\n", error);
618 2228b436 2024-02-10 benni for (size_t i = 0; i < ntok; ++i) {
619 2228b436 2024-02-10 benni fprintf (outfile, "\t\t%s,\n", tokens[i].name);
620 2228b436 2024-02-10 benni }
621 2228b436 2024-02-10 benni fprintf (outfile, "\t}\n\n");
622 2228b436 2024-02-10 benni
623 2228b436 2024-02-10 benni if (nvar > 0) {
624 2228b436 2024-02-10 benni fprintf (outfile, "\t// Variable definitions\n");
625 2228b436 2024-02-10 benni for (size_t i = 0; i < nvar; ++i) {
626 2228b436 2024-02-10 benni const struct definition *var = &vars[i];
627 2228b436 2024-02-10 benni fprintf (outfile,
628 2228b436 2024-02-10 benni "\t%sstatic final TokenFragment %c%s = \n\t\t",
629 2228b436 2024-02-10 benni vis,
630 2228b436 2024-02-10 benni tolower (var->name[0]),
631 2228b436 2024-02-10 benni var->name + 1);
632 2228b436 2024-02-10 benni print_syntax (outfile, var->syn, 2);
633 2228b436 2024-02-10 benni fprintf (outfile, ";\n");
634 2228b436 2024-02-10 benni }
635 2228b436 2024-02-10 benni fputc ('\n', outfile);
636 2228b436 2024-02-10 benni }
637 2228b436 2024-02-10 benni
638 2228b436 2024-02-10 benni
639 2228b436 2024-02-10 benni fprintf (outfile, "\t// Token Definitions\n");
640 2228b436 2024-02-10 benni for (size_t i = 0; i < ntok; ++i) {
641 2228b436 2024-02-10 benni const struct definition *tok = &tokens[i];
642 2228b436 2024-02-10 benni fprintf (outfile,
643 2228b436 2024-02-10 benni "\t%sstatic final TokenRule<TokenType> %c%s = new TokenRule<>(TokenType.%s,\n\t\t",
644 2228b436 2024-02-10 benni vis,
645 2228b436 2024-02-10 benni tolower (tok->name[0]),
646 2228b436 2024-02-10 benni tok->name + 1,
647 2228b436 2024-02-10 benni tok->name);
648 2228b436 2024-02-10 benni print_syntax (outfile, tok->syn, 2);
649 2228b436 2024-02-10 benni fprintf (outfile, "\n\t);\n");
650 2228b436 2024-02-10 benni }
651 2228b436 2024-02-10 benni fputc ('\n', outfile);
652 2228b436 2024-02-10 benni
653 2228b436 2024-02-10 benni fprintf (outfile, "\t%sstatic final Lexer construct() {\n", vis);
654 2228b436 2024-02-10 benni fprintf (outfile, "\t\tfinal var rules = new java.util.ArrayList<TokenRule<TokenType>>();\n");
655 2228b436 2024-02-10 benni for (size_t i = 0; i < ntok; ++i) {
656 2228b436 2024-02-10 benni const struct definition *tok = &tokens[i];
657 2228b436 2024-02-10 benni fprintf (outfile, "\t\trules.add(%c%s);\n", tolower (tok->name[0]), tok->name + 1);
658 2228b436 2024-02-10 benni }
659 2228b436 2024-02-10 benni
660 2228b436 2024-02-10 benni fprintf (outfile, "\t\treturn new Lexer(new TokenRuleSet(rules))");
661 2228b436 2024-02-10 benni if (eof)
662 2228b436 2024-02-10 benni fprintf (outfile, "\n\t\t\t.setEndType(TokenType.%s)", eof);
663 2228b436 2024-02-10 benni if (error)
664 2228b436 2024-02-10 benni fprintf (outfile, "\n\t\t\t.setErrorType(TokenType.%s)", error);
665 2228b436 2024-02-10 benni fprintf (outfile, ";\n\t}\n\n");
666 2228b436 2024-02-10 benni
667 2228b436 2024-02-10 benni while (fgets (line, sizeof (line), infile) != NULL) {
668 2228b436 2024-02-10 benni fputc ('\t', outfile);
669 2228b436 2024-02-10 benni fputs (line, outfile);
670 2228b436 2024-02-10 benni }
671 2228b436 2024-02-10 benni
672 2228b436 2024-02-10 benni fputs ("}\n", outfile);
673 2228b436 2024-02-10 benni
674 2228b436 2024-02-10 benni return 0;
675 2228b436 2024-02-10 benni }
676 2228b436 2024-02-10 benni
677 2228b436 2024-02-10 benni
678 2228b436 2024-02-10 benni
679 2228b436 2024-02-10 benni
680 2228b436 2024-02-10 benni static int usage (void)
681 2228b436 2024-02-10 benni {
682 2228b436 2024-02-10 benni fputs ("usage: lexicgen [-v VISIBILITY] [-p PACKAGE] [-o FILE] file\n", stderr);
683 2228b436 2024-02-10 benni return 1;
684 2228b436 2024-02-10 benni }
685 2228b436 2024-02-10 benni
686 2228b436 2024-02-10 benni static int gen_names (const char *filename, char **outfilename, char **classname)
687 2228b436 2024-02-10 benni {
688 2228b436 2024-02-10 benni const char *slash, *dot;
689 2228b436 2024-02-10 benni
690 2228b436 2024-02-10 benni slash = strrchr (filename, '/');
691 2228b436 2024-02-10 benni if (slash == NULL)
692 2228b436 2024-02-10 benni slash = filename - 1;
693 2228b436 2024-02-10 benni
694 2228b436 2024-02-10 benni dot = strchr (slash + 1, '.');
695 2228b436 2024-02-10 benni if (dot == NULL)
696 2228b436 2024-02-10 benni dot = slash + 1 + strlen (slash + 1);
697 2228b436 2024-02-10 benni
698 2228b436 2024-02-10 benni if ((dot - slash) < 2)
699 2228b436 2024-02-10 benni return -1;
700 2228b436 2024-02-10 benni
701 2228b436 2024-02-10 benni if (outfilename != NULL)
702 2228b436 2024-02-10 benni asprintf (outfilename, "%.*s.java", (int)(dot - filename), filename);
703 2228b436 2024-02-10 benni asprintf (classname, "%.*s", (int)(dot - slash - 1), slash + 1);
704 2228b436 2024-02-10 benni return 0;
705 2228b436 2024-02-10 benni }
706 2228b436 2024-02-10 benni
707 2228b436 2024-02-10 benni
708 2228b436 2024-02-10 benni int main (int argc, char *argv[])
709 2228b436 2024-02-10 benni {
710 2228b436 2024-02-10 benni int option;
711 2228b436 2024-02-10 benni char *package = NULL;
712 2228b436 2024-02-10 benni char *infilename = NULL, *outfilename = NULL, *classname;
713 2228b436 2024-02-10 benni FILE *infile, *outfile;
714 2228b436 2024-02-10 benni enum Visibility vis = V_PRIVATE;
715 2228b436 2024-02-10 benni int ret;
716 2228b436 2024-02-10 benni
717 2228b436 2024-02-10 benni #ifdef __OpenBSD__
718 2228b436 2024-02-10 benni if (pledge ("stdio rpath wpath cpath", NULL) != 0)
719 2228b436 2024-02-10 benni err (1, "pledge()");
720 2228b436 2024-02-10 benni #endif
721 2228b436 2024-02-10 benni
722 2228b436 2024-02-10 benni while ((option = getopt (argc, argv, "p:v:o:")) != -1) {
723 2228b436 2024-02-10 benni switch (option) {
724 2228b436 2024-02-10 benni case 'p':
725 2228b436 2024-02-10 benni package = optarg;
726 2228b436 2024-02-10 benni break;
727 2228b436 2024-02-10 benni case 'v':
728 2228b436 2024-02-10 benni if (strcmp (optarg, "public") == 0) {
729 2228b436 2024-02-10 benni vis = V_PUBLIC;
730 2228b436 2024-02-10 benni } else if (strcmp (optarg, "private") == 0) {
731 2228b436 2024-02-10 benni vis = V_PRIVATE;
732 2228b436 2024-02-10 benni } else if (strcmp (optarg, "package") == 0) {
733 2228b436 2024-02-10 benni vis = V_PACKAGE;
734 2228b436 2024-02-10 benni } else {
735 2228b436 2024-02-10 benni errx (1, "Invalid visibility: '%s', expected any of: private, public, package", optarg);
736 2228b436 2024-02-10 benni }
737 2228b436 2024-02-10 benni break;
738 2228b436 2024-02-10 benni case 'o':
739 2228b436 2024-02-10 benni outfilename = optarg;
740 2228b436 2024-02-10 benni break;
741 2228b436 2024-02-10 benni default:
742 2228b436 2024-02-10 benni return usage ();
743 2228b436 2024-02-10 benni }
744 2228b436 2024-02-10 benni }
745 2228b436 2024-02-10 benni
746 2228b436 2024-02-10 benni argv += optind;
747 2228b436 2024-02-10 benni argc -= optind;
748 2228b436 2024-02-10 benni
749 2228b436 2024-02-10 benni if (argc != 1)
750 2228b436 2024-02-10 benni return usage ();
751 2228b436 2024-02-10 benni
752 2228b436 2024-02-10 benni infilename = argv[0];
753 2228b436 2024-02-10 benni if (gen_names (infilename, outfilename ? NULL : &outfilename, &classname) != 0)
754 2228b436 2024-02-10 benni errx (1, "invalid file name: %s", infilename);
755 2228b436 2024-02-10 benni
756 2228b436 2024-02-10 benni infile = fopen (infilename, "r");
757 2228b436 2024-02-10 benni if (infile == NULL)
758 2228b436 2024-02-10 benni err (1, "fopen(\"%s\")", infilename);
759 2228b436 2024-02-10 benni
760 2228b436 2024-02-10 benni if (strcmp (outfilename, "-") == 0) {
761 2228b436 2024-02-10 benni outfile = stdout;
762 2228b436 2024-02-10 benni } else {
763 2228b436 2024-02-10 benni outfile = fopen (outfilename, "w");
764 2228b436 2024-02-10 benni if (outfile == NULL)
765 2228b436 2024-02-10 benni err (1, "fopen(\"%s\")", outfilename);
766 2228b436 2024-02-10 benni }
767 2228b436 2024-02-10 benni
768 2228b436 2024-02-10 benni #ifdef __OpenBSD__
769 2228b436 2024-02-10 benni if (pledge ("stdio", NULL) != 0)
770 2228b436 2024-02-10 benni err (1, "pledge()");
771 2228b436 2024-02-10 benni #endif
772 2228b436 2024-02-10 benni
773 2228b436 2024-02-10 benni ret = compile (infile, infilename, outfile, visibilities[vis], package, classname);
774 2228b436 2024-02-10 benni
775 2228b436 2024-02-10 benni fclose (outfile);
776 2228b436 2024-02-10 benni fclose (infile);
777 2228b436 2024-02-10 benni if (ret != 0)
778 2228b436 2024-02-10 benni remove (outfilename);
779 2228b436 2024-02-10 benni return ret;
780 2228b436 2024-02-10 benni }
781 2228b436 2024-02-10 benni