3 #elif defined(__GNUC__)
4 # define _DEFAULT_SOURCE
16 #define new(T) ((T *)calloc (1, sizeof (T)))
32 enum syntax_type type;
38 struct syntax *left, *right;
58 static const char *visibilities[] = {
64 static const char *strip_ws (const char *s)
67 while (*s != '\0' && isspace (*s))
72 static bool starts_with (const char *s, const char *prefix)
74 return strncmp (s, prefix, strlen (prefix)) == 0;
77 static char *parse_string (const char **s, char end)
85 str = malloc (cap + 1);
89 if (ch == end && !esc)
94 esc = !esc && (ch == '\\');
98 str = realloc (str, cap + 1);
107 static char *parse_ident (const char **s)
114 str = malloc (cap + 1);
122 str = realloc (str, cap + 1);
124 str[len++] = *(*s)++;
130 static struct syntax *parse_range (struct syntax *syn, const char **s)
136 syn->type = S_EXCEPT;
144 str = malloc (cap + 1);
161 str = realloc (str, cap + 1);
171 if ((int)(cap - len) < (ch - prev)) {
172 cap = cap * 2 + (ch - prev);
173 str = realloc (str, cap + 1);
176 for (char i = prev + 1; i <= ch; ++i)
182 case 'a': ch = '\a'; break;
183 case 'b': ch = '\b'; break;
184 case 'f': ch = '\f'; break;
185 case 'n': ch = '\n'; break;
186 case 'r': ch = '\r'; break;
187 case 't': ch = '\t'; break;
188 case 'v': ch = '\v'; break;
189 case '\\': ch = '\\'; break;
190 case '\'': ch = '\''; break;
191 case '"': ch = '\"'; break;
199 str = realloc (str, cap + 1);
208 syn->string = realloc (str, len);
212 static struct syntax *parse_syn (const char **s);
213 static struct syntax *syn_atom (const char **s)
215 struct syntax *syn = new (struct syntax);
218 for (; isspace (**s); ++*s);
225 syn->syn = parse_syn (s);
226 if (syn->syn == NULL || **s != ')')
231 return parse_range (syn, s);
233 syn->type = S_STRING;
234 syn->string = parse_string (s, '"');
235 if (syn->string == NULL)
242 syn->type = S_EPSILON;
247 warnx ("invalid input: '%c'", ch);
252 syn->string = parse_ident (s);
259 static struct syntax *syn_suffix (const char **s)
261 struct syntax *syn, *n;
268 while (**s == '*' || **s == '+' || **s == '?') {
269 n = new (struct syntax);
289 static struct syntax *syn_andthen (const char **s)
291 struct syntax *syn, *n;
293 syn = syn_suffix (s);
298 for (; isspace (**s); ++*s);
299 if (**s == ';' || **s == '|' || **s == ')' || **s == '\\' || **s == '&')
302 n = new (struct syntax);
306 n->right = syn_suffix (s);
307 if (n->right == NULL)
315 static struct syntax *syn_butonly (const char **s)
317 struct syntax *syn, *n;
319 syn = syn_andthen (s);
324 for (; isspace (**s); ++*s);
329 n = new (struct syntax);
333 n->right = syn_andthen (s);
334 if (n->right == NULL)
342 static struct syntax *syn_butnot (const char **s)
344 struct syntax *syn, *n;
346 syn = syn_butonly (s);
351 for (; isspace (**s); ++*s);
356 n = new (struct syntax);
360 n->right = syn_butonly (s);
361 if (n->right == NULL)
369 static struct syntax *syn_or (const char **s)
371 struct syntax *syn, *n;
373 syn = syn_butnot (s);
378 for (; isspace (**s); ++*s);
383 n = new (struct syntax);
387 n->right = syn_butnot (s);
388 if (n->right == NULL)
396 static struct syntax *parse_syn (const char **s)
401 static void print_char (FILE *file, char ch)
403 char str[3] = { '\\', '\0', '\0' };
405 case '\a': str[1] = 'a'; break;
406 case '\b': str[1] = 'b'; break;
407 case '\f': str[1] = 'f'; break;
408 case '\n': str[1] = 'n'; break;
409 case '\r': str[1] = 'r'; break;
410 case '\t': str[1] = 't'; break;
411 case '\v': str[1] = 'v'; break;
412 case '\\': str[1] = '\\'; break;
413 case '\'': str[1] = '\''; break;
415 fprintf (file, "'%c'", ch);
418 fprintf (file, "'%s'", str);
421 static void print_syntax (FILE *file, const struct syntax *syn, int nesting)
425 fprintf (file, "TokenFragment.epsilon()");
428 print_syntax (file, syn->syn, nesting);
431 fprintf (file, "TokenFragment.of(\"%s\")", syn->string);
435 fprintf (file, "TokenFragment.%s(", syn->type == S_ANYOF ? "anyOf" : "except");
436 print_char (file, syn->string[0]);
437 for (size_t i = 1; syn->string[i] != '\0'; ++i) {
439 print_char (file, syn->string[i]);
444 print_syntax (file, syn->syn, nesting);
445 fprintf (file, ".plus()");
448 print_syntax (file, syn->syn, nesting);
449 fprintf (file, ".star()");
452 print_syntax (file, syn->syn, nesting);
453 fprintf (file, ".optional()");
456 print_syntax (file, syn->left, nesting);
459 for (int i = 0; i < nesting; ++i)
461 fprintf (file, ".%s(", syn->op);
462 print_syntax (file, syn->right, nesting);
466 fprintf (file, "%c%s", tolower (syn->string[0]), syn->string + 1);
471 static int parse_def (const char *s, struct definition *def)
475 for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
479 def->name = strndup (s, i);
482 for (; isspace (*s); ++s);
487 def->syn = parse_syn (&s);
488 if (def->syn == NULL)
497 static int parse_vardef (const char *s, struct definition *def)
501 for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
505 def->name = strndup (s, i);
508 for (; isspace (*s); ++s);
512 def->syn = parse_syn (&s);
513 if (def->syn == NULL)
522 static int compile (FILE *infile, const char *infilename, FILE *outfile, const char *vis, const char *package, const char *classname)
524 size_t linenum = 0, ntok, tokcap, nvar, varcap;
527 struct definition *tokens, *vars;
532 fprintf (outfile, "package %s;\n\n", package);
536 vars = calloc (varcap, sizeof (struct definition));
538 // Parse declarations
541 if (fgets (line, sizeof (line), infile) == NULL) {
542 warnx ("%s: %zu: unexpected end of file", infilename, linenum);
546 line[strcspn (line, "\n")] = '\0';
548 if (*s == '\0' || starts_with (s, "//"))
551 if (strcmp (s, "%%") == 0)
554 if (starts_with (s, "%import ")) {
555 fprintf (outfile, "%s;\n", s + 1);
556 } else if (starts_with (s, "%eof ")) {
557 s = strip_ws (s + 4);
559 } else if (starts_with (s, "%error ")) {
560 s = strip_ws (s + 6);
563 if (nvar == varcap) {
565 vars = reallocarray (vars, varcap, sizeof (struct definition));
568 if (parse_vardef (s, &vars[nvar++]) != 0) {
569 warnx ("%s: %zu: unrecognized statement: %s", infilename, linenum, s);
576 warnx ("%s: %zu: end-of-file token not defined. Use `%%eof NAME` to define an EOF token.", infilename, linenum);
578 warnx ("%s: %zu: error token not defined. Use `%%error NAME` to define an error token.", infilename, linenum);
580 fprintf (outfile, "\npublic class %s {\n", classname);
584 tokens = calloc (tokcap, sizeof (struct definition));
587 while (fgets (line, sizeof (line), infile) != NULL) {
589 line[strcspn (line, "\n")] = '\0';
591 if (*s == '\0' || starts_with (s, "//"))
594 if (strcmp (s, "%%") == 0)
597 if (ntok == tokcap) {
599 tokens = reallocarray (tokens, tokcap, sizeof (struct definition));
602 if (parse_def (s, &tokens[ntok++]) != 0) {
603 warnx ("%s: %zu: invalid token definition", infilename, linenum);
609 warnx ("%s: %zu: no token definitions", infilename, linenum);
613 fprintf (outfile, "\t%sstatic enum TokenType {\n", vis);
615 fprintf (outfile, "\t\t%s,\n", eof);
617 fprintf (outfile, "\t\t%s,\n", error);
618 for (size_t i = 0; i < ntok; ++i) {
619 fprintf (outfile, "\t\t%s,\n", tokens[i].name);
621 fprintf (outfile, "\t}\n\n");
624 fprintf (outfile, "\t// Variable definitions\n");
625 for (size_t i = 0; i < nvar; ++i) {
626 const struct definition *var = &vars[i];
628 "\t%sstatic final TokenFragment %c%s = \n\t\t",
630 tolower (var->name[0]),
632 print_syntax (outfile, var->syn, 2);
633 fprintf (outfile, ";\n");
635 fputc ('\n', outfile);
639 fprintf (outfile, "\t// Token Definitions\n");
640 for (size_t i = 0; i < ntok; ++i) {
641 const struct definition *tok = &tokens[i];
643 "\t%sstatic final TokenRule<TokenType> %c%s = new TokenRule<>(TokenType.%s,\n\t\t",
645 tolower (tok->name[0]),
648 print_syntax (outfile, tok->syn, 2);
649 fprintf (outfile, "\n\t);\n");
651 fputc ('\n', outfile);
653 fprintf (outfile, "\t%sstatic final Lexer construct() {\n", vis);
654 fprintf (outfile, "\t\tfinal var rules = new java.util.ArrayList<TokenRule<TokenType>>();\n");
655 for (size_t i = 0; i < ntok; ++i) {
656 const struct definition *tok = &tokens[i];
657 fprintf (outfile, "\t\trules.add(%c%s);\n", tolower (tok->name[0]), tok->name + 1);
660 fprintf (outfile, "\t\treturn new Lexer(new TokenRuleSet(rules))");
662 fprintf (outfile, "\n\t\t\t.setEndType(TokenType.%s)", eof);
664 fprintf (outfile, "\n\t\t\t.setErrorType(TokenType.%s)", error);
665 fprintf (outfile, ";\n\t}\n\n");
667 while (fgets (line, sizeof (line), infile) != NULL) {
668 fputc ('\t', outfile);
669 fputs (line, outfile);
672 fputs ("}\n", outfile);
680 static int usage (void)
682 fputs ("usage: lexicgen [-v VISIBILITY] [-p PACKAGE] [-o FILE] file\n", stderr);
686 static int gen_names (const char *filename, char **outfilename, char **classname)
688 const char *slash, *dot;
690 slash = strrchr (filename, '/');
692 slash = filename - 1;
694 dot = strchr (slash + 1, '.');
696 dot = slash + 1 + strlen (slash + 1);
698 if ((dot - slash) < 2)
701 if (outfilename != NULL)
702 asprintf (outfilename, "%.*s.java", (int)(dot - filename), filename);
703 asprintf (classname, "%.*s", (int)(dot - slash - 1), slash + 1);
708 int main (int argc, char *argv[])
711 char *package = NULL;
712 char *infilename = NULL, *outfilename = NULL, *classname;
713 FILE *infile, *outfile;
714 enum Visibility vis = V_PRIVATE;
718 if (pledge ("stdio rpath wpath cpath", NULL) != 0)
722 while ((option = getopt (argc, argv, "p:v:o:")) != -1) {
728 if (strcmp (optarg, "public") == 0) {
730 } else if (strcmp (optarg, "private") == 0) {
732 } else if (strcmp (optarg, "package") == 0) {
735 errx (1, "Invalid visibility: '%s', expected any of: private, public, package", optarg);
739 outfilename = optarg;
752 infilename = argv[0];
753 if (gen_names (infilename, outfilename ? NULL : &outfilename, &classname) != 0)
754 errx (1, "invalid file name: %s", infilename);
756 infile = fopen (infilename, "r");
758 err (1, "fopen(\"%s\")", infilename);
760 if (strcmp (outfilename, "-") == 0) {
763 outfile = fopen (outfilename, "w");
765 err (1, "fopen(\"%s\")", outfilename);
769 if (pledge ("stdio", NULL) != 0)
773 ret = compile (infile, infilename, outfile, visibilities[vis], package, classname);
778 remove (outfilename);