Commit Diff


commit - /dev/null
commit + 2228b436e294002e5f8b47f275655042c1ea6512
blob - /dev/null
blob + db4f930ea0b1dd5b430845e9ee25393b08910c6c (mode 644)
--- /dev/null
+++ Makefile
@@ -0,0 +1,30 @@
+.POSIX:
+
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/man
+CFLAGS = -std=c99 -O2 -pipe -Wall -Wextra
+DIST = Makefile lexicgen.c lexicgen.1 example.lx
+
+all: lexicgen
+
+clean:
+	rm -f lexicgen lexicgen.tgz *.core
+
+man:
+	man ./lexicgen.1
+
+run: lexicgen example.lx
+	./lexicgen -o - example.lx
+
+install: lexicgen
+	mkdir -p ${DESTDIR}${PREFIX}/bin ${DESTDIR}${MANPREFIX}/man1
+	cp -f lexicgen ${DESTDIR}${PREFIX}/bin/
+	cp -f lexicgen.1 ${DESTDIR}${MANPREFIX}/man1/
+
+dist:
+	rm -rf _dist
+	mkdir -p _dist/lexicgen
+	cp -f ${DIST} _dist/lexicgen/
+	tar -C _dist -czf lexicgen.tgz lexicgen
+	rm -rf _dist
+
blob - /dev/null
blob + d709235c3aeb1d28adba8813efebff0e99bfe54e (mode 644)
--- /dev/null
+++ example.lx
@@ -0,0 +1,41 @@
+// Declarations
+%import eu.bandm.tools.lexic.*
+%import java.io.IOException
+%import java.io.InputStreamReader
+%import java.nio.charset.StandardCharsets
+
+// Declare an end-of-file token called EOF.
+%eof EOF
+
+// Declare an error token called Error.
+%error Error
+
+Integer = "0" | [1-9][0-9]*;
+Ident = [a-zA-Z_][a-zA-Z_0-9]*;
+
+%%
+// Definitions
+
+Whitespace	: [ \n\t\r\f]+;
+// A File can be either an integer, or an identifier.
+File		: Integer | Ident;
+
+%%
+// Code
+
+public static void main(String[] args) {
+	// The construct() function is generated by lexicgen and returns a Lexer.
+	var lexer = construct();
+
+	try (var rdr = new InputStreamReader(System.in, StandardCharsets.UTF_8)) {
+		TokenSource<String, TokenType> tokens = lexer
+			.lex(CodePointSource.read(rdr, e -> {}))
+			.removeTypes(TokenType.Whitespace);
+		while (true) {
+			var token = tokens.get();
+			if (token.getType() == TokenType.EOF)
+				break;
+			System.out.println(token);
+		}
+	} catch (IOException e) {}
+}
blob - /dev/null
blob + 8d93f8228b1cff5be3a6ff158972b4d59b460fdb (mode 644)
--- /dev/null
+++ lexicgen.1
@@ -0,0 +1,197 @@
+.\"
+.\" Copyright (c) 2023 Benjamin Stürz
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate$
+.Dt LEXICGEN 1
+.Os
+.Sh NAME
+.Nm lexicgen
+.Nd lexic generator
+.Sh SYNOPSIS
+.Nm
+.Op Fl v Ar visibility
+.Op Fl p Ar package
+.Op Fl o Ar output-file
+.Ar input-file
+.Sh DESCRIPTION
+.Nm
+is a clone of lex(1) for generating lexers using Lexic.
+See the FORMAT section for information about the format.
+.Pp
+The options are as follows:
+.Bl -tag -width -Ds
+.It Fl v Ar visibility
+Change the visibility of the lexer-internal data structures and functions. (default: private)
+.It Fl p Ar package
+Set a Java package for the generated class.
+.It Fl o Ar output-file
+Write the output to
+.Ar output-file
+instead of a generated path.
+If
+.Ar output-file
+is '-', then write to the standard output.
+.El
+.Sh FORMAT
+A valid file consists of three sections:
+.Bl -bullet -compact
+.It
+Declarations
+.It
+Definitions
+.It
+and Code (optional)
+.El
+
+The declaration section can contain the following statements:
+.Bl -tag -width -Ds
+.It Sy %import Ar package
+Import
+.Ar package .
+.It Sy %eof Ar name
+Declare an end of file token called
+.Ar name .
+
+Note: Should only be specified once.
+.It Sy %error Ar name
+Declare an error token called
+.Ar name .
+
+Note: Should only be specified once.
+.It Sy name No = Sy syntax No ;
+Define a variable called
+.Ar name
+containing
+.Ar syntax
+that can be refered to from token definitions.
+.El
+
+The definition section contains token definitions.
+A token definition consists of a
+.Ar name
+and a
+.Ar syntax expression .
+The name must consist of alphabetic characters.
+
+The following syntax expressions are supported:
+.Bl -tag -width -Ds
+.It Sy ident
+An identifier is a sequence of one or more alphabetic characters.
+Match if the rule identified by
+.Ar ident
+matches.
+.It Qq Sy string
+Match if the input matches all characters of
+.Ar string .
+.It Bq Sy range
+Match, if any of the fragments match.
+If
+.Ar range
+starts with the
+.Dq ^
+character, then negate the matching result.
+The range consists of fragments.
+A fragment can be a single character, an escape sequence or a character range.
+Only single-character escape sequences are supported.
+A character range has the syntax a-b,
+where a is the first character and b is the last.
+.It Pq expr
+Grouping.
+.It expr?
+Zero or one counts of
+.Ar expr .
+.It expr+
+One or more counts of
+.Ar expr .
+.It expr*
+Zero or more counts of
+.Ar expr .
+.It expr1 expr2
+.Ar expr1
+followed by
+.Ar expr2 .
+.It expr1 & expr2
+Only match if both
+.Ar expr1
+and
+.Ar expr2
+match.
+.It expr1 \e expr2
+Match only if
+.Ar expr1
+matches, and
+.Ar expr2
+doesn't.
+.It expr1 | expr2
+Match if either or both of
+.Ar expr1
+or
+.Ar expr2
+match.
+.El
+
+The code sections contains any arbitrary code that will be put into the generated file.
+
+.Sh EXAMPLES
+The following code is an example of a lexicgen file:
+.Bd -literal
+// Declarations
+%import eu.bandm.tools.lexic.*
+%import java.io.IOException
+%import java.io.InputStreamReader
+%import java.nio.charset.StandardCharsets
+
+// Declare an end-of-file token called EOF.
+%eof EOF
+
+// Declare an error token called Error.
+%error Error
+
+Integer = "0" | [1-9][0-9]*;
+Ident = [a-zA-Z_][a-zA-Z_0-9]*;
+
+%%
+// Definitions
+
+Whitespace	: [ \en\et\er\ef]+;
+// A File can be either an integer, or an identifier.
+File		: Integer | Ident;
+
+%%
+// Code
+
+public static void main(String[] args) {
+	// The construct() function is generated by lexicgen and returns a Lexer.
+	var lexer = construct();
+
+	try (var rdr = new InputStreamReader(System.in, StandardCharsets.UTF_8)) {
+		TokenSource<String, TokenType> tokens = lexer
+			.lex(CodePointSource.read(rdr, e -> {}))
+			.removeTypes(TokenType.Whitespace);
+		while (true) {
+			var token = tokens.get();
+			if (token.getType() == TokenType.EOF)
+				break;
+			System.out.println(token);
+		}
+	} catch (IOException e) {}
+}
+.Ed
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr lex 1
+.Sh AUTHORS
+.An Benjamin Stürz Aq Mt benni@stuerz.xyz
blob - /dev/null
blob + ae4d18076db38c922ca23c7c2b616c476942ce81 (mode 644)
--- /dev/null
+++ lexicgen.c
@@ -0,0 +1,781 @@
+#ifdef __OpenBSD__
+# define _BSD_SOURCE
+#elif defined(__GNUC__)
+# define _DEFAULT_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <err.h>
+
+#define new(T) ((T *)calloc (1, sizeof (T)))
+
+enum syntax_type {
+	S_SUB,
+	S_STRING,
+	S_PLUS,
+	S_STAR,
+	S_OPT,
+	S_BINARY,
+	S_ANYOF,
+	S_EXCEPT,
+	S_EPSILON,
+	S_IDENT,
+};
+
+struct syntax {
+	enum syntax_type type;
+	union {
+		struct syntax *syn;
+		char *string;
+		struct {
+			const char *op;
+			struct syntax *left, *right;
+		};
+		struct {
+			char begin, end;
+		};
+
+	};
+};
+
+struct definition {
+	char *name;
+	struct syntax *syn;
+};
+
+enum Visibility {
+	V_PRIVATE,
+	V_PUBLIC,
+	V_PACKAGE,
+};
+
+static const char *visibilities[] = {
+	"private ",
+	"public ",
+	"",
+};
+
+static const char *strip_ws (const char *s)
+{
+
+	while (*s != '\0' && isspace (*s))
+		++s;
+	return s;
+}
+
+static bool starts_with (const char *s, const char *prefix)
+{
+	return strncmp (s, prefix, strlen (prefix)) == 0;
+}
+
+static char *parse_string (const char **s, char end)
+{
+	char ch, *str;
+	size_t len, cap;
+	bool esc = false;
+
+	len = 0;
+	cap = 10;
+	str = malloc (cap + 1);
+
+	while (1) {
+		ch = *(*s)++;
+		if (ch == end && !esc)
+			break;
+		if (ch == '\0')
+			return NULL;
+
+		esc = (ch == '\\');
+
+		if (len == cap) {
+			cap *= 2;
+			str = realloc (str, cap + 1);
+		}
+
+		str[len++] = ch;
+	}
+	str[len] = '\0';
+	return str;
+}
+
+static char *parse_ident (const char **s)
+{
+	char *str;
+	size_t len, cap;
+
+	len = 0;
+	cap = 10;
+	str = malloc (cap + 1);
+
+	while (1) {
+		if (!isalpha (**s))
+			break;
+
+		if (len == cap) {
+			cap *= 2;
+			str = realloc (str, cap + 1);
+		}
+		str[len++] = *(*s)++;
+	}
+	str[len] = '\0';
+	return str;
+}
+
+static struct syntax *parse_range (struct syntax *syn, const char **s)
+{
+	size_t len, cap;
+	char ch, prev, *str;
+
+	if (**s == '^') {
+		syn->type = S_EXCEPT;
+		++*s;
+	} else {
+		syn->type = S_ANYOF;
+	}
+
+	len = 0;
+	cap = 10;
+	str = malloc (cap + 1);
+
+	while (1) {
+		ch = *(*s)++;
+		switch (ch) {
+		case '\0':
+			return NULL;
+		case ']':
+			goto end;
+		case '-':
+			if (len == 0)
+				return NULL;
+			ch = *(*s)++;
+
+			if (ch == ']') {
+				if (len == cap) {
+					cap *= 2;
+					str = realloc (str, cap + 1);
+				}
+				str[len++] = '-';
+				goto end;
+			}
+
+			prev = str[len - 1];
+			if (prev > ch)
+				return NULL;
+
+			if ((int)(cap - len) < (ch - prev)) {
+				cap = cap * 2 + (ch - prev);
+				str = realloc (str, cap + 1);
+			}
+
+			for (char i = prev + 1; i <= ch; ++i)
+				str[len++] = i;
+			break;
+		case '\\':
+			ch = *(*s)++;
+			switch (ch) {
+			case 'a':	ch = '\a';	break;
+			case 'b':	ch = '\b';	break;
+			case 'f':	ch = '\f';	break;
+			case 'n':	ch = '\n';	break;
+			case 'r':	ch = '\r';	break;
+			case 't':	ch = '\t';	break;
+			case 'v':	ch = '\v';	break;
+			case '\\':	ch = '\\';	break;
+			case '\'':	ch = '\'';	break;
+			case '"':	ch = '\"';	break;
+			default:			break;
+			}
+
+			/* FALLTHROUGH */
+		default:
+			if (len == cap) {
+				cap *= 2;
+				str = realloc (str, cap + 1);
+			}
+			str[len++] = ch;
+			break;
+		}
+
+	}
+end:
+	str[len++] = '\0';
+	syn->string = realloc (str, len);
+	return syn;
+}
+
+static struct syntax *parse_syn (const char **s);
+static struct syntax *syn_atom (const char **s)
+{
+	struct syntax *syn = new (struct syntax);
+ 	char ch;
+
+	for (; isspace (**s); ++*s);
+
+	ch = *(*s)++;
+
+	switch (ch) {
+	case '(':
+		syn->type = S_SUB;
+		syn->syn = parse_syn (s);
+		if (syn->syn == NULL || **s != ')')
+			return NULL;
+		++*s;
+		break;
+	case '[':
+		return parse_range (syn, s);
+	case '"':
+		syn->type = S_STRING;
+		syn->string = parse_string (s, '"');
+		if (syn->string == NULL)
+			return NULL;
+		break;
+	case ';':
+	case ')':
+	case '|':
+	case '&':
+		syn->type = S_EPSILON;
+		--*s;
+		break;
+	default:
+		if (!isalpha (ch)) {
+			warnx ("invalid input: '%c'", ch);
+			return NULL;
+		}
+		--*s;
+		syn->type = S_IDENT;
+		syn->string = parse_ident (s);
+		break;
+	}
+
+	return syn;
+}
+
+static struct syntax *syn_suffix (const char **s)
+{
+	struct syntax *syn, *n;
+	char ch;
+
+	syn = syn_atom (s);
+	if (syn == NULL)
+		return NULL;
+
+	while (**s == '*' || **s == '+' || **s == '?') {
+		n = new (struct syntax);
+		ch = *(*s)++;
+		switch (ch) {
+		case '*':
+			n->type = S_STAR;
+			break;
+		case '+':
+			n->type = S_PLUS;
+			break;
+		case '?':
+			n->type = S_OPT;
+			break;
+		}
+		n->syn = syn;
+		syn = n;
+	}
+
+	return syn;
+}
+
+static struct syntax *syn_andthen (const char **s)
+{
+	struct syntax *syn, *n;
+
+	syn = syn_suffix (s);
+	if (syn == NULL)
+		return NULL;
+
+	while (1) {
+		for (; isspace (**s); ++*s);
+		if (**s == ';' || **s == '|' || **s == ')')
+			break;
+
+		n = new (struct syntax);
+		n->type = S_BINARY;
+		n->op = "andThen";
+		n->left = syn;
+		n->right = syn_suffix (s);
+		if (n->right == NULL)
+			return NULL;
+		syn = n;
+	}
+
+	return syn;
+}
+
+static struct syntax *syn_butonly (const char **s)
+{
+	struct syntax *syn, *n;
+
+	syn = syn_andthen (s);
+	if (syn == NULL)
+		return NULL;
+
+	while (1) {
+		for (; isspace (**s); ++*s);
+		if (**s != '&')
+			break;
+		++*s;
+
+		n = new (struct syntax);
+		n->type = S_BINARY;
+		n->op = "butOnly";
+		n->left = syn;
+		n->right = syn_andthen (s);
+		if (n->right == NULL)
+			return NULL;
+		syn = n;
+	}
+
+	return syn;
+}
+
+static struct syntax *syn_butnot (const char **s)
+{
+	struct syntax *syn, *n;
+
+	syn = syn_butonly (s);
+	if (syn == NULL)
+		return NULL;
+
+	while (1) {
+		for (; isspace (**s); ++*s);
+		if (**s != '\\')
+			break;
+		++*s;
+
+		n = new (struct syntax);
+		n->type = S_BINARY;
+		n->op = "butNot";
+		n->left = syn;
+		n->right = syn_butonly (s);
+		if (n->right == NULL)
+			return NULL;
+		syn = n;
+	}
+
+	return syn;
+}
+
+static struct syntax *syn_or (const char **s)
+{
+	struct syntax *syn, *n;
+
+	syn = syn_butnot (s);
+	if (syn == NULL)
+		return NULL;
+
+	while (1) {
+		for (; isspace (**s); ++*s);
+		if (**s != '|')
+			break;
+		++*s;
+
+		n = new (struct syntax);
+		n->type = S_BINARY;
+		n->op = "orElse";
+		n->left = syn;
+		n->right = syn_butnot (s);
+		if (n->right == NULL)
+			return NULL;
+		syn = n;
+	}
+
+	return syn;
+}
+
+static struct syntax *parse_syn (const char **s)
+{
+	return syn_or (s);
+}
+
+static void print_char (FILE *file, char ch)
+{
+	char str[3] = { '\\', '\0', '\0' };
+	switch (ch) {
+	case '\a':	str[1] = 'a';	break;
+	case '\b':	str[1] = 'b';	break;
+	case '\f':	str[1] = 'f';	break;
+	case '\n':	str[1] = 'n';	break;
+	case '\r':	str[1] = 'r';	break;
+	case '\t':	str[1] = 't';	break;
+	case '\v':	str[1] = 'v';	break;
+	case '\\':	str[1] = '\\';	break;
+	case '\'':	str[1] = '\'';	break;
+	default:
+		fprintf (file, "'%c'", ch);
+		return;
+	}
+	fprintf (file, "'%s'", str);
+}
+
+static void print_syntax (FILE *file, const struct syntax *syn, int nesting)
+{
+	switch (syn->type) {
+	case S_EPSILON:
+		fprintf (file, "TokenFragment.epsilon()");
+		break;
+	case S_SUB:
+		print_syntax (file, syn->syn, nesting);
+		break;
+	case S_STRING:
+		fprintf (file, "TokenFragment.of(\"%s\")", syn->string);
+		break;
+	case S_ANYOF:
+	case S_EXCEPT:
+		fprintf (file, "TokenFragment.%s(", syn->type == S_ANYOF ? "anyOf" : "except");
+		print_char (file, syn->string[0]);
+		for (size_t i = 1; syn->string[i] != '\0'; ++i) {
+			fputs (", ", file);
+			print_char (file, syn->string[i]);
+		}
+		fputc (')', file);
+		break;
+	case S_PLUS:
+		print_syntax (file, syn->syn, nesting);
+		fprintf (file, ".plus()");
+		break;
+	case S_STAR:
+		print_syntax (file, syn->syn, nesting);
+		fprintf (file, ".star()");
+		break;
+	case S_OPT:
+		print_syntax (file, syn->syn, nesting);
+		fprintf (file, ".optional()");
+		break;
+	case S_BINARY:
+		print_syntax (file, syn->left, nesting);
+		++nesting;
+		fputc ('\n', file);
+		for (int i = 0; i < nesting; ++i)
+			fputc ('\t', file);
+		fprintf (file, ".%s(", syn->op);
+		print_syntax (file, syn->right, nesting);
+		fprintf (file, ")");
+		break;
+	case S_IDENT:
+		fprintf (file, "%c%s", tolower (syn->string[0]), syn->string + 1);
+		break;
+	}
+}
+
+static int parse_def (const char *s, struct definition *def)
+{
+	size_t i;
+
+	for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
+	if (i == 0)
+		return 1;
+
+	def->name = strndup (s, i);
+	s += i;
+	
+	for (; isspace (*s); ++s);
+
+	if (*s++ != ':')
+		return 1;
+
+	def->syn = parse_syn (&s);
+	if (def->syn == NULL)
+		return 1;
+
+	if (*s != ';')
+		return 1;
+
+	return 0;
+}
+
+static int parse_vardef (const char *s, struct definition *def)
+{
+	size_t i;
+
+	for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
+	if (i == 0)
+		return 1;
+
+	def->name = strndup (s, i);
+	s += i;
+
+	for (; isspace (*s); ++s);
+	if (*s++ != '=')
+		return 1;
+
+	def->syn = parse_syn (&s);
+	if (def->syn == NULL)
+		return 1;
+
+	if (*s != ';')
+		return 1;
+
+	return 0;
+}
+
+static int compile (FILE *infile, const char *infilename, FILE *outfile, const char *vis, const char *package, const char *classname)
+{
+	size_t linenum = 0, ntok, tokcap, nvar, varcap;
+	char line[1024];
+	const char *s;
+	struct definition *tokens, *vars;
+	char *eof = NULL;
+	char *error = NULL;
+
+	if (package != NULL)
+		fprintf (outfile, "package %s;\n\n", package);
+
+	nvar = 0;
+	varcap = 10;
+	vars = calloc (varcap, sizeof (struct definition));
+
+	// Parse declarations
+	while (1) {
+		++linenum;
+		if (fgets (line, sizeof (line), infile) == NULL) {
+			warnx ("%s: %zu: unexpected end of file", infilename, linenum);
+			return 1;
+		}
+
+		line[strcspn (line, "\n")] = '\0';
+		s = strip_ws (line);
+		if (*s == '\0' || starts_with (s, "//"))
+			continue;
+
+		if (strcmp (s, "%%") == 0)
+			break;
+
+		if (starts_with (s, "%import ")) {
+			fprintf (outfile, "%s;\n", s + 1);
+		} else if (starts_with (s, "%eof ")) {
+			s = strip_ws (s + 4);
+			eof = strdup (s);
+		} else if (starts_with (s, "%error ")) {
+			s = strip_ws (s + 6);
+			error = strdup (s);
+		} else {
+			if (nvar == varcap) {
+				varcap *= 2;
+				vars = reallocarray (vars, varcap, sizeof (struct definition));
+			}
+
+			if (parse_vardef (s, &vars[nvar++]) != 0) {
+				warnx ("%s: %zu: unrecognized statement: %s", infilename, linenum, s);
+				return 1;
+			}
+		}
+	}
+
+	if (eof == NULL)
+		warnx ("%s: %zu: end-of-file token not defined. Use `%%eof NAME` to define an EOF token.", infilename, linenum);
+	if (error == NULL)
+		warnx ("%s: %zu: error token not defined. Use `%%error NAME` to define an error token.", infilename, linenum);
+
+	fprintf (outfile, "\npublic class %s {\n", classname);
+
+	ntok = 0;
+	tokcap = 10;
+	tokens = calloc (tokcap, sizeof (struct definition));
+
+	// Parse definitions
+	while (fgets (line, sizeof (line), infile) != NULL) {
+		++linenum;
+		line[strcspn (line, "\n")] = '\0';
+		s = strip_ws (line);
+		if (*s == '\0' || starts_with (s, "//"))
+			continue;
+
+		if (strcmp (s, "%%") == 0)
+			break;
+
+		if (ntok == tokcap) {
+			tokcap *= 2;
+			tokens = reallocarray (tokens, tokcap, sizeof (struct definition));
+		}
+
+		if (parse_def (s, &tokens[ntok++]) != 0) {
+			warnx ("%s: %zu: invalid token definition", infilename, linenum);
+			return 1;
+		}
+	}
+
+	if (ntok == 0) {
+		warnx ("%s: %zu: no token definitions", infilename, linenum);
+		return 1;
+	}
+
+	fprintf (outfile, "\t%sstatic enum TokenType {\n", vis);
+	if (eof != NULL)
+		fprintf (outfile, "\t\t%s,\n", eof);
+	if (error != NULL)
+		fprintf (outfile, "\t\t%s,\n", error);
+	for (size_t i = 0; i < ntok; ++i) {
+		fprintf (outfile, "\t\t%s,\n", tokens[i].name);
+	}
+	fprintf (outfile, "\t}\n\n");
+
+	if (nvar > 0) {
+		fprintf (outfile, "\t// Variable definitions\n");
+		for (size_t i = 0; i < nvar; ++i) {
+			const struct definition *var = &vars[i];
+			fprintf (outfile,
+					"\t%sstatic final TokenFragment %c%s = \n\t\t",
+					vis,
+					tolower (var->name[0]),
+					var->name + 1);
+			print_syntax (outfile, var->syn, 2);
+			fprintf (outfile, ";\n");
+		}
+		fputc ('\n', outfile);
+	}
+
+
+	fprintf (outfile, "\t// Token Definitions\n");
+	for (size_t i = 0; i < ntok; ++i) {
+		const struct definition *tok = &tokens[i];
+		fprintf (outfile,
+			 "\t%sstatic final TokenRule<TokenType> %c%s = new TokenRule<>(TokenType.%s,\n\t\t",
+			 vis,
+			 tolower (tok->name[0]),
+			 tok->name + 1,
+			 tok->name);
+		print_syntax (outfile, tok->syn, 2);
+		fprintf (outfile, "\n\t);\n");
+	}
+	fputc ('\n', outfile);
+
+	fprintf (outfile, "\t%sstatic final Lexer construct() {\n", vis);
+	fprintf (outfile, "\t\tfinal var rules = new java.util.ArrayList<TokenRule<TokenType>>();\n");
+	for (size_t i = 0; i < ntok; ++i) {
+		const struct definition *tok = &tokens[i];
+		fprintf (outfile, "\t\trules.add(%c%s);\n", tolower (tok->name[0]), tok->name + 1);
+	}
+
+	fprintf (outfile, "\t\treturn new Lexer(new TokenRuleSet(rules))");
+	if (eof)
+		fprintf (outfile, "\n\t\t\t.setEndType(TokenType.%s)", eof);
+	if (error)
+		fprintf (outfile, "\n\t\t\t.setErrorType(TokenType.%s)", error);
+	fprintf (outfile, ";\n\t}\n\n");
+
+	while (fgets (line, sizeof (line), infile) != NULL) {
+		fputc ('\t', outfile);
+		fputs (line, outfile);
+	}
+
+	fputs ("}\n", outfile);
+
+	return 0;
+}
+
+
+
+
+static int usage (void)
+{
+	fputs ("usage: lexicgen [-v VISIBILITY] [-p PACKAGE] [-o FILE] file\n", stderr);
+	return 1;
+}
+
+static int gen_names (const char *filename, char **outfilename, char **classname)
+{
+	const char *slash, *dot;
+       
+	slash = strrchr (filename, '/');
+	if (slash == NULL)
+		slash = filename - 1;
+
+	dot = strchr (slash + 1, '.');
+	if (dot == NULL)
+		dot = slash + 1 + strlen (slash + 1);
+
+	if ((dot - slash) < 2)
+		return -1;
+
+	if (outfilename != NULL)
+		asprintf (outfilename, "%.*s.java", (int)(dot - filename), filename);
+	asprintf (classname, "%.*s", (int)(dot - slash - 1), slash + 1);
+	return 0;
+}
+
+
+int main (int argc, char *argv[])
+{
+	int option;
+	char *package = NULL;
+	char *infilename = NULL, *outfilename = NULL, *classname;
+	FILE *infile, *outfile;
+	enum Visibility vis = V_PRIVATE;
+	int ret;
+
+#ifdef __OpenBSD__
+	if (pledge ("stdio rpath wpath cpath", NULL) != 0)
+		err (1, "pledge()");
+#endif
+
+	while ((option = getopt (argc, argv, "p:v:o:")) != -1) {
+		switch (option) {
+		case 'p':
+			package = optarg;
+			break;
+		case 'v':
+			if (strcmp (optarg, "public") == 0) {
+				vis = V_PUBLIC;
+			} else if (strcmp (optarg, "private") == 0) {
+				vis = V_PRIVATE;
+			} else if (strcmp (optarg, "package") == 0) {
+				vis = V_PACKAGE;
+			} else {
+				errx (1, "Invalid visibility: '%s', expected any of: private, public, package", optarg);
+			}
+			break;
+		case 'o':
+			outfilename = optarg;
+			break;
+		default:
+			return usage ();
+		}
+	}
+
+	argv += optind;
+	argc -= optind;
+
+	if (argc != 1)
+		return usage ();
+
+	infilename = argv[0];
+	if (gen_names (infilename, outfilename ? NULL : &outfilename, &classname) != 0)
+		errx (1, "invalid file name: %s", infilename);
+
+	infile = fopen (infilename, "r");
+	if (infile == NULL)
+		err (1, "fopen(\"%s\")", infilename);
+
+	if (strcmp (outfilename, "-") == 0) {
+		outfile = stdout;
+	} else {
+		outfile = fopen (outfilename, "w");
+		if (outfile == NULL)
+			err (1, "fopen(\"%s\")", outfilename);
+	}
+
+#ifdef __OpenBSD__
+	if (pledge ("stdio", NULL) != 0)
+		err (1, "pledge()");
+#endif
+
+	ret = compile (infile, infilename, outfile, visibilities[vis], package, classname);
+
+	fclose (outfile);
+	fclose (infile);
+	if (ret != 0)
+		remove (outfilename);
+	return ret;
+}
+