commit 780459fb4692655b2d9c001d8f0f992c21c9bf46 from: Benjamin Stürz date: Fri Jul 05 08:54:53 2024 UTC import commit - /dev/null commit + 780459fb4692655b2d9c001d8f0f992c21c9bf46 blob - /dev/null blob + fa63cc7bf930d8d95f48b8331ce6579de841d6f9 (mode 644) --- /dev/null +++ Makefile @@ -0,0 +1,15 @@ +OBJ = parse.o lex.o main.o + +all: destruct + +run: destruct + ./destruct < test.txt + +clean: + rm -f destruct destruct.inc y.tab.h *.o + +destruct: destruct.inc ${OBJ} + ${CC} -o $@ ${OBJ} + +destruct.inc: destruct.h + sed "s/\$$/\\\\n/; s/$$(printf '\t')/\\\\t/g" < destruct.h | tr -d '\r\n' | sed 's/^/"/; s/$$/"/' > $@ blob - /dev/null blob + 339db3a7c4e04375fbde0643a28935bcecba749d (mode 644) --- /dev/null +++ ast.h @@ -0,0 +1,75 @@ +#ifndef FILE_AST_H +#define FILE_AST_H + +enum type_type { + T_I8, + T_I16, + T_I32, + T_I64, + T_U8, + T_U16, + T_U32, + T_U64, + T_ARRAY, + T_NAME, + T_STRUCT, + T_UNION, +}; + +struct type { + enum type_type type; + union { + struct { + struct type *inner; + size_t len; + }; + char *name; + struct structure *st; + struct uitem **un; + }; +}; + +struct item { + char *name; + struct type *type; +}; + +struct structure { + char *name; + struct item **items; +}; + +enum value_type { + V_NAME, + V_INT, +}; + +struct value { + enum value_type type; + union { + char *s; + int i; + }; +}; + +enum cond_type { + C_EQ, +}; + +struct cond { + enum cond_type type; + union { + struct { + char *left; + struct value *right; + } cmp; + }; +}; + +struct uitem { + char *name; + struct type *type; + struct cond *cond; +}; + +#endif // FILE_AST_H blob - /dev/null blob + 4e1153c2c0c8857bed664e941eeb7e86f0952791 (mode 644) --- /dev/null +++ destruct.h @@ -0,0 +1,122 @@ +#define DST_LITTLE_ENDIAN 1234 +#define DST_BIG_ENDIAN 4321 + +static uint8_t dst_read8 (const uint8_t *ptr) +{ + return *ptr; +} + +static uint16_t dst_read16 (const uint8_t *ptr) +{ + const uint16_t b0 = ptr[0]; + const uint16_t b1 = ptr[1]; + +#ifdef DST_FROM_ENDIAN == DST_LITTLE_ENDIAN + return b0 | (b1 << 8); +#elif DST_FROM_ENDIAN == DST_BIG_ENDIAN + return b1 | (b0 << 8); +#endif +} + +static uint32_t dst_read32 (const uint8_t *ptr) +{ + const uint32_t b0 = ptr[0]; + const uint32_t b1 = ptr[1]; + const uint32_t b2 = ptr[2]; + const uint32_t b3 = ptr[3]; + +#ifdef DST_FROM_ENDIAN == DST_LITTLE_ENDIAN + return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); +#elif DST_FROM_ENDIAN == DST_BIG_ENDIAN + return b3 | (b2 << 8) | (b1 << 16) | (b0 << 24); +#endif +} + +static uint64_t dst_read64 (const uint8_t *ptr) +{ + const uint64_t b0 = ptr[0]; + const uint64_t b1 = ptr[1]; + const uint64_t b2 = ptr[2]; + const uint64_t b3 = ptr[3]; + const uint64_t b4 = ptr[4]; + const uint64_t b5 = ptr[5]; + const uint64_t b6 = ptr[6]; + const uint64_t b7 = ptr[7]; + +#ifdef DST_FROM_ENDIAN == DST_LITTLE_ENDIAN + return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32) | (b5 << 40) | (b6 << 48) | (b7 << 56); +#elif DST_FROM_ENDIAN == DST_BIG_ENDIAN + return b7 | (b6 << 8) | (b5 << 16) | (b4 << 24) | (b3 << 32) | (b2 << 40) | (b1 << 48) | (b0 << 56); +#endif +} + +static void dst_write8 (uint8_t *ptr, uint8_t x) +{ + *ptr = x; +} + +static void dst_write16 (uint8_t *ptr, uint16_t x) +{ + const uint8_t b0 = x & 0xff; + const uint8_t b1 = x >> 8; + +#ifdef DST_TO_ENDIAN == DST_LITTLE_ENDIAN + ptr[0] = b0; + ptr[1] = b1; +#else + ptr[0] = b1; + ptr[1] = b0; +#endif +} + +static void dst_write32 (uint8_t *ptr, uint32_t x) +{ + const uint8_t b0 = x & 0xff; + const uint8_t b1 = (x >> 8) & 0xff; + const uint8_t b2 = (x >> 16) & 0xff; + const uint8_t b3 = (x >> 24) & 0xff; + +#ifdef DST_TO_ENDIAN == DST_LITTLE_ENDIAN + ptr[0] = b0; + ptr[1] = b1; + ptr[2] = b2; + ptr[3] = b3; +#else + ptr[0] = b3; + ptr[1] = b2; + ptr[2] = b1; + ptr[3] = b0; +#endif +} + +static void dst_write64 (uint8_t *ptr, uint64_t x) +{ + const uint8_t b0 = x & 0xff; + const uint8_t b1 = (x >> 8) & 0xff; + const uint8_t b2 = (x >> 16) & 0xff; + const uint8_t b3 = (x >> 24) & 0xff; + const uint8_t b4 = (x >> 32) & 0xff; + const uint8_t b5 = (x >> 40) & 0xff; + const uint8_t b6 = (x >> 48) & 0xff; + const uint8_t b7 = (x >> 56) & 0xff; + +#ifdef DST_TO_ENDIAN == DST_LITTLE_ENDIAN + ptr[0] = b0; + ptr[1] = b1; + ptr[2] = b2; + ptr[3] = b3; + ptr[4] = b4; + ptr[5] = b5; + ptr[6] = b6; + ptr[7] = b7; +#else + ptr[0] = b7; + ptr[1] = b6; + ptr[2] = b5; + ptr[3] = b4; + ptr[4] = b3; + ptr[5] = b2; + ptr[6] = b1; + ptr[7] = b0; +#endif +} blob - /dev/null blob + c22c130e9304ebbc5923980b54a497b522baa3e4 (mode 644) --- /dev/null +++ lex.l @@ -0,0 +1,51 @@ +%{ +#include +#include +#include +#include "y.tab.h" + +void yyerror (const char *, ...); +%} + +%option yylineno +%option noyywrap + +IDENT [a-zA-Z_][a-zA-Z0-9_]* +INT 0|[1-9][0-9]* + +%% + +[ \t\r\n\f]+ ; +[:;,{}\[\]] return *yytext; + +struct return STRUCT; +union return UNION; +if return IF; +i8 return I8; +i16 return I16; +i32 return I32; +i64 return I64; +u8 return U8; +u16 return U16; +u32 return U32; +u64 return U64; + +== return EQ; + +{IDENT} yylval.s = strdup (yytext); return IDENT; +{INT} yylval.i = atoi (yytext); return INT; + +. yyerror ("invalid input: '%c'", *yytext); + +%% + +void yyerror (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + fprintf (stderr, "%d: error: ", yylineno); + vfprintf (stderr, fmt, ap); + fputc ('\n', stderr); + exit (1); +} blob - /dev/null blob + 21b050e2a21535b5b02681de5aa6b48b6305eb1b (mode 644) --- /dev/null +++ main.c @@ -0,0 +1,438 @@ +#include +#include +#include +#include +#include "ast.h" + +static struct structure **file; +struct structure **parse (void); + +struct structure *get_struct (const char *name) +{ + for (struct structure **i = file; *i != NULL; ++i) { + struct structure *st = *i; + if (strcmp (name, st->name) == 0) + return st; + } + errx (1, "invalid struct: %s", name); +} + +size_t size_of_struct (struct structure *st); +size_t size_of_union (struct uitem **u); +size_t size_of (struct type *ty) +{ + switch (ty->type) { + case T_I8: + case T_U8: + return 1; + case T_I16: + case T_U16: + return 2; + case T_I32: + case T_U32: + return 4; + case T_I64: + case T_U64: + return 8; + case T_ARRAY: + return ty->len * size_of (ty->inner); + case T_NAME: + return size_of_struct (get_struct (ty->name)); + case T_STRUCT: + return size_of_struct (ty->st); + case T_UNION: + return size_of_union (ty->un); + } +} + +size_t size_of_struct (struct structure *st) +{ + size_t sz = 0; + + for (struct item **i = st->items; *i != NULL; ++i) { + sz += size_of ((*i)->type); + } + + return sz; +} + +size_t size_of_union (struct uitem **u) +{ + size_t sz = 0; + + for (struct uitem **i = u; *i != NULL; ++i) { + size_t s = size_of ((*i)->type); + if (s > sz) + sz = s; + } + + return sz; +} + +size_t align_of_struct (struct structure *st); +size_t align_of_union (struct uitem **u); +size_t align_of (struct type *ty) +{ + switch (ty->type) { + case T_I8: + case T_U8: + case T_I16: + case T_U16: + case T_I32: + case T_U32: + case T_I64: + case T_U64: + return size_of (ty); + case T_ARRAY: + return align_of (ty->inner); + case T_NAME: + return align_of_struct (get_struct (ty->name)); + case T_STRUCT: + return align_of_struct (ty->st); + case T_UNION: + return align_of_union (ty->un); + } +} + +size_t align_of_struct (struct structure *st) +{ + size_t al = 0; + + for (struct item **i = st->items; *i != NULL; ++i) { + size_t a = align_of ((*i)->type); + if (a > al) + al = a; + } + + return al; +} + +size_t align_of_union (struct uitem **u) +{ + size_t al = 0; + + for (struct uitem **i = u; *i != NULL; ++i) { + size_t a = align_of ((*i)->type); + if (a > al) + al = a; + } + + return al; +} + +size_t align (size_t offset, size_t alignment) +{ + return (offset + alignment - 1) & -alignment; +} + +void prindent (int ind) +{ + for (int i = 0; i < ind; ++i) + putchar ('\t'); +} + +void print_value (struct value *v) +{ + switch (v->type) { + case V_NAME: + printf ("%s", v->s); + break; + case V_INT: + printf ("%d", v->i); + break; + } +} + +void print_cond (struct cond *co, const char *prefix) +{ + switch (co->type) { + case C_EQ: + printf ("%s%s == ", prefix, co->cmp.left); + print_value (co->cmp.right); + break; + } +} + +void print_item2 (struct type *ty, const char *name, int indent) +{ + switch (ty->type) { + case T_I8: + printf ("int8_t %s", name); + break; + case T_I16: + printf ("int16_t %s", name); + break; + case T_I32: + printf ("int32_t %s", name); + break; + case T_I64: + printf ("int64_t %s", name); + break; + case T_U8: + printf ("uint8_t %s", name); + break; + case T_U16: + printf ("uint16_t %s", name); + break; + case T_U32: + printf ("uint32_t %s", name); + break; + case T_U64: + printf ("uint64_t %s", name); + break; + case T_ARRAY: + print_item2 (ty->inner, name, indent); + printf ("[%zu]", ty->len); + break; + case T_NAME: + printf ("struct %s %s", ty->name, name); + break; + case T_STRUCT: + printf ("struct {\n"); + + for (struct item **i = ty->st->items; *i != NULL; ++i) { + struct item *it = *i; + prindent (indent + 1); + print_item2 (it->type, it->name, indent + 1); + puts (";"); + } + + prindent (indent); + printf ("} %s", name); + break; + case T_UNION: + printf ("union {\n"); + + for (struct uitem **i = ty->un; *i != NULL; ++i) { + struct uitem *ui = *i; + prindent (indent + 1); + print_item2 (ui->type, ui->name, indent + 1); + printf (" // if "); + print_cond (ui->cond, ""); + puts (";"); + } + + prindent (indent); + printf ("} %s", name); + break; + } +} + +void print_item (struct item *it) +{ + putchar ('\t'); + print_item2 (it->type, it->name, 1); + puts (";"); +} + +void print_structure (struct structure *st) +{ + if (st->name == NULL) + errx (1, "a top-level structure was declared without a name"); + + printf ("struct %s {\n", st->name); + for (struct item **it = st->items; *it != NULL; ++it) { + print_item (*it); + } + puts ("};\n"); +} + +void encode (size_t *offset, struct type *ty, const char *from, const char *parent, int indent) +{ + size_t sz, off2; + char *s; + + *offset = align (*offset, align_of (ty)); + sz = size_of (ty); + + switch (ty->type) { + case T_I8: + case T_I16: + case T_I32: + case T_I64: + case T_U8: + case T_U16: + case T_U32: + case T_U64: + prindent (indent); + printf ("write%zu (out + %zu, %s);\n", sz * 8, *offset, from); + *offset += sz; + break; + case T_ARRAY: + for (size_t i = 0; i < ty->len; ++i) { + asprintf (&s, "%s[%zu]", from, i); + encode (offset, ty->inner, s, parent, indent); + free (s); + } + break; + case T_NAME: + prindent (indent); + printf ("encode_%s (out + %zu, &%s);\n", ty->name, *offset, from); + *offset += sz; + break; + case T_STRUCT: + for (struct item **i = ty->st->items; *i != NULL; ++i) { + struct item *it = *i; + asprintf (&s, "%s.%s", from, it->name); + encode (offset, it->type, s, from, indent); + free (s); + } + break; + case T_UNION: + prindent (indent); + for (struct uitem **i = ty->un; *i != NULL; ++i) { + struct uitem *ui = *i; + char *prefix; + + asprintf (&prefix, "%s.", parent); + printf ("if ("); + print_cond (ui->cond, prefix); + printf (") {\n"); + free (prefix); + + off2 = *offset; + asprintf (&s, "%s.%s", from, ui->name); + encode (&off2, ui->type, s, parent, indent + 1); + free (s); + prindent (indent); + printf ("} else "); + } + puts (";"); + *offset += size_of_union (ty->un); + break; + } +} + +void print_encode (struct structure *st) +{ + printf ("static void encode_%s (uint8_t *out, const struct %s *in)\n", st->name, st->name); + puts ("{"); + + size_t offset = 0; + + for (struct item **i = st->items; *i != NULL; ++i) { + struct item *it = *i; + char *from; + + asprintf (&from, "in->%s", it->name); + encode (&offset, it->type, from, "(*in)", 1); + free (from); + } + + puts ("}\n"); +} + +void decode (size_t *offset, struct type *ty, const char *into, const char *parent, int indent) +{ + size_t sz, off2; + char *s; + + *offset = align (*offset, align_of (ty)); + sz = size_of (ty); + + switch (ty->type) { + case T_I8: + case T_I16: + case T_I32: + case T_I64: + case T_U8: + case T_U16: + case T_U32: + case T_U64: + prindent (indent); + printf ("%s = read%zu (in + %zu);\n", into, sz * 8, *offset); + *offset += sz; + break; + case T_ARRAY: + for (size_t i = 0; i < ty->len; ++i) { + asprintf (&s, "%s[%zu]", into, i); + decode (offset, ty->inner, s, parent, indent); + free (s); + } + break; + case T_NAME: + prindent (indent); + printf ("decode_%s (&%s, in + %zu);\n", ty->name, into, *offset); + *offset += sz; + break; + case T_STRUCT: + for (struct item **i = ty->st->items; *i != NULL; ++i) { + struct item *it = *i; + asprintf (&s, "%s.%s", into, it->name); + decode (offset, it->type, s, into, indent); + free (s); + } + break; + case T_UNION: + prindent (indent); + for (struct uitem **i = ty->un; *i != NULL; ++i) { + struct uitem *ui = *i; + char *prefix; + + asprintf (&prefix, "%s.", parent); + printf ("if ("); + print_cond (ui->cond, prefix); + printf (") {\n"); + free (prefix); + + off2 = *offset; + asprintf (&s, "%s.%s", into, ui->name); + decode (&off2, ui->type, s, parent, indent + 1); + free (s); + prindent (indent); + printf ("} else "); + } + puts (";"); + *offset += size_of_union (ty->un); + break; + } +} + +void print_decode (struct structure *st) +{ + printf ("static void decode_%s (struct %s *out, uint8_t *in)\n", st->name, st->name); + puts ("{"); + + size_t offset = 0; + + for (struct item **i = st->items; *i != NULL; ++i) { + struct item *it = *i; + size_t sz = size_of (it->type); + char *into; + + asprintf (&into, "out->%s", it->name); + decode (&offset, it->type, into, "(*out)", 1); + free (into); + } + + puts ("}\n"); +} + +static const char *header = +#include "destruct.inc" +; + +int main (void) +{ + file = parse (); + + if (file == NULL) { + puts ("parsing failed"); + return 1; + } + + puts (header); + + // Declare the structures + for (struct structure **st = file; *st != NULL; ++st) { + print_structure (*st); + } + + // Define functions + for (struct structure **st = file; *st != NULL; ++st) { + print_encode (*st); + print_decode (*st); + } + + return 0; +} blob - /dev/null blob + edb2587993b036674a776948e01030a5710189ea (mode 644) --- /dev/null +++ parse.y @@ -0,0 +1,198 @@ +%{ +#include +#include "ast.h" + +int yylex (void); +void yyerror (const char *, ...); +%} + +%token IDENT, INT +%token STRUCT, UNION, IF, EQ +%token I8, I16, I32, I64, U8, U16, U32, U64 + +%union { + struct structure *st; + struct value *val; + struct uitem *ui; + struct item *it; + struct type *ty; + struct cond *co; + char *s; + int i; + + struct { + void **ptr; + size_t len, cap; + } list; +}; + +%% + +file : file struct ';' { list_push (&$$, &$1, $2.st); } + | { list_new (&$$); } + ; + +struct : STRUCT ident '{' items comma '}' { structure (&$$, $2.s, &$4); } + ; + +ident : IDENT { $$.s = $1.s; } + | { $$.s = NULL; } + ; + +comma : ',' | ; + +items : items ',' item { list_push (&$$, &$1, $3.it); } + | item { list_new_with (&$$, $1.it); } + +item : IDENT ':' type { item (&$$, $1.s, $3.ty); } + ; + +type : I8 { type_simple (&$$, T_I8); } + | I16 { type_simple (&$$, T_I16); } + | I32 { type_simple (&$$, T_I32); } + | I64 { type_simple (&$$, T_I64); } + | U8 { type_simple (&$$, T_U8); } + | U16 { type_simple (&$$, T_U16); } + | U32 { type_simple (&$$, T_U32); } + | U64 { type_simple (&$$, T_U64); } + | '[' type ';' INT ']' { type_array (&$$, $2.ty, $4.i); } + | STRUCT IDENT { type_name (&$$, $2.s); } + | struct { type_struct (&$$, $1.st); } + | UNION '{' uitems comma '}' { type_union (&$$, &$3); } + ; + +uitems : uitems ',' uitem { list_push (&$$, &$1, $3.ui); } + | uitem { list_new_with (&$$, $1.ui); } + ; + +uitem : IDENT ':' type IF cond { uitem (&$$, $1.s, $3.ty, $5.co); } + ; + +value : IDENT { val_name (&$$, $1.s); } + | INT { val_int (&$$, $1.i); } + ; + +cond : IDENT EQ value { cond_cmp (&$$, $1.s, $3.val); } + ; + +%% + +#define new(T) ((T *)malloc (sizeof (T))) + +void type_simple (YYSTYPE *y, enum type_type type) +{ + y->ty = new (struct type); + y->ty->type = type; +} + +void type_array (YYSTYPE *y, struct type *inner, size_t len) +{ + y->ty = new (struct type); + y->ty->type = T_ARRAY; + y->ty->inner = inner; + y->ty->len = len; +} + +void type_name (YYSTYPE *y, char *s) +{ + y->ty = new (struct type); + y->ty->type = T_NAME; + y->ty->name = s; +} + +void type_struct (YYSTYPE *y, struct structure *st) +{ + y->ty = new (struct type); + y->ty->type = T_STRUCT; + y->ty->st = st; +} + +void item (YYSTYPE *y, char *name, struct type *type) +{ + y->it = new (struct item); + y->it->name = name; + y->it->type = type; +} + +void list_new (YYSTYPE *y) +{ + y->list.len = 0; + y->list.cap = 10; + y->list.ptr = calloc (y->list.cap + 1, sizeof (void *)); +} + +void list_new_with (YYSTYPE *y, void *e) +{ + list_new (y); + y->list.len = 1; + y->list.ptr[0] = e; +} + +void list_push (YYSTYPE *y, YYSTYPE *o, void *e) +{ + *y = *o; + if (y->list.len == y->list.cap) { + y->list.cap *= 2; + y->list.ptr = reallocarray (y->list.ptr, y->list.cap + 1, sizeof (void *)); + } + + y->list.ptr[y->list.len++] = e; +} + +void *list_fin (YYSTYPE *y) +{ + y->list.ptr[y->list.len] = NULL; + return y->list.ptr; +} + +void structure (YYSTYPE *y, char *name, YYSTYPE *items) +{ + y->st = new (struct structure); + y->st->name = name; + y->st->items = list_fin (items); +} + +struct structure **parse (void) +{ + if (yyparse () != 0) + return NULL; + + return list_fin (&yyval); +} + +void val_name (YYSTYPE *y, char *name) +{ + y->val = new (struct value); + y->val->type = V_NAME; + y->val->s = name; +} + +void val_int (YYSTYPE *y, int i) +{ + y->val = new (struct value); + y->val->type = V_INT; + y->val->i = i; +} + +void cond_cmp (YYSTYPE *y, char *l, struct value *r) +{ + y->co = new (struct cond); + y->co->type = C_EQ; + y->co->cmp.left = l; + y->co->cmp.right = r; +} + +void uitem (YYSTYPE *y, char *name, struct type *type, struct cond *co) +{ + y->ui = new (struct uitem); + y->ui->name = name; + y->ui->type = type; + y->ui->cond = co; +} + +void type_union (YYSTYPE *y, YYSTYPE *i) +{ + y->ty = new (struct type); + y->ty->type = T_UNION; + y->ty->un = list_fin (i); +} blob - /dev/null blob + 0765b0168719e313ce4f1f1df97423ee10cb54cb (mode 644) --- /dev/null +++ test.txt @@ -0,0 +1,25 @@ +struct test { + x: u8, + y: u32, +}; + +struct asdf { + x: u32, + y: u8, + z: u64, + a: [u64; 4], + t: struct test, + x: struct { + a: u8, + arr: [struct test; 3], + u: union { + a: u8 if a == 0, + b: u16 if a == 1, + }, + }, + u: union { + a: u8 if x == 0, + b: u8 if x == 0, + }, + z: i8, +};