From bddf6f9d6b0fda8f4f0667e936b77d68bb0c007c Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 24 Nov 2019 22:18:19 +0100 Subject: [PATCH 1/8] pprint a null AST as "(null)" and TT_NONE as "none" --- src/pprint.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pprint.c b/src/pprint.c index 52f42eb6..c178eb72 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -31,9 +31,13 @@ typedef struct pp_state { } pp_state_t; void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { + if (tok == NULL) { + fprintf(stream, "%*s(null)\n", indent, ""); + return; + } switch (tok->token_type) { case TT_NONE: - fprintf(stream, "%*snull\n", indent, ""); + fprintf(stream, "%*snone\n", indent, ""); break; case TT_BYTES: if (tok->bytes.len == 0) @@ -54,7 +58,6 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint); else fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint); - break; case TT_UINT: fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint); From 0757f5f5a00051c7e0fae61f1a63203f65f31c4b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 24 Nov 2019 22:19:55 +0100 Subject: [PATCH 2/8] pprint TT_BYTES like C strings --- src/pprint.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/pprint.c b/src/pprint.c index c178eb72..06e15c46 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -23,6 +23,7 @@ #include "internal.h" #include #include +#include typedef struct pp_state { int delta; @@ -40,18 +41,15 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*snone\n", indent, ""); break; case TT_BYTES: - if (tok->bytes.len == 0) - fprintf(stream, "%*s<>\n", indent, ""); - else { - fprintf(stream, "%*s", indent, ""); - for (size_t i = 0; i < tok->bytes.len; i++) { - fprintf(stream, - "%c%02hhx", - (i == 0) ? '<' : '.', - tok->bytes.token[i]); - } - fprintf(stream, ">\n"); + fprintf(stream, "%*s\"", indent, ""); + for (size_t i = 0; i < tok->bytes.len; i++) { + uint8_t c = tok->bytes.token[i]; + if (isprint(c)) + fputc(c, stream); + else + fprintf(stream, "\\%03hho", c); } + fprintf(stream, "\"\n"); break; case TT_SINT: if (tok->sint < 0) From 7806a848045b844db0be186ee07869c75810b61b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 24 Nov 2019 22:21:09 +0100 Subject: [PATCH 3/8] code cosmetics --- src/pprint.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/pprint.c b/src/pprint.c index 06e15c46..15c14f83 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -60,20 +60,18 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { case TT_UINT: fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint); break; - case TT_SEQUENCE: { + case TT_SEQUENCE: fprintf(stream, "%*s[\n", indent, ""); for (size_t i = 0; i < tok->seq->used; i++) { h_pprint(stream, tok->seq->elements[i], indent + delta, delta); } fprintf(stream, "%*s]\n", indent, ""); - } - break; - case TT_USER: - fprintf(stream, "%*sUSER:%s\n", indent, "", h_get_token_type_name(tok->token_type)); break; default: - if(tok->token_type > TT_USER) { - fprintf(stream, "%*sUSER:%s %d\n", indent, "", h_get_token_type_name(tok->token_type), tok->token_type-TT_USER); + if(tok->token_type >= TT_USER) { + const char *name = h_get_token_type_name(tok->token_type); + int num = tok->token_type-TT_USER; + fprintf(stream, "%*sUSER:%s %d\n", indent, "", name, num); } else { assert_message(0, "Should not reach here."); } From 0f1c0deb1f13afdc1d91a7662e4b0da3b966644f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 24 Nov 2019 23:20:48 +0100 Subject: [PATCH 4/8] add a pprint function pointer to HTTEntry --- src/internal.h | 1 + src/pprint.c | 6 ++++-- src/registry.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/internal.h b/src/internal.h index 0e92e99e..79d6c978 100644 --- a/src/internal.h +++ b/src/internal.h @@ -428,6 +428,7 @@ typedef struct HTTEntry_ { const char* name; HTokenType value; void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf); + void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta); } HTTEntry; const HTTEntry* h_get_token_type_entry(HTokenType token_type); diff --git a/src/pprint.c b/src/pprint.c index 15c14f83..2c9fbc2e 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -69,9 +69,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { break; default: if(tok->token_type >= TT_USER) { - const char *name = h_get_token_type_name(tok->token_type); + const HTTEntry *e = h_get_token_type_entry(tok->token_type); int num = tok->token_type-TT_USER; - fprintf(stream, "%*sUSER:%s %d\n", indent, "", name, num); + fprintf(stream, "%*sUSER:%s %d\n", indent, "", e->name, num); + if (e->pprint) + e->pprint(stream, tok, indent + delta, delta); } else { assert_message(0, "Should not reach here."); } diff --git a/src/registry.c b/src/registry.c index 00486db4..f0201c61 100644 --- a/src/registry.c +++ b/src/registry.c @@ -60,6 +60,7 @@ HTokenType h_allocate_token_new( new_entry->name = name; new_entry->value = 0; new_entry->unamb_sub = unamb_sub; + new_entry->pprint = NULL; HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries); if (probe->value != 0) { // Token type already exists... From 93ab0d723171d2d5ca56d16d0e6b2f997932e3ad Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 25 Nov 2019 00:10:43 +0100 Subject: [PATCH 5/8] a bit more pprint prettification --- src/pprint.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/pprint.c b/src/pprint.c index 2c9fbc2e..6747a2a8 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -32,16 +32,17 @@ typedef struct pp_state { } pp_state_t; void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { + fprintf(stream, "%*s", indent, ""); if (tok == NULL) { - fprintf(stream, "%*s(null)\n", indent, ""); + fprintf(stream, "(null)\n"); return; } switch (tok->token_type) { case TT_NONE: - fprintf(stream, "%*snone\n", indent, ""); + fprintf(stream, "none"); break; case TT_BYTES: - fprintf(stream, "%*s\"", indent, ""); + fprintf(stream, "\""); for (size_t i = 0; i < tok->bytes.len; i++) { uint8_t c = tok->bytes.token[i]; if (isprint(c)) @@ -49,35 +50,38 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { else fprintf(stream, "\\%03hho", c); } - fprintf(stream, "\"\n"); + fprintf(stream, "\""); break; case TT_SINT: if (tok->sint < 0) - fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint); + fprintf(stream, "s -%#" PRIx64, -tok->sint); else - fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint); + fprintf(stream, "s %#" PRIx64, tok->sint); break; case TT_UINT: - fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint); + fprintf(stream, "u %#" PRIx64, tok->uint); break; case TT_SEQUENCE: - fprintf(stream, "%*s[\n", indent, ""); - for (size_t i = 0; i < tok->seq->used; i++) { - h_pprint(stream, tok->seq->elements[i], indent + delta, delta); + if (tok->seq->used == 0) + fprintf(stream, "[]"); + else { + fprintf(stream, "[\n"); + for (size_t i = 0; i < tok->seq->used; i++) + h_pprint(stream, tok->seq->elements[i], indent + delta, delta); + fprintf(stream, "%*s]", indent, ""); } - fprintf(stream, "%*s]\n", indent, ""); break; default: if(tok->token_type >= TT_USER) { const HTTEntry *e = h_get_token_type_entry(tok->token_type); - int num = tok->token_type-TT_USER; - fprintf(stream, "%*sUSER:%s %d\n", indent, "", e->name, num); + fprintf(stream, "USER %d (%s) ", e->value - TT_USER, e->name); if (e->pprint) e->pprint(stream, tok, indent + delta, delta); } else { assert_message(0, "Should not reach here."); } } + fputc('\n', stream); } From a1a872682fff39fb8c86997b1352c69c3aa6926b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 25 Nov 2019 00:39:00 +0100 Subject: [PATCH 6/8] add pprint argument to h_allocate_token_new() also allow NULL argument for unamb_sub to mean default --- src/hammer.h | 3 ++- src/registry.c | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hammer.h b/src/hammer.h index ad44fee9..5774b2dd 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -795,7 +795,8 @@ HTokenType h_allocate_token_type(const char* name); /// Allocate a new token type with an unambiguous print function. HTokenType h_allocate_token_new( const char* name, - void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf)); + void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf), + void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta)); /// Get the token type associated with name. Returns -1 if name is unkown HTokenType h_get_token_type_number(const char* name); diff --git a/src/registry.c b/src/registry.c index f0201c61..5486fd7b 100644 --- a/src/registry.c +++ b/src/registry.c @@ -54,13 +54,14 @@ static void default_unamb_sub(const HParsedToken* tok, HTokenType h_allocate_token_new( const char* name, - void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf)) { + void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf), + void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta)) { HTTEntry* new_entry = h_alloc(&system_allocator, sizeof(*new_entry)); assert(new_entry != NULL); new_entry->name = name; new_entry->value = 0; - new_entry->unamb_sub = unamb_sub; - new_entry->pprint = NULL; + new_entry->unamb_sub = unamb_sub ? unamb_sub : default_unamb_sub; + new_entry->pprint = pprint; HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries); if (probe->value != 0) { // Token type already exists... @@ -87,7 +88,7 @@ HTokenType h_allocate_token_new( } } HTokenType h_allocate_token_type(const char* name) { - return h_allocate_token_new(name, default_unamb_sub); + return h_allocate_token_new(name, NULL, NULL); } HTokenType h_get_token_type_number(const char* name) { HTTEntry e; From 33a98aef9dc02946fd86098511d21b119518fa3a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 25 Nov 2019 12:56:08 +0100 Subject: [PATCH 7/8] condense h_pprint() output. add h_pprintln(). --- src/hammer.h | 16 ++++++++++++++-- src/pprint.c | 28 ++++++++++++++++++---------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/hammer.h b/src/hammer.h index 5774b2dd..d983f2c5 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -728,10 +728,22 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result); */ char* h_write_result_unamb(const HParsedToken* tok); /** - * Format token to the given output stream. Indent starting at - * [indent] spaces, with [delta] spaces between levels. + * Format token to the given output stream. Indent starting at [indent] spaces, + * with [delta] spaces between levels. + * + * Note: This function does not print a trailing newline. It also does not + * print any spaces to indent the initial line of output. This makes it + * suitable for recursive use in the condensed output of larger structures. */ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta); +/** + * Format token to the given output. Print a trailing newline. + * + * This function assumes an initial indentation of 0 and uses 2 spaces between + * indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)' + * followed by 'fputc('\n', stream)' and is provided for convenience. + */ +void h_pprintln(FILE* stream, const HParsedToken* tok); /** * Build parse tables for the given parser backend. See the diff --git a/src/pprint.c b/src/pprint.c index 6747a2a8..85a9f7db 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -32,9 +32,8 @@ typedef struct pp_state { } pp_state_t; void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { - fprintf(stream, "%*s", indent, ""); if (tok == NULL) { - fprintf(stream, "(null)\n"); + fprintf(stream, "(null)"); return; } switch (tok->token_type) { @@ -54,21 +53,26 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { break; case TT_SINT: if (tok->sint < 0) - fprintf(stream, "s -%#" PRIx64, -tok->sint); + fprintf(stream, "-%#" PRIx64, -tok->sint); else - fprintf(stream, "s %#" PRIx64, tok->sint); + fprintf(stream, "+%#" PRIx64, tok->sint); break; case TT_UINT: - fprintf(stream, "u %#" PRIx64, tok->uint); + fprintf(stream, "%#" PRIx64, tok->uint); break; case TT_SEQUENCE: if (tok->seq->used == 0) - fprintf(stream, "[]"); + fprintf(stream, "[ ]"); else { - fprintf(stream, "[\n"); - for (size_t i = 0; i < tok->seq->used; i++) + fprintf(stream, "[%*s", delta - 1, ""); + for (size_t i = 0; i < tok->seq->used; i++) { + if (i > 0) fprintf(stream, "\n%*s,%*s", indent, "", delta - 1, ""); h_pprint(stream, tok->seq->elements[i], indent + delta, delta); - fprintf(stream, "%*s]", indent, ""); + } + if (tok->seq->used > 2) + fprintf(stream, "\n%*s]", indent, ""); + else + fprintf(stream, " ]"); } break; default: @@ -76,11 +80,15 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { const HTTEntry *e = h_get_token_type_entry(tok->token_type); fprintf(stream, "USER %d (%s) ", e->value - TT_USER, e->name); if (e->pprint) - e->pprint(stream, tok, indent + delta, delta); + e->pprint(stream, tok, indent, delta); } else { assert_message(0, "Should not reach here."); } } +} + +void h_pprintln(FILE* stream, const HParsedToken* tok) { + h_pprint(stream, tok, 0, 2); fputc('\n', stream); } From cbeee9d142a61d0ea536d0d924c69ea177d36b22 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 25 Nov 2019 12:57:48 +0100 Subject: [PATCH 8/8] add examples/ttuser - show custom tokens/pprint --- examples/SConscript | 3 +- examples/ttuser.c | 140 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 examples/ttuser.c diff --git a/examples/SConscript b/examples/SConscript index b34b85a1..8504b4bb 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -6,8 +6,9 @@ example = env.Clone() example.Append(LIBS="hammer", LIBPATH="../src") dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) +ttuser = example.Program('ttuser', 'ttuser.c') base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') ties = example.Program('ties', ['ties.c', 'grammar.c']) -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties]) \ No newline at end of file +env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties]) diff --git a/examples/ttuser.c b/examples/ttuser.c new file mode 100644 index 00000000..4e83356c --- /dev/null +++ b/examples/ttuser.c @@ -0,0 +1,140 @@ +/* + * Example parser that demonstrates the use of user-defined token types. + * + * Note the custom printer function that hooks into h_pprint(). + */ + +#include "../src/hammer.h" +#include "../src/glue.h" + + +/* + * custom tokens + */ + +HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC; + +void +pprint(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + /* + * Pretty-printer rules: + * + * - Output 'indent' spaces after every newline you produce. + * - Do not add indent on the first line of output. + * - Do not add a trailing newline. + * - Indent sub-objects by adding 'delta' to 'indent'. + */ + + if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE) + fprintf(stream, "\n%*s", indent, ""); + h_pprint(stream, tok->user, indent, delta); +} + +/* XXX define umamb_sub as well */ + +void +init(void) +{ + TT_SUBJ = h_allocate_token_new("subject", NULL, pprint); + TT_PRED = h_allocate_token_new("predicate", NULL, pprint); + TT_OBJ = h_allocate_token_new("object", NULL, pprint); + TT_ADJ = h_allocate_token_new("adjective", NULL, pprint); + TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint); +} + + +/* + * semantic actions + * + * Normally these would be more interesting, but for this example, we just wrap + * our tokens in their intended types. + */ +HParsedToken *act_subj(const HParseResult *p, void *u) { + return H_MAKE(SUBJ, (void *)p->ast); +} +HParsedToken *act_pred(const HParseResult *p, void *u) { + return H_MAKE(PRED, (void *)p->ast); +} +HParsedToken *act_obj(const HParseResult *p, void *u) { + return H_MAKE(OBJ, (void *)p->ast); +} +HParsedToken *act_adj(const HParseResult *p, void *u) { + return H_MAKE(ADJ, (void *)p->ast); +} +HParsedToken *act_advc(const HParseResult *p, void *u) { + return H_MAKE(ADVC, (void *)p->ast); +} + + +/* + * grammar + */ + +HParser * +build_parser(void) +{ + /* words */ + #define W(X) h_whitespace(h_literal(#X)) + H_RULE(art, h_choice(W(a), W(the), NULL)); + H_RULE(noun, h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion), + W(bear), W(fence), W(tree), W(car), W(cow), NULL)); + H_RULE(verb, h_choice(W(eats), W(jumps), W(falls), NULL)); + H_ARULE(adj, h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan), + W(magenta), W(yellow), W(black), W(brown), NULL)); + H_RULE(adverb, h_choice(W(with), W(over), W(after), NULL)); + #undef W + + /* phrases */ + H_RULE(nphrase, h_sequence(art, h_many(adj), noun, NULL)); + + /* sentence structure */ + H_ARULE(subj, nphrase); + H_ARULE(pred, verb); + H_ARULE(obj, nphrase); + H_ARULE(advc, h_sequence(adverb, nphrase, NULL)); + H_RULE(sentnc, h_sequence(subj, pred, + h_optional(obj), h_optional(advc), NULL)); + + return sentnc; +} + + +/* + * main routine: read, parse, print + * + * input e.g.: + * "the quick brown fox jumps the fence with a cyan lion" + */ + +#include +#include + +int +main(int argc, char **argv) +{ + uint8_t input[1024]; + size_t sz; + const HParser *parser; + const HParseResult *result; + + init(); + parser = build_parser(); + + sz = fread(input, 1, sizeof(input), stdin); + if (!feof(stdin)) { + fprintf(stderr, "too much input\n"); + return 1; + } + + result = h_parse(parser, input, sz); + if (!result) { + fprintf(stderr, "no parse\n"); + return 1; + } + + h_pprintln(stdout, result->ast); + fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n", + result->bit_length / 8, sz); + return 0; +}