Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ example = env.Clone()
example.Append(LIBS="hammer", LIBPATH="../src")

dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
ttuser = example.Program('ttuser', 'ttuser.c')
base64 = example.Program('base64', 'base64.c')
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
ties = example.Program('ties', ['ties.c', 'grammar.c'])
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
140 changes: 140 additions & 0 deletions examples/ttuser.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Example parser that demonstrates the use of user-defined token types.
*
* Note the custom printer function that hooks into h_pprint().
*/

#include "../src/hammer.h"
#include "../src/glue.h"


/*
* custom tokens
*/

HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC;

void
pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
/*
* Pretty-printer rules:
*
* - Output 'indent' spaces after every newline you produce.
* - Do not add indent on the first line of output.
* - Do not add a trailing newline.
* - Indent sub-objects by adding 'delta' to 'indent'.
*/

if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
fprintf(stream, "\n%*s", indent, "");
h_pprint(stream, tok->user, indent, delta);
}

/* XXX define umamb_sub as well */

void
init(void)
{
TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
TT_OBJ = h_allocate_token_new("object", NULL, pprint);
TT_ADJ = h_allocate_token_new("adjective", NULL, pprint);
TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
}


/*
* semantic actions
*
* Normally these would be more interesting, but for this example, we just wrap
* our tokens in their intended types.
*/
HParsedToken *act_subj(const HParseResult *p, void *u) {
return H_MAKE(SUBJ, (void *)p->ast);
}
HParsedToken *act_pred(const HParseResult *p, void *u) {
return H_MAKE(PRED, (void *)p->ast);
}
HParsedToken *act_obj(const HParseResult *p, void *u) {
return H_MAKE(OBJ, (void *)p->ast);
}
HParsedToken *act_adj(const HParseResult *p, void *u) {
return H_MAKE(ADJ, (void *)p->ast);
}
HParsedToken *act_advc(const HParseResult *p, void *u) {
return H_MAKE(ADVC, (void *)p->ast);
}


/*
* grammar
*/

HParser *
build_parser(void)
{
/* words */
#define W(X) h_whitespace(h_literal(#X))
H_RULE(art, h_choice(W(a), W(the), NULL));
H_RULE(noun, h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
W(bear), W(fence), W(tree), W(car), W(cow), NULL));
H_RULE(verb, h_choice(W(eats), W(jumps), W(falls), NULL));
H_ARULE(adj, h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
W(magenta), W(yellow), W(black), W(brown), NULL));
H_RULE(adverb, h_choice(W(with), W(over), W(after), NULL));
#undef W

/* phrases */
H_RULE(nphrase, h_sequence(art, h_many(adj), noun, NULL));

/* sentence structure */
H_ARULE(subj, nphrase);
H_ARULE(pred, verb);
H_ARULE(obj, nphrase);
H_ARULE(advc, h_sequence(adverb, nphrase, NULL));
H_RULE(sentnc, h_sequence(subj, pred,
h_optional(obj), h_optional(advc), NULL));

return sentnc;
}


/*
* main routine: read, parse, print
*
* input e.g.:
* "the quick brown fox jumps the fence with a cyan lion"
*/

#include <stdio.h>
#include <inttypes.h>

int
main(int argc, char **argv)
{
uint8_t input[1024];
size_t sz;
const HParser *parser;
const HParseResult *result;

init();
parser = build_parser();

sz = fread(input, 1, sizeof(input), stdin);
if (!feof(stdin)) {
fprintf(stderr, "too much input\n");
return 1;
}

result = h_parse(parser, input, sz);
if (!result) {
fprintf(stderr, "no parse\n");
return 1;
}

h_pprintln(stdout, result->ast);
fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
result->bit_length / 8, sz);
return 0;
}
19 changes: 16 additions & 3 deletions src/hammer.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,10 +728,22 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
*/
char* h_write_result_unamb(const HParsedToken* tok);
/**
* Format token to the given output stream. Indent starting at
* [indent] spaces, with [delta] spaces between levels.
* Format token to the given output stream. Indent starting at [indent] spaces,
* with [delta] spaces between levels.
*
* Note: This function does not print a trailing newline. It also does not
* print any spaces to indent the initial line of output. This makes it
* suitable for recursive use in the condensed output of larger structures.
*/
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
/**
* Format token to the given output. Print a trailing newline.
*
* This function assumes an initial indentation of 0 and uses 2 spaces between
* indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)'
* followed by 'fputc('\n', stream)' and is provided for convenience.
*/
void h_pprintln(FILE* stream, const HParsedToken* tok);

/**
* Build parse tables for the given parser backend. See the
Expand Down Expand Up @@ -795,7 +807,8 @@ HTokenType h_allocate_token_type(const char* name);
/// Allocate a new token type with an unambiguous print function.
HTokenType h_allocate_token_new(
const char* name,
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf));
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta));

/// Get the token type associated with name. Returns -1 if name is unkown
HTokenType h_get_token_type_number(const char* name);
Expand Down
1 change: 1 addition & 0 deletions src/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ typedef struct HTTEntry_ {
const char* name;
HTokenType value;
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf);
void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta);
} HTTEntry;

const HTTEntry* h_get_token_type_entry(HTokenType token_type);
Expand Down
67 changes: 40 additions & 27 deletions src/pprint.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "internal.h"
#include <stdlib.h>
#include <inttypes.h>
#include <ctype.h>

typedef struct pp_state {
int delta;
Expand All @@ -31,54 +32,66 @@ typedef struct pp_state {
} pp_state_t;

void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
if (tok == NULL) {
fprintf(stream, "(null)");
return;
}
switch (tok->token_type) {
case TT_NONE:
fprintf(stream, "%*snull\n", indent, "");
fprintf(stream, "none");
break;
case TT_BYTES:
if (tok->bytes.len == 0)
fprintf(stream, "%*s<>\n", indent, "");
else {
fprintf(stream, "%*s", indent, "");
for (size_t i = 0; i < tok->bytes.len; i++) {
fprintf(stream,
"%c%02hhx",
(i == 0) ? '<' : '.',
tok->bytes.token[i]);
}
fprintf(stream, ">\n");
fprintf(stream, "\"");
for (size_t i = 0; i < tok->bytes.len; i++) {
uint8_t c = tok->bytes.token[i];
if (isprint(c))
fputc(c, stream);
else
fprintf(stream, "\\%03hho", c);
}
fprintf(stream, "\"");
break;
case TT_SINT:
if (tok->sint < 0)
fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint);
fprintf(stream, "-%#" PRIx64, -tok->sint);
else
fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint);

fprintf(stream, "+%#" PRIx64, tok->sint);
break;
case TT_UINT:
fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint);
fprintf(stream, "%#" PRIx64, tok->uint);
break;
case TT_SEQUENCE: {
fprintf(stream, "%*s[\n", indent, "");
for (size_t i = 0; i < tok->seq->used; i++) {
h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
case TT_SEQUENCE:
if (tok->seq->used == 0)
fprintf(stream, "[ ]");
else {
fprintf(stream, "[%*s", delta - 1, "");
for (size_t i = 0; i < tok->seq->used; i++) {
if (i > 0) fprintf(stream, "\n%*s,%*s", indent, "", delta - 1, "");
h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
}
if (tok->seq->used > 2)
fprintf(stream, "\n%*s]", indent, "");
else
fprintf(stream, " ]");
}
fprintf(stream, "%*s]\n", indent, "");
}
break;
case TT_USER:
fprintf(stream, "%*sUSER:%s\n", indent, "", h_get_token_type_name(tok->token_type));
break;
default:
if(tok->token_type > TT_USER) {
fprintf(stream, "%*sUSER:%s %d\n", indent, "", h_get_token_type_name(tok->token_type), tok->token_type-TT_USER);
if(tok->token_type >= TT_USER) {
const HTTEntry *e = h_get_token_type_entry(tok->token_type);
fprintf(stream, "USER %d (%s) ", e->value - TT_USER, e->name);
if (e->pprint)
e->pprint(stream, tok, indent, delta);
} else {
assert_message(0, "Should not reach here.");
}
}
}

void h_pprintln(FILE* stream, const HParsedToken* tok) {
h_pprint(stream, tok, 0, 2);
fputc('\n', stream);
}


struct result_buf {
char* output;
Expand Down
8 changes: 5 additions & 3 deletions src/registry.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ static void default_unamb_sub(const HParsedToken* tok,

HTokenType h_allocate_token_new(
const char* name,
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf)) {
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta)) {
HTTEntry* new_entry = h_alloc(&system_allocator, sizeof(*new_entry));
assert(new_entry != NULL);
new_entry->name = name;
new_entry->value = 0;
new_entry->unamb_sub = unamb_sub;
new_entry->unamb_sub = unamb_sub ? unamb_sub : default_unamb_sub;
new_entry->pprint = pprint;
HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries);
if (probe->value != 0) {
// Token type already exists...
Expand All @@ -86,7 +88,7 @@ HTokenType h_allocate_token_new(
}
}
HTokenType h_allocate_token_type(const char* name) {
return h_allocate_token_new(name, default_unamb_sub);
return h_allocate_token_new(name, NULL, NULL);
}
HTokenType h_get_token_type_number(const char* name) {
HTTEntry e;
Expand Down