From bddf6f9d6b0fda8f4f0667e936b77d68bb0c007c Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 24 Nov 2019 22:18:19 +0100
Subject: [PATCH 1/8] pprint a null AST as "(null)" and TT_NONE as "none"

---
 src/pprint.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/pprint.c b/src/pprint.c
index 52f42eb6..c178eb72 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -31,9 +31,13 @@ typedef struct pp_state {
 } pp_state_t;
 
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
+  if (tok == NULL) {
+    fprintf(stream, "%*s(null)\n", indent, "");
+    return;
+  }
   switch (tok->token_type) {
   case TT_NONE:
-    fprintf(stream, "%*snull\n", indent, "");
+    fprintf(stream, "%*snone\n", indent, "");
     break;
   case TT_BYTES:
     if (tok->bytes.len == 0)
@@ -54,7 +58,6 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
       fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint);
     else
       fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint);
-
     break;
   case TT_UINT:
     fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint);

From 0757f5f5a00051c7e0fae61f1a63203f65f31c4b Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 24 Nov 2019 22:19:55 +0100
Subject: [PATCH 2/8] pprint TT_BYTES like C strings

---
 src/pprint.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/pprint.c b/src/pprint.c
index c178eb72..06e15c46 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -23,6 +23,7 @@
 #include "internal.h"
 #include <stdlib.h>
 #include <inttypes.h>
+#include <ctype.h>
 
 typedef struct pp_state {
   int delta;
@@ -40,18 +41,15 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
     fprintf(stream, "%*snone\n", indent, "");
     break;
   case TT_BYTES:
-    if (tok->bytes.len == 0)
-      fprintf(stream, "%*s<>\n", indent, "");
-    else {
-      fprintf(stream, "%*s", indent, "");
-      for (size_t i = 0; i < tok->bytes.len; i++) {
-        fprintf(stream,
-                "%c%02hhx",
-                (i == 0) ? '<' : '.',
-                tok->bytes.token[i]);
-      }
-      fprintf(stream, ">\n");
+    fprintf(stream, "%*s\"", indent, "");
+    for (size_t i = 0; i < tok->bytes.len; i++) {
+      uint8_t c = tok->bytes.token[i];
+      if (isprint(c))
+        fputc(c, stream);
+      else
+        fprintf(stream, "\\%03hho", c);
     }
+    fprintf(stream, "\"\n");
     break;
   case TT_SINT:
     if (tok->sint < 0)

From 7806a848045b844db0be186ee07869c75810b61b Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 24 Nov 2019 22:21:09 +0100
Subject: [PATCH 3/8] code cosmetics

---
 src/pprint.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/pprint.c b/src/pprint.c
index 06e15c46..15c14f83 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -60,20 +60,18 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
   case TT_UINT:
     fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint);
     break;
-  case TT_SEQUENCE: {
+  case TT_SEQUENCE:
     fprintf(stream, "%*s[\n", indent, "");
     for (size_t i = 0; i < tok->seq->used; i++) {
       h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
     }
     fprintf(stream, "%*s]\n", indent, "");
-  }
-    break;
-  case TT_USER:
-    fprintf(stream, "%*sUSER:%s\n", indent, "", h_get_token_type_name(tok->token_type));
     break;
   default:
-    if(tok->token_type > TT_USER) {
-      fprintf(stream, "%*sUSER:%s %d\n", indent, "", h_get_token_type_name(tok->token_type), tok->token_type-TT_USER);
+    if(tok->token_type >= TT_USER) {
+      const char *name = h_get_token_type_name(tok->token_type);
+      int num = tok->token_type-TT_USER;
+      fprintf(stream, "%*sUSER:%s %d\n", indent, "", name, num);
     } else {
       assert_message(0, "Should not reach here.");
     }

From 0f1c0deb1f13afdc1d91a7662e4b0da3b966644f Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 24 Nov 2019 23:20:48 +0100
Subject: [PATCH 4/8] add a pprint function pointer to HTTEntry

---
 src/internal.h | 1 +
 src/pprint.c   | 6 ++++--
 src/registry.c | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/internal.h b/src/internal.h
index 0e92e99e..79d6c978 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -428,6 +428,7 @@ typedef struct HTTEntry_ {
   const char* name;
   HTokenType value;
   void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf);
+  void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta);
 } HTTEntry;
 
 const HTTEntry* h_get_token_type_entry(HTokenType token_type);
diff --git a/src/pprint.c b/src/pprint.c
index 15c14f83..2c9fbc2e 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -69,9 +69,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
     break;
   default:
     if(tok->token_type >= TT_USER) {
-      const char *name = h_get_token_type_name(tok->token_type);
+      const HTTEntry *e = h_get_token_type_entry(tok->token_type);
       int num = tok->token_type-TT_USER;
-      fprintf(stream, "%*sUSER:%s %d\n", indent, "", name, num);
+      fprintf(stream, "%*sUSER:%s %d\n", indent, "", e->name, num);
+      if (e->pprint)
+        e->pprint(stream, tok, indent + delta, delta);
     } else {
       assert_message(0, "Should not reach here.");
     }
diff --git a/src/registry.c b/src/registry.c
index 00486db4..f0201c61 100644
--- a/src/registry.c
+++ b/src/registry.c
@@ -60,6 +60,7 @@ HTokenType h_allocate_token_new(
   new_entry->name = name;
   new_entry->value = 0;
   new_entry->unamb_sub = unamb_sub;
+  new_entry->pprint = NULL;
   HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries);
   if (probe->value != 0) {
     // Token type already exists...

From 93ab0d723171d2d5ca56d16d0e6b2f997932e3ad Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Mon, 25 Nov 2019 00:10:43 +0100
Subject: [PATCH 5/8] a bit more pprint prettification

---
 src/pprint.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/pprint.c b/src/pprint.c
index 2c9fbc2e..6747a2a8 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -32,16 +32,17 @@ typedef struct pp_state {
 } pp_state_t;
 
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
+  fprintf(stream, "%*s", indent, ""); 
   if (tok == NULL) {
-    fprintf(stream, "%*s(null)\n", indent, "");
+    fprintf(stream, "(null)\n");
     return;
   }
   switch (tok->token_type) {
   case TT_NONE:
-    fprintf(stream, "%*snone\n", indent, "");
+    fprintf(stream, "none");
     break;
   case TT_BYTES:
-    fprintf(stream, "%*s\"", indent, "");
+    fprintf(stream, "\"");
     for (size_t i = 0; i < tok->bytes.len; i++) {
       uint8_t c = tok->bytes.token[i];
       if (isprint(c))
@@ -49,35 +50,38 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
       else
         fprintf(stream, "\\%03hho", c);
     }
-    fprintf(stream, "\"\n");
+    fprintf(stream, "\"");
     break;
   case TT_SINT:
     if (tok->sint < 0)
-      fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint);
+      fprintf(stream, "s -%#" PRIx64, -tok->sint);
     else
-      fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint);
+      fprintf(stream, "s %#" PRIx64, tok->sint);
     break;
   case TT_UINT:
-    fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint);
+    fprintf(stream, "u %#" PRIx64, tok->uint);
     break;
   case TT_SEQUENCE:
-    fprintf(stream, "%*s[\n", indent, "");
-    for (size_t i = 0; i < tok->seq->used; i++) {
-      h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
+    if (tok->seq->used == 0)
+      fprintf(stream, "[]");
+    else {
+      fprintf(stream, "[\n");
+      for (size_t i = 0; i < tok->seq->used; i++)
+        h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
+      fprintf(stream, "%*s]", indent, "");
     }
-    fprintf(stream, "%*s]\n", indent, "");
     break;
   default:
     if(tok->token_type >= TT_USER) {
       const HTTEntry *e = h_get_token_type_entry(tok->token_type);
-      int num = tok->token_type-TT_USER;
-      fprintf(stream, "%*sUSER:%s %d\n", indent, "", e->name, num);
+      fprintf(stream, "USER %d (%s) ", e->value - TT_USER, e->name);
       if (e->pprint)
         e->pprint(stream, tok, indent + delta, delta);
     } else {
       assert_message(0, "Should not reach here.");
     }
   }
+  fputc('\n', stream);
 }
 
 

From a1a872682fff39fb8c86997b1352c69c3aa6926b Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Mon, 25 Nov 2019 00:39:00 +0100
Subject: [PATCH 6/8] add pprint argument to h_allocate_token_new()

also allow NULL argument for unamb_sub to mean default
---
 src/hammer.h   | 3 ++-
 src/registry.c | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/hammer.h b/src/hammer.h
index ad44fee9..5774b2dd 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -795,7 +795,8 @@ HTokenType h_allocate_token_type(const char* name);
 /// Allocate a new token type with an unambiguous print function.
 HTokenType h_allocate_token_new(
     const char* name,
-    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf));
+    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
+    void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta));
 
 /// Get the token type associated with name. Returns -1 if name is unkown
 HTokenType h_get_token_type_number(const char* name);
diff --git a/src/registry.c b/src/registry.c
index f0201c61..5486fd7b 100644
--- a/src/registry.c
+++ b/src/registry.c
@@ -54,13 +54,14 @@ static void default_unamb_sub(const HParsedToken* tok,
 
 HTokenType h_allocate_token_new(
     const char* name,
-    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf)) {
+    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
+    void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta)) {
   HTTEntry* new_entry = h_alloc(&system_allocator, sizeof(*new_entry));
   assert(new_entry != NULL);
   new_entry->name = name;
   new_entry->value = 0;
-  new_entry->unamb_sub = unamb_sub;
-  new_entry->pprint = NULL;
+  new_entry->unamb_sub = unamb_sub ? unamb_sub : default_unamb_sub;
+  new_entry->pprint = pprint;
   HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries);
   if (probe->value != 0) {
     // Token type already exists...
@@ -87,7 +88,7 @@ HTokenType h_allocate_token_new(
   }
 }
 HTokenType h_allocate_token_type(const char* name) {
-  return h_allocate_token_new(name, default_unamb_sub);
+  return h_allocate_token_new(name, NULL, NULL);
 }
 HTokenType h_get_token_type_number(const char* name) {
   HTTEntry e;

From 33a98aef9dc02946fd86098511d21b119518fa3a Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Mon, 25 Nov 2019 12:56:08 +0100
Subject: [PATCH 7/8] condense h_pprint() output. add h_pprintln().

---
 src/hammer.h | 16 ++++++++++++++--
 src/pprint.c | 28 ++++++++++++++++++----------
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/hammer.h b/src/hammer.h
index 5774b2dd..d983f2c5 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -728,10 +728,22 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
  */
 char* h_write_result_unamb(const HParsedToken* tok);
 /**
- * Format token to the given output stream. Indent starting at
- * [indent] spaces, with [delta] spaces between levels.
+ * Format token to the given output stream. Indent starting at [indent] spaces,
+ * with [delta] spaces between levels.
+ *
+ * Note: This function does not print a trailing newline. It also does not
+ * print any spaces to indent the initial line of output. This makes it
+ * suitable for recursive use in the condensed output of larger structures.
  */
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
+/**
+ * Format token to the given output. Print a trailing newline.
+ *
+ * This function assumes an initial indentation of 0 and uses 2 spaces between
+ * indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)'
+ * followed by 'fputc('\n', stream)' and is provided for convenience.
+ */
+void h_pprintln(FILE* stream, const HParsedToken* tok);
 
 /**
  * Build parse tables for the given parser backend. See the
diff --git a/src/pprint.c b/src/pprint.c
index 6747a2a8..85a9f7db 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -32,9 +32,8 @@ typedef struct pp_state {
 } pp_state_t;
 
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
-  fprintf(stream, "%*s", indent, ""); 
   if (tok == NULL) {
-    fprintf(stream, "(null)\n");
+    fprintf(stream, "(null)");
     return;
   }
   switch (tok->token_type) {
@@ -54,21 +53,26 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
     break;
   case TT_SINT:
     if (tok->sint < 0)
-      fprintf(stream, "s -%#" PRIx64, -tok->sint);
+      fprintf(stream, "-%#" PRIx64, -tok->sint);
     else
-      fprintf(stream, "s %#" PRIx64, tok->sint);
+      fprintf(stream, "+%#" PRIx64, tok->sint);
     break;
   case TT_UINT:
-    fprintf(stream, "u %#" PRIx64, tok->uint);
+    fprintf(stream, "%#" PRIx64, tok->uint);
     break;
   case TT_SEQUENCE:
     if (tok->seq->used == 0)
-      fprintf(stream, "[]");
+      fprintf(stream, "[ ]");
     else {
-      fprintf(stream, "[\n");
-      for (size_t i = 0; i < tok->seq->used; i++)
+      fprintf(stream, "[%*s", delta - 1, "");
+      for (size_t i = 0; i < tok->seq->used; i++) {
+	if (i > 0) fprintf(stream, "\n%*s,%*s", indent, "", delta - 1, "");
         h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
-      fprintf(stream, "%*s]", indent, "");
+      }
+      if (tok->seq->used > 2)
+        fprintf(stream, "\n%*s]", indent, "");
+      else
+        fprintf(stream, " ]");
     }
     break;
   default:
@@ -76,11 +80,15 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
       const HTTEntry *e = h_get_token_type_entry(tok->token_type);
       fprintf(stream, "USER %d (%s) ", e->value - TT_USER, e->name);
       if (e->pprint)
-        e->pprint(stream, tok, indent + delta, delta);
+        e->pprint(stream, tok, indent, delta);
     } else {
       assert_message(0, "Should not reach here.");
     }
   }
+}
+
+void h_pprintln(FILE* stream, const HParsedToken* tok) {
+  h_pprint(stream, tok, 0, 2);
   fputc('\n', stream);
 }
 

From cbeee9d142a61d0ea536d0d924c69ea177d36b22 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Mon, 25 Nov 2019 12:57:48 +0100
Subject: [PATCH 8/8] add examples/ttuser - show custom tokens/pprint

---
 examples/SConscript |   3 +-
 examples/ttuser.c   | 140 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 examples/ttuser.c

diff --git a/examples/SConscript b/examples/SConscript
index b34b85a1..8504b4bb 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -6,8 +6,9 @@ example = env.Clone()
 example.Append(LIBS="hammer", LIBPATH="../src")
 
 dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
+ttuser = example.Program('ttuser', 'ttuser.c')
 base64 = example.Program('base64', 'base64.c')
 base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
 base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
 ties = example.Program('ties', ['ties.c', 'grammar.c'])
-env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
\ No newline at end of file
+env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
diff --git a/examples/ttuser.c b/examples/ttuser.c
new file mode 100644
index 00000000..4e83356c
--- /dev/null
+++ b/examples/ttuser.c
@@ -0,0 +1,140 @@
+/*
+ * Example parser that demonstrates the use of user-defined token types.
+ *
+ * Note the custom printer function that hooks into h_pprint().
+ */
+
+#include "../src/hammer.h"
+#include "../src/glue.h"
+
+
+/*
+ * custom tokens
+ */
+
+HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC;
+
+void
+pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	/* 
+	 * Pretty-printer rules:
+	 *
+	 *  - Output 'indent' spaces after every newline you produce.
+	 *  - Do not add indent on the first line of output.
+	 *  - Do not add a trailing newline.
+	 *  - Indent sub-objects by adding 'delta' to 'indent'.
+	 */
+
+	if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
+		fprintf(stream, "\n%*s", indent, "");
+	h_pprint(stream, tok->user, indent, delta);
+}
+
+/* XXX define umamb_sub as well */
+
+void
+init(void)
+{
+	TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
+	TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
+	TT_OBJ  = h_allocate_token_new("object", NULL, pprint);
+	TT_ADJ  = h_allocate_token_new("adjective", NULL, pprint);
+	TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
+}
+
+
+/*
+ * semantic actions
+ *
+ * Normally these would be more interesting, but for this example, we just wrap
+ * our tokens in their intended types.
+ */
+HParsedToken *act_subj(const HParseResult *p, void *u) {
+	return H_MAKE(SUBJ, (void *)p->ast);
+}
+HParsedToken *act_pred(const HParseResult *p, void *u) {
+	return H_MAKE(PRED, (void *)p->ast);
+}
+HParsedToken *act_obj(const HParseResult *p, void *u) {
+	return H_MAKE(OBJ, (void *)p->ast);
+}
+HParsedToken *act_adj(const HParseResult *p, void *u) {
+	return H_MAKE(ADJ, (void *)p->ast);
+}
+HParsedToken *act_advc(const HParseResult *p, void *u) {
+	return H_MAKE(ADVC, (void *)p->ast);
+}
+
+
+/*
+ * grammar
+ */
+
+HParser *
+build_parser(void)
+{
+	/* words */
+	#define W(X)	h_whitespace(h_literal(#X))
+	H_RULE(art,	h_choice(W(a), W(the), NULL));
+	H_RULE(noun,	h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
+			    W(bear), W(fence), W(tree), W(car), W(cow), NULL));
+	H_RULE(verb,	h_choice(W(eats), W(jumps), W(falls), NULL));
+	H_ARULE(adj,	h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
+			    W(magenta), W(yellow), W(black), W(brown), NULL));
+	H_RULE(adverb,	h_choice(W(with), W(over), W(after), NULL));
+	#undef W
+
+	/* phrases */
+	H_RULE(nphrase,	h_sequence(art, h_many(adj), noun, NULL));
+
+	/* sentence structure */
+	H_ARULE(subj,	nphrase);
+	H_ARULE(pred,	verb);
+	H_ARULE(obj,	nphrase);
+	H_ARULE(advc,	h_sequence(adverb, nphrase, NULL));
+	H_RULE(sentnc,	h_sequence(subj, pred,
+			    h_optional(obj), h_optional(advc), NULL));
+
+	return sentnc;
+}
+
+
+/*
+ * main routine: read, parse, print
+ *
+ * input e.g.:
+ * "the quick brown fox jumps the fence with a cyan lion"
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+
+int
+main(int argc, char **argv)
+{
+	uint8_t input[1024];
+	size_t sz;
+	const HParser *parser;
+	const HParseResult *result;
+
+	init();
+	parser = build_parser();
+
+	sz = fread(input, 1, sizeof(input), stdin);
+	if (!feof(stdin)) {
+		fprintf(stderr, "too much input\n");
+		return 1;
+	}
+
+	result = h_parse(parser, input, sz);
+	if (!result) {
+		fprintf(stderr, "no parse\n");
+		return 1;
+	}
+
+        h_pprintln(stdout, result->ast);
+        fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
+	    result->bit_length / 8, sz);
+        return 0;
+}