Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ parsers = ['parsers/%s.c'%s for s in
'unimplemented',
'whitespace',
'xor',
'value']]
'value',
'seek']]

backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
Expand Down
74 changes: 74 additions & 0 deletions src/bitreader.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,77 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
out <<= final_shift;
return (out ^ msb) - msb; // perform sign extension
}

void h_skip_bits(HInputStream* stream, size_t count) {
size_t left;

if (count == 0)
return;

if (stream->overrun)
return;

if (stream->index == stream->length) {
stream->overrun = true;
return;
}

// consume from a partial byte?
left = 8 - stream->bit_offset - stream->margin;
if (count < left) {
stream->bit_offset += count;
return;
}
if (left < 8) {
stream->index += 1;
stream->bit_offset = 0;
stream->margin = 0;
count -= left;
}
assert(stream->bit_offset == 0);
assert(stream->margin == 0);

// consume full bytes
left = stream->length - stream->index;
if (count / 8 <= left) {
stream->index += count / 8;
count = count % 8;
} else {
stream->index = stream->length;
stream->overrun = true;
return;
}
assert(count < 8);

// final partial byte
if (count > 0 && stream->index == stream->length)
stream->overrun = true;
else
stream->bit_offset = count;
}

void h_seek_bits(HInputStream* stream, size_t pos) {
size_t pos_index = pos / 8;
size_t pos_offset = pos % 8;

/* seek within the current byte? */
if (pos_index == stream->index) {
stream->bit_offset = pos_offset;
return;
}

stream->margin = 0;

/* seek past the end? */
if ((pos_index > stream->length) ||
(pos_index == stream->length && pos_offset > 0)) {
stream->index = stream->length;
stream->bit_offset = 0;
stream->overrun = true;
return;
}

stream->index = pos_index;
stream->bit_offset = pos_offset;
stream->margin = 0;
}
26 changes: 26 additions & 0 deletions src/hammer.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,32 @@ HAMMER_FN_DECL(HParser*, h_get_value, const char* name);
*/
HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env);

/**
* This parser skips 'n' bits of input.
*
* Result: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(HParser*, h_skip, size_t n);

/**
* The HParser equivalent of fseek(), 'h_seek' modifies the parser's input
* position. Note that contrary to 'fseek', offsets are in bits, not bytes.
* The 'whence' argument uses the same values and semantics: SEEK_SET,
* SEEK_CUR, SEEK_END.
*
* Fails if the new input position would be negative or past the end of input.
*
* Result: TT_UINT. The new input position.
*/
HAMMER_FN_DECL(HParser*, h_seek, ssize_t offset, int whence);

/**
* Report the current position in bits. Consumes no input.
*
* Result: TT_UINT. The current input position.
*/
HAMMER_FN_DECL_NOARG(HParser*, h_tell);

/**
* Free the memory allocated to an HParseResult when it is no longer needed.
*/
Expand Down
7 changes: 7 additions & 0 deletions src/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,16 @@ extern HParserBackendVTable h__glr_backend_vtable;
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.

int64_t h_read_bits(HInputStream* state, int count, char signed_p);
void h_skip_bits(HInputStream* state, size_t count);
void h_seek_bits(HInputStream* state, size_t pos);
static inline size_t h_input_stream_pos(HInputStream* state) {
assert(state->index < SIZE_MAX / 8);
return state->index * 8 + state->bit_offset + state->margin;
}
static inline size_t h_input_stream_length(HInputStream *state) {
assert(state->length <= SIZE_MAX / 8);
return state->length * 8;
}
// need to decide if we want to make this public.
HParseResult* h_do_parse(const HParser* parser, HParseState *state);
void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);
Expand Down
118 changes: 118 additions & 0 deletions src/parsers/seek.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#include "parser_internal.h"

typedef struct {
ssize_t offset;
int whence;
} HSeek;

static HParseResult *parse_skip(void *env, HParseState *state)
{
size_t n = (uintptr_t)env;

h_skip_bits(&state->input_stream, n);
return make_result(state->arena, NULL);
}

static HParseResult *parse_seek(void *env, HParseState *state)
{
HSeek *s = (HSeek *)env;
HInputStream *stream = &state->input_stream;
size_t pos;

/* determine base position */
switch (s->whence) {
case SEEK_SET:
pos = 0;
break;
case SEEK_END:
pos = h_input_stream_length(stream);
break;
case SEEK_CUR:
pos = h_input_stream_pos(stream);
break;
default:
return NULL; /* invalid argument */
}

/* calculate target position and do basic overflow checks */
if (s->offset < 0 && (size_t)(- s->offset) > pos)
return NULL; /* underflow */
if (s->offset > 0 && SIZE_MAX - s->offset < pos)
return NULL; /* overflow */
pos += s->offset;

/* perform the seek and check for overrun */
h_seek_bits(stream, pos);
if (stream->overrun)
return NULL;

HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT;
tok->uint = pos;
return make_result(state->arena, tok);
}

static HParseResult *parse_tell(void *env, HParseState *state)
{
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT;
tok->uint = h_input_stream_pos(&state->input_stream);
return make_result(state->arena, tok);
}

static const HParserVtable skip_vt = {
.parse = parse_skip,
.isValidRegular = h_false,
.isValidCF = h_false,
.compile_to_rvm = h_not_regular,
.higher = false,
};

static const HParserVtable seek_vt = {
.parse = parse_seek,
.isValidRegular = h_false,
.isValidCF = h_false,
.compile_to_rvm = h_not_regular,
.higher = false,
};

static const HParserVtable tell_vt = {
.parse = parse_tell,
.isValidRegular = h_false,
.isValidCF = h_false,
.compile_to_rvm = h_not_regular,
.higher = false,
};

HParser* h_skip(size_t n)
{
return h_skip__m(&system_allocator, n);
}

HParser *h_skip__m(HAllocator* mm__, size_t n)
{
return h_new_parser(mm__, &skip_vt, (void *)n);
}

HParser* h_seek(ssize_t offset, int whence)
{
return h_seek__m(&system_allocator, offset, whence);
}

HParser *h_seek__m(HAllocator* mm__, ssize_t offset, int whence)
{
HSeek *env = h_new(HSeek, 1);
env->offset = offset;
env->whence = whence;
return h_new_parser(mm__, &seek_vt, env);
}

HParser *h_tell()
{
return h_tell__m(&system_allocator);
}

HParser *h_tell__m(HAllocator* mm__)
{
return h_new_parser(mm__, &tell_vt, NULL);
}
67 changes: 67 additions & 0 deletions src/t_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,70 @@ static void test_bind(gconstpointer backend) {
g_check_parse_failed(p, be, "272{", 4);
}

static void test_skip(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
const HParser *p, *p_le, *p_be;

p = h_sequence(h_ch('a'), h_skip(32), h_ch('f'), NULL);
g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x66)");
g_check_parse_failed(p, be, "abcdex", 6);
g_check_parse_failed(p, be, "abc", 3);

p = h_sequence(h_ch('a'), h_skip(32), h_end_p(), NULL);
g_check_parse_match(p, be, "abcde", 5, "(u0x61)");

p = h_sequence(h_ch('a'), h_skip(3), h_ch('\0'), h_skip(5), h_ch('b'), NULL);
g_check_parse_match(p, be, "a\xe0\x1f\x62", 4, "(u0x61 u0 u0x62)"); // big-endian
p_le = h_with_endianness(BYTE_LITTLE_ENDIAN|BIT_LITTLE_ENDIAN, p);
p_be = h_with_endianness(BYTE_LITTLE_ENDIAN|BIT_BIG_ENDIAN, p);
g_check_parse_match(p_be, be, "a\xe0\x1f\x62", 4, "(u0x61 u0 u0x62)");
g_check_parse_match(p_le, be, "a\x07\xf8\x62", 4, "(u0x61 u0 u0x62)");

p = h_sequence(h_ch('a'), h_skip(3), h_ch('\0'), h_skip(5), h_end_p(), NULL);
g_check_parse_match(p, be, "a\xe0\x1f", 3, "(u0x61 u0)"); // big-endian
}

static void test_tell(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
const HParser *p;

p = h_sequence(h_ch('a'), h_ch('b'), h_tell(), h_end_p(), NULL);
g_check_parse_match(p, be, "ab", 2, "(u0x61 u0x62 u0x10)");
g_check_parse_failed(p, be, "abc", 1);
g_check_parse_failed(p, be, "a", 1);
}

static void test_seek(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
const HParser *p;

p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_ch('f'), NULL);
g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x28 u0x66)");
g_check_parse_failed(p, be, "abcdex", 6);
g_check_parse_failed(p, be, "abc", 3);

p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_end_p(), NULL);
g_check_parse_match(p, be, "abcde", 5, "(u0x61 u0x28)");
g_check_parse_failed(p, be, "abcdex", 6);
g_check_parse_failed(p, be, "abc", 3);

p = h_sequence(h_ch('a'), h_seek(0, SEEK_END), h_end_p(), NULL);
g_check_parse_match(p, be, "abcde", 5, "(u0x61 u0x28)");
g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x18)");

p = h_sequence(h_ch('a'), h_seek(-16, SEEK_END), h_ch('x'), NULL);
g_check_parse_match(p, be, "abcdxy", 6, "(u0x61 u0x20 u0x78)");
g_check_parse_match(p, be, "abxy", 4, "(u0x61 u0x10 u0x78)");
g_check_parse_failed(p, be, "abc", 3);
g_check_parse_failed(p, be, "x", 1);

p = h_sequence(h_ch('a'), h_seek(32, SEEK_CUR), h_ch('f'), NULL);
g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x28 u0x66)");
g_check_parse_failed(p, be, "xbcdef", 6);
g_check_parse_failed(p, be, "abcdex", 6);
g_check_parse_failed(p, be, "abc", 3);
}

void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
Expand Down Expand Up @@ -795,6 +859,9 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length);
//g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position);
g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip);
g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek);
g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell);

g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
Expand Down