diff --git a/.zigversion b/.zigversion index c39e9c5..a12760e 100644 --- a/.zigversion +++ b/.zigversion @@ -1 +1 @@ -0.14.1 \ No newline at end of file +0.15.2 \ No newline at end of file diff --git a/README.md b/README.md index 525432c..7dea1c6 100644 --- a/README.md +++ b/README.md @@ -4,24 +4,19 @@ main source of knowledge: https://matklad.github.io/2020/04/13/simple-but-powerf ### Run locally ```bash -zig run parser.zig +zig build run -- --help ``` ### Run tests ```bash -zig test parser.zig +zig build test ``` ### Build WASM and run web interface ```bash zig build wasm -cp zig-out/bin/parser.wasm . -python -m http.server 8081 +cd assets +cp ../zig-out/bin/parser.wasm . +python3 -m http.server 8081 ``` Then open `http://localhost:8081` - -## Files - -- `parser.zig` - Main parser with stdout -- `wasm.zig` - Same as the previous one, but this is the WASM version with JSON output -- `index.html`, `style.css`, `script.js` - Web interface \ No newline at end of file diff --git a/index.html b/assets/index.html similarity index 100% rename from index.html rename to assets/index.html diff --git a/assets/parser.wasm b/assets/parser.wasm new file mode 100644 index 0000000..3193518 Binary files /dev/null and b/assets/parser.wasm differ diff --git a/script.js b/assets/script.js similarity index 100% rename from script.js rename to assets/script.js diff --git a/style.css b/assets/style.css similarity index 100% rename from style.css rename to assets/style.css diff --git a/build.zig b/build.zig index da35d99..74c5167 100644 --- a/build.zig +++ b/build.zig @@ -1,17 +1,33 @@ const std = @import("std"); -pub fn build(b: *std.Build) void { +pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + const tex_mod = b.addModule("tex", .{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const tex_lib = b.addLibrary(.{ + .name = "tex", + .root_module = tex_mod, + .linkage = .static, + }); + b.installArtifact(tex_lib); + + const exe_mod = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + exe_mod.addImport("tex", tex_mod); + // Create the executable const exe = b.addExecutable(.{ - .name = "parser", - .root_module = b.createModule(.{ - .root_source_file = b.path("parser.zig"), - .target = target, - .optimize = optimize, - }), + .name = "zig-tex", + .root_module = exe_mod, }); b.installArtifact(exe); @@ -25,7 +41,7 @@ pub fn build(b: *std.Build) void { const wasm_lib = b.addExecutable(.{ .name = "parser", .root_module = b.createModule(.{ - .root_source_file = b.path("wasm.zig"), + .root_source_file = b.path("src/wasm.zig"), .target = wasm_target, .optimize = optimize, }), @@ -50,7 +66,7 @@ pub fn build(b: *std.Build) void { // Create the test executable const exe_unit_tests = b.addTest(.{ .root_module = b.createModule(.{ - .root_source_file = b.path("parser.zig"), + .root_source_file = b.path("src/parser.zig"), .target = target, .optimize = optimize, }), @@ -60,4 +76,94 @@ pub fn build(b: *std.Build) void { const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_exe_unit_tests.step); + try setupSnapshotTesting(b, target, exe); +} + +fn setupSnapshotTesting( + b: *std.Build, + target: std.Build.ResolvedTarget, + zemml_exe: *std.Build.Step.Compile, +) !void { + var arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_allocator.deinit(); + + const test_step = b.step("test-snapshots", "build snapshot tests and diff the results"); + + const camera = b.addExecutable(.{ + .name = "camera", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/build/camera.zig"), + .target = target, + .optimize = .ReleaseFast, + }), + }); + + const diff = b.addSystemCommand(&.{ + "git", + "diff", + "--cached", + "--exit-code", + }); + diff.addDirectoryArg(b.path("tests")); + diff.setName("git diff tests/"); + test_step.dependOn(&diff.step); + + // We need to stage all of tests/ in order for untracked files to show up in + // the diff. It's also not a bad automatism since it avoids the problem of + // forgetting to stage new snapshot files. + const git_add = b.addSystemCommand(&.{ "git", "add" }); + git_add.addDirectoryArg(b.path("tests/")); + git_add.setName("git add tests/"); + diff.step.dependOn(&git_add.step); + + try setupSnapshotTestFolder( + &arena_allocator, + b, + camera, + zemml_exe, + git_add, + "tests/parse_ast", + "--format=ast", + ); +} + +fn setupSnapshotTestFolder( + arena: *std.heap.ArenaAllocator, + b: *std.Build, + camera: *std.Build.Step.Compile, + zemml_exe: *std.Build.Step.Compile, + git_add: *std.Build.Step.Run, + test_path: []const u8, + format_arg: []const u8, +) !void { + const tests_dir = try b.build_root.handle.openDir(test_path, .{ + .iterate = true, + }); + + var it = tests_dir.iterateAssumeFirstIteration(); + while (try it.next()) |entry| { + if (entry.kind != .file) continue; + const src_path = b.pathJoin(&.{ test_path, entry.name }); + + _ = arena.reset(.retain_capacity); + + const snap_name = try std.fmt.allocPrint(arena.allocator(), "{s}.snapshot.txt", .{entry.name}); + const snap_path = b.pathJoin(&.{ test_path, "snapshots", snap_name }); + const input_arg = try std.fmt.allocPrint(arena.allocator(), "--input={s}", .{src_path}); + // const output_arg = try std.fmt.allocPrint(arena.allocator(), "--output={s}", .{snap_path}); + + const run_camera = b.addRunArtifact(camera); + run_camera.addArtifactArg(zemml_exe); + run_camera.addArg(input_arg); + run_camera.addArg(format_arg); + // run_camera.addArg(output_arg); + run_camera.has_side_effects = true; + + const stdout = run_camera.captureStdErr(); + const update_snap = b.addUpdateSourceFiles(); + update_snap.addCopyFileToSource(stdout, snap_path); + + update_snap.step.dependOn(&run_camera.step); + git_add.step.dependOn(&update_snap.step); + } } diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..eee96d1 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,24 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .tex, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + .fingerprint = 0xeaed7f72a51b943e, // Changing this has security and trust implications. + .minimum_zig_version = "0.15.2", + .dependencies = .{}, + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/parser.wasm b/parser.wasm deleted file mode 100644 index bb28145..0000000 Binary files a/parser.wasm and /dev/null differ diff --git a/parser.zig b/parser.zig deleted file mode 100644 index cf751df..0000000 --- a/parser.zig +++ /dev/null @@ -1,1088 +0,0 @@ -const std = @import("std"); - -const print = std.debug.print; -const expect = std.testing.expect; -const Allocator = std.heap.page_allocator; -const ArrayList = std.ArrayList; - -// a + b -- Add(a,b) -// f = (x) -> x + 1 -- Assignment of lambda function -// f(x) = x + 1 -- FunctionCall -// abc.sin -- lhs / dot / rhs -// a.x -- DotAccess -// (x) + 1 -// (x) => x - 1 -// [a, b, c].join() -// [a, b, c] = f() - -// fn dfdx(k: usize, x: *f32) void { -// x.* += @floatFromInt(k); -// } - -// Lexer Tokens -const TokenType = enum { - Variable, - Constant, - Number, - Integer, - Real, - ImaginaryUnit, - Plus, - Minus, - Caret, - Asterisk, - Identifier, - BinaryOp, - Op, - latex_command, - Slash, - LParen, - RParen, - LBrak, - RBrak, - LBrace, - RBrace, - With, - For, - OperatorName, - Color, - Left, - Right, - VBar, - LVBar, - RVBar, - Comma, - Invalid, - FunctionName, - Fraction, - Eof, -}; -// \frac{}{} -pub const keywords = std.StaticStringMap(TokenType).initComptime(.{ - .{ "left(", .LParen }, - .{ "right)", .RParen }, - .{ "left[", .LBrak }, - .{ "right]", .RBrak }, - .{ "left{", .LBrace }, - .{ "right}", .RBrace }, - .{ "left|", .LVBar }, - .{ "right|", .RVBar }, - .{ "left", .Left }, // latex: \left - .{ "right", .Right }, // latex: \right - .{ "{", .LBrace }, // latex: \{ - .{ "}", .RBrace }, // latex : \} - .{ "frac", .Fraction }, - .{ "operatorname", .OperatorName }, // LaTeX: \operatorname{with} - .{ "with", .With }, // LaTeX: \operatorname{with} - .{ "for", .For }, // LaTeX: \operatorname{for} - .{ "rgb", .Color }, // LaTeX: \operatorname{rgb} - - .{ "sin", .FunctionName }, - .{ "cos", .FunctionName }, - .{ "tan", .FunctionName }, - - .{ "theta", .Variable }, - .{ "alpha", .Variable }, - .{ "gamma", .Variable }, - .{ "pi", .Constant }, -}); - -const Token = struct { type: TokenType, text: ?u8 = null, value: ?i64 = null }; - -pub fn getKeyword(text: []const u8) ?TokenType { - return keywords.get(text); -} - -const Loc = struct { from: usize, to: usize }; -pub const _Token = struct { tag: TokenType, pos: Loc }; - -pub const TokenStream = ArrayList(_Token); - -pub const Tokenizer = struct { - buffer: [:0]const u8, - index: usize, - - const State = enum { - start, - identifier, - variable, - variable_subscript, - latex_command, - operator_name, - builtin, - plus, - minus, - int, - period, - number, // integer - decimal_number, // float - invalid, - unknown, - }; - - pub fn dump(self: *Tokenizer, token: *const _Token) void { - std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.pos.from..(token.pos.to)] }); - } - - pub fn init(buffer: [:0]const u8) Tokenizer { - // print("Initial string, {s}, len: {d}\n", .{buffer, buffer.len}); - return .{ - .buffer = buffer, - .index = 0, - }; - } - - pub fn next(self: *Tokenizer) ?_Token { - var result: _Token = .{ .tag = undefined, .pos = .{ - .from = self.index, - .to = undefined, - } }; - - if (self.index >= self.buffer.len) { - // print("Reached end of expression\n", .{}); - return .{ - .tag = .Eof, - .pos = .{ - .from = self.index, - .to = self.index, - }, - }; - } - - state: switch (State.start) { - .start => switch (self.buffer[self.index]) { - '0'...'9' => { - self.index += 1; - continue :state .number; - }, - '.' => { - self.index += 1; - continue :state .period; - }, - '\\' => { - self.index += 1; - result.pos.from = self.index; // ignore the backslash - continue :state .latex_command; - }, - 'a'...'z', 'A'...'Z' => { - result.tag = .Variable; - continue :state .variable; - }, - '+', '-', '*', '/', '^', '=' => { - result.tag = .Op; - self.index += 1; - }, - ',' => { - result.tag = .Comma; - self.index += 1; - }, - '(' => { - result.tag = .LParen; - self.index += 1; - }, - ')' => { - result.tag = .RParen; - self.index += 1; - }, - '[' => { - result.tag = .LBrak; - self.index += 1; - }, - ']' => { - result.tag = .RBrak; - self.index += 1; - }, - '{' => { - result.tag = .LBrace; - self.index += 1; - }, - '}' => { - result.tag = .RBrace; - self.index += 1; - }, - '|' => { - result.tag = .VBar; - self.index += 1; - }, - ' ' => { - self.index += 1; // skip whitespace - result.pos.from = self.index; - continue :state .start; - }, - else => { - result.tag = .Invalid; - self.index += 1; - }, - }, - .number => { - switch (self.buffer[self.index]) { - '0'...'9' => { // this will consume numbers in "123.23" before the decimal - self.index += 1; - continue :state .number; - }, - '.' => continue :state .decimal_number, - else => { - result.tag = .Integer; - }, - } - }, - .decimal_number => { - self.index += 1; - switch (self.buffer[self.index]) { - '0'...'9' => { // this will consume numbers after the decimal point - // self.index += 1; - continue :state .decimal_number; - }, - else => { - result.tag = .Real; - }, - } - }, - .period => { - switch (self.buffer[self.index]) { - '0'...'9' => continue :state .decimal_number, - else => { - result.tag = .Op; - }, - } - }, - .variable => { // If we're here, then we're past the initial letter in a_{123} - self.index += 1; - switch (self.buffer[self.index]) { - '_' => continue :state .variable_subscript, - else => { - result.tag = .Variable; - }, - } - }, - .variable_subscript => { - self.index += 1; - switch (self.buffer[self.index]) { - 'a'...'z', 'A'...'Z', '0'...'9', '{' => continue :state .variable_subscript, - '}' => { - self.index += 1; - result.tag = .Variable; - }, - else => { - result.tag = .Variable; - }, - } - }, - .operator_name => { - switch (self.buffer[self.index]) { - '{' => { - self.index += 1; - result.pos.from = self.index; // ignore the opening brace - continue :state .operator_name; - }, - 'a'...'z', 'A'...'Z' => { - self.index += 1; - continue :state .operator_name; - }, - else => { - const text = self.buffer[result.pos.from..self.index]; - print("Keyword: {s}\n", .{text}); - if (getKeyword(text)) |tag| { - result.tag = tag; - result.pos.to = self.index; - self.index += 1; - return result; - } - result.tag = .Invalid; - result.pos.to = self.index; - return result; - }, - } - }, - .latex_command => { - self.index += 1; - result.tag = .latex_command; - switch (self.buffer[self.index]) { - 'a'...'z', 'A'...'Z' => continue :state .latex_command, - else => { - const text = self.buffer[result.pos.from..self.index]; - if (getKeyword(text)) |tag| { - print("Keyword found: {s} -> {s}\n", .{ text, @tagName(tag) }); - if (tag == .OperatorName) { - result.pos.from = self.index; - continue :state .operator_name; - } - - result.tag = tag; - result.pos.to = self.index; - return result; - } - result.tag = .Invalid; - result.pos.to = self.index; - return result; - }, - } - }, - .invalid => { - result.tag = .Invalid; - }, - else => { - result.tag = .Invalid; - }, - } - - result.pos.to = self.index; - return result; - } -}; - -const Expr = enum { Op, Atom, Invalid }; - -const Tag = enum { Number, Variable, BinaryOperation }; - -const ExprType = enum { - Add, - Sub, - Mul, - iMul, - Pow, - Div, - Dot, - Juxt, - Comma, - With, - Assignment, - Paren, - Arguments, - FunctionCall, - FunctionName, - Object, - UnaryMinus, - UnaryPlus, - Number, - Variable, - Invalid, -}; - -pub const infix_operators = std.StaticStringMap(ExprType).initComptime(.{ - .{ "+", .Add }, - .{ "-", .Sub }, - .{ "*", .Mul }, - .{ "/", .Div }, - .{ "^", .Pow }, - .{ ".", .Dot }, - .{ "=", .Assignment }, - .{ "with", .With }, -}); - -pub const prefix_operators = std.StaticStringMap(ExprType).initComptime(.{ - .{ "+", .UnaryPlus }, - .{ "-", .UnaryMinus }, -}); - -pub fn get_infix_operator(text: []const u8, tag: TokenType) ?ExprType { - switch (tag) { - .Variable => { - return .iMul; - }, - .LParen => { - return .Juxt; - }, - else => {}, - } - - return infix_operators.get(text); -} - -pub fn get_prefix_operator(text: []const u8) ?ExprType { - return prefix_operators.get(text); -} - -pub const Expression = struct { - type: ExprType, - value: ?union(enum) { i: i64, f: f64, length: u64 }, - pos: Loc, - children: ?[*]Expression, // Might be null for literals -}; - -fn infix_binding_power(op: ?ExprType) error{InvalidOperator}!struct { i8, i8 } { - if (op == null) return error.InvalidOperator; - switch (op.?) { - .Comma => return .{ -1, -1 }, - .Add, .Sub => return .{ 3, 4 }, - .Mul, .Div => return .{ 5, 6 }, - .iMul => return .{ 5, 6 }, - .Juxt => return .{ 5, 6 }, - .Dot => return .{ 8, 7 }, - .Pow => return .{ 9, 8 }, - .With => return .{ 10, 9 }, - .Assignment => return .{ 2, 1 }, - else => return error.InvalidOperator, - } -} - -fn prefix_binding_power(op: ?ExprType) error{InvalidOperator}!i8 { - if (op == null) return error.InvalidOperator; - switch (op.?) { - .UnaryMinus, .UnaryPlus => return 6, - else => return error.InvalidOperator, - } -} - -pub const Parser = struct { - token_stream: TokenStream, - expr: [:0]const u8, - head: usize = 0, - current: _Token, - allocator: std.mem.Allocator, - - pub fn init(token_stream: TokenStream, expr: [:0]const u8, allocator: std.mem.Allocator) Parser { - return .{ .token_stream = token_stream, .expr = expr, .head = 0, .current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }, .allocator = allocator }; - } - - pub fn consume(self: *Parser) void { - if (self.head >= self.token_stream.items.len) { - self.current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; - return; - } - self.current = self.token_stream.items[self.head]; - print("Current token: {s} text: {s}\n", .{ @tagName(self.current.tag), self.expr[self.current.pos.from..self.current.pos.to] }); - self.head += 1; - } - - pub fn peek(self: *Parser) _Token { - if (self.head >= self.token_stream.items.len) { - return .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; - } - print("Peeking token: {s} text: {s}\n", .{ @tagName(self.token_stream.items[self.head].tag), self.expr[self.token_stream.items[self.head].pos.from..self.token_stream.items[self.head].pos.to] }); - return self.token_stream.items[self.head]; - } - - pub fn expect(self: *Parser, tag: TokenType, err: ParserError) ParserError!void { - // This function consumes the current token and checks if it matches the expected tag. - // Throws ParserError if the tag does not match. - self.consume(); - if (self.current.tag != tag) { - return err; - } - } - - pub const ParserError = error{ - UnexpectedToken, - ExpectedSomething, - UnmatchedParentheses, - EmptyParentheses, - InvalidOperator, - OutOfMemory, - Overflow, - InvalidCharacter, - }; - - pub fn parse_prefix(self: *Parser) ParserError!Expression { - const op = self.current; - if (op.tag != .Op) return ParserError.UnexpectedToken; - - const op_text: []const u8 = self.expr[op.pos.from..op.pos.to]; - const op_type: ?ExprType = get_prefix_operator(op_text); - if (op_type == null) return ParserError.InvalidOperator; - - const r_bp = try prefix_binding_power(op_type); - const expr = try self.parse(r_bp); - - const children = try self.allocator.alloc(Expression, 1); // Allocate memory for the children array - children[0] = expr; - - return Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; - } - - pub fn parse_paren(self: *Parser) ParserError!Expression { - - // print("Entering paren\n", .{}); - // Paren can be for grouping (has .Comma) or simply to wrap an expression - // Lookahead for commas - var level: i32 = 0; - var commas: u32 = 0; - for (self.token_stream.items[self.head..]) |token| { - switch (token.tag) { - .LParen => { - level -= 1; - }, - .RParen => { - if (level == 0) break; - level += 1; - }, - .Comma => { - if (level == 0) commas += 1; - }, - else => {}, - } - } - - if (level != 0) return ParserError.UnmatchedParentheses; - if (commas == 0) { - var children = try self.allocator.alloc(Expression, 1); - children[0] = try self.parse(0); - return Expression{ .type = .Paren, .value = .{ .length = 1 }, .pos = self.current.pos, .children = children.ptr }; - } // If there are no commas, it's a parenthesized expression - - const len = commas + 1; - var children = try self.allocator.alloc(Expression, len); - for (0..len) |i| { - children[i] = try self.parse(0); - if (i < len - 1) try self.expect(.Comma, ParserError.UnmatchedParentheses); // Comma is expected between expressions - } - - return Expression{ .type = .Object, .value = .{ .length = len }, .pos = self.current.pos, .children = children.ptr }; - } - - pub fn parse_func(self: *Parser) ParserError!Expression { - - // Final type can be FunctionCall for f(x,y) or Juxt for \\sin x - var final_type: ExprType = undefined; - - switch (self.peek().tag) { - .LParen => { - final_type = .FunctionCall; - }, // \\sin(x) - .Eof => { - return ParserError.ExpectedSomething; - }, // \\sin - else => { - final_type = .Juxt; - }, // \\sin abc - } - - const name = Expression{ .type = .FunctionName, .value = null, .pos = self.current.pos, .children = null }; - var args = try self.parse(0); // Will return an .Object - args.type = .Arguments; - args.value = .{ .length = 1 }; - - var children = try self.allocator.alloc(Expression, 2); - children[0] = name; - children[1] = args; - - const func = Expression{ .type = .FunctionCall, .value = null, .pos = self.current.pos, .children = children.ptr }; - - print(">>>>{}\n", .{args}); - - return func; - } - - pub fn parse(self: *Parser, min_bp: i8) ParserError!Expression { - self.consume(); // Consume the first token (likely an atom, but can be an operator too) - // self.current now has that token - - var lhs: Expression = - switch (self.current.tag) { - .Integer => Expression{ .type = .Number, .value = .{ .i = try std.fmt.parseInt(i64, self.expr[self.current.pos.from..self.current.pos.to], 10) }, .pos = self.current.pos, .children = null }, - .Real => Expression{ .type = .Number, .value = .{ .f = try std.fmt.parseFloat(f64, self.expr[self.current.pos.from..self.current.pos.to]) }, .pos = self.current.pos, .children = null }, - .Variable => Expression{ .type = .Variable, .value = null, .pos = self.current.pos, .children = null }, - .Op => try self.parse_prefix(), - .FunctionName => try self.parse_func(), - .LParen => paren: { - const expr: Expression = try self.parse_paren(); // Parse the expression inside parentheses - try self.expect(.RParen, ParserError.UnmatchedParentheses); // Consume the ')' token - break :paren expr; - }, - - else => return ParserError.UnexpectedToken, - }; - - // print("Parsed lhs: {s} text: {s}\n", .{ @tagName(lhs.type), self.expr[lhs.pos.from..lhs.pos.to] }); - - while (true) { - const op = self.peek(); - var skip_op: bool = false; - switch (op.tag) { - .Eof => break, - .Op, .With => {}, // Allow these - .Variable => { - skip_op = true; - }, // Implicit multiplication - .LParen => { - skip_op = true; - }, // Juxtapose expression - .Comma => break, // Comma returns the current expression - .RParen => break, // Stop parsing on closing parenthesis - else => return ParserError.UnexpectedToken, - } - - // Convert TokenType to ExprType - const op_text: []const u8 = self.expr[op.pos.from..op.pos.to]; - const op_type: ?ExprType = get_infix_operator(op_text, op.tag); - const l_bp, const r_bp = try infix_binding_power(op_type); - if (l_bp < min_bp) break; - - if (!skip_op) self.consume(); // Consume the operator token - - const rhs: Expression = try self.parse(r_bp); - - // Allocate memory for the children array - const children = try self.allocator.alloc(Expression, 2); - children[0] = lhs; - children[1] = rhs; - - lhs = Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; - } - return lhs; - } -}; - -pub fn main() !void { - // Initialize the parsing rules - - var token_stream: TokenStream = TokenStream.init(Allocator); - defer token_stream.deinit(); - - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - - const expr: [:0]const u8 = "\\sin x"; // Example expression to parse - var tokenizer = Tokenizer.init(expr); - print("-- start -- : {s}\n", .{expr}); - - while (tokenizer.next()) |token| { - if (token.tag == .Eof) { - print("--eof--\n", .{}); - break; - } - try token_stream.append(token); - tokenizer.dump(&token); - } - - var parser = Parser.init(token_stream, expr, allocator); - print("Token stream length: {d}\n", .{token_stream.items.len}); - const ast = try parser.parse(0); - print("Expr: {}\n", .{ast}); - - print("AST (Tree):\n", .{}); - printAST(&ast, 0); - - print("AST (Polish):\n", .{}); - var polish_buffer = std.ArrayList(u8).init(allocator); - defer polish_buffer.deinit(); - try polishToString(&ast, expr, &polish_buffer); - print("{s}\n", .{polish_buffer.items}); -} - -test "Testing Tokenizer" { - try testTokenize("3", &.{.Integer}); - try testTokenize(".32342", &.{.Real}); - try testTokenize("a_{1}", &.{.Variable}); - try testTokenize("\\sin", &.{.FunctionName}); - try testTokenize("sin", &.{ .Variable, .Variable, .Variable }); - try testTokenize("a_{1}+\\sin*3", &.{ .Variable, .Op, .FunctionName, .Op, .Integer }); - try testTokenize("\\sin", &.{.FunctionName}); - try testTokenize("\\cos", &.{.FunctionName}); - try testTokenize("abc_{123}", &.{ .Variable, .Variable, .Variable }); - try testTokenize("a_{1}+\\sin*3.25-2.2.3.3", &.{ .Variable, .Op, .FunctionName, .Op, .Real, .Op, .Real, .Real, .Real }); - try testTokenize("a_{abc}", &.{.Variable}); - try testTokenize("\\frac{a_{1}}{2}", &.{ .Fraction, .LBrace, .Variable, .RBrace, .LBrace, .Integer, .RBrace }); - try testTokenize("\\left\\{1,2,3\\right\\}", &.{ .Left, .LBrace, .Integer, .Comma, .Integer, .Comma, .Integer, .Right, .RBrace }); -} - -test "Integer tokenization" { - try testTokenize("3", &.{.Integer}); -} - -test "Real number tokenization" { - try testTokenize(".32342", &.{.Real}); -} - -test "Variable with subscript" { - try testTokenize("a_{1}", &.{.Variable}); -} - -test "LaTeX function name" { - try testTokenize("\\sin", &.{.FunctionName}); - try testTokenize("\\cos", &.{.FunctionName}); -} - -test "Regular variable tokenization" { - try testTokenize("sin", &.{ .Variable, .Variable, .Variable }); -} - -test "Complex expression with variables and functions" { - try testTokenize("a_{1}+\\sin*3", &.{ .Variable, .Op, .FunctionName, .Op, .Integer }); -} - -test "Multiple character variables" { - try testTokenize("abc_{123}", &.{ .Variable, .Variable, .Variable }); -} - -test "Complex mathematical expression" { - try testTokenize("a_{1}+\\sin*3.25-2.2.3.3", &.{ .Variable, .Op, .FunctionName, .Op, .Real, .Op, .Real, .Real, .Real }); -} - -test "Variable with text subscript" { - try testTokenize("a_{abc}", &.{.Variable}); -} - -test "LaTeX fraction command" { - try testTokenize("\\frac{a_{1}}{2}", &.{ .Fraction, .LBrace, .Variable, .RBrace, .LBrace, .Integer, .RBrace }); -} - -test "LaTeX left-right delimiters" { - try testTokenize("\\left\\{1,2,3\\right\\}", &.{ .Left, .LBrace, .Integer, .Comma, .Integer, .Comma, .Integer, .Right, .RBrace }); -} - -test "Parser Polish notation" { - try testParser("a+b", "(+ a b)"); - try testParser("a*b+c", "(+ (* a b) c)"); - try testParser("a+b*c", "(+ a (* b c))"); - try testParser("2^3", "(^ 2 3)"); - try testParser("-a", "(-u a)"); -} - -test "Parser Advanced Polish notation" { - try testParser("-x^2-y^2", "(- (-u (^ x 2)) (^ y 2))"); - try testParser("-a-b-c-d", "(- (- (- (-u a) b) c) d)"); - try testParser("abc", "(*i (*i a b) c)"); - try testParser("\\sin abc", "(call func (args (*i a b)))"); -} - -test "Parser Complex expressions" { - // Test individual parts first - try testParser("xyz", "(*i (*i x y) z)"); - try testParser("xyz^2", "(*i (*i x y) (^ z 2))"); - try testParser("-xyz^2", "(*i (*i (-u x) y) (^ z 2))"); // Unary minus binds to first variable - - // Test abc^2 part - try testParser("abc^2", "(*i (*i a b) (^ c 2))"); - - // Full complex expression: -xyz^{2}-abc^{2} - try testParser("-xyz^2-abc^2", "(- (*i (*i (-u x) y) (^ z 2)) (*i (*i a b) (^ c 2)))"); -} - -test "Parser Edge cases" { - // Operator precedence tests - try testParser("a+b*c^d", "(+ a (* b (^ c d)))"); - try testParser("a^b+c*d", "(+ (^ a b) (* c d))"); - try testParser("a*b^c+d", "(+ (* a (^ b c)) d)"); - - // Multiple unary operators - try testParser("--a", "(-u (-u a))"); - try testParser("-a+b", "(+ (-u a) b)"); - try testParser("a+-b", "(+ a (-u b))"); - - // Mixed implicit and explicit multiplication - try testParser("2x", "(*i 2 x)"); // Number followed by variable works - try testParser("2*x", "(* 2 x)"); // Explicit multiplication -} - -fn testTokenize(source: [:0]const u8, expected_token_tags: []const TokenType) !void { - var tokenizer = Tokenizer.init(source); - for (expected_token_tags) |expected_token_tag| { - const token = tokenizer.next().?; // Unwrap the optional - tokenizer.dump(&token); - try std.testing.expectEqual(expected_token_tag, token.tag); - } - - const last_token = tokenizer.next().?; // Unwrap the optional - try std.testing.expectEqual(TokenType.Eof, last_token.tag); - try std.testing.expectEqual(source.len, last_token.pos.from); - try std.testing.expectEqual(source.len, last_token.pos.to); - - // Print success - print("Success: {s}\n", .{source}); -} - -fn printAST(expr: *const Expression, _: u32) void { - printASTHelper(expr, "", true); -} - -fn testParser(source: [:0]const u8, expected_polish: []const u8) !void { - var token_stream: TokenStream = TokenStream.init(Allocator); - defer token_stream.deinit(); - - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - - var tokenizer = Tokenizer.init(source); - - while (tokenizer.next()) |token| { - if (token.tag == .Eof) { - break; - } - try token_stream.append(token); - tokenizer.dump(&token); - } - - var parser = Parser.init(token_stream, source, allocator); - const ast = try parser.parse(0); - - // Convert Polish notation to string - var polish_buffer = std.ArrayList(u8).init(allocator); - defer polish_buffer.deinit(); - - try polishToString(&ast, source, &polish_buffer); - - // Trim trailing whitespace - const actual_polish = std.mem.trim(u8, polish_buffer.items, " "); - - print("Expected: '{s}'\n", .{expected_polish}); - print("Actual: '{s}'\n", .{actual_polish}); - - try std.testing.expectEqualStrings(expected_polish, actual_polish); - - // Print success - print("Success: {s}\n", .{source}); -} - -pub fn polishToString(expr: *const Expression, source: []const u8, buffer: *std.ArrayList(u8)) !void { - const writer = buffer.writer(); - switch (expr.type) { - .Variable => { - try writer.print(" {s}", .{source[expr.pos.from..expr.pos.to]}); - }, - .FunctionName => { - try writer.print(" func", .{}); - }, - .Number => { - if (expr.value) |val| { - switch (val) { - .i => |i| try writer.print(" {d}", .{i}), - .f => |f| try writer.print(" {d}", .{f}), - else => try writer.print(" ?", .{}), - } - } else { - try writer.print("? ", .{}); - } - }, - .Add => { - try writer.print(" (+", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Sub => { - try writer.print(" (-", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Mul => { - try writer.print(" (*", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .iMul => { - try writer.print(" (*i", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Div => { - try writer.print(" (/", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Pow => { - try writer.print(" (^", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Dot => { - try writer.print(" (.", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Assignment => { - try writer.print(" (=", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .With => { - try writer.print(" (with ", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .UnaryMinus => { - try writer.print(" (-u", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - } - try writer.print(")", .{}); - }, - .UnaryPlus => { - try writer.print(" (+", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - } - try writer.print(")", .{}); - }, - .Object => { - try writer.print(" (obj", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try polishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Arguments => { - try writer.print(" (args", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try polishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Paren => { - try writer.print(" (paren", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try polishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Comma => { - try writer.print(", ", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - }, - .FunctionCall => { - try writer.print(" (call", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Juxt => { - try writer.print(" (juxt", .{}); - if (expr.children) |children| { - try polishToString(&children[0], source, buffer); - try polishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Invalid => { - try writer.print("INVALID ", .{}); - }, - } -} - -fn printASTHelper(expr: *const Expression, prefix: []const u8, is_last: bool) void { - // Print current node with appropriate connector - const connector = if (is_last) "+-- " else "|-- "; - - switch (expr.type) { - .Variable => { - print("{s}{s}Var\n", .{ prefix, connector }); - }, - .FunctionName => { - print("{s}{s}FuncName\n", .{ prefix, connector }); - }, - .Number => { - if (expr.value) |val| { - switch (val) { - .i => |i| print("{s}{s}Number: {d}\n", .{ prefix, connector, i }), - .f => |f| print("{s}{s}Number: {d}\n", .{ prefix, connector, f }), - else => unreachable, - } - } else { - print("{s}{s}Number: \n", .{ prefix, connector }); - } - }, - .Add, .Sub, .Mul, .Div, .Dot, .Pow, .Juxt, .Comma, .With, .Assignment => { - print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); - if (expr.children) |children| { - // Create new prefix: extend current with either spaces or vertical bar - var new_prefix: [256]u8 = undefined; - const extension = if (is_last) " " else "| "; - const new_len = @min(prefix.len + 4, 252); // Leave room for extension - @memcpy(new_prefix[0..prefix.len], prefix); - @memcpy(new_prefix[prefix.len..new_len], extension); - - printASTHelper(&children[0], new_prefix[0..new_len], false); - printASTHelper(&children[1], new_prefix[0..new_len], true); - } - }, - .Object, .Arguments, .Paren => { - print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); - if (expr.children) |children| { - var new_prefix: [256]u8 = undefined; - const extension = if (is_last) " " else "| "; - const new_len = @min(prefix.len + 4, 252); - @memcpy(new_prefix[0..prefix.len], prefix); - @memcpy(new_prefix[prefix.len..new_len], extension); - - const length = expr.value.?.length; - for (0..length) |i| { - const is_last_child = (i == length - 1); - printASTHelper(&children[i], new_prefix[0..new_len], is_last_child); - } - } - }, - .iMul => { - print("{s}{s}Mul\n", .{ prefix, connector }); - if (expr.children) |children| { - var new_prefix: [256]u8 = undefined; - const extension = if (is_last) " " else "| "; - const new_len = @min(prefix.len + 4, 252); - @memcpy(new_prefix[0..prefix.len], prefix); - @memcpy(new_prefix[prefix.len..new_len], extension); - - printASTHelper(&children[0], new_prefix[0..new_len], false); - printASTHelper(&children[1], new_prefix[0..new_len], true); - } - }, - .FunctionCall => { - print("{s}{s}Call\n", .{ prefix, connector }); - if (expr.children) |children| { - var new_prefix: [256]u8 = undefined; - const extension = if (is_last) " " else "| "; - const new_len = @min(prefix.len + 4, 252); - @memcpy(new_prefix[0..prefix.len], prefix); - @memcpy(new_prefix[prefix.len..new_len], extension); - - printASTHelper(&children[0], new_prefix[0..new_len], false); - printASTHelper(&children[1], new_prefix[0..new_len], true); - } - }, - .UnaryMinus, .UnaryPlus => { - print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); - if (expr.children) |children| { - var new_prefix: [256]u8 = undefined; - const extension = if (is_last) " " else "| "; - const new_len = @min(prefix.len + 4, 252); - @memcpy(new_prefix[0..prefix.len], prefix); - @memcpy(new_prefix[prefix.len..new_len], extension); - - printASTHelper(&children[0], new_prefix[0..new_len], true); - } - }, - .Invalid => { - print("{s}{s}Invalid Expression\n", .{ prefix, connector }); - }, - } -} diff --git a/src/arg_parse.zig b/src/arg_parse.zig new file mode 100644 index 0000000..5c53581 --- /dev/null +++ b/src/arg_parse.zig @@ -0,0 +1,71 @@ +const std = @import("std"); + +const fatal = @import("fatal.zig"); + +pub const OutputFormat = enum { + ast, + polish, +}; + +pub const ArgParser = struct { + input_file_path: ?[]const u8, + output_file_path: ?[]const u8, + output_format: OutputFormat, + + pub fn parse(args: []const []const u8) ArgParser { + var input_file_path: ?[]const u8 = null; + var output_file_path: ?[]const u8 = null; + var output_format: OutputFormat = .ast; + + const eql = std.mem.eql; + const startsWith = std.mem.startsWith; + + var idx: usize = 0; + while (idx < args.len) : (idx += 1) { + const arg = args[idx]; + if (eql(u8, arg, "-h") or eql(u8, arg, "--help")) { + fatal.help(); + } else if (eql(u8, arg, "-i") or eql(u8, arg, "--input")) { + idx += 1; + if (idx >= args.len) fatal.msg( + "error: missing argument to '{s}'", + .{arg}, + ); + input_file_path = args[idx]; + } else if (startsWith(u8, arg, "--input=")) { + input_file_path = arg["--input=".len..]; + } else if (eql(u8, arg, "-o") or eql(u8, arg, "--output")) { + idx += 1; + if (idx >= args.len) fatal.msg( + "error: missing argument to '{s}'", + .{arg}, + ); + output_file_path = args[idx]; + } else if (startsWith(u8, arg, "--output=")) { + output_file_path = arg["--output=".len..]; + } else if (eql(u8, arg, "-f") or eql(u8, arg, "--format")) { + idx += 1; + if (idx >= args.len) fatal.msg( + "error: missing argument to '{s}'", + .{arg}, + ); + const format_arg = args[idx]; + output_format = std.meta.stringToEnum(OutputFormat, format_arg) orelse { + fatal.msg("error: unexpected format '{s}'\n", .{format_arg}); + }; + } else if (startsWith(u8, arg, "--format=")) { + output_format = std.meta.stringToEnum(OutputFormat, arg["--format=".len..]) orelse { + fatal.msg("error: unexpected format '{s}'\n", .{arg["--format=".len..]}); + }; + } else { + fatal.msg("error: unexpected cli argument '{s}'\n", .{arg}); + } + } + + return .{ + .input_file_path = input_file_path, + .output_file_path = output_file_path, + .output_format = output_format, + }; + } +}; diff --git a/src/build/camera.zig b/src/build/camera.zig new file mode 100644 index 0000000..1325235 --- /dev/null +++ b/src/build/camera.zig @@ -0,0 +1,24 @@ +//! Runs a program that might or might not fail and appends to stdout what +//! the actual exit code was, always returning a successful exit code under +//! normal conditions (regardless of the child's exit code). +//! +//! This is useful for snapshot tests where some of which are meant to be +//! successes, while others are meant to be failures. +const std = @import("std"); + +pub fn main() !void { + const gpa = std.heap.smp_allocator; + const args = try std.process.argsAlloc(gpa); + + var cmd = std.process.Child.init(args[1..], gpa); + const term = try cmd.spawnAndWait(); + + switch (term) { + .Exited => |code| { + const fmt = "\n\n ----- EXIT CODE: {} -----\n"; + std.debug.print(fmt, .{code}); + // try std.io.getStdOut().writer().print(fmt, .{code}); + }, + else => std.debug.panic("child process crashed: {}\n", .{term}), + } +} diff --git a/src/fatal.zig b/src/fatal.zig new file mode 100644 index 0000000..29e2629 --- /dev/null +++ b/src/fatal.zig @@ -0,0 +1,38 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +pub fn msg(comptime fmt: []const u8, args: anytype) noreturn { + std.debug.print(fmt, args); + if (builtin.mode == .Debug) std.debug.panic("\n\n(zemml debug stack trace)\n", .{}); + std.process.exit(1); +} + +pub fn oom() noreturn { + msg("oom\n", .{}); +} + +pub fn dir(path: []const u8, err: anyerror) noreturn { + msg("error accessing dir '{s}': {s}\n", .{ + path, @errorName(err), + }); +} + +pub fn file(path: []const u8, err: anyerror) noreturn { + msg("error accessing file '{s}': {s}\n", .{ + path, @errorName(err), + }); +} + +pub fn help() noreturn { + std.debug.print( + \\Usage: zig-tex [OPTIONS] + \\ + \\ Options: + \\ --input, -i Input file path (default: stdin) + \\ --output, -o Output file path (default: stdout) + \\ --format, -f Output format [ast, polish] (default: ast) + \\ --help, -h Print this help message + \\ + , .{}); + std.process.exit(0); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..7cc11a5 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,79 @@ +const std = @import("std"); + +const tex = @import("tex"); + +const arg_parse = @import("arg_parse.zig"); + +pub fn main() !void { + const gpa = std.heap.smp_allocator; + + var arena_impl: std.heap.ArenaAllocator = .init(gpa); + defer arena_impl.deinit(); + const arena = arena_impl.allocator(); + + var ip_buffer: [4096]u8 = undefined; + var op_buffer: [4096]u8 = undefined; + + var token_stream: tex.TokenStream = .empty; + defer token_stream.deinit(gpa); + + const args = try std.process.argsAlloc(gpa); + defer std.process.argsFree(gpa, args); + + const arg_parser = arg_parse.ArgParser.parse(args[1..]); + + std.log.info("Output Format: {s}\n", .{@tagName(arg_parser.output_format)}); + + var expr: [:0]u8 = undefined; + if (arg_parser.input_file_path) |input_file_path| { + const file = try std.fs.cwd().openFile(input_file_path, .{}); + defer file.close(); + var fr = file.reader(&ip_buffer); + var aw: std.Io.Writer.Allocating = .init(gpa); + _ = try fr.interface.streamRemaining(&aw.writer); + expr = try aw.toOwnedSliceSentinel(0); + } else { + var fr = std.fs.File.stdin().reader(&ip_buffer); + var aw: std.Io.Writer.Allocating = .init(gpa); + _ = try fr.interface.streamRemaining(&aw.writer); + expr = try aw.toOwnedSliceSentinel(0); + } + defer gpa.free(expr); + + var tokenizer = tex.Tokenizer.init(expr); + + while (tokenizer.next()) |token| { + if (token.tag == .Eof) { + std.log.debug("--eof--", .{}); + break; + } + try token_stream.append(gpa, token); + tokenizer.dump(&token); + } + + var parser = tex.Parser.init(token_stream, expr, arena); + std.log.info("Token stream length: {d}", .{token_stream.items.len}); + const ast = try parser.parse(0); + std.log.info("input: {}", .{ast}); + + const render: tex.RenderFunctionType = switch (arg_parser.output_format) { + .ast => tex.renderAST, + .polish => tex.renderPolish, + }; + + if (arg_parser.output_file_path) |output_file_path| { + var file = try std.fs.cwd().createFile(output_file_path, .{}); + defer file.close(); + const writer_impl = file.writer(&op_buffer); + var writer = writer_impl.interface; + + try render(&writer, &ast, expr); + try writer.flush(); + } else { + const stdout_writer = std.fs.File.stdout().writer(&op_buffer); + var stdout = stdout_writer.interface; + + try render(&stdout, &ast, expr); + try stdout.flush(); + } +} diff --git a/src/parse.zig b/src/parse.zig new file mode 100644 index 0000000..8f0339c --- /dev/null +++ b/src/parse.zig @@ -0,0 +1,649 @@ +const std = @import("std"); +const expect = std.testing.expect; +const ArrayList = std.ArrayList; + +const tok = @import("tokenize.zig"); + +const input = enum { Op, Atom, Invalid }; + +const Tag = enum { Number, Variable, BinaryOperation }; + +const ExprType = enum { + Add, + Sub, + Mul, + iMul, + Pow, + Div, + Dot, + Juxt, + Comma, + With, + Assignment, + Paren, + Arguments, + FunctionCall, + FunctionName, + Object, + UnaryMinus, + UnaryPlus, + Number, + Variable, + Invalid, +}; + +pub const infix_operators = std.StaticStringMap(ExprType).initComptime(.{ + .{ "+", .Add }, + .{ "-", .Sub }, + .{ "*", .Mul }, + .{ "/", .Div }, + .{ "^", .Pow }, + .{ ".", .Dot }, + .{ "=", .Assignment }, + .{ "with", .With }, +}); + +pub const prefix_operators = std.StaticStringMap(ExprType).initComptime(.{ + .{ "+", .UnaryPlus }, + .{ "-", .UnaryMinus }, +}); + +pub fn get_infix_operator(text: []const u8, tag: tok.TokenType) ?ExprType { + switch (tag) { + .Variable => { + return .iMul; + }, + .LParen => { + return .Juxt; + }, + else => {}, + } + + return infix_operators.get(text); +} + +pub fn get_prefix_operator(text: []const u8) ?ExprType { + return prefix_operators.get(text); +} + +pub const Expression = struct { + type: ExprType, + value: ?union(enum) { i: i64, f: f64, length: usize }, + pos: tok.Loc, + children: ?[*]Expression, // Might be null for literals +}; + +fn infix_binding_power(op: ?ExprType) error{InvalidOperator}!struct { i8, i8 } { + if (op == null) return error.InvalidOperator; + switch (op.?) { + .Comma => return .{ -1, -1 }, + .Add, .Sub => return .{ 3, 4 }, + .Mul, .Div => return .{ 5, 6 }, + .iMul => return .{ 5, 6 }, + .Juxt => return .{ 5, 6 }, + .Dot => return .{ 8, 7 }, + .Pow => return .{ 9, 8 }, + .With => return .{ 10, 9 }, + .Assignment => return .{ 2, 1 }, + else => return error.InvalidOperator, + } +} + +fn prefix_binding_power(op: ?ExprType) error{InvalidOperator}!i8 { + if (op == null) return error.InvalidOperator; + switch (op.?) { + .UnaryMinus, .UnaryPlus => return 6, + else => return error.InvalidOperator, + } +} + +pub const Parser = struct { + token_stream: tok.TokenStream, + input: [:0]const u8, + head: usize = 0, + current: tok._Token, + allocator: std.mem.Allocator, + + pub fn init(token_stream: tok.TokenStream, expr: [:0]const u8, allocator: std.mem.Allocator) Parser { + return .{ .token_stream = token_stream, .input = expr, .head = 0, .current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }, .allocator = allocator }; + } + + pub fn consume(self: *Parser) void { + if (self.head >= self.token_stream.items.len) { + self.current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; + return; + } + self.current = self.token_stream.items[self.head]; + std.log.debug("Current token: {s} text: {s}", .{ @tagName(self.current.tag), self.input[self.current.pos.from..self.current.pos.to] }); + self.head += 1; + } + + pub fn peek(self: *Parser) tok._Token { + if (self.head >= self.token_stream.items.len) { + return .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; + } + std.log.debug("Peeking token: {s} text: {s}", .{ @tagName(self.token_stream.items[self.head].tag), self.input[self.token_stream.items[self.head].pos.from..self.token_stream.items[self.head].pos.to] }); + return self.token_stream.items[self.head]; + } + + pub fn expect(self: *Parser, tag: tok.TokenType, err: ParserError) ParserError!void { + // This function consumes the current token and checks if it matches the expected tag. + // Throws ParserError if the tag does not match. + self.consume(); + if (self.current.tag != tag) { + return err; + } + } + + pub const ParserError = error{ + UnexpectedToken, + ExpectedSomething, + UnmatchedParentheses, + EmptyParentheses, + InvalidOperator, + OutOfMemory, + Overflow, + InvalidCharacter, + }; + + pub fn parse_prefix(self: *Parser) ParserError!Expression { + const op = self.current; + if (op.tag != .Op) return ParserError.UnexpectedToken; + + const op_text: []const u8 = self.input[op.pos.from..op.pos.to]; + const op_type: ?ExprType = get_prefix_operator(op_text); + if (op_type == null) return ParserError.InvalidOperator; + + const r_bp = try prefix_binding_power(op_type); + const expr = try self.parse(r_bp); + + const children = try self.allocator.alloc(Expression, 1); // Allocate memory for the children array + children[0] = expr; + + return Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; + } + + pub fn parse_paren(self: *Parser) ParserError!Expression { + + // writer.print("Entering paren", .{}); + // Paren can be for grouping (has .Comma) or simply to wrap an Expression + // Lookahead for commas + var level: i32 = 0; + var commas: u32 = 0; + for (self.token_stream.items[self.head..]) |token| { + switch (token.tag) { + .LParen => { + level -= 1; + }, + .RParen => { + if (level == 0) break; + level += 1; + }, + .Comma => { + if (level == 0) commas += 1; + }, + else => {}, + } + } + + if (level != 0) return ParserError.UnmatchedParentheses; + if (commas == 0) { + var children = try self.allocator.alloc(Expression, 1); + children[0] = try self.parse(0); + return Expression{ .type = .Paren, .value = .{ .length = 1 }, .pos = self.current.pos, .children = children.ptr }; + } // If there are no commas, it's a parenthesized Expression + + const len = commas + 1; + var children = try self.allocator.alloc(Expression, len); + for (0..len) |i| { + children[i] = try self.parse(0); + if (i < len - 1) try self.expect(.Comma, ParserError.UnmatchedParentheses); // Comma is expected between Expressions + } + + return Expression{ .type = .Object, .value = .{ .length = len }, .pos = self.current.pos, .children = children.ptr }; + } + + pub fn parse_func(self: *Parser) ParserError!Expression { + + // Final type can be FunctionCall for f(x,y) or Juxt for \\sin x + var final_type: ExprType = undefined; + + switch (self.peek().tag) { + .LParen => { + final_type = .FunctionCall; + }, // \\sin(x) + .Eof => { + return ParserError.ExpectedSomething; + }, // \\sin + else => { + final_type = .Juxt; + }, // \\sin abc + } + + const name = Expression{ .type = .FunctionName, .value = null, .pos = self.current.pos, .children = null }; + var args = try self.parse(0); // Will return an .Object + args.type = .Arguments; + args.value = .{ .length = 1 }; + + var children = try self.allocator.alloc(Expression, 2); + children[0] = name; + children[1] = args; + + const func = Expression{ .type = .FunctionCall, .value = null, .pos = self.current.pos, .children = children.ptr }; + + std.log.debug(">>>>{}", .{args}); + + return func; + } + + pub fn parse(self: *Parser, min_bp: i8) ParserError!Expression { + self.consume(); // Consume the first token (likely an atom, but can be an operator too) + // self.current now has that token + + var lhs: Expression = + switch (self.current.tag) { + .Integer => Expression{ .type = .Number, .value = .{ .i = try std.fmt.parseInt(i64, self.input[self.current.pos.from..self.current.pos.to], 10) }, .pos = self.current.pos, .children = null }, + .Real => Expression{ .type = .Number, .value = .{ .f = try std.fmt.parseFloat(f64, self.input[self.current.pos.from..self.current.pos.to]) }, .pos = self.current.pos, .children = null }, + .Variable => Expression{ .type = .Variable, .value = null, .pos = self.current.pos, .children = null }, + .Op => try self.parse_prefix(), + .FunctionName => try self.parse_func(), + .LParen => paren: { + const expr: Expression = try self.parse_paren(); // Parse the Expression inside parentheses + try self.expect(.RParen, ParserError.UnmatchedParentheses); // Consume the ')' token + break :paren expr; + }, + + else => return ParserError.UnexpectedToken, + }; + + // writer.print("Parsed lhs: {s} text: {s}", .{ @tagName(lhs.type), self.input[lhs.pos.from..lhs.pos.to] }); + + while (true) { + const op = self.peek(); + var skip_op: bool = false; + switch (op.tag) { + .Eof => break, + .Op, .With => {}, // Allow these + .Variable => { + skip_op = true; + }, // Implicit multiplication + .LParen => { + skip_op = true; + }, // Juxtapose Expression + .Comma => break, // Comma returns the current Expression + .RParen => break, // Stop parsing on closing parenthesis + else => return ParserError.UnexpectedToken, + } + + // Convert TokenType to inputType + const op_text: []const u8 = self.input[op.pos.from..op.pos.to]; + const op_type: ?ExprType = get_infix_operator(op_text, op.tag); + const l_bp, const r_bp = try infix_binding_power(op_type); + if (l_bp < min_bp) break; + + if (!skip_op) self.consume(); // Consume the operator token + + const rhs: Expression = try self.parse(r_bp); + + // Allocate memory for the children array + const children = try self.allocator.alloc(Expression, 2); + children[0] = lhs; + children[1] = rhs; + + lhs = Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; + } + return lhs; + } +}; + +test "Parser Polish notation" { + const alloc = std.heap.smp_allocator; + try testParser(alloc, "a+b", "(+ a b)"); + try testParser(alloc, "a*b+c", "(+ (* a b) c)"); + try testParser(alloc, "a+b*c", "(+ a (* b c))"); + try testParser(alloc, "2^3", "(^ 2 3)"); + try testParser(alloc, "-a", "(-u a)"); +} + +test "Parser Advanced Polish notation" { + const alloc = std.heap.smp_allocator; + try testParser(alloc, "-x^2-y^2", "(- (-u (^ x 2)) (^ y 2))"); + try testParser(alloc, "-a-b-c-d", "(- (- (- (-u a) b) c) d)"); + try testParser(alloc, "abc", "(*i (*i a b) c)"); + try testParser(alloc, "\\sin abc", "(call func (args (*i a b)))"); +} + +test "Parser Complex Expressions" { + const alloc = std.heap.smp_allocator; + // Test individual parts first + try testParser(alloc, "xyz", "(*i (*i x y) z)"); + try testParser(alloc, "xyz^2", "(*i (*i x y) (^ z 2))"); + try testParser(alloc, "-xyz^2", "(*i (*i (-u x) y) (^ z 2))"); // Unary minus binds to first variable + + // Test abc^2 part + try testParser(alloc, "abc^2", "(*i (*i a b) (^ c 2))"); + + // Full complex Expression: -xyz^{2}-abc^{2} + try testParser(alloc, "-xyz^2-abc^2", "(- (*i (*i (-u x) y) (^ z 2)) (*i (*i a b) (^ c 2)))"); +} + +test "Parser Edge cases" { + const alloc = std.heap.smp_allocator; + // Operator precedence tests + try testParser(alloc, "a+b*c^d", "(+ a (* b (^ c d)))"); + try testParser(alloc, "a^b+c*d", "(+ (^ a b) (* c d))"); + try testParser(alloc, "a*b^c+d", "(+ (* a (^ b c)) d)"); + + // Multiple unary operators + try testParser(alloc, "--a", "(-u (-u a))"); + try testParser(alloc, "-a+b", "(+ (-u a) b)"); + try testParser(alloc, "a+-b", "(+ a (-u b))"); + + // Mixed implicit and explicit multiplication + try testParser(alloc, "2x", "(*i 2 x)"); // Number followed by variable works + try testParser(alloc, "2*x", "(* 2 x)"); // Explicit multiplication +} + +pub fn renderAST(writer: *std.Io.Writer, expr: *const Expression, source: []const u8) std.Io.Writer.Error!void { + _ = source; + try renderASTHelper(writer, expr, "", true); +} + +fn testParser(gpa: std.mem.Allocator, source: [:0]const u8, expected_polish: []const u8) !void { + var token_stream: tok.TokenStream = .empty; + defer token_stream.deinit(gpa); + + var arena_impl = std.heap.ArenaAllocator.init(gpa); + defer arena_impl.deinit(); + const arena = arena_impl.allocator(); + + var tokenizer = tok.Tokenizer.init(source); + + while (tokenizer.next()) |token| { + if (token.tag == .Eof) { + break; + } + try token_stream.append(arena, token); + tokenizer.dump(&token); + } + + var parser = Parser.init(token_stream, source, arena); + const ast = try parser.parse(0); + + // Convert Polish notation to string + var polish_writer: std.Io.Writer.Allocating = .init(arena); + defer polish_writer.deinit(); + + try renderPolish(&polish_writer.writer, &ast, source); + + // Trim trailing whitespace + const actual_polish = std.mem.trim(u8, polish_writer.written(), " "); + + std.log.debug("Expected: '{s}'", .{expected_polish}); + std.log.debug("Actual: '{s}'", .{actual_polish}); + + try std.testing.expectEqualStrings(expected_polish, actual_polish); + + // Print success + std.log.debug("Success: {s}", .{source}); +} + +pub fn renderPolish(writer: *std.Io.Writer, expr: *const Expression, source: []const u8) std.Io.Writer.Error!void { + switch (expr.type) { + .Variable => { + try writer.print(" {s}", .{source[expr.pos.from..expr.pos.to]}); + }, + .FunctionName => { + try writer.print(" func", .{}); + }, + .Number => { + if (expr.value) |val| { + switch (val) { + .i => |i| try writer.print(" {d}", .{i}), + .f => |f| try writer.print(" {d}", .{f}), + else => try writer.print(" ?", .{}), + } + } else { + try writer.print("? ", .{}); + } + }, + .Add => { + try writer.print(" (+", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Sub => { + try writer.print(" (-", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Mul => { + try writer.print(" (*", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .iMul => { + try writer.print(" (*i", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Div => { + try writer.print(" (/", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Pow => { + try writer.print(" (^", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Dot => { + try writer.print(" (.", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Assignment => { + try writer.print(" (=", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .With => { + try writer.print(" (with ", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .UnaryMinus => { + try writer.print(" (-u", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + } + try writer.print(")", .{}); + }, + .UnaryPlus => { + try writer.print(" (+", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + } + try writer.print(")", .{}); + }, + .Object => { + try writer.print(" (obj", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try renderPolish(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Arguments => { + try writer.print(" (args", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try renderPolish(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Paren => { + try writer.print(" (paren", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try renderPolish(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Comma => { + try writer.print(", ", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + }, + .FunctionCall => { + try writer.print(" (call", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Juxt => { + try writer.print(" (juxt", .{}); + if (expr.children) |children| { + try renderPolish(writer, &children[0], source); + try renderPolish(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Invalid => { + try writer.print("INVALID ", .{}); + }, + } +} + +fn renderASTHelper(writer: *std.Io.Writer, expr: *const Expression, prefix: []const u8, is_last: bool) std.Io.Writer.Error!void { + // Print current node with appropriate connector + const connector = if (is_last) "+-- " else "|-- "; + + switch (expr.type) { + .Variable => { + try writer.print("{s}{s}Var\n", .{ prefix, connector }); + }, + .FunctionName => { + try writer.print("{s}{s}FuncName\n", .{ prefix, connector }); + }, + .Number => { + if (expr.value) |val| { + switch (val) { + .i => |i| try writer.print("{s}{s}Number: {d}\n", .{ prefix, connector, i }), + .f => |f| try writer.print("{s}{s}Number: {d}\n", .{ prefix, connector, f }), + else => unreachable, + } + } else { + try writer.print("{s}{s}Number: \n", .{ prefix, connector }); + } + }, + .Add, .Sub, .Mul, .Div, .Dot, .Pow, .Juxt, .Comma, .With, .Assignment => { + try writer.print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); + if (expr.children) |children| { + // Create new prefix: extend current with either spaces or vertical bar + var new_prefix: [256]u8 = undefined; + const extension = if (is_last) " " else "| "; + const new_len = @min(prefix.len + 4, 252); // Leave room for extension + @memcpy(new_prefix[0..prefix.len], prefix); + @memcpy(new_prefix[prefix.len..new_len], extension); + + try renderASTHelper(writer, &children[0], new_prefix[0..new_len], false); + try renderASTHelper(writer, &children[1], new_prefix[0..new_len], true); + } + }, + .Object, .Arguments, .Paren => { + try writer.print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); + if (expr.children) |children| { + var new_prefix: [256]u8 = undefined; + const extension = if (is_last) " " else "| "; + const new_len = @min(prefix.len + 4, 252); + @memcpy(new_prefix[0..prefix.len], prefix); + @memcpy(new_prefix[prefix.len..new_len], extension); + + const length = expr.value.?.length; + for (0..length) |i| { + const is_last_child = (i == length - 1); + try renderASTHelper(writer, &children[i], new_prefix[0..new_len], is_last_child); + } + } + }, + .iMul => { + try writer.print("{s}{s}Mul\n", .{ prefix, connector }); + if (expr.children) |children| { + var new_prefix: [256]u8 = undefined; + const extension = if (is_last) " " else "| "; + const new_len = @min(prefix.len + 4, 252); + @memcpy(new_prefix[0..prefix.len], prefix); + @memcpy(new_prefix[prefix.len..new_len], extension); + + try renderASTHelper(writer, &children[0], new_prefix[0..new_len], false); + try renderASTHelper(writer, &children[1], new_prefix[0..new_len], true); + } + }, + .FunctionCall => { + try writer.print("{s}{s}Call\n", .{ prefix, connector }); + if (expr.children) |children| { + var new_prefix: [256]u8 = undefined; + const extension = if (is_last) " " else "| "; + const new_len = @min(prefix.len + 4, 252); + @memcpy(new_prefix[0..prefix.len], prefix); + @memcpy(new_prefix[prefix.len..new_len], extension); + + try renderASTHelper(writer, &children[0], new_prefix[0..new_len], false); + try renderASTHelper(writer, &children[1], new_prefix[0..new_len], true); + } + }, + .UnaryMinus, .UnaryPlus => { + try writer.print("{s}{s}{s}\n", .{ prefix, connector, @tagName(expr.type) }); + if (expr.children) |children| { + var new_prefix: [256]u8 = undefined; + const extension = if (is_last) " " else "| "; + const new_len = @min(prefix.len + 4, 252); + @memcpy(new_prefix[0..prefix.len], prefix); + @memcpy(new_prefix[prefix.len..new_len], extension); + + try renderASTHelper(writer, &children[0], new_prefix[0..new_len], true); + } + }, + .Invalid => { + try writer.print("{s}{s}Invalid Expression\n", .{ prefix, connector }); + }, + } +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..210ff19 --- /dev/null +++ b/src/root.zig @@ -0,0 +1,12 @@ +const Io = @import("std").Io; + +pub const parse = @import("parse.zig"); +pub const Expression = parse.Expression; +pub const Parser = parse.Parser; +pub const renderAST = parse.renderAST; +pub const renderPolish = parse.renderPolish; +pub const tokenize = @import("tokenize.zig"); +pub const Tokenizer = tokenize.Tokenizer; +pub const TokenStream = tokenize.TokenStream; + +pub const RenderFunctionType = *const fn (*Io.Writer, *const Expression, []const u8) Io.Writer.Error!void; diff --git a/src/tokenize.zig b/src/tokenize.zig new file mode 100644 index 0000000..f37c79c --- /dev/null +++ b/src/tokenize.zig @@ -0,0 +1,393 @@ +const std = @import("std"); +const expect = std.testing.expect; +const ArrayList = std.ArrayList; + +// Lexer Tokens +pub const TokenType = enum { + Variable, + Constant, + Number, + Integer, + Real, + ImaginaryUnit, + Plus, + Minus, + Caret, + Asterisk, + Identifier, + BinaryOp, + Op, + latex_command, + Slash, + LParen, + RParen, + LBrak, + RBrak, + LBrace, + RBrace, + With, + For, + OperatorName, + Color, + Left, + Right, + VBar, + LVBar, + RVBar, + Comma, + Invalid, + FunctionName, + Fraction, + Eof, +}; +// \frac{}{} +pub const keywords = std.StaticStringMap(TokenType).initComptime(.{ + .{ "left(", .LParen }, + .{ "right)", .RParen }, + .{ "left[", .LBrak }, + .{ "right]", .RBrak }, + .{ "left{", .LBrace }, + .{ "right}", .RBrace }, + .{ "left|", .LVBar }, + .{ "right|", .RVBar }, + .{ "left", .Left }, // latex: \left + .{ "right", .Right }, // latex: \right + .{ "{", .LBrace }, // latex: \{ + .{ "}", .RBrace }, // latex : \} + .{ "frac", .Fraction }, + .{ "operatorname", .OperatorName }, // LaTeX: \operatorname{with} + .{ "with", .With }, // LaTeX: \operatorname{with} + .{ "for", .For }, // LaTeX: \operatorname{for} + .{ "rgb", .Color }, // LaTeX: \operatorname{rgb} + + .{ "sin", .FunctionName }, + .{ "cos", .FunctionName }, + .{ "tan", .FunctionName }, + + .{ "theta", .Variable }, + .{ "alpha", .Variable }, + .{ "gamma", .Variable }, + .{ "pi", .Constant }, +}); + +const Token = struct { type: TokenType, text: ?u8 = null, value: ?i64 = null }; + +pub fn getKeyword(text: []const u8) ?TokenType { + return keywords.get(text); +} + +pub const Loc = struct { from: usize, to: usize }; +pub const _Token = struct { tag: TokenType, pos: Loc }; + +pub const TokenStream = ArrayList(_Token); + +pub const Tokenizer = struct { + buffer: [:0]const u8, + index: usize, + + const State = enum { + start, + identifier, + variable, + variable_subscript, + latex_command, + operator_name, + builtin, + plus, + minus, + int, + period, + number, // integer + decimal_number, // float + invalid, + unknown, + }; + + pub fn dump(self: *Tokenizer, token: *const _Token) void { + std.log.debug("{s} \"{s}\"", .{ @tagName(token.tag), self.buffer[token.pos.from..(token.pos.to)] }); + } + + pub fn init(buf: [:0]const u8) Tokenizer { + // std.log.debug("Initial string, {s}, len: {d}", .{writer, writer.len}); + return .{ + .buffer = buf, + .index = 0, + }; + } + + pub fn next(self: *Tokenizer) ?_Token { + var result: _Token = .{ .tag = undefined, .pos = .{ + .from = self.index, + .to = undefined, + } }; + + if (self.index >= self.buffer.len) { + // std.log.debug("Reached end of Expression", .{}); + return .{ + .tag = .Eof, + .pos = .{ + .from = self.index, + .to = self.index, + }, + }; + } + + state: switch (State.start) { + .start => switch (self.buffer[self.index]) { + '0'...'9' => { + self.index += 1; + continue :state .number; + }, + '.' => { + self.index += 1; + continue :state .period; + }, + '\\' => { + self.index += 1; + result.pos.from = self.index; // ignore the backslash + continue :state .latex_command; + }, + 'a'...'z', 'A'...'Z' => { + result.tag = .Variable; + continue :state .variable; + }, + '+', '-', '*', '/', '^', '=' => { + result.tag = .Op; + self.index += 1; + }, + ',' => { + result.tag = .Comma; + self.index += 1; + }, + '(' => { + result.tag = .LParen; + self.index += 1; + }, + ')' => { + result.tag = .RParen; + self.index += 1; + }, + '[' => { + result.tag = .LBrak; + self.index += 1; + }, + ']' => { + result.tag = .RBrak; + self.index += 1; + }, + '{' => { + result.tag = .LBrace; + self.index += 1; + }, + '}' => { + result.tag = .RBrace; + self.index += 1; + }, + '|' => { + result.tag = .VBar; + self.index += 1; + }, + ' ' => { + self.index += 1; // skip whitespace + result.pos.from = self.index; + continue :state .start; + }, + else => { + result.tag = .Invalid; + self.index += 1; + }, + }, + .number => { + switch (self.buffer[self.index]) { + '0'...'9' => { // this will consume numbers in "123.23" before the decimal + self.index += 1; + continue :state .number; + }, + '.' => continue :state .decimal_number, + else => { + result.tag = .Integer; + }, + } + }, + .decimal_number => { + self.index += 1; + switch (self.buffer[self.index]) { + '0'...'9' => { // this will consume numbers after the decimal point + // self.index += 1; + continue :state .decimal_number; + }, + else => { + result.tag = .Real; + }, + } + }, + .period => { + switch (self.buffer[self.index]) { + '0'...'9' => continue :state .decimal_number, + else => { + result.tag = .Op; + }, + } + }, + .variable => { // If we're here, then we're past the initial letter in a_{123} + self.index += 1; + switch (self.buffer[self.index]) { + '_' => continue :state .variable_subscript, + else => { + result.tag = .Variable; + }, + } + }, + .variable_subscript => { + self.index += 1; + switch (self.buffer[self.index]) { + 'a'...'z', 'A'...'Z', '0'...'9', '{' => continue :state .variable_subscript, + '}' => { + self.index += 1; + result.tag = .Variable; + }, + else => { + result.tag = .Variable; + }, + } + }, + .operator_name => { + switch (self.buffer[self.index]) { + '{' => { + self.index += 1; + result.pos.from = self.index; // ignore the opening brace + continue :state .operator_name; + }, + 'a'...'z', 'A'...'Z' => { + self.index += 1; + continue :state .operator_name; + }, + else => { + const text = self.buffer[result.pos.from..self.index]; + std.log.debug("Keyword: {s}", .{text}); + if (getKeyword(text)) |tag| { + result.tag = tag; + result.pos.to = self.index; + self.index += 1; + return result; + } + result.tag = .Invalid; + result.pos.to = self.index; + return result; + }, + } + }, + .latex_command => { + self.index += 1; + result.tag = .latex_command; + switch (self.buffer[self.index]) { + 'a'...'z', 'A'...'Z' => continue :state .latex_command, + else => { + const text = self.buffer[result.pos.from..self.index]; + if (getKeyword(text)) |tag| { + std.log.debug("Keyword found: {s} -> {s}", .{ text, @tagName(tag) }); + if (tag == .OperatorName) { + result.pos.from = self.index; + continue :state .operator_name; + } + + result.tag = tag; + result.pos.to = self.index; + return result; + } + result.tag = .Invalid; + result.pos.to = self.index; + return result; + }, + } + }, + .invalid => { + result.tag = .Invalid; + }, + else => { + result.tag = .Invalid; + }, + } + + result.pos.to = self.index; + return result; + } +}; + +fn testTokenize(source: [:0]const u8, expected_token_tags: []const TokenType) !void { + var tokenizer = Tokenizer.init(source); + for (expected_token_tags) |expected_token_tag| { + const token = tokenizer.next().?; // Unwrap the optional + tokenizer.dump(&token); + try std.testing.expectEqual(expected_token_tag, token.tag); + } + + const last_token = tokenizer.next().?; // Unwrap the optional + try std.testing.expectEqual(TokenType.Eof, last_token.tag); + try std.testing.expectEqual(source.len, last_token.pos.from); + try std.testing.expectEqual(source.len, last_token.pos.to); + + // Print success + std.log.debug("Success: {s}", .{source}); +} + +test "Testing Tokenizer" { + try testTokenize("3", &.{.Integer}); + try testTokenize(".32342", &.{.Real}); + try testTokenize("a_{1}", &.{.Variable}); + try testTokenize("\\sin", &.{.FunctionName}); + try testTokenize("sin", &.{ .Variable, .Variable, .Variable }); + try testTokenize("a_{1}+\\sin*3", &.{ .Variable, .Op, .FunctionName, .Op, .Integer }); + try testTokenize("\\sin", &.{.FunctionName}); + try testTokenize("\\cos", &.{.FunctionName}); + try testTokenize("abc_{123}", &.{ .Variable, .Variable, .Variable }); + try testTokenize("a_{1}+\\sin*3.25-2.2.3.3", &.{ .Variable, .Op, .FunctionName, .Op, .Real, .Op, .Real, .Real, .Real }); + try testTokenize("a_{abc}", &.{.Variable}); + try testTokenize("\\frac{a_{1}}{2}", &.{ .Fraction, .LBrace, .Variable, .RBrace, .LBrace, .Integer, .RBrace }); + try testTokenize("\\left\\{1,2,3\\right\\}", &.{ .Left, .LBrace, .Integer, .Comma, .Integer, .Comma, .Integer, .Right, .RBrace }); +} + +test "Integer tokenization" { + try testTokenize("3", &.{.Integer}); +} + +test "Real number tokenization" { + try testTokenize(".32342", &.{.Real}); +} + +test "Variable with subscript" { + try testTokenize("a_{1}", &.{.Variable}); +} + +test "LaTeX function name" { + try testTokenize("\\sin", &.{.FunctionName}); + try testTokenize("\\cos", &.{.FunctionName}); +} + +test "Regular variable tokenization" { + try testTokenize("sin", &.{ .Variable, .Variable, .Variable }); +} + +test "Complex Expression with variables and functions" { + try testTokenize("a_{1}+\\sin*3", &.{ .Variable, .Op, .FunctionName, .Op, .Integer }); +} + +test "Multiple character variables" { + try testTokenize("abc_{123}", &.{ .Variable, .Variable, .Variable }); +} + +test "Complex mathematical Expression" { + try testTokenize("a_{1}+\\sin*3.25-2.2.3.3", &.{ .Variable, .Op, .FunctionName, .Op, .Real, .Op, .Real, .Real, .Real }); +} + +test "Variable with text subscript" { + try testTokenize("a_{abc}", &.{.Variable}); +} + +test "LaTeX fraction command" { + try testTokenize("\\frac{a_{1}}{2}", &.{ .Fraction, .LBrace, .Variable, .RBrace, .LBrace, .Integer, .RBrace }); +} + +test "LaTeX left-right delimiters" { + try testTokenize("\\left\\{1,2,3\\right\\}", &.{ .Left, .LBrace, .Integer, .Comma, .Integer, .Comma, .Integer, .Right, .RBrace }); +} diff --git a/src/wasm.zig b/src/wasm.zig new file mode 100644 index 0000000..0d45813 --- /dev/null +++ b/src/wasm.zig @@ -0,0 +1,428 @@ +const std = @import("std"); +const ArrayList = std.ArrayList; + +const tex = @import("root.zig"); + +pub const std_options: std.Options = .{ + .logFn = logFn, +}; + +pub fn logFn( + comptime level: std.log.Level, + comptime scope: @Type(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + _ = level; + _ = scope; + _ = format; + _ = args; + // No-op for WASM +} + +// WASM memory for string exchange +var output_writer: [2048]u8 = undefined; +var output_len: usize = 0; + +// Simple test function to verify WASM loading +export fn testFunction() i32 { + return 42; +} + +// Simple memory allocator for WASM +var memory_pool: [4096]u8 = undefined; +var memory_offset: usize = 0; + +// Export function to allocate memory +export fn malloc(size: usize) usize { + if (memory_offset + size > memory_pool.len) { + return 0; // Return 0 if out of memory + } + const offset = memory_offset; + memory_offset += size; + return @intFromPtr(&memory_pool[offset]); +} + +export fn free(ptr: usize) void { + _ = ptr; +} + +// Export function to get the output writer pointer +export fn getOutputPtr() [*]u8 { + return &output_writer; +} + +// Export function to get the output length +export fn getOutputLen() usize { + return output_len; +} + +// Simplified polishToString for WASM +fn wasmPolishToString(writer: *std.Io.Writer, expr: *const tex.Expression, source: []const u8) !void { + switch (expr.type) { + .Variable => { + try writer.print(" {s}", .{source[expr.pos.from..expr.pos.to]}); + }, + .FunctionName => { + try writer.print(" func", .{}); + }, + .Number => { + if (expr.value) |val| { + switch (val) { + .i => |i| try writer.print(" {d}", .{i}), + .f => |f| try writer.print(" {d}", .{f}), + else => try writer.print(" ?", .{}), + } + } else { + try writer.print("? ", .{}); + } + }, + .Add => { + try writer.print(" (+", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Sub => { + try writer.print(" (-", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Mul => { + try writer.print(" (*", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .iMul => { + try writer.print(" (*i", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Div => { + try writer.print(" (/", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Pow => { + try writer.print(" (^", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Dot => { + try writer.print(" (.", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Assignment => { + try writer.print(" (=", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .With => { + try writer.print(" (with ", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .UnaryMinus => { + try writer.print(" (-u", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + } + try writer.print(")", .{}); + }, + .UnaryPlus => { + try writer.print(" (+", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + } + try writer.print(")", .{}); + }, + .Object => { + try writer.print(" (obj", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try wasmPolishToString(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Arguments => { + try writer.print(" (args", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try wasmPolishToString(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Paren => { + try writer.print(" (paren", .{}); + if (expr.children) |children| { + const length = expr.value.?.length; + for (0..length) |i| { + try wasmPolishToString(writer, &children[i], source); + } + } + try writer.print(")", .{}); + }, + .Comma => { + try writer.print(", ", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + }, + .FunctionCall => { + try writer.print(" (call", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Juxt => { + try writer.print(" (juxt", .{}); + if (expr.children) |children| { + try wasmPolishToString(writer, &children[0], source); + try wasmPolishToString(writer, &children[1], source); + } + try writer.print(")", .{}); + }, + .Invalid => { + try writer.print("INVALID ", .{}); + }, + } +} + +// JSON tree serialization for WASM +fn wasmTreeToJson(expr: *const tex.Expression, source: []const u8, writer: *std.Io.Writer) !void { + try writer.print("{{\"type\":\"{s}\"", .{@tagName(expr.type)}); + + // Add position info + try writer.print(",\"pos\":{{\"from\":{d},\"to\":{d}}}", .{ expr.pos.from, expr.pos.to }); + + // Add value if present + if (expr.value) |val| { + switch (val) { + .i => |i| try writer.print(",\"value\":{d}", .{i}), + .f => |f| try writer.print(",\"value\":{d}", .{f}), + .length => |len| try writer.print(",\"length\":{d}", .{len}), + } + } + + // Add text for variables/operators/functions + if (expr.type == .Variable or expr.type == .FunctionName) { + try writer.print(",\"text\":\"{s}\"", .{source[expr.pos.from..expr.pos.to]}); + } + + // Add children recursively + if (expr.children) |children| { + try writer.print(",\"children\":[", .{}); + const length = switch (expr.type) { + .Object, .Arguments, .Paren => expr.value.?.length, + .UnaryMinus, .UnaryPlus => 1, + else => 2, // Binary operations + }; + + for (0..length) |i| { + if (i > 0) try writer.print(",", .{}); + try wasmTreeToJson(&children[i], source, writer); + } + try writer.print("]", .{}); + } + + try writer.print("}}", .{}); +} + +export fn parseExpression(input_ptr: [*]const u8, input_len: usize) bool { + // Reset output + output_len = 0; + + // Create a null-terminated string from the input + if (input_len >= output_writer.len - 1) { + // Input too long, copy error message + const error_msg = "Error: Input too long"; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + } + + // Copy input and null-terminate + var input_writer: [2048]u8 = undefined; + @memcpy(input_writer[0..input_len], input_ptr[0..input_len]); + input_writer[input_len] = 0; + const expr: [:0]const u8 = input_writer[0..input_len :0]; + + // Create arena allocator + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + // Parse the expression + const result = parseExpressionInternal(expr, allocator) catch |err| { + const error_msg = switch (err) { + error.UnexpectedToken => "Error: Unexpected token", + error.ExpectedSomething => "Error: Expected something", + error.UnmatchedParentheses => "Error: Unmatched parentheses", + error.EmptyParentheses => "Error: Empty parentheses", + error.InvalidOperator => "Error: Invalid operator", + error.OutOfMemory => "Error: Out of memory", + error.Overflow => "Error: Overflow", + error.InvalidCharacter => "Error: Invalid character", + error.WriteFailed => "Error: Write failed", + }; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + }; + + // Copy result to output writer + if (result.len >= output_writer.len) { + const error_msg = "Error: Result too long"; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + } + + @memcpy(output_writer[0..result.len], result); + output_len = result.len; + return true; +} + +// New export function for tree JSON output +export fn parseExpressionToTree(input_ptr: [*]const u8, input_len: usize) bool { + // Reset output + output_len = 0; + + // Create a null-terminated string from the input + if (input_len >= output_writer.len - 1) { + // Input too long, copy error message + const error_msg = "Error: Input too long"; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + } + + // Copy input and null-terminate + var input_writer: [2048]u8 = undefined; + @memcpy(input_writer[0..input_len], input_ptr[0..input_len]); + input_writer[input_len] = 0; + const expr: [:0]const u8 = input_writer[0..input_len :0]; + + // Create arena allocator + var arena_impl = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_impl.deinit(); + const arena = arena_impl.allocator(); + + // Parse the expression + const result = parseExpressionToTreeInternal(expr, arena) catch |err| { + const error_msg = switch (err) { + error.UnexpectedToken => "Error: Unexpected token", + error.ExpectedSomething => "Error: Expected something", + error.UnmatchedParentheses => "Error: Unmatched parentheses", + error.EmptyParentheses => "Error: Empty parentheses", + error.InvalidOperator => "Error: Invalid operator", + error.OutOfMemory => "Error: Out of memory", + error.Overflow => "Error: Overflow", + error.InvalidCharacter => "Error: Invalid character", + error.WriteFailed => "Error: Write failed", + }; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + }; + + // Copy result to output writer + if (result.len >= output_writer.len) { + const error_msg = "Error: Result too long"; + @memcpy(output_writer[0..error_msg.len], error_msg); + output_len = error_msg.len; + return false; + } + + @memcpy(output_writer[0..result.len], result); + output_len = result.len; + return true; +} + +fn parseExpressionInternal(expr: [:0]const u8, allocator: std.mem.Allocator) ![]const u8 { + var token_stream: tex.TokenStream = .empty; + defer token_stream.deinit(allocator); + + var tokenizer = tex.Tokenizer.init(expr); + + while (tokenizer.next()) |token| { + if (token.tag == .Eof) { + break; + } + try token_stream.append(allocator, token); + } + + var parser = tex.Parser.init(token_stream, expr, allocator); + const ast = try parser.parse(0); + + var writer_impl: std.Io.Writer.Allocating = .init(allocator); + defer writer_impl.deinit(); + + try wasmPolishToString(&writer_impl.writer, &ast, expr); + + const result = std.mem.trim(u8, writer_impl.written(), " "); + + const result_copy = try allocator.dupe(u8, result); + return result_copy; +} + +fn parseExpressionToTreeInternal(expr: [:0]const u8, allocator: std.mem.Allocator) ![]const u8 { + var token_stream: tex.TokenStream = .empty; + defer token_stream.deinit(allocator); + + var tokenizer = tex.Tokenizer.init(expr); + + while (tokenizer.next()) |token| { + if (token.tag == .Eof) { + break; + } + try token_stream.append(allocator, token); + } + + var parser = tex.Parser.init(token_stream, expr, allocator); + const ast = try parser.parse(0); + + var writer_impl: std.Io.Writer.Allocating = .init(allocator); + defer writer_impl.deinit(); + + try wasmTreeToJson(&ast, expr, &writer_impl.writer); + + const result = std.mem.trim(u8, writer_impl.written(), " "); + + const result_copy = try allocator.dupe(u8, result); + return result_copy; +} diff --git a/tests/parse_ast/comments_with_whitespace.tex b/tests/parse_ast/comments_with_whitespace.tex new file mode 100644 index 0000000..c35d4f6 --- /dev/null +++ b/tests/parse_ast/comments_with_whitespace.tex @@ -0,0 +1,2 @@ +% this is a comment +a + b % inline comment diff --git a/tests/parse_ast/complex_symbol_use.tex b/tests/parse_ast/complex_symbol_use.tex new file mode 100644 index 0000000..703a0d0 --- /dev/null +++ b/tests/parse_ast/complex_symbol_use.tex @@ -0,0 +1 @@ +x ~ y \approx z \neq a diff --git a/tests/parse_ast/display_math.tex b/tests/parse_ast/display_math.tex new file mode 100644 index 0000000..0d5a586 --- /dev/null +++ b/tests/parse_ast/display_math.tex @@ -0,0 +1 @@ +$$x^2 + y^2 = z^2$$ diff --git a/tests/parse_ast/empty.tex b/tests/parse_ast/empty.tex new file mode 100644 index 0000000..e69de29 diff --git a/tests/parse_ast/environment_block.tex b/tests/parse_ast/environment_block.tex new file mode 100644 index 0000000..1bfac37 --- /dev/null +++ b/tests/parse_ast/environment_block.tex @@ -0,0 +1,5 @@ +\begin{aligned} + a &= b + c \\\\ + d &= e + f +\end{aligned} + \ No newline at end of file diff --git a/tests/parse_ast/escaped_newlines.tex b/tests/parse_ast/escaped_newlines.tex new file mode 100644 index 0000000..20b4058 --- /dev/null +++ b/tests/parse_ast/escaped_newlines.tex @@ -0,0 +1 @@ +x = 1 \\\\ y = 2 diff --git a/tests/parse_ast/lorenz_equations.tex b/tests/parse_ast/lorenz_equations.tex new file mode 100644 index 0000000..4e62fd7 --- /dev/null +++ b/tests/parse_ast/lorenz_equations.tex @@ -0,0 +1,5 @@ +\begin{aligned} +\dot{x} & = \sigma(y-x) \\ +\dot{y} & = \rho x - y - xz \\ +\dot{z} & = -\beta z + xy +\end{aligned} diff --git a/tests/parse_ast/mixed_scripts.tex b/tests/parse_ast/mixed_scripts.tex new file mode 100644 index 0000000..693d17a --- /dev/null +++ b/tests/parse_ast/mixed_scripts.tex @@ -0,0 +1 @@ +x_i^2 + y_j^3 diff --git a/tests/parse_ast/nested_fractions.tex b/tests/parse_ast/nested_fractions.tex new file mode 100644 index 0000000..ec18524 --- /dev/null +++ b/tests/parse_ast/nested_fractions.tex @@ -0,0 +1 @@ +\frac{1}{\frac{2}{3}} diff --git a/tests/parse_ast/optional_args.tex b/tests/parse_ast/optional_args.tex new file mode 100644 index 0000000..eab71ae --- /dev/null +++ b/tests/parse_ast/optional_args.tex @@ -0,0 +1 @@ +\sqrt[3]{x} diff --git a/tests/parse_ast/simple_inline_math.tex b/tests/parse_ast/simple_inline_math.tex new file mode 100644 index 0000000..dfad857 --- /dev/null +++ b/tests/parse_ast/simple_inline_math.tex @@ -0,0 +1 @@ +$a + b = c$ \ No newline at end of file diff --git a/tests/parse_ast/single_char_commands.tex b/tests/parse_ast/single_char_commands.tex new file mode 100644 index 0000000..1ad023a --- /dev/null +++ b/tests/parse_ast/single_char_commands.tex @@ -0,0 +1 @@ +\\ \\% \\$ \\& \\_ \\{ \\} diff --git a/tests/parse_ast/snapshots/comments_with_whitespace.tex.snapshot.txt b/tests/parse_ast/snapshots/comments_with_whitespace.tex.snapshot.txt new file mode 100644 index 0000000..4c5a7d0 --- /dev/null +++ b/tests/parse_ast/snapshots/comments_with_whitespace.tex.snapshot.txt @@ -0,0 +1,51 @@ +info: Output Format: ast + +debug: Invalid "%" +debug: Variable "t" +debug: Variable "h" +debug: Variable "i" +debug: Variable "s" +debug: Variable "i" +debug: Variable "s" +debug: Variable "a" +debug: Variable "c" +debug: Variable "o" +debug: Variable "m" +debug: Variable "m" +debug: Variable "e" +debug: Variable "n" +debug: Variable "t" +debug: Invalid " +" +debug: Variable "a" +debug: Op "+" +debug: Variable "b" +debug: Invalid "%" +debug: Variable "i" +debug: Variable "n" +debug: Variable "l" +debug: Variable "i" +debug: Variable "n" +debug: Variable "e" +debug: Variable "c" +debug: Variable "o" +debug: Variable "m" +debug: Variable "m" +debug: Variable "e" +debug: Variable "n" +debug: Variable "t" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 34 +debug: Current token: Invalid text: % +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x10067a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x100676dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/complex_symbol_use.tex.snapshot.txt b/tests/parse_ast/snapshots/complex_symbol_use.tex.snapshot.txt new file mode 100644 index 0000000..d20d5a8 --- /dev/null +++ b/tests/parse_ast/snapshots/complex_symbol_use.tex.snapshot.txt @@ -0,0 +1,25 @@ +info: Output Format: ast + +debug: Variable "x" +debug: Invalid "~" +debug: Variable "y" +debug: Invalid "approx" +debug: Variable "z" +debug: Invalid "neq" +debug: Variable "a" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 8 +debug: Current token: Variable text: x +debug: Peeking token: Invalid text: ~ +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:275:25: 0x100a9a3bb in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x100a96dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/display_math.tex.snapshot.txt b/tests/parse_ast/snapshots/display_math.tex.snapshot.txt new file mode 100644 index 0000000..bfb0e29 --- /dev/null +++ b/tests/parse_ast/snapshots/display_math.tex.snapshot.txt @@ -0,0 +1,32 @@ +info: Output Format: ast + +debug: Invalid "$" +debug: Invalid "$" +debug: Variable "x" +debug: Op "^" +debug: Integer "2" +debug: Op "+" +debug: Variable "y" +debug: Op "^" +debug: Integer "2" +debug: Op "=" +debug: Variable "z" +debug: Op "^" +debug: Integer "2" +debug: Invalid "$" +debug: Invalid "$" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 16 +debug: Current token: Invalid text: $ +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x100716193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x100712dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/empty.tex.snapshot.txt b/tests/parse_ast/snapshots/empty.tex.snapshot.txt new file mode 100644 index 0000000..848c0e6 --- /dev/null +++ b/tests/parse_ast/snapshots/empty.tex.snapshot.txt @@ -0,0 +1,14 @@ +info: Output Format: ast + +debug: --eof-- +info: Token stream length: 0 +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x102f9e193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102f9add7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/environment_block.tex.snapshot.txt b/tests/parse_ast/snapshots/environment_block.tex.snapshot.txt new file mode 100644 index 0000000..e3a60ac Binary files /dev/null and b/tests/parse_ast/snapshots/environment_block.tex.snapshot.txt differ diff --git a/tests/parse_ast/snapshots/escaped_newlines.tex.snapshot.txt b/tests/parse_ast/snapshots/escaped_newlines.tex.snapshot.txt new file mode 100644 index 0000000..d86f245 --- /dev/null +++ b/tests/parse_ast/snapshots/escaped_newlines.tex.snapshot.txt @@ -0,0 +1,32 @@ +info: Output Format: ast + +debug: Variable "x" +debug: Op "=" +debug: Integer "1" +debug: Invalid "\" +debug: Invalid "\" +debug: Variable "y" +debug: Op "=" +debug: Integer "2" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 9 +debug: Current token: Variable text: x +debug: Peeking token: Op text: = +debug: Current token: Op text: = +debug: Current token: Integer text: 1 +debug: Peeking token: Invalid text: \ +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:275:25: 0x102fba3bb in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/parser.zig:286:37: 0x102fba5d7 in parse (zig-tex) + const rhs: Expression = try self.parse(r_bp); + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102fb6dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/lorenz_equations.tex.snapshot.txt b/tests/parse_ast/snapshots/lorenz_equations.tex.snapshot.txt new file mode 100644 index 0000000..f82f61d --- /dev/null +++ b/tests/parse_ast/snapshots/lorenz_equations.tex.snapshot.txt @@ -0,0 +1,84 @@ +info: Output Format: ast + +debug: Invalid "begin" +debug: LBrace "{" +debug: Variable "a" +debug: Variable "l" +debug: Variable "i" +debug: Variable "g" +debug: Variable "n" +debug: Variable "e" +debug: Variable "d" +debug: RBrace "}" +debug: Invalid " +" +debug: Invalid "dot" +debug: LBrace "{" +debug: Variable "x" +debug: RBrace "}" +debug: Invalid "&" +debug: Op "=" +debug: Invalid "sigma" +debug: LParen "(" +debug: Variable "y" +debug: Op "-" +debug: Variable "x" +debug: RParen ")" +debug: Invalid "\" +debug: Invalid " +" +debug: Invalid "dot" +debug: LBrace "{" +debug: Variable "y" +debug: RBrace "}" +debug: Invalid "&" +debug: Op "=" +debug: Invalid "rho" +debug: Variable "x" +debug: Op "-" +debug: Variable "y" +debug: Op "-" +debug: Variable "x" +debug: Variable "z" +debug: Invalid "\" +debug: Invalid " +" +debug: Invalid "dot" +debug: LBrace "{" +debug: Variable "z" +debug: RBrace "}" +debug: Invalid "&" +debug: Op "=" +debug: Op "-" +debug: Invalid "beta" +debug: Variable "z" +debug: Op "+" +debug: Variable "x" +debug: Variable "y" +debug: Invalid " +" +debug: Invalid "end" +debug: LBrace "{" +debug: Variable "a" +debug: Variable "l" +debug: Variable "i" +debug: Variable "g" +debug: Variable "n" +debug: Variable "e" +debug: Variable "d" +debug: RBrace "}" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 64 +debug: Current token: Invalid text: begin +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x104c76193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x104c72dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/mixed_scripts.tex.snapshot.txt b/tests/parse_ast/snapshots/mixed_scripts.tex.snapshot.txt new file mode 100644 index 0000000..fb2882a --- /dev/null +++ b/tests/parse_ast/snapshots/mixed_scripts.tex.snapshot.txt @@ -0,0 +1,42 @@ +info: Output Format: ast + +debug: Variable "x_i" +debug: Op "^" +debug: Integer "2" +debug: Op "+" +debug: Variable "y_j" +debug: Op "^" +debug: Integer "3" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 8 +debug: Current token: Variable text: x_i +debug: Peeking token: Op text: ^ +debug: Current token: Op text: ^ +debug: Current token: Integer text: 2 +debug: Peeking token: Op text: + +debug: Peeking token: Op text: + +debug: Current token: Op text: + +debug: Current token: Variable text: y_j +debug: Peeking token: Op text: ^ +debug: Current token: Op text: ^ +debug: Current token: Integer text: 3 +debug: Peeking token: Invalid text: + +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:275:25: 0x1002863bb in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/parser.zig:286:37: 0x1002865d7 in parse (zig-tex) + const rhs: Expression = try self.parse(r_bp); + ^ +/Users/jan/projects/zig-tex/src/parser.zig:286:37: 0x1002865d7 in parse (zig-tex) + const rhs: Expression = try self.parse(r_bp); + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x100282dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/nested_fractions.tex.snapshot.txt b/tests/parse_ast/snapshots/nested_fractions.tex.snapshot.txt new file mode 100644 index 0000000..db709fb --- /dev/null +++ b/tests/parse_ast/snapshots/nested_fractions.tex.snapshot.txt @@ -0,0 +1,32 @@ +info: Output Format: ast + +debug: Keyword found: frac -> Fraction +debug: Fraction "frac" +debug: LBrace "{" +debug: Integer "1" +debug: RBrace "}" +debug: LBrace "{" +debug: Keyword found: frac -> Fraction +debug: Fraction "frac" +debug: LBrace "{" +debug: Integer "2" +debug: RBrace "}" +debug: LBrace "{" +debug: Integer "3" +debug: RBrace "}" +debug: RBrace "}" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 14 +debug: Current token: Fraction text: frac +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x10210a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102106dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/optional_args.tex.snapshot.txt b/tests/parse_ast/snapshots/optional_args.tex.snapshot.txt new file mode 100644 index 0000000..2f917ca --- /dev/null +++ b/tests/parse_ast/snapshots/optional_args.tex.snapshot.txt @@ -0,0 +1,24 @@ +info: Output Format: ast + +debug: Invalid "sqrt" +debug: LBrak "[" +debug: Integer "3" +debug: RBrak "]" +debug: LBrace "{" +debug: Variable "x" +debug: RBrace "}" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 8 +debug: Current token: Invalid text: sqrt +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x10482a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x104826dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/simple_inline_math.tex.snapshot.txt b/tests/parse_ast/snapshots/simple_inline_math.tex.snapshot.txt new file mode 100644 index 0000000..4cb66b5 --- /dev/null +++ b/tests/parse_ast/snapshots/simple_inline_math.tex.snapshot.txt @@ -0,0 +1,22 @@ +info: Output Format: ast + +debug: Invalid "$" +debug: Variable "a" +debug: Op "+" +debug: Variable "b" +debug: Op "=" +debug: Variable "c" +debug: Invalid "$" +debug: --eof-- +info: Token stream length: 7 +debug: Current token: Invalid text: $ +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x10219a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102196dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/single_char_commands.tex.snapshot.txt b/tests/parse_ast/snapshots/single_char_commands.tex.snapshot.txt new file mode 100644 index 0000000..d8948c4 --- /dev/null +++ b/tests/parse_ast/snapshots/single_char_commands.tex.snapshot.txt @@ -0,0 +1,30 @@ +info: Output Format: ast + +debug: Invalid "\" +debug: Invalid "\" +debug: Invalid "%" +debug: Invalid "\" +debug: Invalid "$" +debug: Invalid "\" +debug: Invalid "&" +debug: Invalid "\" +debug: Invalid "_" +debug: Invalid "\" +debug: LBrace "{" +debug: Invalid "\" +debug: RBrace "}" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 14 +debug: Current token: Invalid text: \ +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x102f0a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102f06dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/temml_sample.tex.snapshot.txt b/tests/parse_ast/snapshots/temml_sample.tex.snapshot.txt new file mode 100644 index 0000000..4dd309b --- /dev/null +++ b/tests/parse_ast/snapshots/temml_sample.tex.snapshot.txt @@ -0,0 +1,88 @@ +info: Output Format: ast + +debug: Invalid "def" +debug: Invalid "d" +debug: LBrace "{" +debug: Invalid "mathrm" +debug: LBrace "{" +debug: Variable "d" +debug: RBrace "}" +debug: RBrace "}" +debug: Invalid " +" +debug: Invalid " +" +debug: Invalid "oint" +debug: Invalid "_" +debug: Variable "C" +debug: Invalid "vec" +debug: LBrace "{" +debug: Variable "B" +debug: RBrace "}" +debug: Invalid "circ" +debug: Invalid "d" +debug: Invalid "vec" +debug: LBrace "{" +debug: Variable "l" +debug: RBrace "}" +debug: Op "=" +debug: Invalid "mu" +debug: Invalid "_" +debug: Integer "0" +debug: Keyword found: left -> Left +debug: Left "left" +debug: LParen "(" +debug: Variable "I_{" +debug: Invalid "text" +debug: LBrace "{" +debug: Variable "e" +debug: Variable "n" +debug: Variable "c" +debug: RBrace "}" +debug: RBrace "}" +debug: Op "+" +debug: Invalid "varepsilon" +debug: Invalid "_" +debug: Integer "0" +debug: Keyword found: frac -> Fraction +debug: Fraction "frac" +debug: LBrace "{" +debug: Invalid "d" +debug: RBrace "}" +debug: LBrace "{" +debug: Invalid "d" +debug: Variable "t" +debug: RBrace "}" +debug: Invalid "int" +debug: Invalid "_" +debug: Variable "S" +debug: LBrace "{" +debug: Invalid "vec" +debug: LBrace "{" +debug: Variable "E" +debug: RBrace "}" +debug: Invalid "circ" +debug: Invalid "hat" +debug: LBrace "{" +debug: Variable "n" +debug: RBrace "}" +debug: RBrace "}" +debug: Invalid ";" +debug: Invalid "d" +debug: Variable "a" +debug: Keyword found: right -> Right +debug: Right "right" +debug: RParen ")" +debug: --eof-- +info: Token stream length: 68 +debug: Current token: Invalid text: def +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x10218a193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x102186dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/text_in_math_mode.tex.snapshot.txt b/tests/parse_ast/snapshots/text_in_math_mode.tex.snapshot.txt new file mode 100644 index 0000000..efb95b0 --- /dev/null +++ b/tests/parse_ast/snapshots/text_in_math_mode.tex.snapshot.txt @@ -0,0 +1,37 @@ +info: Output Format: ast + +debug: Invalid "text" +debug: LBrace "{" +debug: Variable "v" +debug: Variable "e" +debug: Variable "l" +debug: Variable "o" +debug: Variable "c" +debug: Variable "i" +debug: Variable "t" +debug: Variable "y" +debug: RBrace "}" +debug: Op "=" +debug: Keyword found: frac -> Fraction +debug: Fraction "frac" +debug: LBrace "{" +debug: Variable "d" +debug: RBrace "}" +debug: LBrace "{" +debug: Variable "t" +debug: RBrace "}" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 20 +debug: Current token: Invalid text: text +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x104486193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x104482dd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/snapshots/unterminated_group.tex.snapshot.txt b/tests/parse_ast/snapshots/unterminated_group.tex.snapshot.txt new file mode 100644 index 0000000..cb68e78 --- /dev/null +++ b/tests/parse_ast/snapshots/unterminated_group.tex.snapshot.txt @@ -0,0 +1,24 @@ +info: Output Format: ast + +debug: Keyword found: frac -> Fraction +debug: Fraction "frac" +debug: LBrace "{" +debug: Variable "a" +debug: RBrace "}" +debug: LBrace "{" +debug: Variable "b" +debug: Invalid " +" +debug: --eof-- +info: Token stream length: 7 +debug: Current token: Fraction text: frac +error: UnexpectedToken +/Users/jan/projects/zig-tex/src/parser.zig:256:25: 0x105082193 in parse (zig-tex) + else => return ParserError.UnexpectedToken, + ^ +/Users/jan/projects/zig-tex/src/main.zig:56:17: 0x10507edd7 in main (zig-tex) + const ast = try parser.parse(0); + ^ + + + ----- EXIT CODE: 1 ----- diff --git a/tests/parse_ast/temml_sample.tex b/tests/parse_ast/temml_sample.tex new file mode 100644 index 0000000..2ed3186 --- /dev/null +++ b/tests/parse_ast/temml_sample.tex @@ -0,0 +1,3 @@ +\def\d{\mathrm{d}} + +\oint_C \vec{B}\circ \d\vec{l} = \mu_0 \left( I_{\text{enc}} + \varepsilon_0 \frac{\d}{\d t} \int_S {\vec{E} \circ \hat{n}}\; \d a \right) \ No newline at end of file diff --git a/tests/parse_ast/text_in_math_mode.tex b/tests/parse_ast/text_in_math_mode.tex new file mode 100644 index 0000000..ad9f11b --- /dev/null +++ b/tests/parse_ast/text_in_math_mode.tex @@ -0,0 +1 @@ +\text{velocity} = \frac{d}{t} diff --git a/tests/parse_ast/unterminated_group.tex b/tests/parse_ast/unterminated_group.tex new file mode 100644 index 0000000..e5a5eaa --- /dev/null +++ b/tests/parse_ast/unterminated_group.tex @@ -0,0 +1 @@ +\frac{a}{b diff --git a/wasm.zig b/wasm.zig deleted file mode 100644 index 4376324..0000000 --- a/wasm.zig +++ /dev/null @@ -1,966 +0,0 @@ -const std = @import("std"); -const ArrayList = std.ArrayList; - -const TokenType = enum { - Variable, - Constant, - Number, - Integer, - Real, - ImaginaryUnit, - Plus, - Minus, - Caret, - Asterisk, - Identifier, - BinaryOp, - Op, - latex_command, - Slash, - LParen, - RParen, - LBrak, - RBrak, - LBrace, - RBrace, - With, - For, - OperatorName, - Color, - Left, - Right, - VBar, - LVBar, - RVBar, - Comma, - Invalid, - FunctionName, - Fraction, - Eof, -}; - -pub const keywords = std.StaticStringMap(TokenType).initComptime(.{ - .{ "left(", .LParen }, - .{ "right)", .RParen }, - .{ "left[", .LBrak }, - .{ "right]", .RBrak }, - .{ "left{", .LBrace }, - .{ "right}", .RBrace }, - .{ "left|", .LVBar }, - .{ "right|", .RVBar }, - .{ "left", .Left }, - .{ "right", .Right }, - .{ "{", .LBrace }, - .{ "}", .RBrace }, - .{ "frac", .Fraction }, - .{ "operatorname", .OperatorName }, - .{ "with", .With }, - .{ "for", .For }, - .{ "rgb", .Color }, - .{ "sin", .FunctionName }, - .{ "cos", .FunctionName }, - .{ "tan", .FunctionName }, - .{ "theta", .Variable }, - .{ "alpha", .Variable }, - .{ "gamma", .Variable }, - .{ "pi", .Constant }, -}); - -pub fn getKeyword(text: []const u8) ?TokenType { - return keywords.get(text); -} - -const Loc = struct { from: usize, to: usize }; -const Token = struct { tag: TokenType, pos: Loc }; -const TokenStream = ArrayList(Token); - -const ExprType = enum { - Add, - Sub, - Mul, - iMul, - Pow, - Div, - Dot, - Juxt, - Comma, - With, - Assignment, - Paren, - Arguments, - FunctionCall, - FunctionName, - Object, - UnaryMinus, - UnaryPlus, - Number, - Variable, - Invalid, -}; - -const Expression = struct { - type: ExprType, - value: ?union(enum) { i: i64, f: f64, length: u32 }, // Changed to u32 for wasm32 - pos: Loc, - children: ?[*]Expression, -}; - -pub const infix_operators = std.StaticStringMap(ExprType).initComptime(.{ - .{ "+", .Add }, - .{ "-", .Sub }, - .{ "*", .Mul }, - .{ "/", .Div }, - .{ "^", .Pow }, - .{ ".", .Dot }, - .{ "=", .Assignment }, - .{ "with", .With }, -}); - -pub const prefix_operators = std.StaticStringMap(ExprType).initComptime(.{ - .{ "+", .UnaryPlus }, - .{ "-", .UnaryMinus }, -}); - -pub fn get_infix_operator(text: []const u8, tag: TokenType) ?ExprType { - switch (tag) { - .Variable => return .iMul, - .LParen => return .Juxt, - else => {}, - } - return infix_operators.get(text); -} - -pub fn get_prefix_operator(text: []const u8) ?ExprType { - return prefix_operators.get(text); -} - -fn infix_binding_power(op: ?ExprType) error{InvalidOperator}!struct { i8, i8 } { - if (op == null) return error.InvalidOperator; - switch (op.?) { - .Comma => return .{ -1, -1 }, - .Add, .Sub => return .{ 3, 4 }, - .Mul, .Div => return .{ 5, 6 }, - .iMul => return .{ 5, 6 }, - .Juxt => return .{ 5, 6 }, - .Dot => return .{ 8, 7 }, - .Pow => return .{ 9, 8 }, - .With => return .{ 10, 9 }, - .Assignment => return .{ 2, 1 }, - else => return error.InvalidOperator, - } -} - -fn prefix_binding_power(op: ?ExprType) error{InvalidOperator}!i8 { - if (op == null) return error.InvalidOperator; - switch (op.?) { - .UnaryMinus, .UnaryPlus => return 6, - else => return error.InvalidOperator, - } -} - -const Tokenizer = struct { - buffer: [:0]const u8, - index: usize, - - const State = enum { - start, - identifier, - variable, - variable_subscript, - latex_command, - operator_name, - builtin, - plus, - minus, - int, - period, - number, - decimal_number, - invalid, - unknown, - }; - - pub fn init(buffer: [:0]const u8) Tokenizer { - return .{ - .buffer = buffer, - .index = 0, - }; - } - - pub fn next(self: *Tokenizer) ?Token { - var result: Token = .{ .tag = undefined, .pos = .{ - .from = self.index, - .to = undefined, - } }; - - if (self.index >= self.buffer.len) { - return .{ - .tag = .Eof, - .pos = .{ - .from = self.index, - .to = self.index, - }, - }; - } - - state: switch (State.start) { - .start => switch (self.buffer[self.index]) { - '0'...'9' => { - self.index += 1; - continue :state .number; - }, - '.' => { - self.index += 1; - continue :state .period; - }, - '\\' => { - self.index += 1; - result.pos.from = self.index; - continue :state .latex_command; - }, - 'a'...'z', 'A'...'Z' => { - result.tag = .Variable; - continue :state .variable; - }, - '+', '-', '*', '/', '^', '=' => { - result.tag = .Op; - self.index += 1; - }, - ',' => { - result.tag = .Comma; - self.index += 1; - }, - '(' => { - result.tag = .LParen; - self.index += 1; - }, - ')' => { - result.tag = .RParen; - self.index += 1; - }, - '[' => { - result.tag = .LBrak; - self.index += 1; - }, - ']' => { - result.tag = .RBrak; - self.index += 1; - }, - '{' => { - result.tag = .LBrace; - self.index += 1; - }, - '}' => { - result.tag = .RBrace; - self.index += 1; - }, - '|' => { - result.tag = .VBar; - self.index += 1; - }, - ' ' => { - self.index += 1; - result.pos.from = self.index; - continue :state .start; - }, - else => { - result.tag = .Invalid; - self.index += 1; - }, - }, - .number => { - switch (self.buffer[self.index]) { - '0'...'9' => { - self.index += 1; - continue :state .number; - }, - '.' => continue :state .decimal_number, - else => { - result.tag = .Integer; - }, - } - }, - .decimal_number => { - self.index += 1; - switch (self.buffer[self.index]) { - '0'...'9' => { - continue :state .decimal_number; - }, - else => { - result.tag = .Real; - }, - } - }, - .period => { - switch (self.buffer[self.index]) { - '0'...'9' => continue :state .decimal_number, - else => { - result.tag = .Op; - }, - } - }, - .variable => { - self.index += 1; - switch (self.buffer[self.index]) { - '_' => continue :state .variable_subscript, - else => { - result.tag = .Variable; - }, - } - }, - .variable_subscript => { - self.index += 1; - switch (self.buffer[self.index]) { - 'a'...'z', 'A'...'Z', '0'...'9', '{' => continue :state .variable_subscript, - '}' => { - self.index += 1; - result.tag = .Variable; - }, - else => { - result.tag = .Variable; - }, - } - }, - .operator_name => { - switch (self.buffer[self.index]) { - '{' => { - self.index += 1; - result.pos.from = self.index; - continue :state .operator_name; - }, - 'a'...'z', 'A'...'Z' => { - self.index += 1; - continue :state .operator_name; - }, - else => { - const text = self.buffer[result.pos.from..self.index]; - if (getKeyword(text)) |tag| { - result.tag = tag; - result.pos.to = self.index; - self.index += 1; - return result; - } - result.tag = .Invalid; - result.pos.to = self.index; - return result; - }, - } - }, - .latex_command => { - self.index += 1; - result.tag = .latex_command; - switch (self.buffer[self.index]) { - 'a'...'z', 'A'...'Z' => continue :state .latex_command, - else => { - const text = self.buffer[result.pos.from..self.index]; - if (getKeyword(text)) |tag| { - if (tag == .OperatorName) { - result.pos.from = self.index; - continue :state .operator_name; - } - result.tag = tag; - result.pos.to = self.index; - return result; - } - result.tag = .Invalid; - result.pos.to = self.index; - return result; - }, - } - }, - .invalid => { - result.tag = .Invalid; - }, - else => { - result.tag = .Invalid; - }, - } - - result.pos.to = self.index; - return result; - } -}; - -const Parser = struct { - token_stream: TokenStream, - expr: [:0]const u8, - head: usize = 0, - current: Token, - allocator: std.mem.Allocator, - - pub fn init(token_stream: TokenStream, expr: [:0]const u8, allocator: std.mem.Allocator) Parser { - return .{ .token_stream = token_stream, .expr = expr, .head = 0, .current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }, .allocator = allocator }; - } - - pub fn consume(self: *Parser) void { - if (self.head >= self.token_stream.items.len) { - self.current = .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; - return; - } - self.current = self.token_stream.items[self.head]; - self.head += 1; - } - - pub fn peek(self: *Parser) Token { - if (self.head >= self.token_stream.items.len) { - return .{ .tag = .Eof, .pos = .{ .from = 0, .to = 0 } }; - } - return self.token_stream.items[self.head]; - } - - pub fn expect(self: *Parser, tag: TokenType, err: ParserError) ParserError!void { - self.consume(); - if (self.current.tag != tag) { - return err; - } - } - - pub const ParserError = error{ - UnexpectedToken, - ExpectedSomething, - UnmatchedParentheses, - EmptyParentheses, - InvalidOperator, - OutOfMemory, - Overflow, - InvalidCharacter, - }; - - pub fn parse_prefix(self: *Parser) ParserError!Expression { - const op = self.current; - if (op.tag != .Op) return ParserError.UnexpectedToken; - - const op_text: []const u8 = self.expr[op.pos.from..op.pos.to]; - const op_type: ?ExprType = get_prefix_operator(op_text); - if (op_type == null) return ParserError.InvalidOperator; - - const r_bp = try prefix_binding_power(op_type); - const expr = try self.parse(r_bp); - - const children = try self.allocator.alloc(Expression, 1); - children[0] = expr; - - return Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; - } - - pub fn parse_paren(self: *Parser) ParserError!Expression { - var level: i32 = 0; - var commas: u32 = 0; - for (self.token_stream.items[self.head..]) |token| { - switch (token.tag) { - .LParen => { - level -= 1; - }, - .RParen => { - if (level == 0) break; - level += 1; - }, - .Comma => { - if (level == 0) commas += 1; - }, - else => {}, - } - } - - if (level != 0) return ParserError.UnmatchedParentheses; - if (commas == 0) { - var children = try self.allocator.alloc(Expression, 1); - children[0] = try self.parse(0); - return Expression{ .type = .Paren, .value = .{ .length = 1 }, .pos = self.current.pos, .children = children.ptr }; - } - - const len = commas + 1; - var children = try self.allocator.alloc(Expression, len); - for (0..len) |i| { - children[i] = try self.parse(0); - if (i < len - 1) try self.expect(.Comma, ParserError.UnmatchedParentheses); - } - - return Expression{ .type = .Object, .value = .{ .length = len }, .pos = self.current.pos, .children = children.ptr }; - } - - pub fn parse_func(self: *Parser) ParserError!Expression { - var final_type: ExprType = undefined; - - switch (self.peek().tag) { - .LParen => { - final_type = .FunctionCall; - }, - .Eof => { - return ParserError.ExpectedSomething; - }, - else => { - final_type = .Juxt; - }, - } - - const name = Expression{ .type = .FunctionName, .value = null, .pos = self.current.pos, .children = null }; - var args = try self.parse(0); - args.type = .Arguments; - args.value = .{ .length = 1 }; - - var children = try self.allocator.alloc(Expression, 2); - children[0] = name; - children[1] = args; - - const func = Expression{ .type = .FunctionCall, .value = null, .pos = self.current.pos, .children = children.ptr }; - - return func; - } - - pub fn parse(self: *Parser, min_bp: i8) ParserError!Expression { - self.consume(); - - var lhs: Expression = - switch (self.current.tag) { - .Integer => Expression{ .type = .Number, .value = .{ .i = try std.fmt.parseInt(i64, self.expr[self.current.pos.from..self.current.pos.to], 10) }, .pos = self.current.pos, .children = null }, - .Real => Expression{ .type = .Number, .value = .{ .f = try std.fmt.parseFloat(f64, self.expr[self.current.pos.from..self.current.pos.to]) }, .pos = self.current.pos, .children = null }, - .Variable => Expression{ .type = .Variable, .value = null, .pos = self.current.pos, .children = null }, - .Op => try self.parse_prefix(), - .FunctionName => try self.parse_func(), - .LParen => paren: { - const expr: Expression = try self.parse_paren(); - try self.expect(.RParen, ParserError.UnmatchedParentheses); - break :paren expr; - }, - else => return ParserError.UnexpectedToken, - }; - - while (true) { - const op = self.peek(); - var skip_op: bool = false; - switch (op.tag) { - .Eof => break, - .Op, .With => {}, - .Variable => { - skip_op = true; - }, - .LParen => { - skip_op = true; - }, - .Comma => break, - .RParen => break, - else => return ParserError.UnexpectedToken, - } - - const op_text: []const u8 = self.expr[op.pos.from..op.pos.to]; - const op_type: ?ExprType = get_infix_operator(op_text, op.tag); - const l_bp, const r_bp = try infix_binding_power(op_type); - if (l_bp < min_bp) break; - - if (!skip_op) self.consume(); - - const rhs: Expression = try self.parse(r_bp); - - const children = try self.allocator.alloc(Expression, 2); - children[0] = lhs; - children[1] = rhs; - - lhs = Expression{ .type = op_type.?, .value = null, .pos = op.pos, .children = children.ptr }; - } - return lhs; - } -}; - -// WASM memory for string exchange -var output_buffer: [2048]u8 = undefined; -var output_len: usize = 0; - -// Simple test function to verify WASM loading -export fn testFunction() i32 { - return 42; -} - -// Simple memory allocator for WASM -var memory_pool: [4096]u8 = undefined; -var memory_offset: usize = 0; - -// Export function to allocate memory -export fn malloc(size: usize) usize { - if (memory_offset + size > memory_pool.len) { - return 0; // Return 0 if out of memory - } - const offset = memory_offset; - memory_offset += size; - return @intFromPtr(&memory_pool[offset]); -} - -export fn free(ptr: usize) void { - _ = ptr; -} - -// Export function to get the output buffer pointer -export fn getOutputPtr() [*]u8 { - return &output_buffer; -} - -// Export function to get the output length -export fn getOutputLen() usize { - return output_len; -} - -// Simplified polishToString for WASM -fn wasmPolishToString(expr: *const Expression, source: []const u8, buffer: *std.ArrayList(u8)) !void { - const writer = buffer.writer(); - switch (expr.type) { - .Variable => { - try writer.print(" {s}", .{source[expr.pos.from..expr.pos.to]}); - }, - .FunctionName => { - try writer.print(" func", .{}); - }, - .Number => { - if (expr.value) |val| { - switch (val) { - .i => |i| try writer.print(" {d}", .{i}), - .f => |f| try writer.print(" {d}", .{f}), - else => try writer.print(" ?", .{}), - } - } else { - try writer.print("? ", .{}); - } - }, - .Add => { - try writer.print(" (+", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Sub => { - try writer.print(" (-", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Mul => { - try writer.print(" (*", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .iMul => { - try writer.print(" (*i", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Div => { - try writer.print(" (/", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Pow => { - try writer.print(" (^", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Dot => { - try writer.print(" (.", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Assignment => { - try writer.print(" (=", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .With => { - try writer.print(" (with ", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .UnaryMinus => { - try writer.print(" (-u", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - } - try writer.print(")", .{}); - }, - .UnaryPlus => { - try writer.print(" (+", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - } - try writer.print(")", .{}); - }, - .Object => { - try writer.print(" (obj", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try wasmPolishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Arguments => { - try writer.print(" (args", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try wasmPolishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Paren => { - try writer.print(" (paren", .{}); - if (expr.children) |children| { - const length = expr.value.?.length; - for (0..length) |i| { - try wasmPolishToString(&children[i], source, buffer); - } - } - try writer.print(")", .{}); - }, - .Comma => { - try writer.print(", ", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - }, - .FunctionCall => { - try writer.print(" (call", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Juxt => { - try writer.print(" (juxt", .{}); - if (expr.children) |children| { - try wasmPolishToString(&children[0], source, buffer); - try wasmPolishToString(&children[1], source, buffer); - } - try writer.print(")", .{}); - }, - .Invalid => { - try writer.print("INVALID ", .{}); - }, - } -} - -// JSON tree serialization for WASM -fn wasmTreeToJson(expr: *const Expression, source: []const u8, buffer: *std.ArrayList(u8)) !void { - const writer = buffer.writer(); - try writer.print("{{\"type\":\"{s}\"", .{@tagName(expr.type)}); - - // Add position info - try writer.print(",\"pos\":{{\"from\":{d},\"to\":{d}}}", .{expr.pos.from, expr.pos.to}); - - // Add value if present - if (expr.value) |val| { - switch (val) { - .i => |i| try writer.print(",\"value\":{d}", .{i}), - .f => |f| try writer.print(",\"value\":{d}", .{f}), - .length => |len| try writer.print(",\"length\":{d}", .{len}), - } - } - - // Add text for variables/operators/functions - if (expr.type == .Variable or expr.type == .FunctionName) { - try writer.print(",\"text\":\"{s}\"", .{source[expr.pos.from..expr.pos.to]}); - } - - // Add children recursively - if (expr.children) |children| { - try writer.print(",\"children\":[", .{}); - const length = switch (expr.type) { - .Object, .Arguments, .Paren => expr.value.?.length, - .UnaryMinus, .UnaryPlus => 1, - else => 2, // Binary operations - }; - - for (0..length) |i| { - if (i > 0) try writer.print(",", .{}); - try wasmTreeToJson(&children[i], source, buffer); - } - try writer.print("]", .{}); - } - - try writer.print("}}", .{}); -} - -export fn parseExpression(input_ptr: [*]const u8, input_len: usize) bool { - // Reset output - output_len = 0; - - // Create a null-terminated string from the input - if (input_len >= output_buffer.len - 1) { - // Input too long, copy error message - const error_msg = "Error: Input too long"; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - } - - // Copy input and null-terminate - var input_buffer: [2048]u8 = undefined; - @memcpy(input_buffer[0..input_len], input_ptr[0..input_len]); - input_buffer[input_len] = 0; - const expr: [:0]const u8 = input_buffer[0..input_len :0]; - - // Create arena allocator - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - - // Parse the expression - const result = parseExpressionInternal(expr, allocator) catch |err| { - const error_msg = switch (err) { - error.UnexpectedToken => "Error: Unexpected token", - error.ExpectedSomething => "Error: Expected something", - error.UnmatchedParentheses => "Error: Unmatched parentheses", - error.EmptyParentheses => "Error: Empty parentheses", - error.InvalidOperator => "Error: Invalid operator", - error.OutOfMemory => "Error: Out of memory", - error.Overflow => "Error: Overflow", - error.InvalidCharacter => "Error: Invalid character", - }; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - }; - - // Copy result to output buffer - if (result.len >= output_buffer.len) { - const error_msg = "Error: Result too long"; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - } - - @memcpy(output_buffer[0..result.len], result); - output_len = result.len; - return true; -} - -// New export function for tree JSON output -export fn parseExpressionToTree(input_ptr: [*]const u8, input_len: usize) bool { - // Reset output - output_len = 0; - - // Create a null-terminated string from the input - if (input_len >= output_buffer.len - 1) { - // Input too long, copy error message - const error_msg = "Error: Input too long"; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - } - - // Copy input and null-terminate - var input_buffer: [2048]u8 = undefined; - @memcpy(input_buffer[0..input_len], input_ptr[0..input_len]); - input_buffer[input_len] = 0; - const expr: [:0]const u8 = input_buffer[0..input_len :0]; - - // Create arena allocator - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - - // Parse the expression - const result = parseExpressionToTreeInternal(expr, allocator) catch |err| { - const error_msg = switch (err) { - error.UnexpectedToken => "Error: Unexpected token", - error.ExpectedSomething => "Error: Expected something", - error.UnmatchedParentheses => "Error: Unmatched parentheses", - error.EmptyParentheses => "Error: Empty parentheses", - error.InvalidOperator => "Error: Invalid operator", - error.OutOfMemory => "Error: Out of memory", - error.Overflow => "Error: Overflow", - error.InvalidCharacter => "Error: Invalid character", - }; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - }; - - // Copy result to output buffer - if (result.len >= output_buffer.len) { - const error_msg = "Error: Result too long"; - @memcpy(output_buffer[0..error_msg.len], error_msg); - output_len = error_msg.len; - return false; - } - - @memcpy(output_buffer[0..result.len], result); - output_len = result.len; - return true; -} - -fn parseExpressionInternal(expr: [:0]const u8, allocator: std.mem.Allocator) ![]const u8 { - var token_stream: TokenStream = TokenStream.init(allocator); - defer token_stream.deinit(); - - var tokenizer = Tokenizer.init(expr); - - while (tokenizer.next()) |token| { - if (token.tag == .Eof) { - break; - } - try token_stream.append(token); - } - - var parser = Parser.init(token_stream, expr, allocator); - const ast = try parser.parse(0); - - var polish_buffer = ArrayList(u8).init(allocator); - try wasmPolishToString(&ast, expr, &polish_buffer); - - const result = std.mem.trim(u8, polish_buffer.items, " "); - - const result_copy = try allocator.dupe(u8, result); - return result_copy; -} - -fn parseExpressionToTreeInternal(expr: [:0]const u8, allocator: std.mem.Allocator) ![]const u8 { - var token_stream: TokenStream = TokenStream.init(allocator); - defer token_stream.deinit(); - - var tokenizer = Tokenizer.init(expr); - - while (tokenizer.next()) |token| { - if (token.tag == .Eof) { - break; - } - try token_stream.append(token); - } - - var parser = Parser.init(token_stream, expr, allocator); - const ast = try parser.parse(0); - - var json_buffer = ArrayList(u8).init(allocator); - try wasmTreeToJson(&ast, expr, &json_buffer); - - const result = std.mem.trim(u8, json_buffer.items, " "); - - const result_copy = try allocator.dupe(u8, result); - return result_copy; -}