From 713ea054a998f58cf828e35f542888ed94c1e718 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Thu, 6 Mar 2025 20:04:23 +0100
Subject: [PATCH 01/13] define types to allow building a stateful lexer

---
 packages/ts-parsec/src/Lexer.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 8f82ed4..6261a03 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -83,8 +83,11 @@ class TokenImpl<T> implements Token<T> {
     }
 }
 
+type LexerState<T> = [boolean, RegExp, T, LexerState<T> | "pop"][];
+type TopLevelLexerRule<T> = [boolean, RegExp, T, LexerState<T>?];
+
 class LexerImpl<T> implements Lexer<T> {
-    constructor(public rules: [boolean, RegExp, T][]) {
+    constructor(public rules: TopLevelLexerRule<T>[]) {
         for (const rule of this.rules) {
             if (rule[1].source[0] !== '^') {
                 throw new Error(`Regular expression patterns for a tokenizer should start with "^": ${rule[1].source}`);
@@ -155,6 +158,6 @@ class LexerImpl<T> implements Lexer<T> {
     }
 }
 
-export function buildLexer<T>(rules: [boolean, RegExp, T][]): Lexer<T> {
+export function buildLexer<T>(rules: TopLevelLexerRule<T>[]): Lexer<T> {
     return new LexerImpl<T>(rules);
 }

From 218a1f7ab054d03845916e433e78d326e94449c8 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Thu, 6 Mar 2025 20:26:26 +0100
Subject: [PATCH 02/13] analyze the rules in nested lexer states

---
 packages/ts-parsec/src/Lexer.ts | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 6261a03..fd57000 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -83,19 +83,27 @@ class TokenImpl<T> implements Token<T> {
     }
 }
 
-type LexerState<T> = [boolean, RegExp, T, LexerState<T> | "pop"][];
+type LexerState<T> = [boolean, RegExp, T, (LexerState<T> | "pop")?][];
 type TopLevelLexerRule<T> = [boolean, RegExp, T, LexerState<T>?];
 
+function analyzeLexerRules<T>(rules: LexerState<T>) {
+    for (const [_, regex, _, state] of rules) {
+        if (regex.source[0] !== "^") {
+            throw new Error(`Regular expression patterns for a tokenizer should start with "^": ${regex.source}`);
+        }
+        if (!regex.global) {
+            throw new Error(`Regular expression patterns for a tokenizer should be global: ${regex.source}`);
+        }
+        if (state !== undefined && state !== "pop") {
+            analyzeLexerRules(state);
+        }
+    }
+}
+
 class LexerImpl<T> implements Lexer<T> {
     constructor(public rules: TopLevelLexerRule<T>[]) {
-        for (const rule of this.rules) {
-            if (rule[1].source[0] !== '^') {
-                throw new Error(`Regular expression patterns for a tokenizer should start with "^": ${rule[1].source}`);
-            }
-            if (!rule[1].global) {
-                throw new Error(`Regular expression patterns for a tokenizer should be global: ${rule[1].source}`);
-            }
-        }
+        // Casting `rules` to `LexerState<T>` is safe because `LexerState<T>` is a superset of `TopLevelLexerRule<T>`
+        analyzeLexerRules<T>(rules as LexerState<T>);
     }
 
     public parse(input: string): TokenImpl<T> | undefined {

From ccba6f6b9a3188f7dd902f6d4c0fb81f1684bd96 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Thu, 6 Mar 2025 21:01:05 +0100
Subject: [PATCH 03/13] Implement the stateful lexer

---
 packages/ts-parsec/src/Lexer.ts | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index fd57000..758fe8a 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -101,6 +101,8 @@ function analyzeLexerRules<T>(rules: LexerState<T>) {
 }
 
 class LexerImpl<T> implements Lexer<T> {
+    private states: LexerState<T>[] = [this.rules];
+
     constructor(public rules: TopLevelLexerRule<T>[]) {
         // Casting `rules` to `LexerState<T>` is safe because `LexerState<T>` is a superset of `TopLevelLexerRule<T>`
         analyzeLexerRules<T>(rules as LexerState<T>);
@@ -117,7 +119,9 @@ class LexerImpl<T> implements Lexer<T> {
 
         const subString = input.substr(indexStart);
         let result: TokenImpl<T> | undefined;
-        for (const [keep, regexp, kind] of this.rules) {
+        const currentRuleset = this.states[this.states.length - 1];
+        let nextState: LexerState<T> | "pop" | undefined = undefined;
+        for (const [keep, regexp, kind, next] of currentRuleset) {
             regexp.lastIndex = 0;
             if (regexp.test(subString)) {
                 const text = subString.substr(0, regexp.lastIndex);
@@ -134,6 +138,7 @@ class LexerImpl<T> implements Lexer<T> {
                 const newResult = new TokenImpl<T>(this, input, kind, text, { index: indexStart, rowBegin, columnBegin, rowEnd, columnEnd }, keep);
                 if (result === undefined || result.text.length < newResult.text.length) {
                     result = newResult;
+                    nextState = next;
                 }
             }
         }
@@ -144,6 +149,11 @@ class LexerImpl<T> implements Lexer<T> {
                 `Unable to tokenize the rest of the input: ${input.substr(indexStart)}`
             );
         } else {
+            if (nextState === "pop") {
+                this.states.pop();
+            } else if (nextState !== undefined) {
+                this.states.push(nextState);
+            }
             return result;
         }
     }

From 52941079f9f2d50e477c5aa8044f3f535bd06bfa Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Thu, 6 Mar 2025 21:02:19 +0100
Subject: [PATCH 04/13] Resolve naming conflict

---
 packages/ts-parsec/src/Lexer.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 758fe8a..36a8ea5 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -87,7 +87,7 @@ type LexerState<T> = [boolean, RegExp, T, (LexerState<T> | "pop")?][];
 type TopLevelLexerRule<T> = [boolean, RegExp, T, LexerState<T>?];
 
 function analyzeLexerRules<T>(rules: LexerState<T>) {
-    for (const [_, regex, _, state] of rules) {
+    for (const [_keep, regex, _kind, state] of rules) {
         if (regex.source[0] !== "^") {
             throw new Error(`Regular expression patterns for a tokenizer should start with "^": ${regex.source}`);
         }

From c44c98ef875f3888adf64457093a19929494b8e9 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Thu, 6 Mar 2025 21:17:44 +0100
Subject: [PATCH 05/13] apply tslint suggestions to follow project rules

---
 packages/ts-parsec/src/Lexer.ts | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 36a8ea5..40688bb 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -83,18 +83,18 @@ class TokenImpl<T> implements Token<T> {
     }
 }
 
-type LexerState<T> = [boolean, RegExp, T, (LexerState<T> | "pop")?][];
+type LexerState<T> = [boolean, RegExp, T, (LexerState<T> | 'pop')?][];
 type TopLevelLexerRule<T> = [boolean, RegExp, T, LexerState<T>?];
 
-function analyzeLexerRules<T>(rules: LexerState<T>) {
-    for (const [_keep, regex, _kind, state] of rules) {
-        if (regex.source[0] !== "^") {
-            throw new Error(`Regular expression patterns for a tokenizer should start with "^": ${regex.source}`);
+function analyzeLexerRules<T>(rules: LexerState<T>): void {
+    for (const [, regex, , state] of rules) {
+        if (regex.source[0] !== '^') {
+            throw new Error(`Regular expression patterns for a tokenizer should start with '^': ${regex.source}`);
         }
         if (!regex.global) {
             throw new Error(`Regular expression patterns for a tokenizer should be global: ${regex.source}`);
         }
-        if (state !== undefined && state !== "pop") {
+        if (state !== undefined && state !== 'pop') {
             analyzeLexerRules(state);
         }
     }
@@ -105,7 +105,7 @@ class LexerImpl<T> implements Lexer<T> {
 
     constructor(public rules: TopLevelLexerRule<T>[]) {
         // Casting `rules` to `LexerState<T>` is safe because `LexerState<T>` is a superset of `TopLevelLexerRule<T>`
-        analyzeLexerRules<T>(rules as LexerState<T>);
+        analyzeLexerRules(rules);
     }
 
     public parse(input: string): TokenImpl<T> | undefined {
@@ -120,7 +120,7 @@ class LexerImpl<T> implements Lexer<T> {
         const subString = input.substr(indexStart);
         let result: TokenImpl<T> | undefined;
         const currentRuleset = this.states[this.states.length - 1];
-        let nextState: LexerState<T> | "pop" | undefined = undefined;
+        let nextState: LexerState<T> | 'pop' | undefined;
         for (const [keep, regexp, kind, next] of currentRuleset) {
             regexp.lastIndex = 0;
             if (regexp.test(subString)) {
@@ -149,7 +149,7 @@ class LexerImpl<T> implements Lexer<T> {
                 `Unable to tokenize the rest of the input: ${input.substr(indexStart)}`
             );
         } else {
-            if (nextState === "pop") {
+            if (nextState === 'pop') {
                 this.states.pop();
             } else if (nextState !== undefined) {
                 this.states.push(nextState);

From 4c174acc66a9f04164d39bd6a8464f04997a0d68 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Fri, 7 Mar 2025 00:45:15 +0100
Subject: [PATCH 06/13] Test stateful tokenization on c-style block comments

---
 packages/ts-parsec/src/Lexer.ts     |  4 +++
 packages/tspc-test/src/TestLexer.ts | 44 +++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 40688bb..6cb941b 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -176,6 +176,10 @@ class LexerImpl<T> implements Lexer<T> {
     }
 }
 
+export function buildLexerState<T>(rules: LexerState<T>): LexerState<T> {
+    return rules;
+}
+
 export function buildLexer<T>(rules: TopLevelLexerRule<T>[]): Lexer<T> {
     return new LexerImpl<T>(rules);
 }
diff --git a/packages/tspc-test/src/TestLexer.ts b/packages/tspc-test/src/TestLexer.ts
index 59f65ef..d89dce5 100644
--- a/packages/tspc-test/src/TestLexer.ts
+++ b/packages/tspc-test/src/TestLexer.ts
@@ -133,3 +133,47 @@ test(`Lexer: identifiers and numbers with discardable commas and spaces`, () =>
 
     assert.strictEqual(token, undefined);
 });
+
+test(`Lexer: C-style block comments via lexer states`, () => {
+    enum TokenKind {
+        CommentBegin,
+        CommentEnd,
+        CommentContents,
+        Number,
+        Identifier,
+        Comma,
+        Space,
+    }
+
+    const BlockComment = buildLexerState([
+        [false, /^\*\//g, TokenKind.CommentEnd, 'pop'],
+        [true, /^[^*]+/g, TokenKind.CommentContents],
+    ]);
+
+    const lexer = buildLexer([
+        [false, /^\/\*/g, TokenKind.CommentBegin, BlockComment],
+        [true, /^\d+/g, TokenKind.Number],
+        [true, /^[a-zA-Z]\w*/g, TokenKind.Identifier],
+        [false, /^,/g, TokenKind.Comma],
+        [false, /^\s+/g, TokenKind.Space]
+    ]);
+
+    let token = lexer.parse(`123 /* abc */ def`);
+
+    token = notUndefined(token);
+    assert.strictEqual(token.kind, TokenKind.Number);
+    assert.strictEqual(token.text, '123');
+    token = token.next;
+
+    token = notUndefined(token);
+    assert.strictEqual(token.kind, TokenKind.CommentContents);
+    assert.strictEqual(token.text, ' abc ');
+    token = token.next;
+
+    token = notUndefined(token);
+    assert.strictEqual(token.kind, TokenKind.Identifier);
+    assert.strictEqual(token.text, 'def');
+    token = token.next;
+
+    assert.strictEqual(token, undefined);
+});

From 44255c249b32acdb443231d9f3e6281e9ba4a3bc Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Fri, 7 Mar 2025 01:04:59 +0100
Subject: [PATCH 07/13] Don't differentiate between top-level rules and states
 on the type level

---
 packages/ts-parsec/src/Lexer.ts     | 27 ++++++++++++++-------------
 packages/tspc-test/src/TestLexer.ts |  6 +++---
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 6cb941b..597b524 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -83,10 +83,10 @@ class TokenImpl<T> implements Token<T> {
     }
 }
 
-type LexerState<T> = [boolean, RegExp, T, (LexerState<T> | 'pop')?][];
-type TopLevelLexerRule<T> = [boolean, RegExp, T, LexerState<T>?];
+export type LexerRule<T> = [boolean, RegExp, T, (LexerRule<T>[] | 'pop')?];
+export type LexerState<T> = LexerRule<T>[];
 
-function analyzeLexerRules<T>(rules: LexerState<T>): void {
+function analyzeLexerRules<T>(rules: LexerState<T>, topLevel: boolean): void {
     for (const [, regex, , state] of rules) {
         if (regex.source[0] !== '^') {
             throw new Error(`Regular expression patterns for a tokenizer should start with '^': ${regex.source}`);
@@ -94,8 +94,14 @@ function analyzeLexerRules<T>(rules: LexerState<T>): void {
         if (!regex.global) {
             throw new Error(`Regular expression patterns for a tokenizer should be global: ${regex.source}`);
         }
-        if (state !== undefined && state !== 'pop') {
-            analyzeLexerRules(state);
+        if (state !== undefined) {
+            if (state === 'pop') {
+                if (topLevel) {
+                    throw new Error(`The 'pop' directive is not allowed in the top-level lexer state`);
+                }
+            } else {
+                analyzeLexerRules(state, false);
+            }
         }
     }
 }
@@ -103,9 +109,8 @@ function analyzeLexerRules<T>(rules: LexerState<T>): void {
 class LexerImpl<T> implements Lexer<T> {
     private states: LexerState<T>[] = [this.rules];
 
-    constructor(public rules: TopLevelLexerRule<T>[]) {
-        // Casting `rules` to `LexerState<T>` is safe because `LexerState<T>` is a superset of `TopLevelLexerRule<T>`
-        analyzeLexerRules(rules);
+    constructor(public rules: LexerState<T>) {
+        analyzeLexerRules(rules, true);
     }
 
     public parse(input: string): TokenImpl<T> | undefined {
@@ -176,10 +181,6 @@ class LexerImpl<T> implements Lexer<T> {
     }
 }
 
-export function buildLexerState<T>(rules: LexerState<T>): LexerState<T> {
-    return rules;
-}
-
-export function buildLexer<T>(rules: TopLevelLexerRule<T>[]): Lexer<T> {
+export function buildLexer<T>(rules: LexerState<T>): Lexer<T> {
     return new LexerImpl<T>(rules);
 }
diff --git a/packages/tspc-test/src/TestLexer.ts b/packages/tspc-test/src/TestLexer.ts
index d89dce5..672615b 100644
--- a/packages/tspc-test/src/TestLexer.ts
+++ b/packages/tspc-test/src/TestLexer.ts
@@ -4,7 +4,7 @@
 // tslint:disable:trailing-comma
 
 import * as assert from 'assert';
-import { buildLexer } from 'typescript-parsec';
+import { buildLexer, LexerState } from 'typescript-parsec';
 
 function notUndefined<T>(t: T | undefined): T {
     assert.notStrictEqual(t, undefined);
@@ -145,10 +145,10 @@ test(`Lexer: C-style block comments via lexer states`, () => {
         Space,
     }
 
-    const BlockComment = buildLexerState([
+    const BlockComment: LexerState<TokenKind> = [
         [false, /^\*\//g, TokenKind.CommentEnd, 'pop'],
         [true, /^[^*]+/g, TokenKind.CommentContents],
-    ]);
+    ];
 
     const lexer = buildLexer([
         [false, /^\/\*/g, TokenKind.CommentBegin, BlockComment],

From 429cd450ec1fb0f1ab188866259b17754342d30d Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Fri, 7 Mar 2025 13:06:48 +0100
Subject: [PATCH 08/13] Test the stateful lexer with nested block comments

---
 packages/tspc-test/src/TestLexer.ts | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/packages/tspc-test/src/TestLexer.ts b/packages/tspc-test/src/TestLexer.ts
index 672615b..2c7de11 100644
--- a/packages/tspc-test/src/TestLexer.ts
+++ b/packages/tspc-test/src/TestLexer.ts
@@ -134,7 +134,7 @@ test(`Lexer: identifiers and numbers with discardable commas and spaces`, () =>
     assert.strictEqual(token, undefined);
 });
 
-test(`Lexer: C-style block comments via lexer states`, () => {
+test(`Lexer: C-style nested block comments via lexer states`, () => {
     enum TokenKind {
         CommentBegin,
         CommentEnd,
@@ -146,8 +146,9 @@ test(`Lexer: C-style block comments via lexer states`, () => {
     }
 
     const BlockComment: LexerState<TokenKind> = [
+        [false, /^\/\*/g, TokenKind.CommentBegin, 'push'], // nested comment
         [false, /^\*\//g, TokenKind.CommentEnd, 'pop'],
-        [true, /^[^*]+/g, TokenKind.CommentContents],
+        [true, /^(?:(?!\/\*|\*\/).)+/g, TokenKind.CommentContents],
     ];
 
     const lexer = buildLexer([
@@ -155,10 +156,10 @@ test(`Lexer: C-style block comments via lexer states`, () => {
         [true, /^\d+/g, TokenKind.Number],
         [true, /^[a-zA-Z]\w*/g, TokenKind.Identifier],
         [false, /^,/g, TokenKind.Comma],
-        [false, /^\s+/g, TokenKind.Space]
+        [false, /^\s+/g, TokenKind.Space],
     ]);
 
-    let token = lexer.parse(`123 /* abc */ def`);
+    let token = lexer.parse(`123 /* abc /*456*/*/ def`);
 
     token = notUndefined(token);
     assert.strictEqual(token.kind, TokenKind.Number);
@@ -170,6 +171,11 @@ test(`Lexer: C-style block comments via lexer states`, () => {
     assert.strictEqual(token.text, ' abc ');
     token = token.next;
 
+    token = notUndefined(token);
+    assert.strictEqual(token.kind, TokenKind.CommentContents);
+    assert.strictEqual(token.text, '456');
+    token = token.next;
+
     token = notUndefined(token);
     assert.strictEqual(token.kind, TokenKind.Identifier);
     assert.strictEqual(token.text, 'def');

From a519209633549d775040ba434653cc0372ccf797 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Fri, 7 Mar 2025 13:07:28 +0100
Subject: [PATCH 09/13] Support pushing the current state to the stack again

---
 packages/ts-parsec/src/Lexer.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 597b524..1e4e7ac 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -83,7 +83,7 @@ class TokenImpl<T> implements Token<T> {
     }
 }
 
-export type LexerRule<T> = [boolean, RegExp, T, (LexerRule<T>[] | 'pop')?];
+export type LexerRule<T> = [boolean, RegExp, T, (LexerRule<T>[] | 'push' | 'pop')?];
 export type LexerState<T> = LexerRule<T>[];
 
 function analyzeLexerRules<T>(rules: LexerState<T>, topLevel: boolean): void {
@@ -95,9 +95,9 @@ function analyzeLexerRules<T>(rules: LexerState<T>, topLevel: boolean): void {
             throw new Error(`Regular expression patterns for a tokenizer should be global: ${regex.source}`);
         }
         if (state !== undefined) {
-            if (state === 'pop') {
+            if (state === 'pop' || state === 'push') {
                 if (topLevel) {
-                    throw new Error(`The 'pop' directive is not allowed in the top-level lexer state`);
+                    throw new Error(`The 'push' and 'pop' directives are not allowed in the top-level lexer state`);
                 }
             } else {
                 analyzeLexerRules(state, false);
@@ -125,7 +125,7 @@ class LexerImpl<T> implements Lexer<T> {
         const subString = input.substr(indexStart);
         let result: TokenImpl<T> | undefined;
         const currentRuleset = this.states[this.states.length - 1];
-        let nextState: LexerState<T> | 'pop' | undefined;
+        let nextState: LexerState<T> | 'push' | 'pop' | undefined;
         for (const [keep, regexp, kind, next] of currentRuleset) {
             regexp.lastIndex = 0;
             if (regexp.test(subString)) {
@@ -156,6 +156,8 @@ class LexerImpl<T> implements Lexer<T> {
         } else {
             if (nextState === 'pop') {
                 this.states.pop();
+            } else if(nextState === 'push') {
+                this.states.push(currentRuleset);
             } else if (nextState !== undefined) {
                 this.states.push(nextState);
             }

From 1b81b3e2baa952d69ccbe64d72750601ca456986 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Fri, 7 Mar 2025 13:09:24 +0100
Subject: [PATCH 10/13] apply tslint suggestions

---
 doc/Tokenizer.md                    | 44 +++++++++++++++++++++++++++++
 packages/ts-parsec/src/Lexer.ts     |  2 +-
 packages/tspc-test/src/TestLexer.ts |  4 +--
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/doc/Tokenizer.md b/doc/Tokenizer.md
index 4b7b5b7..8151500 100644
--- a/doc/Tokenizer.md
+++ b/doc/Tokenizer.md
@@ -51,3 +51,47 @@ For some languages, like VB.NET, it has a context sensitive tokenizer. You could
 ## NOTE
 
 `buildLexer` only accepts regular expressions like this: `/^xxx/g`.
+
+## Stateful tokenization
+
+Internally, the lexer maintains a stack of states that you can grow. A state is
+defined as the set of rules that the lexer uses to tokenize the input. For
+instance, in the examples shown above, `buildLexer` was used to create a lexer
+with a single state with three rules each. Stateful tokenization is useful if
+you want to provide different rules to the lexer based on previously matched
+tokens.
+
+The following example shows a lexer that tokenizes nested block comments. Start
+by looking at the set of top-level rules defined by `buildLexer`. These rules
+look standard, except for the rule that recognizes a `TokenKind.CommentBegin`.
+When a rule contains a fourth element, and the rule is matched, it means that
+the lexer will switch to a different state. In this case, the fourth element
+tells us that the lexer will switch to the `BlockComment` state by pushing the
+state to its internal stack. The definition of a state works almost analogously
+to the definition of the top-level state using `buildLexer`. When the tokenizer
+switches to another state, only the rules defined inside of that state apply
+until the tokenizer leaves the state again. To leave a state, the fourth element
+of a rule can be set to `'pop'`, which pops the state off of the lexers'
+internal stack. In case you wish to push the same state to the stack that you
+are already in, use the `'push'` directive. When the fourth element of a rule is
+omitted, the lexer will remain in its current state.
+
+```typescript
+const blockComment: LexerState<TokenKind> = [
+    [false, /^\/\*/g, TokenKind.CommentBegin, "push"], // nested comment
+    [false, /^\*\//g, TokenKind.CommentEnd, "pop"],
+    [true, /^(?:(?!\/\*|\*\/).)+/g, TokenKind.CommentContents],
+];
+
+const tokenizer = buildLexer([
+    [false, /^\/\*/g, TokenKind.CommentBegin, blockComment],
+    [true, /^\d+/g, TokenKind.Number],
+    [true, /^[a-zA-Z]\w*/g, TokenKind.Identifier],
+    [false, /^,/g, TokenKind.Comma],
+    [false, /^\s+/g, TokenKind.Space],
+]);
+```
+
+Note: Using `'push'` or `'pop'` is not allowed in the top-level state. If you
+wish to switch states from there, you need to provide a concrete instance of the
+new state that should be pushed.
diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index 1e4e7ac..b807d1f 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -156,7 +156,7 @@ class LexerImpl<T> implements Lexer<T> {
         } else {
             if (nextState === 'pop') {
                 this.states.pop();
-            } else if(nextState === 'push') {
+            } else if (nextState === 'push') {
                 this.states.push(currentRuleset);
             } else if (nextState !== undefined) {
                 this.states.push(nextState);
diff --git a/packages/tspc-test/src/TestLexer.ts b/packages/tspc-test/src/TestLexer.ts
index 2c7de11..653e4d9 100644
--- a/packages/tspc-test/src/TestLexer.ts
+++ b/packages/tspc-test/src/TestLexer.ts
@@ -145,14 +145,14 @@ test(`Lexer: C-style nested block comments via lexer states`, () => {
         Space,
     }
 
-    const BlockComment: LexerState<TokenKind> = [
+    const blockComment: LexerState<TokenKind> = [
         [false, /^\/\*/g, TokenKind.CommentBegin, 'push'], // nested comment
         [false, /^\*\//g, TokenKind.CommentEnd, 'pop'],
         [true, /^(?:(?!\/\*|\*\/).)+/g, TokenKind.CommentContents],
     ];
 
     const lexer = buildLexer([
-        [false, /^\/\*/g, TokenKind.CommentBegin, BlockComment],
+        [false, /^\/\*/g, TokenKind.CommentBegin, blockComment],
         [true, /^\d+/g, TokenKind.Number],
         [true, /^[a-zA-Z]\w*/g, TokenKind.Identifier],
         [false, /^,/g, TokenKind.Comma],

From c67989647546305d2f5e8b2ef6643776bd9f88f8 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Tue, 1 Apr 2025 16:38:30 +0200
Subject: [PATCH 11/13] safely analyze recurively dependent lexer states

---
 packages/ts-parsec/src/Lexer.ts | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index b807d1f..e36d10c 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -86,7 +86,12 @@ class TokenImpl<T> implements Token<T> {
 export type LexerRule<T> = [boolean, RegExp, T, (LexerRule<T>[] | 'push' | 'pop')?];
 export type LexerState<T> = LexerRule<T>[];
 
-function analyzeLexerRules<T>(rules: LexerState<T>, topLevel: boolean): void {
+function analyzeLexerRules<T>(
+    rules: LexerState<T>,
+    topLevel: boolean,
+    memo: Set<LexerState<T>> = new Set(),
+): void {
+    memo.add(rules);
     for (const [, regex, , state] of rules) {
         if (regex.source[0] !== '^') {
             throw new Error(`Regular expression patterns for a tokenizer should start with '^': ${regex.source}`);
@@ -100,7 +105,10 @@ function analyzeLexerRules<T>(rules: LexerState<T>, topLevel: boolean): void {
                     throw new Error(`The 'push' and 'pop' directives are not allowed in the top-level lexer state`);
                 }
             } else {
-                analyzeLexerRules(state, false);
+                if (memo.has(state)) {
+                    return;
+                }
+                analyzeLexerRules(state, false, memo);
             }
         }
     }
@@ -186,3 +194,18 @@ class LexerImpl<T> implements Lexer<T> {
 export function buildLexer<T>(rules: LexerState<T>): Lexer<T> {
     return new LexerImpl<T>(rules);
 }
+
+// TESTING
+
+const statements: LexerState<string> = [];
+const stringLiteral: LexerState<string> = [];
+
+statements.push(
+  [true, /^"/g, "stringDelimiter", stringLiteral],
+);
+
+stringLiteral.push(
+  [true, /^\${/g, "stringInterpolationDelimiter", statements],
+);
+
+buildLexer(statements);

From 23e8f781c2c26c2ca2ea2a4466b502f8117b046a Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Tue, 1 Apr 2025 16:56:51 +0200
Subject: [PATCH 12/13] allow push and pop directives in the top-level state

---
 packages/ts-parsec/src/Lexer.ts | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/packages/ts-parsec/src/Lexer.ts b/packages/ts-parsec/src/Lexer.ts
index e36d10c..b9decee 100644
--- a/packages/ts-parsec/src/Lexer.ts
+++ b/packages/ts-parsec/src/Lexer.ts
@@ -88,7 +88,6 @@ export type LexerState<T> = LexerRule<T>[];
 
 function analyzeLexerRules<T>(
     rules: LexerState<T>,
-    topLevel: boolean,
     memo: Set<LexerState<T>> = new Set(),
 ): void {
     memo.add(rules);
@@ -100,15 +99,11 @@ function analyzeLexerRules<T>(
             throw new Error(`Regular expression patterns for a tokenizer should be global: ${regex.source}`);
         }
         if (state !== undefined) {
-            if (state === 'pop' || state === 'push') {
-                if (topLevel) {
-                    throw new Error(`The 'push' and 'pop' directives are not allowed in the top-level lexer state`);
-                }
-            } else {
+            if (state !== 'pop' && state !== 'push') {
                 if (memo.has(state)) {
                     return;
                 }
-                analyzeLexerRules(state, false, memo);
+                analyzeLexerRules(state, memo);
             }
         }
     }
@@ -118,7 +113,7 @@ class LexerImpl<T> implements Lexer<T> {
     private states: LexerState<T>[] = [this.rules];
 
     constructor(public rules: LexerState<T>) {
-        analyzeLexerRules(rules, true);
+        analyzeLexerRules(rules);
     }
 
     public parse(input: string): TokenImpl<T> | undefined {

From 6a4c0841096d3cc9454a1dae155d2928f153b210 Mon Sep 17 00:00:00 2001
From: Leon Puchinger <lepu1590@gmail.com>
Date: Wed, 2 Apr 2025 23:39:23 +0200
Subject: [PATCH 13/13] update documentation

---
 doc/Tokenizer.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/doc/Tokenizer.md b/doc/Tokenizer.md
index 8151500..d8022d6 100644
--- a/doc/Tokenizer.md
+++ b/doc/Tokenizer.md
@@ -91,7 +91,3 @@ const tokenizer = buildLexer([
     [false, /^\s+/g, TokenKind.Space],
 ]);
 ```
-
-Note: Using `'push'` or `'pop'` is not allowed in the top-level state. If you
-wish to switch states from there, you need to provide a concrete instance of the
-new state that should be pushed.