From e014e64a2547403393212e06beb32d71a538d061 Mon Sep 17 00:00:00 2001 From: hrb-hub Date: Tue, 6 May 2025 13:12:22 +0200 Subject: [PATCH] Fix tokenizing of comments starting or ending with more than two -- Comments ending with more than two dashes are not terminated correctly, resulting in markup following these comments to be included. `
Markup
` is tokenized as a comment of `Comment--->
Markup
`. Comments starting with three dashes have their first character clipped. `` is tokenized as a comment of `omment`. --- src/evented-tokenizer.ts | 6 ++++-- tests/tokenizer-tests.ts | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts index d387df7..4227ba4 100644 --- a/src/evented-tokenizer.ts +++ b/src/evented-tokenizer.ts @@ -379,7 +379,7 @@ export default class EventedTokenizer { commentStart() { let char = this.consume(); - if (char === '-') { + if (char === '-' && this.peek() === '-') { this.transitionTo(TokenizerState.commentStartDash); } else if (char === '>') { this.delegate.finishComment(); @@ -417,7 +417,9 @@ export default class EventedTokenizer { commentEndDash() { let char = this.consume(); - if (char === '-') { + if (char === '-' && this.peek() === '-') { + this.delegate.appendToCommentData(char); + } else if (char === '-') { this.transitionTo(TokenizerState.commentEnd); } else { this.delegate.appendToCommentData('-' + char); diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts index 58fe989..54f423d 100644 --- a/tests/tokenizer-tests.ts +++ b/tests/tokenizer-tests.ts @@ -324,11 +324,48 @@ QUnit.test('A comment that immediately closes', function(assert) { QUnit.test('A comment that contains a -', function(assert) { let tokens = tokenize(''); assert.deepEqual(tokens, [comment(' A perfectly legal - appears ')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A perfectly legal - ')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A perfectly legal- ')]); }); QUnit.test('A (buggy) comment that contains two --', function(assert) { let tokens = tokenize(''); assert.deepEqual(tokens, [comment(' A questionable -- appears ')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable -- ')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable-- ')]); +}); + +QUnit.test('A (buggy) comment ending with more than two --', function(assert) { + let tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable but legal comment -')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable but legal comment-')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable but legal comment - -')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable but legal comment -- -')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment(' A questionable but legal comment ----')]); +}); + +QUnit.test('A (buggy) comment starting with more than two --', function(assert) { + let tokens = tokenize(''); + assert.deepEqual(tokens, [comment('- Questionable but legal ')]); + + tokens = tokenize(''); + assert.deepEqual(tokens, [comment('-Questionable but legal ')]); }); QUnit.test('Character references are expanded', function(assert) {