From 0bd6838d3e3f13becd7c0bbfd43cc55b1ead3923 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Thu, 18 Dec 2025 17:19:20 -0800 Subject: [PATCH 1/5] remove use of eval --- src/lexer.js | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) diff --git a/src/lexer.js b/src/lexer.js index 1fc557d..30bca50 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -47,7 +47,7 @@ export function parse (source, name = '@') { function decode (str) { if (str[0] === '"' || str[0] === '\'') { try { - const decoded = (0, eval)(str); + const decoded = scanStringLiteral(str); // Filter to exclude non-matching UTF-16 surrogate strings for (let i = 0; i < decoded.length; i++) { const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00; @@ -74,6 +74,169 @@ function decode (str) { } } + +function scanStringLiteral (source) { + const quote = source[0]; + + // try JSON.parse first for performance + if (quote === '"') { + try { + return JSON.parse(source); + } catch { + // ignored + } + } else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) { + try { + return JSON.parse('"' + source.slice(1, -1) + '"'); + } catch { + // ignored + } + } + + // fall back to doing it the hard way + let parsed = ''; + let index = { v: 1 }; + + while (index.v < source.length) { + const char = source[index.v]; + switch (char) { + case quote: { + return parsed; + } + case '\\': { + ++index.v; + parsed += scanEscapeSequence(source, index); + break; + } + case '\r': + case '\n': { + throw new SyntaxError(); + } + default: { + ++index.v; + parsed += char; + } + } + } + + throw new SyntaxError(); +} + +function scanEscapeSequence (source, index) { + if (index.v === source.length) { + throw new SyntaxError(); + } + const char = source[index.v]; + ++index.v; + switch (char) { + case '\r': { + if (source[index.v] === '\n') { + ++index.v; + } + // fall through + } + case '\n': + case '\u2028': + case '\u2029': { + return ''; + } + case 'r': { + return '\r'; + } + case 'n': { + return '\n'; + } + case 't': { + return '\t'; + } + case 'b': { + return '\b'; + } + case 'f': { + return '\f'; + } + case 'v': { + return '\v'; + } + case 'x': { + return scanHexEscapeSequence(source, index); + } + case 'u': { + return scanUnicodeEscapeSequence(source, index); + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + return scanOctalEscapeSequence(char, source, index); + } + default: { + return char; + } + } +} + +function scanHexEscapeSequence (source, index) { + const a = readHex(source[index.v]); + ++index.v; + const b = readHex(source[index.v]); + ++index.v; + return String.fromCodePoint(a * 16 + b); +} + +function scanUnicodeEscapeSequence (source, index) { + let result = 0; + if (source[index.v] === '{') { + ++index.v; + do { + result = result * 16 + readHex(source[index.v]); + if (result > 0x10FFFF) { + throw new SyntaxError(); + } + ++index.v; + } while (source[index.v] !== '}'); + ++index.v; + } else { + for (let i = 0; i < 4; ++i) { + result = result * 16 + readHex(source[index.v]); + ++index.v; + } + } + return String.fromCodePoint(result); +} + +function scanOctalEscapeSequence (char, source, index) { + let toRead = char <= '3' ? 2 : 1; + let result = +char; + do { + char = source[index.v]; + if (char < '0' || char > '7') { + break; + } + result = result * 8 + (+char); + ++index.v; + --toRead; + } while (toRead > 0); + return String.fromCodePoint(result); +} + +function readHex (char) { + if (char >= '0' && char <= '9') { + return +char; + } else if (char >= 'a' && char <= 'f') { + return char.charCodeAt(0) - 87; + } else if (char >= 'A' && char <= 'F') { + return char.charCodeAt(0) - 55; + } + throw new SyntaxError(); +} + + + function copyBE (src, outBuf16) { const len = src.length; let i = 0; From b9a6596c4cc7780936fc79a12c07392824448262 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Mon, 22 Dec 2025 21:17:09 -0800 Subject: [PATCH 2/5] also add string scanner to pure-JS impl --- lexer.js | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 1 deletion(-) diff --git a/lexer.js b/lexer.js index 6807f0b..1614fd9 100755 --- a/lexer.js +++ b/lexer.js @@ -55,7 +55,7 @@ function parseCJS (source, name = '@') { function decode (str) { if (str[0] === '"' || str[0] === '\'') { try { - const decoded = (0, eval)(str); + const decoded = scanStringLiteral(str); // Filter to exclude non-matching UTF-16 surrogate strings for (let i = 0; i < decoded.length; i++) { const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00; @@ -1027,6 +1027,168 @@ function tryParseLiteralExports () { } } +// This function and it's callees are duplicated in src/lexer.js +function scanStringLiteral (source) { + const quote = source[0]; + + // try JSON.parse first for performance + if (quote === '"') { + try { + return JSON.parse(source); + } catch { + // ignored + } + } else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) { + try { + return JSON.parse('"' + source.slice(1, -1) + '"'); + } catch { + // ignored + } + } + + // fall back to doing it the hard way + let parsed = ''; + let index = { v: 1 }; + + while (index.v < source.length) { + const char = source[index.v]; + switch (char) { + case quote: { + return parsed; + } + case '\\': { + ++index.v; + parsed += scanEscapeSequence(source, index); + break; + } + case '\r': + case '\n': { + throw new SyntaxError(); + } + default: { + ++index.v; + parsed += char; + } + } + } + + throw new SyntaxError(); +} + +function scanEscapeSequence (source, index) { + if (index.v === source.length) { + throw new SyntaxError(); + } + const char = source[index.v]; + ++index.v; + switch (char) { + case '\r': { + if (source[index.v] === '\n') { + ++index.v; + } + // fall through + } + case '\n': + case '\u2028': + case '\u2029': { + return ''; + } + case 'r': { + return '\r'; + } + case 'n': { + return '\n'; + } + case 't': { + return '\t'; + } + case 'b': { + return '\b'; + } + case 'f': { + return '\f'; + } + case 'v': { + return '\v'; + } + case 'x': { + return scanHexEscapeSequence(source, index); + } + case 'u': { + return scanUnicodeEscapeSequence(source, index); + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + return scanOctalEscapeSequence(char, source, index); + } + default: { + return char; + } + } +} + +function scanHexEscapeSequence (source, index) { + const a = readHex(source[index.v]); + ++index.v; + const b = readHex(source[index.v]); + ++index.v; + return String.fromCodePoint(a * 16 + b); +} + +function scanUnicodeEscapeSequence (source, index) { + let result = 0; + if (source[index.v] === '{') { + ++index.v; + do { + result = result * 16 + readHex(source[index.v]); + if (result > 0x10FFFF) { + throw new SyntaxError(); + } + ++index.v; + } while (source[index.v] !== '}'); + ++index.v; + } else { + for (let i = 0; i < 4; ++i) { + result = result * 16 + readHex(source[index.v]); + ++index.v; + } + } + return String.fromCodePoint(result); +} + +function scanOctalEscapeSequence (char, source, index) { + let toRead = char <= '3' ? 2 : 1; + let result = +char; + do { + char = source[index.v]; + if (char < '0' || char > '7') { + break; + } + result = result * 8 + (+char); + ++index.v; + --toRead; + } while (toRead > 0); + return String.fromCodePoint(result); +} + +function readHex (char) { + if (char >= '0' && char <= '9') { + return +char; + } else if (char >= 'a' && char <= 'f') { + return char.charCodeAt(0) - 87; + } else if (char >= 'A' && char <= 'F') { + return char.charCodeAt(0) - 55; + } + throw new SyntaxError(); +} + + // --- Extracted from AcornJS --- //(https://github.com/acornjs/acorn/blob/master/acorn/src/identifier.js#L23 // From 6bcf40eda95bc3b8e2b3b6fae8c882d78e95c231 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Mon, 22 Dec 2025 21:17:46 -0800 Subject: [PATCH 3/5] add some more string cases to tests --- test/_unit.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/_unit.js b/test/_unit.js index b167093..0458f17 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -536,6 +536,11 @@ suite('Lexer', () => { exports['not identifier'] = 'asdf'; exports['\\u{D83C}\\u{DF10}'] = 1; exports['\\u{D83C}'] = 1; + exports['\\u58b8'] = 1; + exports['\\n'] = 1; + exports['\\xFF'] = 1; + exports['\\011'] = 1; + exports['\\3z'] = 1; exports['\\''] = 1; exports['@notidentifier'] = 'asdf'; Object.defineProperty(exports, "%notidentifier", { value: x }); @@ -549,6 +554,11 @@ suite('Lexer', () => { 'ab cd', 'not identifier', '\u{D83C}\u{DF10}', + '\u58B8', + '\n', + '\xFF', + '\011', + '\3z', '\'', '@notidentifier', '%notidentifier', From fb45a0b87666e4b903f69fad0249015bf426595e Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Mon, 22 Dec 2025 21:19:06 -0800 Subject: [PATCH 4/5] make tests actually fail on failure --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d6c84bd..6c40993 100755 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "test-js": "mocha -b -u tdd test/*.js", "test-wasm": "cross-env WASM=1 mocha -b -u tdd test/*.js", "test-wasm-sync": "cross-env WASM_SYNC=1 mocha -b -u tdd test/*.js", - "test": "npm run test-wasm ; npm run test-wasm-sync ; npm run test-js", + "test": "npm run test-wasm && npm run test-wasm-sync && npm run test-js", "bench": "node --expose-gc bench/index.mjs", "build": "node build.js ; babel dist/lexer.mjs -o dist/lexer.js ; terser dist/lexer.js -o dist/lexer.js", "build-wasm": "make lib/lexer.wasm ; node build.js", From 4f032cda395678f57c909629dd339d0ae429224b Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Mon, 22 Dec 2025 21:19:50 -0800 Subject: [PATCH 5/5] add disallow-code-generation-from-strings to tests --- package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 6c40993..21084b8 100755 --- a/package.json +++ b/package.json @@ -12,9 +12,9 @@ }, "types": "lexer.d.ts", "scripts": { - "test-js": "mocha -b -u tdd test/*.js", - "test-wasm": "cross-env WASM=1 mocha -b -u tdd test/*.js", - "test-wasm-sync": "cross-env WASM_SYNC=1 mocha -b -u tdd test/*.js", + "test-js": "cross-env NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js", + "test-wasm": "cross-env WASM=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js", + "test-wasm-sync": "cross-env WASM_SYNC=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js", "test": "npm run test-wasm && npm run test-wasm-sync && npm run test-js", "bench": "node --expose-gc bench/index.mjs", "build": "node build.js ; babel dist/lexer.mjs -o dist/lexer.js ; terser dist/lexer.js -o dist/lexer.js",