From 0bd6838d3e3f13becd7c0bbfd43cc55b1ead3923 Mon Sep 17 00:00:00 2001
From: Kevin Gibbons <bakkot@gmail.com>
Date: Thu, 18 Dec 2025 17:19:20 -0800
Subject: [PATCH 1/5] remove use of eval

---
 src/lexer.js | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 164 insertions(+), 1 deletion(-)

diff --git a/src/lexer.js b/src/lexer.js
index 1fc557d..30bca50 100755
--- a/src/lexer.js
+++ b/src/lexer.js
@@ -47,7 +47,7 @@ export function parse (source, name = '@') {
 function decode (str) {
   if (str[0] === '"' || str[0] === '\'') {
     try {
-      const decoded = (0, eval)(str);
+      const decoded = scanStringLiteral(str);
       // Filter to exclude non-matching UTF-16 surrogate strings
       for (let i = 0; i < decoded.length; i++) {
         const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
@@ -74,6 +74,169 @@ function decode (str) {
   }
 }
 
+
+function scanStringLiteral (source) {
+  const quote = source[0];
+
+  // try JSON.parse first for performance
+  if (quote === '"') {
+    try {
+      return JSON.parse(source);
+    } catch {
+      // ignored
+    }
+  } else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) {
+    try {
+      return JSON.parse('"' + source.slice(1, -1) + '"');
+    } catch {
+      // ignored
+    }
+  }
+
+  // fall back to doing it the hard way
+  let parsed = '';
+  let index = { v: 1 };
+
+  while (index.v < source.length) {
+    const char = source[index.v];
+    switch (char) {
+      case quote: {
+        return parsed;
+      }
+      case '\\': {
+        ++index.v;
+        parsed += scanEscapeSequence(source, index);
+        break;
+      }
+      case '\r':
+      case '\n': {
+        throw new SyntaxError();
+      }
+      default: {
+        ++index.v;
+        parsed += char;
+      }
+    }
+  }
+
+  throw new SyntaxError();
+}
+
+function scanEscapeSequence (source, index) {
+  if (index.v === source.length) {
+    throw new SyntaxError();
+  }
+  const char = source[index.v];
+  ++index.v;
+  switch (char) {
+    case '\r': {
+      if (source[index.v] === '\n') {
+        ++index.v;
+      }
+      // fall through
+    }
+    case '\n':
+    case '\u2028':
+    case '\u2029': {
+      return '';
+    }
+    case 'r': {
+      return '\r';
+    }
+    case 'n': {
+      return '\n';
+    }
+    case 't': {
+      return '\t';
+    }
+    case 'b': {
+      return '\b';
+    }
+    case 'f': {
+      return '\f';
+    }
+    case 'v': {
+      return '\v';
+    }
+    case 'x': {
+      return scanHexEscapeSequence(source, index);
+    }
+    case 'u': {
+      return scanUnicodeEscapeSequence(source, index);
+    }
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7': {
+      return scanOctalEscapeSequence(char, source, index);
+    }
+    default: {
+      return char;
+    }
+  }
+}
+
+function scanHexEscapeSequence (source, index) {
+  const a = readHex(source[index.v]);
+  ++index.v;
+  const b = readHex(source[index.v]);
+  ++index.v;
+  return String.fromCodePoint(a * 16 + b);
+}
+
+function scanUnicodeEscapeSequence (source, index) {
+  let result = 0;
+  if (source[index.v] === '{') {
+    ++index.v;
+    do {
+      result = result * 16 + readHex(source[index.v]);
+      if (result > 0x10FFFF) {
+        throw new SyntaxError();
+      }
+      ++index.v;
+    } while (source[index.v] !== '}');
+    ++index.v;
+  } else {
+    for (let i = 0; i < 4; ++i) {
+      result = result * 16 + readHex(source[index.v]);
+      ++index.v;
+    }
+  }
+  return String.fromCodePoint(result);
+}
+
+function scanOctalEscapeSequence (char, source, index) {
+  let toRead = char <= '3' ? 2 : 1;
+  let result = +char;
+  do {
+    char = source[index.v];
+    if (char < '0' || char > '7') {
+      break;
+    }
+    result = result * 8 + (+char);
+    ++index.v;
+    --toRead;
+  } while (toRead > 0);
+  return String.fromCodePoint(result);
+}
+
+function readHex (char) {
+  if (char >= '0' && char <= '9') {
+    return +char;
+  } else if (char >= 'a' && char <= 'f') {
+    return char.charCodeAt(0) - 87;
+  } else if (char >= 'A' && char <= 'F') {
+    return char.charCodeAt(0) - 55;
+  }
+  throw new SyntaxError();
+}
+
+
+
 function copyBE (src, outBuf16) {
   const len = src.length;
   let i = 0;

From b9a6596c4cc7780936fc79a12c07392824448262 Mon Sep 17 00:00:00 2001
From: Kevin Gibbons <bakkot@gmail.com>
Date: Mon, 22 Dec 2025 21:17:09 -0800
Subject: [PATCH 2/5] also add string scanner to pure-JS impl

---
 lexer.js | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 1 deletion(-)

diff --git a/lexer.js b/lexer.js
index 6807f0b..1614fd9 100755
--- a/lexer.js
+++ b/lexer.js
@@ -55,7 +55,7 @@ function parseCJS (source, name = '@') {
 function decode (str) {
   if (str[0] === '"' || str[0] === '\'') {
     try {
-      const decoded = (0, eval)(str);
+      const decoded = scanStringLiteral(str);
       // Filter to exclude non-matching UTF-16 surrogate strings
       for (let i = 0; i < decoded.length; i++) {
         const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
@@ -1027,6 +1027,168 @@ function tryParseLiteralExports () {
   }
 }
 
+// This function and it's callees are duplicated in src/lexer.js
+function scanStringLiteral (source) {
+  const quote = source[0];
+
+  // try JSON.parse first for performance
+  if (quote === '"') {
+    try {
+      return JSON.parse(source);
+    } catch {
+      // ignored
+    }
+  } else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) {
+    try {
+      return JSON.parse('"' + source.slice(1, -1) + '"');
+    } catch {
+      // ignored
+    }
+  }
+
+  // fall back to doing it the hard way
+  let parsed = '';
+  let index = { v: 1 };
+
+  while (index.v < source.length) {
+    const char = source[index.v];
+    switch (char) {
+      case quote: {
+        return parsed;
+      }
+      case '\\': {
+        ++index.v;
+        parsed += scanEscapeSequence(source, index);
+        break;
+      }
+      case '\r':
+      case '\n': {
+        throw new SyntaxError();
+      }
+      default: {
+        ++index.v;
+        parsed += char;
+      }
+    }
+  }
+
+  throw new SyntaxError();
+}
+
+function scanEscapeSequence (source, index) {
+  if (index.v === source.length) {
+    throw new SyntaxError();
+  }
+  const char = source[index.v];
+  ++index.v;
+  switch (char) {
+    case '\r': {
+      if (source[index.v] === '\n') {
+        ++index.v;
+      }
+      // fall through
+    }
+    case '\n':
+    case '\u2028':
+    case '\u2029': {
+      return '';
+    }
+    case 'r': {
+      return '\r';
+    }
+    case 'n': {
+      return '\n';
+    }
+    case 't': {
+      return '\t';
+    }
+    case 'b': {
+      return '\b';
+    }
+    case 'f': {
+      return '\f';
+    }
+    case 'v': {
+      return '\v';
+    }
+    case 'x': {
+      return scanHexEscapeSequence(source, index);
+    }
+    case 'u': {
+      return scanUnicodeEscapeSequence(source, index);
+    }
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7': {
+      return scanOctalEscapeSequence(char, source, index);
+    }
+    default: {
+      return char;
+    }
+  }
+}
+
+function scanHexEscapeSequence (source, index) {
+  const a = readHex(source[index.v]);
+  ++index.v;
+  const b = readHex(source[index.v]);
+  ++index.v;
+  return String.fromCodePoint(a * 16 + b);
+}
+
+function scanUnicodeEscapeSequence (source, index) {
+  let result = 0;
+  if (source[index.v] === '{') {
+    ++index.v;
+    do {
+      result = result * 16 + readHex(source[index.v]);
+      if (result > 0x10FFFF) {
+        throw new SyntaxError();
+      }
+      ++index.v;
+    } while (source[index.v] !== '}');
+    ++index.v;
+  } else {
+    for (let i = 0; i < 4; ++i) {
+      result = result * 16 + readHex(source[index.v]);
+      ++index.v;
+    }
+  }
+  return String.fromCodePoint(result);
+}
+
+function scanOctalEscapeSequence (char, source, index) {
+  let toRead = char <= '3' ? 2 : 1;
+  let result = +char;
+  do {
+    char = source[index.v];
+    if (char < '0' || char > '7') {
+      break;
+    }
+    result = result * 8 + (+char);
+    ++index.v;
+    --toRead;
+  } while (toRead > 0);
+  return String.fromCodePoint(result);
+}
+
+function readHex (char) {
+  if (char >= '0' && char <= '9') {
+    return +char;
+  } else if (char >= 'a' && char <= 'f') {
+    return char.charCodeAt(0) - 87;
+  } else if (char >= 'A' && char <= 'F') {
+    return char.charCodeAt(0) - 55;
+  }
+  throw new SyntaxError();
+}
+
+
 // --- Extracted from AcornJS ---
 //(https://github.com/acornjs/acorn/blob/master/acorn/src/identifier.js#L23
 //

From 6bcf40eda95bc3b8e2b3b6fae8c882d78e95c231 Mon Sep 17 00:00:00 2001
From: Kevin Gibbons <bakkot@gmail.com>
Date: Mon, 22 Dec 2025 21:17:46 -0800
Subject: [PATCH 3/5] add some more string cases to tests

---
 test/_unit.js | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/_unit.js b/test/_unit.js
index b167093..0458f17 100755
--- a/test/_unit.js
+++ b/test/_unit.js
@@ -536,6 +536,11 @@ suite('Lexer', () => {
       exports['not identifier'] = 'asdf';
       exports['\\u{D83C}\\u{DF10}'] = 1;
       exports['\\u{D83C}'] = 1;
+      exports['\\u58b8'] = 1;
+      exports['\\n'] = 1;
+      exports['\\xFF'] = 1;
+      exports['\\011'] = 1;
+      exports['\\3z'] = 1;
       exports['\\''] = 1;
       exports['@notidentifier'] = 'asdf';
       Object.defineProperty(exports, "%notidentifier", { value: x });
@@ -549,6 +554,11 @@ suite('Lexer', () => {
       'ab cd',
       'not identifier',
       '\u{D83C}\u{DF10}',
+      '\u58B8',
+      '\n',
+      '\xFF',
+      '\011',
+      '\3z',
       '\'',
       '@notidentifier',
       '%notidentifier',

From fb45a0b87666e4b903f69fad0249015bf426595e Mon Sep 17 00:00:00 2001
From: Kevin Gibbons <bakkot@gmail.com>
Date: Mon, 22 Dec 2025 21:19:06 -0800
Subject: [PATCH 4/5] make tests actually fail on failure

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index d6c84bd..6c40993 100755
--- a/package.json
+++ b/package.json
@@ -15,7 +15,7 @@
     "test-js": "mocha -b -u tdd test/*.js",
     "test-wasm": "cross-env WASM=1 mocha -b -u tdd test/*.js",
     "test-wasm-sync": "cross-env WASM_SYNC=1 mocha -b -u tdd test/*.js",
-    "test": "npm run test-wasm ; npm run test-wasm-sync ; npm run test-js",
+    "test": "npm run test-wasm && npm run test-wasm-sync && npm run test-js",
     "bench": "node --expose-gc bench/index.mjs",
     "build": "node build.js ; babel dist/lexer.mjs -o dist/lexer.js ; terser dist/lexer.js -o dist/lexer.js",
     "build-wasm": "make lib/lexer.wasm ; node build.js",

From 4f032cda395678f57c909629dd339d0ae429224b Mon Sep 17 00:00:00 2001
From: Kevin Gibbons <bakkot@gmail.com>
Date: Mon, 22 Dec 2025 21:19:50 -0800
Subject: [PATCH 5/5] add disallow-code-generation-from-strings to tests

---
 package.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package.json b/package.json
index 6c40993..21084b8 100755
--- a/package.json
+++ b/package.json
@@ -12,9 +12,9 @@
   },
   "types": "lexer.d.ts",
   "scripts": {
-    "test-js": "mocha -b -u tdd test/*.js",
-    "test-wasm": "cross-env WASM=1 mocha -b -u tdd test/*.js",
-    "test-wasm-sync": "cross-env WASM_SYNC=1 mocha -b -u tdd test/*.js",
+    "test-js": "cross-env NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
+    "test-wasm": "cross-env WASM=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
+    "test-wasm-sync": "cross-env WASM_SYNC=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
     "test": "npm run test-wasm && npm run test-wasm-sync && npm run test-js",
     "bench": "node --expose-gc bench/index.mjs",
     "build": "node build.js ; babel dist/lexer.mjs -o dist/lexer.js ; terser dist/lexer.js -o dist/lexer.js",