Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 163 additions & 1 deletion lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function parseCJS (source, name = '@') {
function decode (str) {
if (str[0] === '"' || str[0] === '\'') {
try {
const decoded = (0, eval)(str);
const decoded = scanStringLiteral(str);
// Filter to exclude non-matching UTF-16 surrogate strings
for (let i = 0; i < decoded.length; i++) {
const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
Expand Down Expand Up @@ -1027,6 +1027,168 @@ function tryParseLiteralExports () {
}
}

// This function and it's callees are duplicated in src/lexer.js
function scanStringLiteral (source) {
const quote = source[0];

// try JSON.parse first for performance
if (quote === '"') {
try {
return JSON.parse(source);
} catch {
// ignored
}
} else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) {
try {
return JSON.parse('"' + source.slice(1, -1) + '"');
} catch {
// ignored
}
}

// fall back to doing it the hard way
let parsed = '';
let index = { v: 1 };

while (index.v < source.length) {
const char = source[index.v];
switch (char) {
case quote: {
return parsed;
}
case '\\': {
++index.v;
parsed += scanEscapeSequence(source, index);
break;
}
case '\r':
case '\n': {
throw new SyntaxError();
}
default: {
++index.v;
parsed += char;
}
}
}

throw new SyntaxError();
}

function scanEscapeSequence (source, index) {
if (index.v === source.length) {
throw new SyntaxError();
}
const char = source[index.v];
++index.v;
switch (char) {
case '\r': {
if (source[index.v] === '\n') {
++index.v;
}
// fall through
}
case '\n':
case '\u2028':
case '\u2029': {
return '';
}
case 'r': {
return '\r';
}
case 'n': {
return '\n';
}
case 't': {
return '\t';
}
case 'b': {
return '\b';
}
case 'f': {
return '\f';
}
case 'v': {
return '\v';
}
case 'x': {
return scanHexEscapeSequence(source, index);
}
case 'u': {
return scanUnicodeEscapeSequence(source, index);
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
return scanOctalEscapeSequence(char, source, index);
}
default: {
return char;
}
}
}

function scanHexEscapeSequence (source, index) {
const a = readHex(source[index.v]);
++index.v;
const b = readHex(source[index.v]);
++index.v;
return String.fromCodePoint(a * 16 + b);
}

function scanUnicodeEscapeSequence (source, index) {
let result = 0;
if (source[index.v] === '{') {
++index.v;
do {
result = result * 16 + readHex(source[index.v]);
if (result > 0x10FFFF) {
throw new SyntaxError();
}
++index.v;
} while (source[index.v] !== '}');
++index.v;
} else {
for (let i = 0; i < 4; ++i) {
result = result * 16 + readHex(source[index.v]);
++index.v;
}
}
return String.fromCodePoint(result);
}

function scanOctalEscapeSequence (char, source, index) {
let toRead = char <= '3' ? 2 : 1;
let result = +char;
do {
char = source[index.v];
if (char < '0' || char > '7') {
break;
}
result = result * 8 + (+char);
++index.v;
--toRead;
} while (toRead > 0);
return String.fromCodePoint(result);
}

function readHex (char) {
if (char >= '0' && char <= '9') {
return +char;
} else if (char >= 'a' && char <= 'f') {
return char.charCodeAt(0) - 87;
} else if (char >= 'A' && char <= 'F') {
return char.charCodeAt(0) - 55;
}
throw new SyntaxError();
}


// --- Extracted from AcornJS ---
//(https://github.com/acornjs/acorn/blob/master/acorn/src/identifier.js#L23
//
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
},
"types": "lexer.d.ts",
"scripts": {
"test-js": "mocha -b -u tdd test/*.js",
"test-wasm": "cross-env WASM=1 mocha -b -u tdd test/*.js",
"test-wasm-sync": "cross-env WASM_SYNC=1 mocha -b -u tdd test/*.js",
"test": "npm run test-wasm ; npm run test-wasm-sync ; npm run test-js",
"test-js": "cross-env NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
"test-wasm": "cross-env WASM=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
"test-wasm-sync": "cross-env WASM_SYNC=1 NODE_OPTIONS=--disallow-code-generation-from-strings mocha -b -u tdd test/*.js",
"test": "npm run test-wasm && npm run test-wasm-sync && npm run test-js",
"bench": "node --expose-gc bench/index.mjs",
"build": "node build.js ; babel dist/lexer.mjs -o dist/lexer.js ; terser dist/lexer.js -o dist/lexer.js",
"build-wasm": "make lib/lexer.wasm ; node build.js",
Expand Down
165 changes: 164 additions & 1 deletion src/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export function parse (source, name = '@') {
function decode (str) {
if (str[0] === '"' || str[0] === '\'') {
try {
const decoded = (0, eval)(str);
const decoded = scanStringLiteral(str);
// Filter to exclude non-matching UTF-16 surrogate strings
for (let i = 0; i < decoded.length; i++) {
const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
Expand All @@ -74,6 +74,169 @@ function decode (str) {
}
}


function scanStringLiteral (source) {
const quote = source[0];

// try JSON.parse first for performance
if (quote === '"') {
try {
return JSON.parse(source);
} catch {
// ignored
}
} else if (quote === "'" && source.length > 1 && source[source.length - 1] === "'" && source.indexOf('"') === -1) {
try {
return JSON.parse('"' + source.slice(1, -1) + '"');
} catch {
// ignored
}
}

// fall back to doing it the hard way
let parsed = '';
let index = { v: 1 };

while (index.v < source.length) {
const char = source[index.v];
switch (char) {
case quote: {
return parsed;
}
case '\\': {
++index.v;
parsed += scanEscapeSequence(source, index);
break;
}
case '\r':
case '\n': {
throw new SyntaxError();
}
default: {
++index.v;
parsed += char;
}
}
}

throw new SyntaxError();
}

function scanEscapeSequence (source, index) {
if (index.v === source.length) {
throw new SyntaxError();
}
const char = source[index.v];
++index.v;
switch (char) {
case '\r': {
if (source[index.v] === '\n') {
++index.v;
}
// fall through
}
case '\n':
case '\u2028':
case '\u2029': {
return '';
}
case 'r': {
return '\r';
}
case 'n': {
return '\n';
}
case 't': {
return '\t';
}
case 'b': {
return '\b';
}
case 'f': {
return '\f';
}
case 'v': {
return '\v';
}
case 'x': {
return scanHexEscapeSequence(source, index);
}
case 'u': {
return scanUnicodeEscapeSequence(source, index);
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
return scanOctalEscapeSequence(char, source, index);
}
default: {
return char;
}
}
}

function scanHexEscapeSequence (source, index) {
const a = readHex(source[index.v]);
++index.v;
const b = readHex(source[index.v]);
++index.v;
return String.fromCodePoint(a * 16 + b);
}

function scanUnicodeEscapeSequence (source, index) {
let result = 0;
if (source[index.v] === '{') {
++index.v;
do {
result = result * 16 + readHex(source[index.v]);
if (result > 0x10FFFF) {
throw new SyntaxError();
}
++index.v;
} while (source[index.v] !== '}');
++index.v;
} else {
for (let i = 0; i < 4; ++i) {
result = result * 16 + readHex(source[index.v]);
++index.v;
}
}
return String.fromCodePoint(result);
}

function scanOctalEscapeSequence (char, source, index) {
let toRead = char <= '3' ? 2 : 1;
let result = +char;
do {
char = source[index.v];
if (char < '0' || char > '7') {
break;
}
result = result * 8 + (+char);
++index.v;
--toRead;
} while (toRead > 0);
return String.fromCodePoint(result);
}

function readHex (char) {
if (char >= '0' && char <= '9') {
return +char;
} else if (char >= 'a' && char <= 'f') {
return char.charCodeAt(0) - 87;
} else if (char >= 'A' && char <= 'F') {
return char.charCodeAt(0) - 55;
}
throw new SyntaxError();
}



function copyBE (src, outBuf16) {
const len = src.length;
let i = 0;
Expand Down
10 changes: 10 additions & 0 deletions test/_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,11 @@ suite('Lexer', () => {
exports['not identifier'] = 'asdf';
exports['\\u{D83C}\\u{DF10}'] = 1;
exports['\\u{D83C}'] = 1;
exports['\\u58b8'] = 1;
exports['\\n'] = 1;
exports['\\xFF'] = 1;
exports['\\011'] = 1;
exports['\\3z'] = 1;
exports['\\''] = 1;
exports['@notidentifier'] = 'asdf';
Object.defineProperty(exports, "%notidentifier", { value: x });
Expand All @@ -549,6 +554,11 @@ suite('Lexer', () => {
'ab cd',
'not identifier',
'\u{D83C}\u{DF10}',
'\u58B8',
'\n',
'\xFF',
'\011',
'\3z',
'\'',
'@notidentifier',
'%notidentifier',
Expand Down