diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 21a777e1..add2cc1a 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -1,5 +1,5 @@ import { AiScriptSyntaxError, AiScriptUnexpectedEOFError } from '../error.js'; -import { decodeUnicodeEscapeSequence } from '../utils/characters.js'; +import { decodeUnicodeEscapeSequence, tryDecodeSingleEscapeCharacter } from '../utils/characters.js'; import { CharStream } from './streams/char-stream.js'; import { TOKEN, TokenKind } from './token.js'; import { unexpectedTokenError } from './utils.js'; @@ -463,6 +463,25 @@ export class Scanner implements ITokenStream { return; } + private decodeEscapeSequence(): string { + if (this.stream.eof) { + throw new AiScriptUnexpectedEOFError(this.stream.getPos()); + } + + if (this.stream.char === 'u') { + const unicodeEscapeSequence = this.readUnicodeEscapeSequence(); + return String.fromCharCode(Number.parseInt(unicodeEscapeSequence.slice(1), 16)); + } + + const decodedSingleEscapeCharacter = tryDecodeSingleEscapeCharacter(this.stream.char); + if (decodedSingleEscapeCharacter != null) { + this.stream.next(); + return decodedSingleEscapeCharacter; + } + + throw new AiScriptSyntaxError(`invalid escape character: "${this.stream.char}"`, this.stream.getPos()); + } + private readUnicodeEscapeSequence(): `u${string}` { if (this.stream.eof || (this.stream.char as string) !== 'u') { throw new AiScriptSyntaxError('character "u" expected', this.stream.getPos()); @@ -569,11 +588,7 @@ export class Scanner implements ITokenStream { break; } case 'escape': { - if (this.stream.eof) { - throw new AiScriptUnexpectedEOFError(pos); - } - value += this.stream.char; - this.stream.next(); + value += this.decodeEscapeSequence(); state = 'string'; break; } @@ -632,13 +647,7 @@ export class Scanner implements ITokenStream { break; } case 'escape': { - // エスケープ対象の文字が無いままEOFに達した - if (this.stream.eof) { - throw new AiScriptUnexpectedEOFError(pos); - } - // 普通の文字として取り込み - buf += this.stream.char; - this.stream.next(); + buf += this.decodeEscapeSequence(); // 通常の文字列に戻る state = 'string'; break; diff --git a/src/utils/characters.ts b/src/utils/characters.ts index 2c5a170a..b2df24f1 100644 --- a/src/utils/characters.ts +++ b/src/utils/characters.ts @@ -72,3 +72,21 @@ export function decodeUnicodeEscapeSequence(string: string): string { return result; } + +export function tryDecodeSingleEscapeCharacter(s: string): string | null { + switch (s) { + // case 'b': return '\b'; + case 't': return '\t'; + case 'n': return '\n'; + // case 'v': return '\v'; + // case 'f': return '\f'; + case 'r': return '\r'; + case '"': return '"'; + case '\'': return '\''; + case '\\': return '\\'; + case '`': return '`'; + case '{': return '{'; + case '}': return '}'; + default: return null; + } +} diff --git a/test/literals.ts b/test/literals.ts index cb747b7c..a55512d2 100644 --- a/test/literals.ts +++ b/test/literals.ts @@ -341,3 +341,83 @@ describe('Template syntax', () => { }); }); +describe('Escape sequence', () => { + describe('valid', () => { + const cases: [string, string][] = [ + ['\\t', '\t'], // horizontal tab + ['\\n', '\n'], // line feed + ['\\r', '\r'], // carriage return + ['\\"', '"'], + ['\\\'', '\''], + ['\\\\', '\\'], + ['\\`', '`'], + ['\\{', '{'], + ['\\}', '}'], + ['\\u0041', 'A'], + ['\\u85cd', '藍'], + ['\\u85CD', '藍'], + ['\\ud842\\udfb7', '𠮷'], + ['\\uD842\\uDFB7', '𠮷'], + ]; + + describe('double quote', () => { + test.each(cases)('value of escape sequence "%s" must be "%s"', async (char, expected) => { + const res = await exe(` + <: "${char}" + `); + eq(res, STR(expected)); + }); + }); + + describe('single quote', () => { + test.each(cases)('value of escape sequence "%s" must be "%s"', async (char, expected) => { + const res = await exe(` + <: '${char}' + `); + eq(res, STR(expected)); + }); + }); + + describe('template', () => { + test.each(cases)('value of escape sequence "%s" must be "%s"', async (string, expected) => { + const res = await exe(` + <: \`${string}\` + `); + eq(res, STR(expected)); + }); + }); + }); + + describe('invalid', () => { + const cases: [string][] = [ + ['\\x'], + ['\\b'], + ['\\v'], + ['\\f'], + ]; + + describe('double quote', () => { + test.each(cases)('value of escape sequence "%s" must not be allowed', async (char) => { + await expect(async () => await exe(` + <: "${char}" + `)).rejects.toThrow(AiScriptSyntaxError); + }); + }); + + describe('single quote', () => { + test.each(cases)('value of escape sequence "%s" must not be allowed', async (char) => { + await expect(async () => await exe(` + <: '${char}' + `)).rejects.toThrow(AiScriptSyntaxError); + }); + }); + + describe('template', () => { + test.each(cases)('value of escape sequence "%s" must not be allowed', async (string) => { + await expect(async () => await exe(` + <: \`${string}\` + `)).rejects.toThrow(AiScriptSyntaxError); + }); + }); + }); +}); diff --git a/unreleased/str_escape_sequnece.md b/unreleased/str_escape_sequnece.md new file mode 100644 index 00000000..4f5ac1f4 --- /dev/null +++ b/unreleased/str_escape_sequnece.md @@ -0,0 +1,8 @@ +- **Breaking change** 文字列リテラルやテンプレートリテラルにおけるエスケープシーケンスの仕様を変更しました。 + - 以下のエスケープシーケンスが追加されました。 + - `\t` - 水平タブ (U+0009) + - `\n` - 改行 (U+000A) + - `\r` - 復帰 (U+000D) + - `\u`とそれに続く4桁の16進数の英数字 - 与えられた値を持つUTF-16コード単位として解釈されます。 + - `\"`, `\'`, `` \` ``, `\{`, `\}` - それぞれ、`\`の直後の文字そのものとなります。 + - `\`とそれに続く文字列が上記のいずれにも一致しない場合、文法エラーが発生するようになりました。