diff --git a/Lib/test/test_dstring.py b/Lib/test/test_dstring.py new file mode 100644 index 00000000000000..5161c21b2a6c26 --- /dev/null +++ b/Lib/test/test_dstring.py @@ -0,0 +1,104 @@ +import unittest + + +_dstring_prefixes = "d db df dt dr drb drf drt".split() +_dstring_prefixes += [p.upper() for p in _dstring_prefixes] + + +def d(s): + # Helper function to evaluate d-strings. + if '"""' in s: + return eval(f"d'''{s}'''") + else: + return eval(f'd"""{s}"""') + + +class DStringTestCase(unittest.TestCase): + def assertAllRaise(self, exception_type, regex, error_strings): + for str in error_strings: + with self.subTest(str=str): + with self.assertRaisesRegex(exception_type, regex) as cm: + eval(str) + + def test_single_quote(self): + exprs = [ + f"{p}'hello, world'" for p in _dstring_prefixes + ] + [ + f'{p}"hello, world"' for p in _dstring_prefixes + ] + self.assertAllRaise(SyntaxError, "d-string must be triple-quoted", exprs) + + def test_empty_dstring(self): + exprs = [ + f"{p}''''''" for p in _dstring_prefixes + ] + [ + f'{p}""""""' for p in _dstring_prefixes + ] + self.assertAllRaise(SyntaxError, "d-string must start with a newline", exprs) + + for prefix in _dstring_prefixes: + expr = f"{prefix}'''\n'''" + expr2 = f'{prefix}"""\n"""' + with self.subTest(expr=expr): + v = eval(expr) + v2 = eval(expr2) + if 't' in prefix.lower(): + self.assertEqual(v.strings, ("",)) + self.assertEqual(v2.strings, ("",)) + elif 'b' in prefix.lower(): + self.assertEqual(v, b"") + self.assertEqual(v2, b"") + else: + self.assertEqual(v, "") + self.assertEqual(v2, "") + + def test_dedent(self): + # Basic dedent - remove common leading whitespace + result = d(""" + hello + world + """) + self.assertEqual(result, "hello\nworld\n") + + # Dedent with varying indentation + result = d(""" + line1 + line2 + line3 + """) + self.assertEqual(result, " line1\n line2\nline3\n ") + + # Dedent with tabs + result = d(""" +\thello +\tworld +\t""") + self.assertEqual(result, "hello\nworld\n") + + # Mixed spaces and tabs (using common leading whitespace) + result = d(""" +\t\t hello +\t\t world +\t\t """) + self.assertEqual(result, " hello\n world\n") + + # Empty lines do not affect the calculation of common leading whitespace + result = d(""" + hello + + world + """) + self.assertEqual(result, "hello\n\nworld\n") + + # Lines with only whitespace also have their indentation removed. + result = d(""" + hello + \n\ + \n\ + world + """) + self.assertEqual(result, "hello\n\n \nworld\n") + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index ca67e381958757..2110f4c6a48320 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3420,7 +3420,7 @@ def determine_valid_prefixes(): # some uppercase-only prefix is added. for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase): try: - eval(f'{letter}""') + eval(f'{letter}"""\n"""') # d-string needs multiline single_char_valid_prefixes.add(letter.lower()) except SyntaxError: pass @@ -3444,7 +3444,7 @@ def determine_valid_prefixes(): # because it's a valid expression: not "" continue try: - eval(f'{p}""') + eval(f'{p}"""\n"""') # d-string needs multiline # No syntax error, so p is a valid string # prefix. diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 11c134482db024..74a709024174b6 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -86,7 +86,8 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr'] + _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'd', 'br', 'fr', 'tr', + 'bd', 'rd', 'fd', 'td', 'brd', 'frd', 'trd'] # if we add binary f-strings, add: ['fb', 'fbr'] result = {''} for prefix in _valid_string_prefixes: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fdcbcf51cb62c2..412dbfc53b6353 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13480,8 +13480,8 @@ of all lines in the [src, end). It returns the length of the common leading whitespace and sets `output` to point to the beginning of the common leading whitespace if length > 0. */ -static Py_ssize_t -search_longest_common_leading_whitespace( +Py_ssize_t +_Py_search_longest_common_leading_whitespace( const char *const src, const char *const end, const char **output) @@ -13576,7 +13576,7 @@ _PyUnicode_Dedent(PyObject *unicode) // [whitespace_start, whitespace_start + whitespace_len) // describes the current longest common leading whitespace const char *whitespace_start = NULL; - Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + Py_ssize_t whitespace_len = _Py_search_longest_common_leading_whitespace( src, end, &whitespace_start); if (whitespace_len == 0) { diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 50856686335a14..567da3475eb098 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1292,24 +1292,146 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq // Fstring stuff +static int +unicodewriter_write_line(Parser *p, PyUnicodeWriter *w, const char *line_start, const char *line_end, + int is_raw, Token* token) +{ + if (is_raw || memchr(line_start, '\\', line_end - line_start) == NULL) { + return PyUnicodeWriter_WriteUTF8(w, line_start, line_end - line_start); + } + else { + PyObject *line = _PyPegen_decode_string(p, 1, line_start, line_end - line_start, token); + if (line == NULL || PyUnicodeWriter_WriteStr(w, line) < 0) { + Py_XDECREF(line); + return -1; + } + Py_DECREF(line); + } + return 0; +} + +static PyObject* +_PyPegen_dedent_string_part( + Parser *p, const char *s, size_t len, const char *indent, Py_ssize_t indent_len, + int is_first, int is_raw, expr_ty constant, Token* token) +{ + Py_ssize_t lineno = constant->lineno; + const char *line_start = s; + const char *end = s + len; + + int _prev_call_invalid = p->call_invalid_rules; + if (!_prev_call_invalid && !is_raw) { + // _PyPegen_decode_string() and decode_bytes_with_escapes() may call + // warn_invalid_escape_sequence(). It may emit issue or raise SyntaxError + // for invalid escape sequences. + // We need to call it before dedenting since SyntaxError needs exact lineno + // and col_offset of invalid escape sequences. + PyObject *t = _PyPegen_decode_string(p, 0, s, len, token); + if (t == NULL) { + return NULL; + } + Py_DECREF(t); + p->call_invalid_rules = 1; + } + + PyUnicodeWriter *w = PyUnicodeWriter_Create(len); + if (w == NULL) { + return NULL; + } + + if (is_first) { + assert (line_start[0] == '\n'); + line_start++; // skip the first newline + } + else { + // Example: df""" + // first part {param} second part + // next line + // """" + // We don't need to dedent the first line in the non-first parts. + const char *line_end = memchr(line_start, '\n', end - line_start); + if (line_end) { + line_end++; // include the newline + } + else { + line_end = end; + } + if (unicodewriter_write_line(p, w, line_start, line_end, is_raw, token) < 0) { + goto error; + } + line_start = line_end; + } + + while (line_start < end) { + lineno++; + + Py_ssize_t i = 0; + while (line_start + i < end && i < indent_len && line_start[i] == indent[i]) { + i++; + } + + if (line_start[i] == '\0') { // found an empty line without newline. + break; + } + if (line_start[i] == '\n') { // found an empty line with newline. + if (PyUnicodeWriter_WriteChar(w, '\n') < 0) { + goto error; + } + line_start += i+1; + continue; + } + if (i < indent_len) { // found an invalid indent. + assert(line_start[i] != indent[i]); + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, i, lineno, i+1, + "d-string line missing valid indentation"); + goto error; + } + + // found a indented line. let's dedent it. + line_start += i; + const char *line_end = memchr(line_start, '\n', end - line_start); + if (line_end) { + line_end++; // include the newline + } + else { + line_end = end; + } + if (unicodewriter_write_line(p, w, line_start, line_end, is_raw, token) < 0) { + goto error; + } + line_start = line_end; + } + p->call_invalid_rules = _prev_call_invalid; + return PyUnicodeWriter_Finish(w); + +error: + p->call_invalid_rules = _prev_call_invalid; + PyUnicodeWriter_Discard(w); + return NULL; +} + static expr_ty -_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) { +_PyPegen_decode_fstring_part(Parser* p, int is_first, int is_raw, + const char *indent, Py_ssize_t indent_len, + expr_ty constant, Token* token) +{ assert(PyUnicode_CheckExact(constant->v.Constant.value)); const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value); if (bstr == NULL) { return NULL; } + is_raw = is_raw || strchr(bstr, '\\') == NULL; - size_t len; - if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) { - len = 1; - } else { - len = strlen(bstr); + PyObject *str = NULL; + if (indent != NULL) { + str = _PyPegen_dedent_string_part(p, bstr, strlen(bstr), indent, indent_len, + is_first, is_raw, constant, token); + } + else { + str = _PyPegen_decode_string(p, is_raw, bstr, strlen(bstr), token); } - is_raw = is_raw || strchr(bstr, '\\') == NULL; - PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token); if (str == NULL) { _Pypegen_raise_decode_error(p); return NULL; @@ -1323,6 +1445,103 @@ _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* tok p->arena); } +/* +This function is customized version of _Py_search_longest_common_leading_whitespace() +in unicodeobject.c +*/ +static void +search_longest_common_leading_whitespace( + const char *const src, + const char *const end, + const char **indent, + Py_ssize_t *indent_len) +{ + // [_start, _start + _len) + // describes the current longest common leading whitespace + const char *_start = *indent; + Py_ssize_t _len = *indent_len; + + // skip the first line. for example: + // s = df""" + // first part + // first part{x}second part + // second part + // """ + // we don't need newline after opening qute. + // we don't need first line in the second part too. + const char *iter = memchr(src, '\n', end - src); + if (iter == NULL) { + // single line string + return; + } + + for (iter++; iter <= end; iter++) { + const char *line_start = iter; + const char *leading_whitespace_end = NULL; + + // scan the whole line + while (iter < end && *iter != '\n') { + if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') { + /* `iter` points to the first non-whitespace character + in this line */ + if (iter == line_start) { + // some line has no indent, fast exit! + *indent = iter; + *indent_len = 0; + return; + } + leading_whitespace_end = iter; + } + ++iter; + } + + if (!leading_whitespace_end) { + // if this line has all white space, skip it + if (iter < end) { + continue; + } + leading_whitespace_end = iter; // last line may not end with '\n' + } + + if (!_start) { + // update the first leading whitespace + _start = line_start; + _len = leading_whitespace_end - line_start; + } + else { + /* We then compare with the current longest leading whitespace. + + [line_start, leading_whitespace_end) is the leading + whitespace of this line, + + [_start, _start + _len) is the leading whitespace of the + current longest leading whitespace. */ + Py_ssize_t new_len = 0; + const char *_iter = _start, *line_iter = line_start; + + while (_iter < _start + _len && line_iter < leading_whitespace_end + && *_iter == *line_iter) + { + ++_iter; + ++line_iter; + ++new_len; + } + + _len = new_len; + if (_len == 0) { + // No common things now, fast exit! + *indent = _start; + *indent_len = 0; + return; + } + } + } + + *indent = _start; + *indent_len = _len; +} + + static asdl_expr_seq * _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind) { @@ -1340,12 +1559,53 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b return NULL; } int is_raw = strpbrk(quote_str, "rR") != NULL; + int is_dedent = strpbrk(quote_str, "dD") != NULL; asdl_expr_seq *seq = _Py_asdl_expr_seq_new(total_items, p->arena); if (seq == NULL) { return NULL; } + const char *indent_start = NULL; + Py_ssize_t indent_len = 0; + + if (is_dedent) { + if (total_items == 0) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + a, + "d-string must start with a newline" + ); + return NULL; + } + expr_ty first_item = asdl_seq_GET(raw_expressions, 0); + if (first_item->kind != Constant_kind + || PyUnicode_ReadChar(first_item->v.Constant.value, 0) != '\n') { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + first_item, + "d-string must start with a newline" + ); + return NULL; + } + + for (Py_ssize_t i = 0; i < n_items; i++) { + expr_ty item = asdl_seq_GET(raw_expressions, i); + if (item->kind == Constant_kind) { + Py_ssize_t blen; + const char *bstr = PyUnicode_AsUTF8AndSize(item->v.Constant.value, &blen); + if (bstr == NULL) { + return NULL; + } + search_longest_common_leading_whitespace(bstr, bstr + blen, &indent_start, &indent_len); + } + } + + assert(indent_start != NULL); // TODO: is this assert true? + // _py_serach_longest_common_leading_whitespace() may not set indent_start when string is empty. + if (indent_len == 0) { + indent_start = ""; + } + } + Py_ssize_t index = 0; for (Py_ssize_t i = 0; i < n_items; i++) { expr_ty item = asdl_seq_GET(raw_expressions, i); @@ -1377,7 +1637,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b } if (item->kind == Constant_kind) { - item = _PyPegen_decode_fstring_part(p, is_raw, item, b); + item = _PyPegen_decode_fstring_part(p, i == 0, is_raw, indent_start, indent_len, item, b); if (item == NULL) { return NULL; } diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 7f25afec302c22..07c61f1cb4386b 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -455,7 +455,7 @@ tok_continuation_line(struct tok_state *tok) { static int maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok, int saw_b, int saw_r, int saw_u, - int saw_f, int saw_t) { + int saw_f, int saw_t, int saw_d) { // Supported: rb, rf, rt (in any order) // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order) @@ -480,6 +480,9 @@ maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok, if (saw_u && saw_t) { RETURN_SYNTAX_ERROR("u", "t"); } + if (saw_u && saw_d) { + RETURN_SYNTAX_ERROR("u", "d"); + } if (saw_b && saw_f) { RETURN_SYNTAX_ERROR("b", "f"); @@ -741,8 +744,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Identifier (most frequent token!) */ nonascii = 0; if (is_potential_identifier_start(c)) { - /* Process the various legal combinations of b"", r"", u"", and f"". */ - int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0; + /* Process the various legal combinations of b"", r"", u"", f"", and d"". */ + int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0, saw_d = 0; while (1) { if (!saw_b && (c == 'b' || c == 'B')) { saw_b = 1; @@ -762,6 +765,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t else if (!saw_t && (c == 't' || c == 'T')) { saw_t = 1; } + else if (!saw_d && (c == 'd' || c == 'D')) { + saw_d = 1; + } else { break; } @@ -769,7 +775,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (c == '"' || c == '\'') { // Raise error on incompatible string prefixes: int status = maybe_raise_syntax_error_for_string_prefixes( - tok, saw_b, saw_r, saw_u, saw_f, saw_t); + tok, saw_b, saw_r, saw_u, saw_f, saw_t, saw_d); if (status < 0) { return MAKE_TOKEN(ERRORTOKEN); } @@ -1049,7 +1055,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } f_string_quote: - if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't') + if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't' || Py_TOLOWER(*tok->start) == 'd') && (c == '\'' || c == '"'))) { int quote = c; @@ -1089,6 +1095,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t the_current_tok->kind = TOK_FSTRING_MODE; the_current_tok->quote = quote; the_current_tok->quote_size = quote_size; + the_current_tok->raw = 0; the_current_tok->start = tok->start; the_current_tok->multi_line_start = tok->line_start; the_current_tok->first_line = tok->lineno; @@ -1101,25 +1108,28 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t the_current_tok->in_debug = 0; enum string_kind_t string_kind = FSTRING; - switch (*tok->start) { - case 'T': - case 't': - the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r'; - string_kind = TSTRING; - break; - case 'F': - case 'f': - the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r'; - break; - case 'R': - case 'r': - the_current_tok->raw = 1; - if (Py_TOLOWER(*(tok->start + 1)) == 't') { + for (const char *p = tok->start; *p != c; p++) { + switch (*p) { + case 'f': + case 'F': + break; + case 't': + case 'T': string_kind = TSTRING; - } - break; - default: - Py_UNREACHABLE(); + break; + case 'r': + case 'R': + the_current_tok->raw = 1; + break; + case 'd': + case 'D': + if (quote_size != 3) { + return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "d-string must be triple-quoted")); + } + break; + default: + Py_UNREACHABLE(); + } } the_current_tok->string_kind = string_kind; diff --git a/Parser/string_parser.c b/Parser/string_parser.c index b164dfbc81a933..3425b856796fe5 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -247,6 +247,67 @@ _PyPegen_decode_string(Parser *p, int raw, const char *s, size_t len, Token *t) return decode_unicode_with_escapes(p, s, len, t); } +/* defined in unicodeobject.c */ +extern Py_ssize_t +_Py_search_longest_common_leading_whitespace( + const char *const src, + const char *const end, + const char **output + ); + +// Dedent d-string and return result as a bytes. +static PyObject* +_PyPegen_dedent_string(Parser *p, const char *s, Py_ssize_t len, + const char *indent, Py_ssize_t indent_len, int lineno) +{ + PyBytesWriter *w = PyBytesWriter_Create(0); + if (w == NULL) { + return NULL; + } + + const char *end = s + len; + for (; s < end; lineno++) { + Py_ssize_t i; + for (i = 0; i < indent_len; i++) { + if (s[i] != indent[i]) { + if (s[i] == '\n') { + break; // empty line + } + PyBytesWriter_Discard(w); + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_IndentationError, lineno, i, lineno, i+1, + "d-string missing valid indentation"); + return NULL; + } + } + + if (s[i] == '\n') { // found an empty line with newline. + if (PyBytesWriter_WriteBytes(w, "\n", 1) < 0) { + PyBytesWriter_Discard(w); + return NULL; + } + s += i+1; + continue; + } + + // found a indented line. let's dedent it. + s += i; + const char *line_end = memchr(s, '\n', end - s); + if (line_end == NULL) { + line_end = end; // last line without newline + } + else { + line_end++; // include the newline in the line + } + + if (PyBytesWriter_WriteBytes(w, s, line_end - s) < 0) { + PyBytesWriter_Discard(w); + return NULL; + } + s = line_end; + } + return PyBytesWriter_Finish(w); +} + /* s must include the bracketing quote characters, and r, b &/or f prefixes (if any), and embedded escape sequences (if any). (f-strings are handled by the parser) _PyPegen_parse_string parses it, and returns the decoded Python string object. */ @@ -262,9 +323,10 @@ _PyPegen_parse_string(Parser *p, Token *t) int quote = Py_CHARMASK(*s); int bytesmode = 0; int rawmode = 0; + int dedentmode = 0; if (Py_ISALPHA(quote)) { - while (!bytesmode || !rawmode) { + while (!bytesmode || !rawmode || !dedentmode) { if (quote == 'b' || quote == 'B') { quote =(unsigned char)*++s; bytesmode = 1; @@ -276,6 +338,10 @@ _PyPegen_parse_string(Parser *p, Token *t) quote = (unsigned char)*++s; rawmode = 1; } + else if (quote == 'd' || quote == 'D') { + quote =(unsigned char)*++s; + dedentmode = 1; + } else { break; } @@ -315,9 +381,64 @@ _PyPegen_parse_string(Parser *p, Token *t) return NULL; } } + else if (dedentmode) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "d-string must be triple-quoted"); + return NULL; + } /* Avoid invoking escape decoding routines if possible. */ rawmode = rawmode || strchr(s, '\\') == NULL; + + int _prev_call_invald = p->call_invalid_rules; + + PyObject *dedent_bytes = NULL; + if (dedentmode) { + if (len == 0 || s[0] != '\n') { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "d-string must start with a newline"); + return NULL; + } + + // _PyPegen_decode_string() and decode_bytes_with_escapes() emit + // a warning for invalid escape sequences. + // We need to call it before dedenting since it shifts the positions. + if (!_prev_call_invald && !rawmode) { + PyObject *temp; + if (bytesmode) { + temp = decode_bytes_with_escapes(p, s, len, t); + } + else { + temp = _PyPegen_decode_string(p, 0, s, len, t); + } + if (temp == NULL) { + return NULL; + } + Py_DECREF(temp); + } + + // We find common indent from [s, end+1) because we want to include the last line + // for indent calculation. + const char *end = s + len; + assert(*end == '"' || *end == '\''); // end[0:3] is the trailing quotes + const char *indent; + Py_ssize_t indent_len = _Py_search_longest_common_leading_whitespace(s+1, end+1, &indent); + + s++; len--; // skip the first newline + if (indent_len > 0) { + // dedent the string + dedent_bytes = _PyPegen_dedent_string(p, s, len, indent, indent_len, t->lineno + 1); + if (dedent_bytes == NULL) { + return NULL; + } + if (PyBytes_AsStringAndSize(dedent_bytes, (char**)&s, (Py_ssize_t*)&len) < 0) { + Py_DECREF(dedent_bytes); + return NULL; + } + } + + p->call_invalid_rules = 1; + } + + PyObject *result; if (bytesmode) { /* Disallow non-ASCII characters. */ const char *ch; @@ -327,13 +448,23 @@ _PyPegen_parse_string(Parser *p, Token *t) t, "bytes can only contain ASCII " "literal characters"); + Py_XDECREF(dedent_bytes); + p->call_invalid_rules = _prev_call_invald; return NULL; } } if (rawmode) { - return PyBytes_FromStringAndSize(s, (Py_ssize_t)len); + result = PyBytes_FromStringAndSize(s, (Py_ssize_t)len); } - return decode_bytes_with_escapes(p, s, (Py_ssize_t)len, t); + else { + result = decode_bytes_with_escapes(p, s, (Py_ssize_t)len, t); + } + } + else { + result = _PyPegen_decode_string(p, rawmode, s, len, t); } - return _PyPegen_decode_string(p, rawmode, s, len, t); + Py_XDECREF(dedent_bytes); + p->call_invalid_rules = _prev_call_invald; + return result; } +