From 5c4d3481ac827c5f98682c17b523272aed0c3e09 Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Tue, 20 Jun 2023 20:26:27 +0100 Subject: [PATCH 1/5] Fix Issue 23999 - literal suffixes dont mix well with template instantiations --- compiler/src/dmd/lexer.d | 19 ++++++++++++++- .../fail_compilation/templatesingleparam.d | 23 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 compiler/test/fail_compilation/templatesingleparam.d diff --git a/compiler/src/dmd/lexer.d b/compiler/src/dmd/lexer.d index 9cce7c567234..f08e55107ccc 100644 --- a/compiler/src/dmd/lexer.d +++ b/compiler/src/dmd/lexer.d @@ -1964,7 +1964,7 @@ class Lexer /*************************************** * Get postfix of string literal. */ - private void stringPostfix(Token* t) pure @nogc + private void stringPostfix(Token* t) { switch (*p) { @@ -1973,6 +1973,13 @@ class Lexer case 'd': t.postfix = *p; p++; + // disallow e.g. `@r"_"dtype var;` + if (isidchar(*p) || *p & 0x80) + { + const loc = loc(); + error(loc, "alphanumeric character cannot follow string literal `%c` postfix without whitespace", + p[-1]); + } break; default: t.postfix = 0; @@ -1994,6 +2001,16 @@ class Lexer { int base = 10; const start = p; + scope (exit) + { + // disallow e.g. `@10Utype var;` + if (p > start && (isalpha(p[-1]) || p[-1] & 0x80) && (isidchar(*p) || *p & 0x80)) + { + const loc = loc(); + error(loc, "alphanumeric character cannot follow numeric literal `%s` without whitespace", + start[0..p-start].xarraydup().ptr); + } + } ulong n = 0; // unsigned >=64 bit integer type int d; bool err = false; diff --git a/compiler/test/fail_compilation/templatesingleparam.d b/compiler/test/fail_compilation/templatesingleparam.d new file mode 100644 index 000000000000..f6b524031b0d --- /dev/null +++ b/compiler/test/fail_compilation/templatesingleparam.d @@ -0,0 +1,23 @@ +/* +REQUIRED_ARGS: -vcolumns +TEST_OUTPUT: +--- +fail_compilation/templatesingleparam.d(17,14): Error: alphanumeric character cannot follow string literal `c` postfix without whitespace +fail_compilation/templatesingleparam.d(18,10): Error: alphanumeric character cannot follow numeric literal `2LU` without whitespace +fail_compilation/templatesingleparam.d(22,6): Error: alphanumeric character cannot follow string literal `d` postfix without whitespace +fail_compilation/templatesingleparam.d(23,8): Error: alphanumeric character cannot follow numeric literal `0xFeed` without whitespace +--- +*/ +class Foo(alias str) { + enum STR = str; +} + +class Bar { + Foo!q{foo}bb; // OK + Foo!q{foo}cc; + Foo!2LUNGS; +} + +@`_`int i; // OK +@`_`dint di; +@0xFeedObject obj; From 188ac5926b33b77202ce9d980c1e11eb4dea4d99 Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Wed, 21 Jun 2023 13:26:08 +0100 Subject: [PATCH 2/5] Ignore C files --- compiler/src/dmd/lexer.d | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compiler/src/dmd/lexer.d b/compiler/src/dmd/lexer.d index f08e55107ccc..880738530447 100644 --- a/compiler/src/dmd/lexer.d +++ b/compiler/src/dmd/lexer.d @@ -1974,7 +1974,7 @@ class Lexer t.postfix = *p; p++; // disallow e.g. `@r"_"dtype var;` - if (isidchar(*p) || *p & 0x80) + if (!Ccompile && (isidchar(*p) || *p & 0x80)) { const loc = loc(); error(loc, "alphanumeric character cannot follow string literal `%c` postfix without whitespace", @@ -2004,7 +2004,9 @@ class Lexer scope (exit) { // disallow e.g. `@10Utype var;` - if (p > start && (isalpha(p[-1]) || p[-1] & 0x80) && (isidchar(*p) || *p & 0x80)) + if (!Ccompile && p > start && + (isalpha(p[-1]) || p[-1] & 0x80) && + (isidchar(*p) || *p & 0x80)) { const loc = loc(); error(loc, "alphanumeric character cannot follow numeric literal `%s` without whitespace", From cf700e6989dc4ce102ceacb78e7c7f8eef3f5a09 Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Wed, 21 Jun 2023 13:38:46 +0100 Subject: [PATCH 3/5] Update hex float test --- compiler/test/fail_compilation/fail11751.d | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler/test/fail_compilation/fail11751.d b/compiler/test/fail_compilation/fail11751.d index 36d7f9d8e302..b1fd248a75b2 100644 --- a/compiler/test/fail_compilation/fail11751.d +++ b/compiler/test/fail_compilation/fail11751.d @@ -1,9 +1,10 @@ /* TEST_OUTPUT: --- -fail_compilation/fail11751.d(10): Error: missing exponent -fail_compilation/fail11751.d(10): Error: semicolon expected following auto declaration, not `ABC` -fail_compilation/fail11751.d(10): Error: no identifier for declarator `ABC` +fail_compilation/fail11751.d(11): Error: missing exponent +fail_compilation/fail11751.d(11): Error: alphanumeric character cannot follow numeric literal `0x1.FFFFFFFFFFFFFp` without whitespace +fail_compilation/fail11751.d(11): Error: semicolon expected following auto declaration, not `ABC` +fail_compilation/fail11751.d(11): Error: no identifier for declarator `ABC` --- */ From d55816864d75379da1ae8bf069978dcd5ab3273f Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Fri, 23 Jun 2023 16:46:31 +0100 Subject: [PATCH 4/5] Only error after numeric suffix Also allow digit after string postfix or numeric suffix. --- compiler/src/dmd/lexer.d | 62 ++++++++++--------- compiler/test/fail_compilation/fail11751.d | 7 +-- .../fail_compilation/templatesingleparam.d | 11 ++-- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/compiler/src/dmd/lexer.d b/compiler/src/dmd/lexer.d index 880738530447..72d58d81ac60 100644 --- a/compiler/src/dmd/lexer.d +++ b/compiler/src/dmd/lexer.d @@ -1974,10 +1974,10 @@ class Lexer t.postfix = *p; p++; // disallow e.g. `@r"_"dtype var;` - if (!Ccompile && (isidchar(*p) || *p & 0x80)) + if (!Ccompile && (isalpha(*p) || *p & 0x80)) { const loc = loc(); - error(loc, "alphanumeric character cannot follow string literal `%c` postfix without whitespace", + error(loc, "identifier character cannot follow string `%c` postfix without whitespace", p[-1]); } break; @@ -2001,18 +2001,6 @@ class Lexer { int base = 10; const start = p; - scope (exit) - { - // disallow e.g. `@10Utype var;` - if (!Ccompile && p > start && - (isalpha(p[-1]) || p[-1] & 0x80) && - (isidchar(*p) || *p & 0x80)) - { - const loc = loc(); - error(loc, "alphanumeric character cannot follow numeric literal `%s` without whitespace", - start[0..p-start].xarraydup().ptr); - } - } ulong n = 0; // unsigned >=64 bit integer type int d; bool err = false; @@ -2206,6 +2194,7 @@ class Lexer FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none; // Parse trailing 'u', 'U', 'l' or 'L' in any combination const psuffix = p; +LIntegerSuffix: while (1) { FLAGS f; @@ -2214,26 +2203,31 @@ class Lexer case 'U': case 'u': f = FLAGS.unsigned; - goto L1; + break; case 'l': - f = FLAGS.long_; error("lower case integer suffix 'l' is not allowed. Please use 'L' instead"); - goto L1; + goto case; case 'L': f = FLAGS.long_; - L1: - p++; - if ((flags & f) && !err) + break; + default: + // disallow e.g. `Foo!5Luvar;` + if (!Ccompile && flags >= FLAGS.unsigned && (isalpha(*p) || *p & 0x80)) { - error("repeated integer suffix `%c`", p[-1]); - err = true; + const loc = loc(); + error(loc, "identifier character cannot follow integer `%c` suffix without whitespace", + p[-1]); } - flags = cast(FLAGS)(flags | f); - continue; - default: - break; + break LIntegerSuffix; } - break; + p++; + if ((flags & f) && !err) + { + error("repeated integer suffix `%c`", p[-1]); + err = true; + } + flags = cast(FLAGS)(flags | f); + continue; } if (base == 8 && n >= 8) { @@ -2610,6 +2604,7 @@ class Lexer imaginary = true; } + bool gotSuffix = false; switch (*p) { case 'F': @@ -2623,7 +2618,7 @@ class Lexer if (isWellformedString && !isOutOfRange) isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr); result = TOK.float64Literal; - break; + goto LcheckI; case 'l': if (!Ccompile) error("use 'L' suffix instead of 'l'"); @@ -2635,13 +2630,22 @@ class Lexer result = TOK.float80Literal; break; } - + gotSuffix = true; +LcheckI: if ((*p == 'i' || *p == 'I') && !Ccompile) { if (*p == 'I') error("use 'i' suffix instead of 'I'"); p++; imaginary = true; + gotSuffix = true; + } + // disallow e.g. `Foo!5fvar;` + if (!Ccompile && gotSuffix && (isalpha(*p) || *p & 0x80)) + { + const loc = loc(); + error(loc, "identifier character cannot follow float `%c` suffix without whitespace", + p[-1]); } if (imaginary) diff --git a/compiler/test/fail_compilation/fail11751.d b/compiler/test/fail_compilation/fail11751.d index b1fd248a75b2..36d7f9d8e302 100644 --- a/compiler/test/fail_compilation/fail11751.d +++ b/compiler/test/fail_compilation/fail11751.d @@ -1,10 +1,9 @@ /* TEST_OUTPUT: --- -fail_compilation/fail11751.d(11): Error: missing exponent -fail_compilation/fail11751.d(11): Error: alphanumeric character cannot follow numeric literal `0x1.FFFFFFFFFFFFFp` without whitespace -fail_compilation/fail11751.d(11): Error: semicolon expected following auto declaration, not `ABC` -fail_compilation/fail11751.d(11): Error: no identifier for declarator `ABC` +fail_compilation/fail11751.d(10): Error: missing exponent +fail_compilation/fail11751.d(10): Error: semicolon expected following auto declaration, not `ABC` +fail_compilation/fail11751.d(10): Error: no identifier for declarator `ABC` --- */ diff --git a/compiler/test/fail_compilation/templatesingleparam.d b/compiler/test/fail_compilation/templatesingleparam.d index f6b524031b0d..7e5a9368585b 100644 --- a/compiler/test/fail_compilation/templatesingleparam.d +++ b/compiler/test/fail_compilation/templatesingleparam.d @@ -2,10 +2,10 @@ REQUIRED_ARGS: -vcolumns TEST_OUTPUT: --- -fail_compilation/templatesingleparam.d(17,14): Error: alphanumeric character cannot follow string literal `c` postfix without whitespace -fail_compilation/templatesingleparam.d(18,10): Error: alphanumeric character cannot follow numeric literal `2LU` without whitespace -fail_compilation/templatesingleparam.d(22,6): Error: alphanumeric character cannot follow string literal `d` postfix without whitespace -fail_compilation/templatesingleparam.d(23,8): Error: alphanumeric character cannot follow numeric literal `0xFeed` without whitespace +fail_compilation/templatesingleparam.d(17,14): Error: identifier character cannot follow string `c` postfix without whitespace +fail_compilation/templatesingleparam.d(18,10): Error: identifier character cannot follow integer `U` suffix without whitespace +fail_compilation/templatesingleparam.d(22,6): Error: identifier character cannot follow string `d` postfix without whitespace +fail_compilation/templatesingleparam.d(23,4): Error: identifier character cannot follow float `f` suffix without whitespace --- */ class Foo(alias str) { @@ -20,4 +20,5 @@ class Bar { @`_`int i; // OK @`_`dint di; -@0xFeedObject obj; +@2flong fi; +@0xFeedObject obj; // not caught From ce69adf5152e91a2e8f034c1d934e788f36965cc Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Mon, 10 Jul 2023 17:47:09 +0100 Subject: [PATCH 5/5] Remove unicode detection This could cause a false positive for unicode line endings. --- compiler/src/dmd/lexer.d | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/src/dmd/lexer.d b/compiler/src/dmd/lexer.d index 72d58d81ac60..c9c3ed18d7a9 100644 --- a/compiler/src/dmd/lexer.d +++ b/compiler/src/dmd/lexer.d @@ -1974,7 +1974,7 @@ class Lexer t.postfix = *p; p++; // disallow e.g. `@r"_"dtype var;` - if (!Ccompile && (isalpha(*p) || *p & 0x80)) + if (!Ccompile && isalpha(*p)) { const loc = loc(); error(loc, "identifier character cannot follow string `%c` postfix without whitespace", @@ -2212,7 +2212,7 @@ LIntegerSuffix: break; default: // disallow e.g. `Foo!5Luvar;` - if (!Ccompile && flags >= FLAGS.unsigned && (isalpha(*p) || *p & 0x80)) + if (!Ccompile && flags >= FLAGS.unsigned && isalpha(*p)) { const loc = loc(); error(loc, "identifier character cannot follow integer `%c` suffix without whitespace", @@ -2641,7 +2641,7 @@ LcheckI: gotSuffix = true; } // disallow e.g. `Foo!5fvar;` - if (!Ccompile && gotSuffix && (isalpha(*p) || *p & 0x80)) + if (!Ccompile && gotSuffix && isalpha(*p)) { const loc = loc(); error(loc, "identifier character cannot follow float `%c` suffix without whitespace",