From b77e9e41f5c7db0bcf4a16aed807a3387fa0bd67 Mon Sep 17 00:00:00 2001 From: Ben Gribaudo Date: Fri, 2 Feb 2024 10:33:37 -0500 Subject: [PATCH 1/4] Tying generalized-identifier to lexical-element --- query-languages/m/m-spec-consolidated-grammar.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/query-languages/m/m-spec-consolidated-grammar.md b/query-languages/m/m-spec-consolidated-grammar.md index cc21b7dd..645161f8 100644 --- a/query-languages/m/m-spec-consolidated-grammar.md +++ b/query-languages/m/m-spec-consolidated-grammar.md @@ -2,7 +2,7 @@ title: M Language Consolidated Grammar description: Describes all of the grammar associated with the Power Query M formula language ms.topic: conceptual -ms.date: 9/15/2023 +ms.date: 2/2/2024 ms.custom: "nonautomated-date" --- @@ -145,6 +145,7 @@ _verbatim-literal:_
_identifier:
      regular-identifier
      quoted-identifier
+      generalized-identifier
regular-identifier:
      available-identifier
      available-identifier dot-character regular-identifier
From 5d5514caf55544861f33477cb9fb7a086f65ff28 Mon Sep 17 00:00:00 2001 From: Ben Gribaudo Date: Thu, 28 Mar 2024 10:47:00 -0400 Subject: [PATCH 2/4] Idea --- .../m/m-spec-consolidated-grammar.md | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/query-languages/m/m-spec-consolidated-grammar.md b/query-languages/m/m-spec-consolidated-grammar.md index 5940b1ee..a51f4841 100644 --- a/query-languages/m/m-spec-consolidated-grammar.md +++ b/query-languages/m/m-spec-consolidated-grammar.md @@ -2,7 +2,7 @@ title: M Language Consolidated Grammar description: Describes all of the grammar associated with the Power Query M formula language ms.topic: conceptual -ms.date: 02/02/2024 +ms.date: 03/28/2024 ms.custom: "nonautomated-date" --- @@ -146,7 +146,6 @@ _verbatim-literal:_
_identifier:
      regular-identifier
      quoted-identifier
-      generalized-identifier
regular-identifier:
      available-identifier
      available-identifier dot-character regular-identifier
@@ -169,15 +168,6 @@ identifier-part-character:
      connecting-character
      combining-character
      formatting-character
-generalized-identifier:
-      generalized-identifier-part
-      generalized-identifier_ separated only by blanks (`U+0020`) _generalized-identifier-part
-generalized-identifier-part:
-      generalized-identifier-segment
-      decimal-digit-character generalized-identifier-segment
-generalized-identifier-segment:
-      keyword-or-identifier
-      keyword-or-identifier dot-character keyword-or-identifier
dot-character:_
      `.` (`U+002E`)
_underscore-character:_
@@ -341,7 +331,10 @@ identifier-reference:
exclusive-identifier-reference:
      identifier
inclusive-identifier-reference:_
-      `@` _identifier_ +      `@` _identifier
+generalized-identifier:_
+      the range of text spanned by a sequence of one or more tokens, other than `=` or `]`
+      but only if that text complies with the generalized identifier grammar #### Section-access expression @@ -607,3 +600,34 @@ any-literal:
      number-literal
      text-literal
      null-literal_ + + +## Generalized identifer grammar + +Compliance with this grammar can be validated using the following regular expression: +```` +(?x)^ +# generalized-identifier-always-valid-character +[(\p{L})|(\p{Nl})|(\p{Nd})|(\p{Mn})|(\p{Mc})|(\p{Pc})(\p{Cf})] + +# (generalized-identifier-inner-valid-segment* period? generalized-identifier-always-valid-character)? +(?: + # generalized-identifier-inner-valid-segment + (?:.?[(\p{L})|(\p{Nl})|(\p{Nd})|(\p{Mn})|(\p{Mc})|(\p{Pc})(\p{Cf})\s])* + + # period? generalized-identifier-always-valid-character + .?[(\p{L})|(\p{Nl})|(\p{Nd})|(\p{Mn})|(\p{Mc})|(\p{Pc})(\p{Cf})] +)? +$ +```` + +_space:_
+      Space character (`U+0020`)
+_period:_
+      Period character (`U+002E`))
+_generalized-identifier-always-valid-character_:
+      Any character in the following Unicode classes: Lu (Uppercase Letter), Ll (Lowercase Letter), Lt (Titlecase Letter), Lm (Modifier Letter), Lo (Other Letter), Nl (Letter Number), Nd (Decimal Number), Mn (Nonspacing Mark), Mc (Spacing Mark), Pc (Connector Punctuation), Cf (Format)
+_generalized-identifier-inner-valid-segment:
+      period? (generalized-identifier-always-valid-character | space)
+generalized-identifier-syntax:
+      generalized-identifier-always-valid-character (generalized-identifier-inner-valid-segment* period? generalized-identifier-always-valid-character)?_ From 7a0dca59f6e4dc477fdda2efcbae2f8194301ec8 Mon Sep 17 00:00:00 2001 From: Ben Gribaudo Date: Thu, 25 Apr 2024 14:18:53 -0400 Subject: [PATCH 3/4] Update m-spec-consolidated-grammar.md --- query-languages/m/m-spec-consolidated-grammar.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/query-languages/m/m-spec-consolidated-grammar.md b/query-languages/m/m-spec-consolidated-grammar.md index a51f4841..2c2fab2c 100644 --- a/query-languages/m/m-spec-consolidated-grammar.md +++ b/query-languages/m/m-spec-consolidated-grammar.md @@ -331,10 +331,7 @@ identifier-reference:
exclusive-identifier-reference:
      identifier
inclusive-identifier-reference:_
-      `@` _identifier
-generalized-identifier:_
-      the range of text spanned by a sequence of one or more tokens, other than `=` or `]`
-      but only if that text complies with the generalized identifier grammar +      `@` _identifier_ #### Section-access expression @@ -380,8 +377,10 @@ _field-list:
field:
      field-name_ `=` _expression
field-name:
-      generalized-identifier
+      identifier
      quoted-identifier_
+      the range of text spanned by a sequence of one or more tokens, other than `=`, `,` or `]`
+             but only if that text complies with the generalized identifier grammar #### Item access expression From f8762b34b70156d77a28262aed41a6c2cb25edec Mon Sep 17 00:00:00 2001 From: Ben Gribaudo Date: Tue, 30 Apr 2024 09:19:06 -0400 Subject: [PATCH 4/4] Update m-spec-consolidated-grammar.md --- query-languages/m/m-spec-consolidated-grammar.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/query-languages/m/m-spec-consolidated-grammar.md b/query-languages/m/m-spec-consolidated-grammar.md index 2c2fab2c..593fcce0 100644 --- a/query-languages/m/m-spec-consolidated-grammar.md +++ b/query-languages/m/m-spec-consolidated-grammar.md @@ -378,9 +378,12 @@ field:
      field-name_ `=` _expression
field-name:
      identifier
-      quoted-identifier_
-      the range of text spanned by a sequence of one or more tokens, other than `=`, `,` or `]`
-             but only if that text complies with the generalized identifier grammar +      quoted-identifier
+      generalized-identifier
+generalized-identifier:_
+      The range of text spanned by a sequence of one or more tokens, other than `=`, `,` or `]`,
+      but only if that text complies with the generalized identifier grammar.
+      (This grammar token is contextual; it is only relevant in the context of when a `field-name` is expected.) #### Item access expression