-
Notifications
You must be signed in to change notification settings - Fork 1
Extend participle parser with lenient parameter syntax #165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c31234e
ea10d5c
a455314
956528c
61bd580
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,21 +70,127 @@ func (s *SlashCommand) Params() map[string]string { | |
| return params | ||
| } | ||
|
|
||
| // stripQuotes removes surrounding double quotes from a string if present. | ||
| // Single quotes are not supported as the grammar only allows double-quoted strings. | ||
| // stripQuotes removes surrounding quotes from a string if present and processes escape sequences. | ||
| // Supports both single (') and double (") quotes. | ||
| // Processes escape sequences: \n, \t, \r, \\, \", \', \uXXXX (Unicode), \xHH (hex), \OOO (octal) | ||
| // Unknown escape sequences (e.g., \z) preserve only the character after the backslash. | ||
| // Incomplete escape sequences (e.g., \u00a, \x4) are preserved literally including the backslash. | ||
| func stripQuotes(s string) string { | ||
| if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { | ||
| // Remove quotes and handle escaped quotes inside | ||
| unquoted := s[1 : len(s)-1] | ||
| return strings.ReplaceAll(unquoted, `\"`, `"`) | ||
| // Check if the string is quoted | ||
| if len(s) < 2 { | ||
| return s | ||
| } | ||
| return s | ||
|
|
||
| quoteChar := byte(0) | ||
| if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'') { | ||
| quoteChar = s[0] | ||
| s = s[1 : len(s)-1] // Remove surrounding quotes | ||
| } | ||
|
|
||
| // If not quoted, return as-is (no escape processing for unquoted values in slash commands) | ||
| if quoteChar == 0 { | ||
| return s | ||
| } | ||
|
|
||
| // Process escape sequences | ||
| return processEscapeSequences(s) | ||
| } | ||
|
|
||
| // processEscapeSequences decodes escape sequences in a string. | ||
| // Supports: \n, \t, \r, \\, \", \', \uXXXX (Unicode), \xHH (hex), \OOO (octal) | ||
|
Comment on lines
+99
to
+100
|
||
| // Unknown escape sequences (e.g., \z) preserve only the character after the backslash. | ||
| // Incomplete escape sequences (e.g., \u00a, \x4) are preserved literally including the backslash. | ||
| func processEscapeSequences(s string) string { | ||
| if !strings.Contains(s, "\\") { | ||
| return s // Fast path: no escapes | ||
| } | ||
|
|
||
| var result strings.Builder | ||
| result.Grow(len(s)) | ||
|
|
||
| for i := 0; i < len(s); i++ { | ||
| if s[i] != '\\' || i == len(s)-1 { | ||
| result.WriteByte(s[i]) | ||
| continue | ||
| } | ||
|
|
||
| // We have a backslash and there's at least one more character | ||
| i++ // Move past backslash | ||
| switch s[i] { | ||
| case 'n': | ||
| result.WriteByte('\n') | ||
| case 't': | ||
| result.WriteByte('\t') | ||
| case 'r': | ||
| result.WriteByte('\r') | ||
| case '\\': | ||
| result.WriteByte('\\') | ||
| case '"': | ||
| result.WriteByte('"') | ||
| case '\'': | ||
| result.WriteByte('\'') | ||
| case 'u': | ||
| // Unicode escape: \uXXXX | ||
| if i+5 <= len(s) { | ||
| hexStr := s[i+1 : i+5] | ||
| if val, err := strconv.ParseInt(hexStr, 16, 32); err == nil { | ||
| result.WriteRune(rune(val)) | ||
| i += 4 | ||
| } else { | ||
| // Invalid Unicode escape, keep as-is | ||
| result.WriteString("\\u") | ||
| } | ||
| } else { | ||
| // Incomplete Unicode escape | ||
| result.WriteString("\\u") | ||
| } | ||
| case 'x': | ||
| // Hex escape: \xHH | ||
| if i+3 <= len(s) { | ||
| hexStr := s[i+1 : i+3] | ||
| if val, err := strconv.ParseInt(hexStr, 16, 8); err == nil { | ||
| result.WriteByte(byte(val)) | ||
| i += 2 | ||
| } else { | ||
| // Invalid hex escape, keep as-is | ||
| result.WriteString("\\x") | ||
| } | ||
| } else { | ||
| // Incomplete hex escape | ||
| result.WriteString("\\x") | ||
| } | ||
| default: | ||
| // Check for octal escape: \OOO (up to 3 digits, 0-7) | ||
| if s[i] >= '0' && s[i] <= '7' { | ||
| octalStart := i | ||
| octalEnd := i + 1 | ||
| // Read up to 2 more octal digits | ||
| for octalEnd-octalStart < 3 && octalEnd < len(s) && s[octalEnd] >= '0' && s[octalEnd] <= '7' { | ||
| octalEnd++ | ||
| } | ||
| octalStr := s[octalStart:octalEnd] | ||
| if val, err := strconv.ParseInt(octalStr, 8, 16); err == nil { | ||
| result.WriteByte(byte(val)) | ||
| i = octalEnd - 1 | ||
| } else { | ||
| // Invalid octal, keep as-is | ||
| result.WriteByte('\\') | ||
| result.WriteByte(s[i]) | ||
| } | ||
| } else { | ||
| // Unknown escape sequence, keep the character after backslash | ||
| result.WriteByte(s[i]) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return result.String() | ||
| } | ||
|
|
||
| // Argument represents either a named (key=value) or positional argument | ||
| type Argument struct { | ||
| Key string `parser:"(@Term Assign)?"` | ||
| Value string `parser:"(@String | @Term)"` | ||
| Value string `parser:"(@DQString | @SQString | @Term)"` | ||
| } | ||
|
|
||
| // Text represents a block of text | ||
|
|
@@ -97,8 +203,8 @@ type Text struct { | |
| // TextLine is a single line of text content (not starting with a slash) | ||
| // It matches tokens until the end of the line | ||
| type TextLine struct { | ||
| NonSlashStart []string `parser:"(@Term | @String | @Assign | @Whitespace)"` // First token can't be Slash | ||
| RestOfLine []string `parser:"(@Term | @String | @Slash | @Assign | @Whitespace)*"` // Rest can include Slash | ||
| NonSlashStart []string `parser:"(@Term | @DQString | @SQString | @Assign | @Whitespace)"` // First token can't be Slash | ||
| RestOfLine []string `parser:"(@Term | @DQString | @SQString | @Slash | @Assign | @Whitespace)*"` // Rest can include Slash | ||
| NewlineOpt string `parser:"@Newline?"` | ||
| } | ||
|
|
||
|
|
@@ -119,12 +225,13 @@ func (t *Text) Content() string { | |
|
|
||
| // Define the lexer using participle's lexer.MustSimple | ||
| var taskLexer = lexer.MustSimple([]lexer.SimpleRule{ | ||
| {Name: "Slash", Pattern: `/`}, // Any "/" | ||
| {Name: "Assign", Pattern: `=`}, // "=" | ||
| {Name: "String", Pattern: `"(?:\\.|[^"])*"`}, // Quoted strings with escapes | ||
| {Name: "Whitespace", Pattern: `[ \t]+`}, // Spaces and tabs (horizontal only) | ||
| {Name: "Newline", Pattern: `[\n\r]+`}, // Newlines | ||
| {Name: "Term", Pattern: `[^ \t\n\r/"=]+`}, // Any char except space, newline, /, ", = | ||
| {Name: "Slash", Pattern: `/`}, // Any "/" | ||
| {Name: "Assign", Pattern: `=`}, // "=" | ||
| {Name: "DQString", Pattern: `"(?:\\.|[^"])*"`}, // Double-quoted strings with escapes | ||
| {Name: "SQString", Pattern: `'(?:\\.|[^'])*'`}, // Single-quoted strings with escapes | ||
| {Name: "Whitespace", Pattern: `[ \t]+`}, // Spaces and tabs (horizontal only) | ||
| {Name: "Newline", Pattern: `[\n\r]+`}, // Newlines | ||
| {Name: "Term", Pattern: `[^ \t\n\r/"=']+`}, // Any char except space, newline, /, ", ', = | ||
| }) | ||
|
|
||
| var parser = participle.MustBuild[Input]( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The documentation should clarify the behavior for unknown or invalid escape sequences. According to the implementation, unknown escapes like \z preserve the character after the backslash (resulting in 'z'), and incomplete escapes like \u00a are preserved as-is. Consider adding this to the comment to make the behavior explicit for users of this function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Enhanced documentation in commit 61bd580 to explicitly describe behavior for unknown escape sequences (preserves character after backslash) and incomplete escape sequences (preserves literally including backslash).