diff --git a/go.mod b/go.mod index 8ddfced..e17ada1 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( require ( github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.0 // indirect github.com/hashicorp/go-multierror v1.1.0 // indirect @@ -18,5 +19,8 @@ require ( github.com/klauspost/compress v1.11.2 // indirect github.com/mitchellh/go-homedir v1.0.0 // indirect github.com/mitchellh/go-testing-interface v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/testify v1.11.1 // indirect github.com/ulikunitz/xz v0.5.8 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index ec179d7..1a6297b 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d h1:xDfNPAt8lFiC1UJrqV3uuy861HCTo708pDMbjHHdCas= github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d/go.mod h1:6QX/PXZ00z/TKoufEY6K/a0k6AhaJrQKdFe6OfVXsa4= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= @@ -30,5 +32,12 @@ github.com/mitchellh/go-homedir v1.0.0 h1:vKb8ShqSby24Yrqr/yDYkuFz8d0WUjys40rvnG github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0 h1:fzU/JVNcaqHQEcVFAKeR41fkiLdIPrefOvVG1VZ96U0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/ulikunitz/xz v0.5.8 h1:ERv8V6GKqVi23rgu5cj9pVfVzJbOqAY2Ntl88O6c2nQ= github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/codingcontext/task_parser.go b/pkg/codingcontext/task_parser.go index 232b8e5..dc9eb04 100644 --- a/pkg/codingcontext/task_parser.go +++ b/pkg/codingcontext/task_parser.go @@ -70,21 +70,127 @@ func (s *SlashCommand) Params() map[string]string { return params } -// stripQuotes removes surrounding double quotes from a string if present. -// Single quotes are not supported as the grammar only allows double-quoted strings. +// stripQuotes removes surrounding quotes from a string if present and processes escape sequences. +// Supports both single (') and double (") quotes. +// Processes escape sequences: \n, \t, \r, \\, \", \', \uXXXX (Unicode), \xHH (hex), \OOO (octal) +// Unknown escape sequences (e.g., \z) preserve only the character after the backslash. +// Incomplete escape sequences (e.g., \u00a, \x4) are preserved literally including the backslash. func stripQuotes(s string) string { - if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { - // Remove quotes and handle escaped quotes inside - unquoted := s[1 : len(s)-1] - return strings.ReplaceAll(unquoted, `\"`, `"`) + // Check if the string is quoted + if len(s) < 2 { + return s } - return s + + quoteChar := byte(0) + if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'') { + quoteChar = s[0] + s = s[1 : len(s)-1] // Remove surrounding quotes + } + + // If not quoted, return as-is (no escape processing for unquoted values in slash commands) + if quoteChar == 0 { + return s + } + + // Process escape sequences + return processEscapeSequences(s) +} + +// processEscapeSequences decodes escape sequences in a string. +// Supports: \n, \t, \r, \\, \", \', \uXXXX (Unicode), \xHH (hex), \OOO (octal) +// Unknown escape sequences (e.g., \z) preserve only the character after the backslash. +// Incomplete escape sequences (e.g., \u00a, \x4) are preserved literally including the backslash. +func processEscapeSequences(s string) string { + if !strings.Contains(s, "\\") { + return s // Fast path: no escapes + } + + var result strings.Builder + result.Grow(len(s)) + + for i := 0; i < len(s); i++ { + if s[i] != '\\' || i == len(s)-1 { + result.WriteByte(s[i]) + continue + } + + // We have a backslash and there's at least one more character + i++ // Move past backslash + switch s[i] { + case 'n': + result.WriteByte('\n') + case 't': + result.WriteByte('\t') + case 'r': + result.WriteByte('\r') + case '\\': + result.WriteByte('\\') + case '"': + result.WriteByte('"') + case '\'': + result.WriteByte('\'') + case 'u': + // Unicode escape: \uXXXX + if i+5 <= len(s) { + hexStr := s[i+1 : i+5] + if val, err := strconv.ParseInt(hexStr, 16, 32); err == nil { + result.WriteRune(rune(val)) + i += 4 + } else { + // Invalid Unicode escape, keep as-is + result.WriteString("\\u") + } + } else { + // Incomplete Unicode escape + result.WriteString("\\u") + } + case 'x': + // Hex escape: \xHH + if i+3 <= len(s) { + hexStr := s[i+1 : i+3] + if val, err := strconv.ParseInt(hexStr, 16, 8); err == nil { + result.WriteByte(byte(val)) + i += 2 + } else { + // Invalid hex escape, keep as-is + result.WriteString("\\x") + } + } else { + // Incomplete hex escape + result.WriteString("\\x") + } + default: + // Check for octal escape: \OOO (up to 3 digits, 0-7) + if s[i] >= '0' && s[i] <= '7' { + octalStart := i + octalEnd := i + 1 + // Read up to 2 more octal digits + for octalEnd-octalStart < 3 && octalEnd < len(s) && s[octalEnd] >= '0' && s[octalEnd] <= '7' { + octalEnd++ + } + octalStr := s[octalStart:octalEnd] + if val, err := strconv.ParseInt(octalStr, 8, 16); err == nil { + result.WriteByte(byte(val)) + i = octalEnd - 1 + } else { + // Invalid octal, keep as-is + result.WriteByte('\\') + result.WriteByte(s[i]) + } + } else { + // Unknown escape sequence, keep the character after backslash + result.WriteByte(s[i]) + } + } + } + + return result.String() } // Argument represents either a named (key=value) or positional argument type Argument struct { Key string `parser:"(@Term Assign)?"` - Value string `parser:"(@String | @Term)"` + Value string `parser:"(@DQString | @SQString | @Term)"` } // Text represents a block of text @@ -97,8 +203,8 @@ type Text struct { // TextLine is a single line of text content (not starting with a slash) // It matches tokens until the end of the line type TextLine struct { - NonSlashStart []string `parser:"(@Term | @String | @Assign | @Whitespace)"` // First token can't be Slash - RestOfLine []string `parser:"(@Term | @String | @Slash | @Assign | @Whitespace)*"` // Rest can include Slash + NonSlashStart []string `parser:"(@Term | @DQString | @SQString | @Assign | @Whitespace)"` // First token can't be Slash + RestOfLine []string `parser:"(@Term | @DQString | @SQString | @Slash | @Assign | @Whitespace)*"` // Rest can include Slash NewlineOpt string `parser:"@Newline?"` } @@ -119,12 +225,13 @@ func (t *Text) Content() string { // Define the lexer using participle's lexer.MustSimple var taskLexer = lexer.MustSimple([]lexer.SimpleRule{ - {Name: "Slash", Pattern: `/`}, // Any "/" - {Name: "Assign", Pattern: `=`}, // "=" - {Name: "String", Pattern: `"(?:\\.|[^"])*"`}, // Quoted strings with escapes - {Name: "Whitespace", Pattern: `[ \t]+`}, // Spaces and tabs (horizontal only) - {Name: "Newline", Pattern: `[\n\r]+`}, // Newlines - {Name: "Term", Pattern: `[^ \t\n\r/"=]+`}, // Any char except space, newline, /, ", = + {Name: "Slash", Pattern: `/`}, // Any "/" + {Name: "Assign", Pattern: `=`}, // "=" + {Name: "DQString", Pattern: `"(?:\\.|[^"])*"`}, // Double-quoted strings with escapes + {Name: "SQString", Pattern: `'(?:\\.|[^'])*'`}, // Single-quoted strings with escapes + {Name: "Whitespace", Pattern: `[ \t]+`}, // Spaces and tabs (horizontal only) + {Name: "Newline", Pattern: `[\n\r]+`}, // Newlines + {Name: "Term", Pattern: `[^ \t\n\r/"=']+`}, // Any char except space, newline, /, ", ', = }) var parser = participle.MustBuild[Input]( diff --git a/pkg/codingcontext/task_parser_lenient_test.go b/pkg/codingcontext/task_parser_lenient_test.go new file mode 100644 index 0000000..c9f8831 --- /dev/null +++ b/pkg/codingcontext/task_parser_lenient_test.go @@ -0,0 +1,450 @@ +package codingcontext + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestParseTask_LenientParsing tests the lenient parameter parsing features +// added to support flexible quote types and escape sequences +func TestParseTask_LenientParsing(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + check func(t *testing.T, task Task) + }{ + { + name: "single-quoted argument value", + input: "/command 'single quoted value'\n", + wantErr: false, + check: func(t *testing.T, task Task) { + require.Len(t, task, 1) + cmd := task[0].SlashCommand + require.NotNil(t, cmd) + require.Len(t, cmd.Arguments, 1) + // The parser includes quotes in the token + assert.Equal(t, `'single quoted value'`, cmd.Arguments[0].Value) + // After stripQuotes, it should be unquoted + params := cmd.Params() + assert.Equal(t, "single quoted value", params["1"]) + }, + }, + { + name: "double-quoted argument value", + input: `/command "double quoted value"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + require.Len(t, task, 1) + cmd := task[0].SlashCommand + require.NotNil(t, cmd) + params := cmd.Params() + assert.Equal(t, "double quoted value", params["1"]) + }, + }, + { + name: "single-quoted named parameter", + input: "/command key='value'\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "value", params["key"]) + }, + }, + { + name: "double-quoted named parameter", + input: `/command key="value"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "value", params["key"]) + }, + }, + { + name: "escape sequence: newline", + input: `/command "line1\nline2"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "line1\nline2", params["1"]) + }, + }, + { + name: "escape sequence: tab", + input: `/command "col1\tcol2"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "col1\tcol2", params["1"]) + }, + }, + { + name: "escape sequence: carriage return", + input: `/command "line1\rline2"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "line1\rline2", params["1"]) + }, + }, + { + name: "escape sequence: backslash", + input: `/command "path\\to\\file"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, `path\to\file`, params["1"]) + }, + }, + { + name: "escape sequence: escaped double quote", + input: `/command "say \"hello\""` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, `say "hello"`, params["1"]) + }, + }, + { + name: "escape sequence: escaped single quote", + input: "/command 'say \\'hello\\''\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, `say 'hello'`, params["1"]) + }, + }, + { + name: "escape sequence: Unicode \\uXXXX", + input: `/command "hello\u0020world"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "hello world", params["1"]) // \u0020 is space + }, + }, + { + name: "escape sequence: hex \\xHH", + input: `/command "A\x42C"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "ABC", params["1"]) // \x42 is 'B' + }, + }, + { + name: "escape sequence: octal \\OOO", + input: `/command "\101\102\103"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "ABC", params["1"]) // \101=A, \102=B, \103=C in octal + }, + }, + { + name: "mixed escape sequences", + input: `/command "line1\nline2\ttabbed\rreturned\\backslash"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "line1\nline2\ttabbed\rreturned\\backslash", params["1"]) + }, + }, + { + name: "invalid escape sequence treated as literal", + input: `/command "\z\q"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + // Unknown escapes keep the character after backslash + assert.Equal(t, "zq", params["1"]) + }, + }, + { + name: "multiple arguments with different quote types", + input: "/command \"double\" 'single' unquoted\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + require.Len(t, cmd.Arguments, 3) + params := cmd.Params() + assert.Equal(t, "double", params["1"]) + assert.Equal(t, "single", params["2"]) + assert.Equal(t, "unquoted", params["3"]) + }, + }, + { + name: "named parameters with different quote types", + input: "/command k1=\"v1\" k2='v2' k3=v3\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "v1", params["k1"]) + assert.Equal(t, "v2", params["k2"]) + assert.Equal(t, "v3", params["k3"]) + }, + }, + { + name: "UTF-8 characters in values", + input: "/command \"こんにちは\" emoji=\"🚀\"\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + assert.Equal(t, "こんにちは", params["1"]) + assert.Equal(t, "🚀", params["emoji"]) + }, + }, + { + name: "incomplete Unicode escape handled gracefully", + input: `/command "\u00a"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + // Incomplete escape should be kept as-is + assert.Equal(t, "\\u00a", params["1"]) + }, + }, + { + name: "incomplete hex escape handled gracefully", + input: `/command "\x4"` + "\n", + wantErr: false, + check: func(t *testing.T, task Task) { + cmd := task[0].SlashCommand + params := cmd.Params() + // Incomplete escape should be kept as-is + assert.Equal(t, "\\x4", params["1"]) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + task, err := ParseTask(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + if tt.check != nil { + tt.check(t, task) + } + }) + } +} + +// TestStripQuotes tests the stripQuotes function with various input types +func TestStripQuotes(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "double-quoted string", + input: `"hello"`, + expected: "hello", + }, + { + name: "single-quoted string", + input: `'hello'`, + expected: "hello", + }, + { + name: "unquoted string", + input: "hello", + expected: "hello", + }, + { + name: "double-quoted with escaped quote", + input: `"say \"hello\""`, + expected: `say "hello"`, + }, + { + name: "single-quoted with escaped quote", + input: `'say \'hello\''`, + expected: `say 'hello'`, + }, + { + name: "double-quoted with newline escape", + input: `"line1\nline2"`, + expected: "line1\nline2", + }, + { + name: "double-quoted with tab escape", + input: `"col1\tcol2"`, + expected: "col1\tcol2", + }, + { + name: "double-quoted with Unicode escape", + input: `"hello\u0020world"`, + expected: "hello world", + }, + { + name: "double-quoted with hex escape", + input: `"A\x42C"`, + expected: "ABC", + }, + { + name: "double-quoted with octal escape", + input: `"\101\102\103"`, + expected: "ABC", + }, + { + name: "empty double-quoted string", + input: `""`, + expected: "", + }, + { + name: "empty single-quoted string", + input: `''`, + expected: "", + }, + { + name: "single character", + input: "a", + expected: "a", + }, + { + name: "no escape processing for unquoted", + input: `hello\nworld`, + expected: `hello\nworld`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := stripQuotes(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestProcessEscapeSequences tests the processEscapeSequences function +func TestProcessEscapeSequences(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "no escapes", + input: "hello world", + expected: "hello world", + }, + { + name: "newline", + input: `line1\nline2`, + expected: "line1\nline2", + }, + { + name: "tab", + input: `col1\tcol2`, + expected: "col1\tcol2", + }, + { + name: "carriage return", + input: `line1\rline2`, + expected: "line1\rline2", + }, + { + name: "backslash", + input: `path\\to\\file`, + expected: `path\to\file`, + }, + { + name: "double quote", + input: `say \"hello\"`, + expected: `say "hello"`, + }, + { + name: "single quote", + input: `say \'hello\'`, + expected: `say 'hello'`, + }, + { + name: "Unicode escape", + input: `\u0048\u0065\u006c\u006c\u006f`, // "Hello" + expected: "Hello", + }, + { + name: "hex escape", + input: `\x48\x65\x6c\x6c\x6f`, // "Hello" + expected: "Hello", + }, + { + name: "octal escape", + input: `\110\145\154\154\157`, // "Hello" + expected: "Hello", + }, + { + name: "mixed escapes", + input: `\n\t\r\\\"\'\u0020\x20\40`, + expected: "\n\t\r\\\"' \x20 ", + }, + { + name: "unknown escape", + input: `\z\q`, + expected: "zq", + }, + { + name: "incomplete Unicode escape", + input: `\u00a`, + expected: `\u00a`, + }, + { + name: "incomplete hex escape", + input: `\x4`, + expected: `\x4`, + }, + { + name: "backslash at end", + input: `hello\`, + expected: `hello\`, + }, + { + name: "octal with non-octal digits", + input: `\7\8\9`, + expected: "\a89", // \7 is octal 7 (ASCII bell \a), \8 and \9 are treated as unknown escapes and output as '8' and '9' + }, + { + name: "short octal sequences", + input: `\7\77`, + expected: "\a?", // \7 is octal 7 (ASCII bell \a), \77 is octal 77 (ASCII '?') + }, + { + name: "octal values above 127", + input: `\200\377`, + expected: "\x80\xff", // \200 is 128, \377 is 255 + }, + { + name: "octal high byte values", + input: `\240\300\350`, + expected: "\xa0\xc0\xe8", // \240 is 160, \300 is 192, \350 is 232 + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := processEscapeSequences(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +}