From 5f0c199800b6b63d3abbab386f61ecd80fb8ab34 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:28:07 -0800 Subject: [PATCH 01/41] Move vfs matching to package --- internal/ls/autoimports.go | 6 +++--- internal/project/ata/discovertypings.go | 3 ++- internal/tsoptions/parsedcommandline.go | 3 ++- internal/tsoptions/tsconfigparsing.go | 15 ++++++++------- internal/tsoptions/wildcarddirectories.go | 6 +++--- .../vfs/{utilities.go => vfsmatch/vfsmatch.go} | 9 +++++---- 6 files changed, 23 insertions(+), 19 deletions(-) rename internal/vfs/{utilities.go => vfsmatch/vfsmatch.go} (98%) diff --git a/internal/ls/autoimports.go b/internal/ls/autoimports.go index c88f426880..d8ee73d620 100644 --- a/internal/ls/autoimports.go +++ b/internal/ls/autoimports.go @@ -24,7 +24,7 @@ import ( "github.com/microsoft/typescript-go/internal/packagejson" "github.com/microsoft/typescript-go/internal/stringutil" "github.com/microsoft/typescript-go/internal/tspath" - "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) type SymbolExportInfo struct { @@ -1420,9 +1420,9 @@ func getIsExcludedPatterns(preferences *lsutil.UserPreferences, useCaseSensitive } var patterns []*regexp2.Regexp for _, spec := range preferences.AutoImportFileExcludePatterns { - pattern := vfs.GetSubPatternFromSpec(spec, "", vfs.UsageExclude, vfs.WildcardMatcher{}) + pattern := vfsmatch.GetSubPatternFromSpec(spec, "", vfsmatch.UsageExclude, vfsmatch.WildcardMatcher{}) if pattern != "" { - if re := vfs.GetRegexFromPattern(pattern, useCaseSensitiveFileNames); re != nil { + if re := vfsmatch.GetRegexFromPattern(pattern, useCaseSensitiveFileNames); re != nil { patterns = append(patterns, re) } } diff --git a/internal/project/ata/discovertypings.go b/internal/project/ata/discovertypings.go index 8a87b3ea92..3f8ba209db 100644 --- a/internal/project/ata/discovertypings.go +++ b/internal/project/ata/discovertypings.go @@ -14,6 +14,7 @@ import ( "github.com/microsoft/typescript-go/internal/semver" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) func isTypingUpToDate(cachedTyping *CachedTyping, availableTypingVersions map[string]string) bool { @@ -222,7 +223,7 @@ func addTypingNamesAndGetFilesToWatch( } else { // And #2. Depth = 3 because scoped packages look like `node_modules/@foo/bar/package.json` depth := 3 - for _, manifestPath := range vfs.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, &depth) { + for _, manifestPath := range vfsmatch.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, &depth) { if tspath.GetBaseFileName(manifestPath) != manifestName { continue } diff --git a/internal/tsoptions/parsedcommandline.go b/internal/tsoptions/parsedcommandline.go index a00e80854a..34ba336260 100644 --- a/internal/tsoptions/parsedcommandline.go +++ b/internal/tsoptions/parsedcommandline.go @@ -15,6 +15,7 @@ import ( "github.com/microsoft/typescript-go/internal/outputpaths" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) const ( @@ -326,7 +327,7 @@ func (p *ParsedCommandLine) PossiblyMatchesFileName(fileName string) bool { } for _, include := range p.ConfigFile.configFileSpecs.validatedIncludeSpecs { - if !strings.ContainsAny(include, "*?") && !vfs.IsImplicitGlob(include) { + if !strings.ContainsAny(include, "*?") && !vfsmatch.IsImplicitGlob(include) { includePath := tspath.ToPath(include, p.GetCurrentDirectory(), p.UseCaseSensitiveFileNames()) if includePath == path { return true diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index 9668bb70b4..6887a45b77 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -20,6 +20,7 @@ import ( "github.com/microsoft/typescript-go/internal/parser" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) type extendsResult struct { @@ -106,8 +107,8 @@ func (c *configFileSpecs) matchesExclude(fileName string, comparePathsOptions ts if len(c.validatedExcludeSpecs) == 0 { return false } - excludePattern := vfs.GetRegularExpressionForWildcard(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude") - excludeRegex := vfs.GetRegexFromPattern(excludePattern, comparePathsOptions.UseCaseSensitiveFileNames) + excludePattern := vfsmatch.GetRegularExpressionForWildcard(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude") + excludeRegex := vfsmatch.GetRegexFromPattern(excludePattern, comparePathsOptions.UseCaseSensitiveFileNames) if match, err := excludeRegex.MatchString(fileName); err == nil && match { return true } @@ -124,9 +125,9 @@ func (c *configFileSpecs) getMatchedIncludeSpec(fileName string, comparePathsOpt return "" } for index, spec := range c.validatedIncludeSpecs { - includePattern := vfs.GetPatternFromSpec(spec, comparePathsOptions.CurrentDirectory, "files") + includePattern := vfsmatch.GetPatternFromSpec(spec, comparePathsOptions.CurrentDirectory, "files") if includePattern != "" { - includeRegex := vfs.GetRegexFromPattern(includePattern, comparePathsOptions.UseCaseSensitiveFileNames) + includeRegex := vfsmatch.GetRegexFromPattern(includePattern, comparePathsOptions.UseCaseSensitiveFileNames) if match, err := includeRegex.MatchString(fileName); err == nil && match { return c.validatedIncludeSpecsBeforeSubstitution[index] } @@ -1663,15 +1664,15 @@ func getFileNamesFromConfigSpecs( var jsonOnlyIncludeRegexes []*regexp2.Regexp if len(validatedIncludeSpecs) > 0 { - files := vfs.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, nil) + files := vfsmatch.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, nil) for _, file := range files { if tspath.FileExtensionIs(file, tspath.ExtensionJson) { if jsonOnlyIncludeRegexes == nil { includes := core.Filter(validatedIncludeSpecs, func(include string) bool { return strings.HasSuffix(include, tspath.ExtensionJson) }) - includeFilePatterns := core.Map(vfs.GetRegularExpressionsForWildcards(includes, basePath, "files"), func(pattern string) string { return fmt.Sprintf("^%s$", pattern) }) + includeFilePatterns := core.Map(vfsmatch.GetRegularExpressionsForWildcards(includes, basePath, "files"), func(pattern string) string { return fmt.Sprintf("^%s$", pattern) }) if includeFilePatterns != nil { jsonOnlyIncludeRegexes = core.Map(includeFilePatterns, func(pattern string) *regexp2.Regexp { - return vfs.GetRegexFromPattern(pattern, host.UseCaseSensitiveFileNames()) + return vfsmatch.GetRegexFromPattern(pattern, host.UseCaseSensitiveFileNames()) }) } else { jsonOnlyIncludeRegexes = nil diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index a782b1123c..b4e223a00b 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -5,7 +5,7 @@ import ( "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/tspath" - "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) func getWildcardDirectories(include []string, exclude []string, comparePathsOptions tspath.ComparePathsOptions) map[string]bool { @@ -26,7 +26,7 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti return nil } - rawExcludeRegex := vfs.GetRegularExpressionForWildcard(exclude, comparePathsOptions.CurrentDirectory, "exclude") + rawExcludeRegex := vfsmatch.GetRegularExpressionForWildcard(exclude, comparePathsOptions.CurrentDirectory, "exclude") var excludeRegex *regexp2.Regexp if rawExcludeRegex != "" { flags := regexp2.ECMAScript @@ -131,7 +131,7 @@ func getWildcardDirectoryFromSpec(spec string, useCaseSensitiveFileNames bool) * if lastSepIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator); lastSepIndex != -1 { lastSegment := spec[lastSepIndex+1:] - if vfs.IsImplicitGlob(lastSegment) { + if vfsmatch.IsImplicitGlob(lastSegment) { path := tspath.RemoveTrailingDirectorySeparator(spec) return &wildcardDirectoryMatch{ Key: toCanonicalKey(path, useCaseSensitiveFileNames), diff --git a/internal/vfs/utilities.go b/internal/vfs/vfsmatch/vfsmatch.go similarity index 98% rename from internal/vfs/utilities.go rename to internal/vfs/vfsmatch/vfsmatch.go index af4c616a3e..1ecb01bf8f 100644 --- a/internal/vfs/utilities.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -1,4 +1,4 @@ -package vfs +package vfsmatch import ( "fmt" @@ -12,6 +12,7 @@ import ( "github.com/microsoft/typescript-go/internal/core" "github.com/microsoft/typescript-go/internal/stringutil" "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" ) type FileMatcherPatterns struct { @@ -356,7 +357,7 @@ type visitor struct { includeDirectoryRegex *regexp2.Regexp extensions []string useCaseSensitiveFileNames bool - host FS + host vfs.FS visited collections.Set[string] results [][]string } @@ -412,7 +413,7 @@ func (v *visitor) visitDirectory( } // path is the directory of the tsconfig.json -func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host FS) []string { +func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) @@ -458,6 +459,6 @@ func matchFiles(path string, extensions []string, excludes []string, includes [] return core.Flatten(results) } -func ReadDirectory(host FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { +func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } From e169936401f6538d3bc4cde0b9d11419fed60be3 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Tue, 16 Dec 2025 20:59:36 -0800 Subject: [PATCH 02/41] Big test --- internal/vfs/vfsmatch/vfsmatch.go | 5 +- internal/vfs/vfsmatch/vfsmatch_test.go | 1229 ++++++++++++++++++++++++ 2 files changed, 1233 insertions(+), 1 deletion(-) create mode 100644 internal/vfs/vfsmatch/vfsmatch_test.go diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 1ecb01bf8f..3fa4a401c7 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -367,7 +367,10 @@ func (v *visitor) visitDirectory( absolutePath string, depth *int, ) { - canonicalPath := tspath.GetCanonicalFileName(absolutePath, v.useCaseSensitiveFileNames) + // Use the real path (with symlinks resolved) for cycle detection. + // This prevents infinite loops when symlinks create cycles. + realPath := v.host.Realpath(absolutePath) + canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) if v.visited.Has(canonicalPath) { return } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go new file mode 100644 index 0000000000..af127f9a3d --- /dev/null +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -0,0 +1,1229 @@ +package vfsmatch_test + +import ( + "slices" + "testing" + + "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" + "github.com/microsoft/typescript-go/internal/vfs/vfstest" + "gotest.tools/v3/assert" +) + +// Test cases modeled after TypeScript's matchFiles tests in +// _submodules/TypeScript/src/testRunner/unittests/config/matchFiles.ts + +func ptrTo[T any](v T) *T { + return &v +} + +// caseInsensitiveHost simulates a Windows-like file system +func caseInsensitiveHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/b.js": "", + "/dev/c.d.ts": "", + "/dev/z/a.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bbz.ts": "", + "/dev/z/bba.ts": "", + "/dev/x/a.ts": "", + "/dev/x/aa.ts": "", + "/dev/x/b.ts": "", + "/dev/x/y/a.ts": "", + "/dev/x/y/b.ts": "", + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + "/ext/ext.ts": "", + "/ext/b/a..b.ts": "", + }, false) +} + +// caseSensitiveHost simulates a Unix-like case-sensitive file system +func caseSensitiveHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/b.js": "", + "/dev/A.ts": "", + "/dev/B.ts": "", + "/dev/c.d.ts": "", + "/dev/z/a.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bbz.ts": "", + "/dev/z/bba.ts": "", + "/dev/x/a.ts": "", + "/dev/x/b.ts": "", + "/dev/x/y/a.ts": "", + "/dev/x/y/b.ts": "", + "/dev/q/a/c/b/d.ts": "", + "/dev/js/a.js": "", + "/dev/js/b.js": "", + }, true) +} + +// commonFoldersHost includes node_modules, bower_components, jspm_packages +func commonFoldersHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/x/a.ts": "", + "/dev/node_modules/a.ts": "", + "/dev/bower_components/a.ts": "", + "/dev/jspm_packages/a.ts": "", + }, false) +} + +// dottedFoldersHost includes files and folders starting with a dot +func dottedFoldersHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/x/d.ts": "", + "/dev/x/y/d.ts": "", + "/dev/x/y/.e.ts": "", + "/dev/x/.y/a.ts": "", + "/dev/.z/.b.ts": "", + "/dev/.z/c.ts": "", + "/dev/w/.u/e.ts": "", + "/dev/g.min.js/.g/g.ts": "", + }, false) +} + +// mixedExtensionHost has various file extensions +func mixedExtensionHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.tsx": "", + "/dev/b.d.ts": "", + "/dev/b.jsx": "", + "/dev/c.tsx": "", + "/dev/c.js": "", + "/dev/d.js": "", + "/dev/e.jsx": "", + "/dev/f.other": "", + }, false) +} + +// sameNamedDeclarationsHost has files with same names but different extensions +func sameNamedDeclarationsHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.tsx": "", + "/dev/a.d.ts": "", + "/dev/b.tsx": "", + "/dev/b.ts": "", + "/dev/c.tsx": "", + "/dev/m.ts": "", + "/dev/m.d.ts": "", + "/dev/n.tsx": "", + "/dev/n.ts": "", + "/dev/n.d.ts": "", + "/dev/o.ts": "", + "/dev/x.d.ts": "", + }, false) +} + +type readDirTestCase struct { + name string + host func() vfs.FS + currentDir string + path string + extensions []string + excludes []string + includes []string + depth *int + expect func(t *testing.T, got []string) +} + +func runReadDirectoryCase(t *testing.T, tc readDirTestCase) { + currentDir := tc.currentDir + if currentDir == "" { + currentDir = "/" + } + path := tc.path + if path == "" { + path = "/dev" + } + got := vfsmatch.ReadDirectory(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) + tc.expect(t, got) +} + +func TestReadDirectory(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "defaults include common package folders", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "literal includes without exclusions", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/b.ts"}) + }, + }, + { + name: "literal includes with non ts extensions excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.js", "b.js"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 0) + }, + }, + { + name: "literal includes missing files excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z.ts", "x.ts"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 0) + }, + }, + { + name: "literal includes with literal excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"b.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts"}) + }, + }, + { + name: "literal includes with wildcard excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts"}) + }, + }, + { + name: "literal includes with recursive excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"a.ts", "b.ts", "x/a.ts", "x/b.ts", "x/y/a.ts", "x/y/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts"}) + }, + }, + { + name: "case sensitive exclude is respected", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"B.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/B.ts"}) + }, + }, + { + name: "explicit includes keep common package folders", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts", "node_modules/a.ts", "bower_components/a.ts", "jspm_packages/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "wildcard include sorted order", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + expect: func(t *testing.T, got []string) { + expected := []string{ + "/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts", + "/dev/x/a.ts", "/dev/x/aa.ts", "/dev/x/b.ts", + } + assert.DeepEqual(t, got, expected) + }, + }, + { + name: "wildcard include same named declarations excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/a.d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/c.d.ts")) + }, + }, + { + name: "wildcard star matches only ts files", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, contains(f, ".ts") || contains(f, ".tsx") || contains(f, ".d.ts"), "unexpected file: %s", f) + } + assert.Assert(t, !slices.Contains(got, "/dev/a.js")) + assert.Assert(t, !slices.Contains(got, "/dev/b.js")) + }, + }, + { + name: "wildcard question mark single character", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/?.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/x/a.ts", "/dev/x/b.ts"}) + }, + }, + { + name: "wildcard recursive directory", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "wildcard multiple recursive directories", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/y/**/a.ts", "x/**/a.ts", "z/**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, len(got) > 0) + }, + }, + { + name: "wildcard case sensitive matching", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/A.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/A.ts"}) + }, + }, + { + name: "wildcard missing files excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/z.ts"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "exclude folders with wildcards", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"z", "x"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/z/") && !contains(f, "/x/"), "should not contain z or x: %s", f) + } + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "include paths outside project absolute", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*", "/ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include paths outside project relative", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"*", "../ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include files containing double dots", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"/ext/b/a..b.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + { + name: "exclude files containing double dots", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"/ext/b/a..b.ts"}, + includes: []string{"/ext/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + assert.Assert(t, !slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + { + name: "common package folders implicitly excluded", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "common package folders explicit recursive include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts", "**/node_modules/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "common package folders wildcard include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "common package folders explicit wildcard include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/a.ts", "node_modules/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "dotted folders not implicitly included", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/*", "w/*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/d.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/.e.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "dotted folders explicitly included", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/.y/a.ts", "/dev/.z/.b.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/.z/.b.ts")) + }, + }, + { + name: "dotted folders recursive wildcard matches directories", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/.z/c.ts")) + assert.Assert(t, slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "trailing recursive include returns empty", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "trailing recursive exclude removes everything", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "multiple recursive directory patterns in includes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "multiple recursive directory patterns in excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/x/**"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "implicit globbification expands directory", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/aba.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/b.ts")) + }, + }, + { + name: "exclude patterns starting with starstar", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/x"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/x/"), "should not contain /x/: %s", f) + } + }, + }, + { + name: "include patterns starting with starstar", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x", "**/a/**/b"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) + }, + }, + { + name: "depth limit one", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + depth: ptrTo(1), + expect: func(t *testing.T, got []string) { + for _, f := range got { + suffix := f[len("/dev/"):] + assert.Assert(t, !contains(suffix, "/"), "depth 1 should not include nested files: %s", f) + } + }, + }, + { + name: "depth limit two", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + depth: ptrTo(2), + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "mixed extensions only ts", + host: mixedExtensionHost, + extensions: []string{".ts"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".ts"), "should only have .ts files: %s", f) + } + }, + }, + { + name: "mixed extensions ts and tsx", + host: mixedExtensionHost, + extensions: []string{".ts", ".tsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".ts") || hasSuffix(f, ".tsx"), "should only have .ts or .tsx files: %s", f) + } + }, + }, + { + name: "mixed extensions js and jsx", + host: mixedExtensionHost, + extensions: []string{".js", ".jsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".js") || hasSuffix(f, ".jsx"), "should only have .js or .jsx files: %s", f) + } + }, + }, + { + name: "min js files excluded by wildcard", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/a.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/b.js")) + assert.Assert(t, !slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, !slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "min js files explicitly included", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*.min.js"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "same named declarations include ts", + host: sameNamedDeclarationsHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0) }, + }, + { + name: "same named declarations include tsx", + host: sameNamedDeclarationsHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.tsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".tsx"), "should only have .tsx files: %s", f) + } + }, + }, + { + name: "empty includes returns all matching files", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, len(got) > 0) + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + }, + }, + { + name: "nil extensions returns all files", + host: caseInsensitiveHost, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/a.js")) + }, + }, + { + name: "empty extensions slice returns all files", + host: caseInsensitiveHost, + extensions: []string{}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0, "expected files to be returned") }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc) + }) + } +} + +// Helper functions +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(substr) == 0 || + (len(s) > len(substr) && containsAt(s, substr))) +} + +func containsAt(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + +// Additional tests for helper functions + +func TestIsImplicitGlob(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expected bool + }{ + {name: "simple", input: "foo", expected: true}, + {name: "folder", input: "src", expected: true}, + {name: "with extension", input: "foo.ts", expected: false}, + {name: "trailing dot", input: "foo.", expected: false}, + {name: "star", input: "*", expected: false}, + {name: "question", input: "?", expected: false}, + {name: "star suffix", input: "foo*", expected: false}, + {name: "question suffix", input: "foo?", expected: false}, + {name: "dot name", input: "foo.bar", expected: false}, + {name: "empty", input: "", expected: true}, + } + + for _, tt := range tests { + tc := tt + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := vfsmatch.IsImplicitGlob(tc.input) + assert.Equal(t, result, tc.expected) + }) + } +} + +func TestGetRegularExpressionForWildcard(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + usage vfsmatch.Usage + expected string + assertFn func(t *testing.T, got string) + }{ + {name: "nil specs", specs: nil, usage: vfsmatch.UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "empty specs", specs: []string{}, usage: vfsmatch.UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "single spec", specs: []string{"*.ts"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "multiple specs", specs: []string{"*.ts", "*.tsx"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := vfsmatch.GetRegularExpressionForWildcard(tc.specs, "/", tc.usage) + if tc.assertFn != nil { + tc.assertFn(t, result) + } else { + assert.Equal(t, result, tc.expected) + } + }) + } +} + +func TestGetRegularExpressionsForWildcards(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + usage vfsmatch.Usage + assertFn func(t *testing.T, got []string) + }{ + {name: "nil specs", specs: nil, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "empty specs", specs: []string{}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "two specs", specs: []string{"*.ts", "*.tsx"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Equal(t, len(got), 2) }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := vfsmatch.GetRegularExpressionsForWildcards(tc.specs, "/", tc.usage) + tc.assertFn(t, result) + }) + } +} + +func TestGetPatternFromSpec(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + spec string + usage vfsmatch.Usage + assertFn func(t *testing.T, got string) + }{ + {name: "files usage", spec: "*.ts", usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { + assert.Assert(t, got != "") + assert.Assert(t, hasSuffix(got, "$")) + }}, + {name: "directories usage", spec: "src", usage: vfsmatch.UsageDirectories, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "exclude usage", spec: "node_modules", usage: vfsmatch.UsageExclude, assertFn: func(t *testing.T, got string) { + assert.Assert(t, got != "") + assert.Assert(t, contains(got, "($|/)")) + }}, + {name: "trailing starstar non exclude", spec: "**", usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "trailing starstar exclude allowed", spec: "**", usage: vfsmatch.UsageExclude, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := vfsmatch.GetPatternFromSpec(tc.spec, "/", tc.usage) + tc.assertFn(t, result) + }) + } +} + +// Edge case tests for various pattern scenarios +func TestReadDirectoryEdgeCases(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "rooted include path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"/dev/a.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/a.ts")) }, + }, + { + name: "include with extension in path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"a.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/a.ts")) }, + }, + { + name: "special regex characters in path", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/file+test.ts": "", + "/dev/file[0].ts": "", + "/dev/file(1).ts": "", + "/dev/file$money.ts": "", + "/dev/file^start.ts": "", + "/dev/file|pipe.ts": "", + "/dev/file#hash.ts": "", + }, false) + }, + extensions: []string{".ts"}, + includes: []string{"file+test.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/file+test.ts")) }, + }, + { + name: "include pattern starting with question mark", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"?.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "include pattern starting with star", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"*b.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/b.ts")) }, + }, + { + name: "case insensitive file matching", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/File.ts": "", + "/dev/FILE.ts": "", + }, true) + }, + extensions: []string{".ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) == 2) }, + }, + { + name: "nested subdirectory base path", + host: caseSensitiveHost, + extensions: []string{".ts"}, + includes: []string{"q/a/c/b/d.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) }, + }, + { + name: "current directory differs from path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"z/*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0) }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc) + }) + } +} + +func TestReadDirectoryEmptyIncludes(t *testing.T) { + t.Parallel() + cases := []readDirTestCase{ + { + name: "empty includes slice behavior", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/root/a.ts": "", + }, true) + }, + path: "/root", + currentDir: "/", + extensions: []string{".ts"}, + includes: []string{}, + expect: func(t *testing.T, got []string) { + if len(got) == 0 { + return + } + assert.Assert(t, slices.Contains(got, "/root/a.ts")) + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc) + }) + } +} + +// TestReadDirectorySymlinkCycle tests that cyclic symlinks don't cause infinite loops. +// The cycle is detected by the vfs package using Realpath for cycle detection. +// This means directories with cyclic symlinks will be skipped during traversal. +func TestReadDirectorySymlinkCycle(t *testing.T) { + t.Parallel() + cases := []readDirTestCase{ + { + name: "detects and skips symlink cycles", + host: func() vfs.FS { + return vfstest.FromMap(map[string]any{ + "/root/file.ts": "", + "/root/a/file.ts": "", + "/root/a/b": vfstest.Symlink("/root/a"), + }, true) + }, + path: "/root", + currentDir: "/", + extensions: []string{".ts"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/root/file.ts", "/root/a/file.ts"} + assert.DeepEqual(t, got, expected) + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc) + }) + } +} + +// TestReadDirectoryMatchesTypeScriptBaselines contains tests that verify the Go implementation +// matches the TypeScript baseline outputs from _submodules/TypeScript/tests/baselines/reference/config/matchFiles/ +func TestReadDirectoryMatchesTypeScriptBaselines(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "sorted in include order then alphabetical", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/z/a.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bba.ts": "", + "/dev/z/bbz.ts": "", + "/dev/x/a.ts": "", + "/dev/x/aa.ts": "", + "/dev/x/b.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + expect: func(t *testing.T, got []string) { + expected := []string{ + "/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts", + "/dev/x/a.ts", "/dev/x/aa.ts", "/dev/x/b.ts", + } + assert.DeepEqual(t, got, expected) + }, + }, + { + name: "recursive wildcards match dotted directories", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/x/d.ts": "", + "/dev/x/y/d.ts": "", + "/dev/x/y/.e.ts": "", + "/dev/x/.y/a.ts": "", + "/dev/.z/.b.ts": "", + "/dev/.z/c.ts": "", + "/dev/w/.u/e.ts": "", + "/dev/g.min.js/.g/g.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/dev/.z/c.ts", "/dev/g.min.js/.g/g.ts", "/dev/w/.u/e.ts", "/dev/x/.y/a.ts"} + assert.Equal(t, len(got), len(expected)) + for _, want := range expected { + assert.Assert(t, slices.Contains(got, want)) + } + }, + }, + { + name: "common package folders implicitly excluded with wildcard", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/x/a.ts": "", + "/dev/node_modules/a.ts": "", + "/dev/bower_components/a.ts": "", + "/dev/jspm_packages/a.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts"}) }, + }, + { + name: "js wildcard excludes min js files", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + }, false) + }, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/js/a.js", "/dev/js/b.js"}) }, + }, + { + name: "explicit min js pattern includes min files", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + }, false) + }, + extensions: []string{".js"}, + includes: []string{"js/*.min.js"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/dev/js/ab.min.js", "/dev/js/d.min.js"} + assert.Equal(t, len(got), len(expected)) + for _, want := range expected { + assert.Assert(t, slices.Contains(got, want)) + } + }, + }, + { + name: "literal excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"b.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/a.ts"}) }, + }, + { + name: "wildcard excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts"}) }, + }, + { + name: "recursive excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"a.ts", "b.ts", "x/a.ts", "x/b.ts", "x/y/a.ts", "x/y/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts"}) + }, + }, + { + name: "question mark baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/?.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/x/a.ts", "/dev/x/b.ts"}) }, + }, + { + name: "recursive directory pattern baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts", "/dev/z/a.ts"}) + }, + }, + { + name: "case sensitive baseline", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/A.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/A.ts"}) }, + }, + { + name: "exclude folders baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"z", "x"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/z/") && !contains(f, "/x/"), "should not contain z or x: %s", f) + } + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "implicit glob expansion baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts"}) + }, + }, + { + name: "trailing recursive directory baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "exclude trailing recursive directory baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "multiple recursive directory patterns baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/aa.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/b.ts")) + }, + }, + { + name: "include dirs with starstar prefix baseline", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x", "**/a/**/b"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) + }, + }, + { + name: "dotted folders not implicitly included baseline", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/*", "w/*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/d.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/.e.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "include paths outside project baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*", "/ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include files with double dots baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"/ext/b/a..b.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/ext/b/a..b.ts")) }, + }, + { + name: "exclude files with double dots baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"/ext/b/a..b.ts"}, + includes: []string{"/ext/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + assert.Assert(t, !slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc) + }) + } +} From 89940b4a2d81bbfd4cc64117216d59c16c05c7e5 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:23:12 -0800 Subject: [PATCH 03/41] it works --- internal/vfs/vfsmatch/bench_test.go | 233 +++++++++ internal/vfs/vfsmatch/globmatch.go | 630 +++++++++++++++++++++++++ internal/vfs/vfsmatch/vfsmatch.go | 11 + internal/vfs/vfsmatch/vfsmatch_test.go | 64 ++- 4 files changed, 917 insertions(+), 21 deletions(-) create mode 100644 internal/vfs/vfsmatch/bench_test.go create mode 100644 internal/vfs/vfsmatch/globmatch.go diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go new file mode 100644 index 0000000000..78bdd83b42 --- /dev/null +++ b/internal/vfs/vfsmatch/bench_test.go @@ -0,0 +1,233 @@ +package vfsmatch_test + +import ( + "testing" + + "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" + "github.com/microsoft/typescript-go/internal/vfs/vfstest" +) + +// Benchmark test cases using the same hosts as the unit tests + +func BenchmarkReadDirectory(b *testing.B) { + benchCases := []struct { + name string + host func() vfs.FS + path string + extensions []string + excludes []string + includes []string + }{ + { + name: "LiteralIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts"}, + }, + { + name: "WildcardIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + }, + { + name: "RecursiveWildcard", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + }, + { + name: "RecursiveWithExcludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"**/*.ts"}, + }, + { + name: "ComplexPattern", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + }, + { + name: "DottedFolders", + host: dottedFoldersHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + }, + { + name: "CommonPackageFolders", + host: commonFoldersHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + }, + { + name: "NoIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + }, + { + name: "MultipleRecursive", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + }, + { + name: "LargeFileSystem", + host: largeFileSystemHost, + path: "/project", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"src/**/*.ts"}, + excludes: []string{"**/node_modules/**", "**/*.test.ts"}, + }, + } + + for _, bc := range benchCases { + b.Run("Regex/"+bc.name, func(b *testing.B) { + host := bc.host() + b.ResetTimer() + for range b.N { + vfsmatch.ReadDirectoryRegex(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + } + }) + + b.Run("NoRegex/"+bc.name, func(b *testing.B) { + host := bc.host() + b.ResetTimer() + for range b.N { + vfsmatch.ReadDirectoryNoRegex(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + } + }) + } +} + +// largeFileSystemHost creates a more realistic file system with many files +func largeFileSystemHost() vfs.FS { + files := make(map[string]string) + + // Create a realistic project structure + dirs := []string{ + "/project/src", + "/project/src/components", + "/project/src/utils", + "/project/src/services", + "/project/src/models", + "/project/src/hooks", + "/project/test", + "/project/node_modules/react", + "/project/node_modules/typescript", + "/project/node_modules/@types/node", + } + + // Add files to each directory + for _, dir := range dirs { + for j := range 20 { + files[dir+"/file"+string(rune('a'+j))+".ts"] = "" + files[dir+"/file"+string(rune('a'+j))+".test.ts"] = "" + } + } + + // Add some dotted directories + files["/project/src/.hidden/secret.ts"] = "" + files["/project/.config/settings.ts"] = "" + + return vfstest.FromMap(files, false) +} + +// BenchmarkPatternCompilation benchmarks the pattern compilation step +func BenchmarkPatternCompilation(b *testing.B) { + patterns := []struct { + name string + spec string + }{ + {"Literal", "src/file.ts"}, + {"SingleWildcard", "src/*.ts"}, + {"QuestionMark", "src/?.ts"}, + {"DoubleAsterisk", "**/file.ts"}, + {"Complex", "src/**/components/*.tsx"}, + {"DottedPattern", "**/.*/*"}, + } + + for _, p := range patterns { + b.Run(p.name, func(b *testing.B) { + for range b.N { + vfsmatch.CompileGlobPattern(p.spec, "/project", vfsmatch.UsageFiles, true) + } + }) + } +} + +// BenchmarkPatternMatching benchmarks pattern matching against paths +func BenchmarkPatternMatching(b *testing.B) { + testCases := []struct { + name string + spec string + paths []string + }{ + { + name: "LiteralMatch", + spec: "src/file.ts", + paths: []string{ + "/project/src/file.ts", + "/project/src/other.ts", + "/project/lib/file.ts", + }, + }, + { + name: "WildcardMatch", + spec: "src/*.ts", + paths: []string{ + "/project/src/file.ts", + "/project/src/component.ts", + "/project/src/deep/file.ts", + "/project/lib/file.ts", + }, + }, + { + name: "RecursiveMatch", + spec: "**/file.ts", + paths: []string{ + "/project/file.ts", + "/project/src/file.ts", + "/project/src/deep/nested/file.ts", + "/project/src/other.ts", + }, + }, + { + name: "ComplexMatch", + spec: "src/**/components/*.tsx", + paths: []string{ + "/project/src/components/Button.tsx", + "/project/src/features/auth/components/Login.tsx", + "/project/src/components/Button.ts", + "/project/lib/components/Button.tsx", + }, + }, + } + + for _, tc := range testCases { + pattern := vfsmatch.CompileGlobPattern(tc.spec, "/project", vfsmatch.UsageFiles, true) + if pattern == nil { + continue + } + + b.Run(tc.name, func(b *testing.B) { + for range b.N { + for _, path := range tc.paths { + pattern.Matches(path) + } + } + }) + } +} diff --git a/internal/vfs/vfsmatch/globmatch.go b/internal/vfs/vfsmatch/globmatch.go new file mode 100644 index 0000000000..daa75fccca --- /dev/null +++ b/internal/vfs/vfsmatch/globmatch.go @@ -0,0 +1,630 @@ +package vfsmatch + +import ( + "strings" + + "github.com/microsoft/typescript-go/internal/collections" + "github.com/microsoft/typescript-go/internal/core" + "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" +) + +// GlobPattern represents a compiled glob pattern for matching file paths. +// It stores the pattern components for efficient matching without using regex. +type GlobPattern struct { + // The original pattern specification + spec string + // The base path from which the pattern was derived + basePath string + // The normalized path components to match + components []patternComponent + // Whether this is an exclude pattern (affects matching behavior) + isExclude bool + // Whether pattern matching should be case-sensitive + caseSensitive bool + // For files patterns, exclude .min.js by default + excludeMinJs bool +} + +// patternComponent represents a single segment of a glob pattern +type patternComponent struct { + // Whether this component is a ** wildcard + isDoubleAsterisk bool + // The literal text if not a wildcard pattern + literal string + // Whether this component contains wildcards + hasWildcards bool + // Parsed wildcard segments for matching + segments []patternSegment + // For include patterns (not exclude), implicitly exclude common package folders + implicitlyExcludePackages bool +} + +// patternSegment represents a parsed segment within a component +type patternSegment struct { + kind segmentKind + literal string +} + +type segmentKind int + +const ( + segmentLiteral segmentKind = iota + segmentStar // * - matches any chars except / + segmentQuestion // ? - matches single char except / +) + +// CompileGlobPattern compiles a glob spec into a GlobPattern for matching. +func CompileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *GlobPattern { + components := tspath.GetNormalizedPathComponents(spec, basePath) + lastComponent := core.LastOrNil(components) + + // If the last component is ** and this is not an exclude pattern, return nil + // (such patterns match nothing) + if usage != UsageExclude && lastComponent == "**" { + return nil + } + + // Remove trailing separator from root component + components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) + + // Handle implicit glob (directories become dir/**/*) + if IsImplicitGlob(lastComponent) { + components = append(components, "**", "*") + } + + pattern := &GlobPattern{ + spec: spec, + basePath: basePath, + isExclude: usage == UsageExclude, + caseSensitive: caseSensitive, + excludeMinJs: usage == UsageFiles, + } + + for _, comp := range components { + pc := patternComponent{} + + if comp == "**" { + pc.isDoubleAsterisk = true + } else { + pc.hasWildcards = strings.ContainsAny(comp, "*?") + + if pc.hasWildcards { + pc.segments = parsePatternSegments(comp) + // For non-exclude patterns with wildcards, implicitly exclude common package folders + if usage != UsageExclude { + pc.implicitlyExcludePackages = true + } + } else { + pc.literal = comp + } + } + + pattern.components = append(pattern.components, pc) + } + + return pattern +} + +// parsePatternSegments breaks a component with wildcards into segments +func parsePatternSegments(comp string) []patternSegment { + var segments []patternSegment + var current strings.Builder + + for i := range len(comp) { + switch comp[i] { + case '*': + if current.Len() > 0 { + segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) + current.Reset() + } + segments = append(segments, patternSegment{kind: segmentStar}) + case '?': + if current.Len() > 0 { + segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) + current.Reset() + } + segments = append(segments, patternSegment{kind: segmentQuestion}) + default: + current.WriteByte(comp[i]) + } + } + + if current.Len() > 0 { + segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) + } + + return segments +} + +// Matches checks if the given path matches this glob pattern. +func (p *GlobPattern) Matches(path string) bool { + if p == nil { + return false + } + + // Split the path into components + pathComponents := splitPath(path) + + matched := p.matchComponents(pathComponents, 0, 0, false) + + return matched +} + +// MatchesPrefix checks if the given directory path could potentially match files under it. +// This is used for directory filtering during traversal. +func (p *GlobPattern) MatchesPrefix(path string) bool { + if p == nil { + return false + } + + pathComponents := splitPath(path) + + return p.matchComponentsPrefix(pathComponents, 0, 0) +} + +// splitPath splits a path into its components +func splitPath(path string) []string { + // Handle the case of an absolute path + if len(path) > 0 && path[0] == '/' { + rest := strings.Split(strings.TrimPrefix(path, "/"), "/") + // Prepend empty string to represent root + result := make([]string, 0, len(rest)+1) + result = append(result, "") + for _, s := range rest { + if s != "" { + result = append(result, s) + } + } + return result + } + + parts := strings.Split(path, "/") + result := make([]string, 0, len(parts)) + for _, s := range parts { + if s != "" { + result = append(result, s) + } + } + return result +} + +// matchComponents recursively matches path components against pattern components +func (p *GlobPattern) matchComponents(pathComps []string, pathIdx int, patternIdx int, inDoubleAsterisk bool) bool { + // If we've consumed all pattern components, check if path is also fully consumed + if patternIdx >= len(p.components) { + if p.isExclude { + // For exclude patterns, we can match a prefix + return true + } + return pathIdx >= len(pathComps) + } + + // If we've consumed all path components but still have pattern components + if pathIdx >= len(pathComps) { + // Check if remaining pattern components are all optional (** only) + for i := patternIdx; i < len(p.components); i++ { + if !p.components[i].isDoubleAsterisk { + return false + } + } + return true + } + + pc := p.components[patternIdx] + pathComp := pathComps[pathIdx] + + if pc.isDoubleAsterisk { + // ** can match zero or more directory levels + // First, try matching zero directories (skip the **) + if p.matchComponents(pathComps, pathIdx, patternIdx+1, true) { + return true + } + + // For include patterns, ** should not match directories starting with . or common package folders + // But we still try to skip those directories and continue matching + if !p.isExclude { + if len(pathComp) > 0 && pathComp[0] == '.' { + // Don't match hidden directories in ** for includes - return false + // The next pattern component (if any) might explicitly match it + return false + } + if isCommonPackageFolder(pathComp) { + // Don't match common package folders in ** for includes + return false + } + } + + // Match current component with ** and continue + return p.matchComponents(pathComps, pathIdx+1, patternIdx, true) + } + + // Check implicit package folder exclusion + if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { + return false + } + + // Match current component + if !p.matchComponent(pc, pathComp, inDoubleAsterisk) { + return false + } + + // Continue to next components + return p.matchComponents(pathComps, pathIdx+1, patternIdx+1, false) +} + +// matchComponentsPrefix checks if the path could be a prefix of a matching path +func (p *GlobPattern) matchComponentsPrefix(pathComps []string, pathIdx int, patternIdx int) bool { + // If we've consumed all path components, this prefix could match + if pathIdx >= len(pathComps) { + return true + } + + // If we've consumed all pattern components, no more matches possible + if patternIdx >= len(p.components) { + return false + } + + pc := p.components[patternIdx] + pathComp := pathComps[pathIdx] + + if pc.isDoubleAsterisk { + // ** can match any directory level + // Try matching zero (skip **) or more directories + if p.matchComponentsPrefix(pathComps, pathIdx, patternIdx+1) { + return true + } + + // For include patterns, ** should not match hidden or package directories + if !p.isExclude { + if len(pathComp) > 0 && pathComp[0] == '.' { + return false + } + if isCommonPackageFolder(pathComp) { + return false + } + } + + return p.matchComponentsPrefix(pathComps, pathIdx+1, patternIdx) + } + + // Check implicit package folder exclusion + if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { + return false + } + + // Match current component + if !p.matchComponent(pc, pathComp, false) { + return false + } + + return p.matchComponentsPrefix(pathComps, pathIdx+1, patternIdx+1) +} + +// matchComponent matches a single path component against a pattern component +func (p *GlobPattern) matchComponent(pc patternComponent, pathComp string, afterDoubleAsterisk bool) bool { + if pc.isDoubleAsterisk { + // Should not happen here, handled separately + return true + } + + // If the pattern component has no wildcards, do literal comparison + if !pc.hasWildcards { + return p.stringsEqual(pc.literal, pathComp) + } + + // Match with wildcards + // Note: The check for dotted names after ** is handled in matchWildcardComponent + // where we only reject if the pattern itself starts with a wildcard + return p.matchWildcardComponent(pc.segments, pathComp) +} + +// matchWildcardComponent matches a path component against wildcard segments +func (p *GlobPattern) matchWildcardComponent(segments []patternSegment, s string) bool { + // For non-exclude patterns, if the segments start with * or ?, + // the matched string cannot start with '.' + if !p.isExclude && len(segments) > 0 && len(s) > 0 && s[0] == '.' { + firstSeg := segments[0] + if firstSeg.kind == segmentStar || firstSeg.kind == segmentQuestion { + // Pattern starts with wildcard, so it cannot match a string starting with '.' + return false + } + } + + return p.matchSegments(segments, 0, s, 0) +} + +func (p *GlobPattern) matchSegments(segments []patternSegment, segIdx int, s string, sIdx int) bool { + // If we've processed all segments + if segIdx >= len(segments) { + return sIdx >= len(s) + } + + seg := segments[segIdx] + + switch seg.kind { + case segmentLiteral: + // Must match the literal exactly + if sIdx+len(seg.literal) > len(s) { + return false + } + substr := s[sIdx : sIdx+len(seg.literal)] + if !p.stringsEqual(seg.literal, substr) { + return false + } + return p.matchSegments(segments, segIdx+1, s, sIdx+len(seg.literal)) + + case segmentQuestion: + // Must match exactly one character (not /) + if sIdx >= len(s) { + return false + } + if s[sIdx] == '/' { + return false + } + return p.matchSegments(segments, segIdx+1, s, sIdx+1) + + case segmentStar: + // Match zero or more characters (not /) + // For files usage, also need to handle .min.js exclusion + + // Try matching zero characters first + if p.matchSegments(segments, segIdx+1, s, sIdx) { + // Before returning true, check min.js exclusion + if p.excludeMinJs && segIdx == 0 && segIdx+1 < len(segments) { + // Check if this could result in matching a .min.js file + if p.wouldMatchMinJs(s) { + return false + } + } + return true + } + + // Try matching more characters + for i := sIdx; i < len(s); i++ { + if s[i] == '/' { + break + } + if p.matchSegments(segments, segIdx+1, s, i+1) { + // Check min.js exclusion + if p.excludeMinJs && strings.HasSuffix(s, ".min.js") { + // Only exclude if pattern doesn't explicitly include .min.js + if !p.patternExplicitlyIncludesMinJs(segments) { + return false + } + } + return true + } + } + return false + } + + return false +} + +// wouldMatchMinJs checks if the filename ends with .min.js +func (p *GlobPattern) wouldMatchMinJs(filename string) bool { + return strings.HasSuffix(strings.ToLower(filename), ".min.js") +} + +// patternExplicitlyIncludesMinJs checks if the pattern explicitly includes .min.js +func (p *GlobPattern) patternExplicitlyIncludesMinJs(segments []patternSegment) bool { + // Look for .min.js in the literal segments + for _, seg := range segments { + if seg.kind == segmentLiteral && strings.Contains(strings.ToLower(seg.literal), ".min.js") { + return true + } + } + return false +} + +// stringsEqual compares two strings with case sensitivity based on pattern settings +func (p *GlobPattern) stringsEqual(a, b string) bool { + if p.caseSensitive { + return a == b + } + return strings.EqualFold(a, b) +} + +// isCommonPackageFolder checks if a directory name is a common package folder +func isCommonPackageFolder(name string) bool { + lower := strings.ToLower(name) + return lower == "node_modules" || lower == "bower_components" || lower == "jspm_packages" +} + +// GlobMatcher holds compiled glob patterns for matching files. +type GlobMatcher struct { + includePatterns []*GlobPattern + excludePatterns []*GlobPattern + caseSensitive bool + // hadIncludes tracks whether any include specs were provided (even if they compiled to nothing) + hadIncludes bool +} + +// NewGlobMatcher creates a new GlobMatcher from include and exclude specs. +func NewGlobMatcher(includes []string, excludes []string, basePath string, caseSensitive bool, usage Usage) *GlobMatcher { + m := &GlobMatcher{ + caseSensitive: caseSensitive, + hadIncludes: len(includes) > 0, + } + + for _, spec := range includes { + if pattern := CompileGlobPattern(spec, basePath, usage, caseSensitive); pattern != nil { + m.includePatterns = append(m.includePatterns, pattern) + } + } + + for _, spec := range excludes { + if pattern := CompileGlobPattern(spec, basePath, UsageExclude, caseSensitive); pattern != nil { + m.excludePatterns = append(m.excludePatterns, pattern) + } + } + + return m +} + +// MatchesFile checks if a file path matches the include patterns and doesn't match exclude patterns. +// Returns the index of the matching include pattern, or -1 if no match. +func (m *GlobMatcher) MatchesFile(path string) int { + // First check excludes + for _, exc := range m.excludePatterns { + if exc.Matches(path) { + return -1 + } + } + + // If no valid include patterns but includes were specified, nothing matches + if len(m.includePatterns) == 0 { + if m.hadIncludes { + return -1 + } + return 0 + } + + // Check includes + for i, inc := range m.includePatterns { + if inc.Matches(path) { + return i + } + } + + return -1 +} + +// MatchesDirectory checks if a directory could contain matching files. +func (m *GlobMatcher) MatchesDirectory(path string) bool { + // First check if excluded + for _, exc := range m.excludePatterns { + if exc.Matches(path) { + return false + } + } + + // If no valid include patterns but includes were specified, nothing matches + if len(m.includePatterns) == 0 { + if m.hadIncludes { + return false + } + return true + } + + // Check if any include pattern could match files in this directory + for _, inc := range m.includePatterns { + if inc.MatchesPrefix(path) { + return true + } + } + + return false +} + +// visitorNoRegex is similar to visitor but uses GlobMatcher instead of regex +type visitorNoRegex struct { + fileMatcher *GlobMatcher + directoryMatcher *GlobMatcher + extensions []string + useCaseSensitiveFileNames bool + host vfs.FS + visited collections.Set[string] + results [][]string + numIncludePatterns int +} + +func (v *visitorNoRegex) visitDirectory( + path string, + absolutePath string, + depth *int, +) { + // Use the real path for cycle detection + realPath := v.host.Realpath(absolutePath) + canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) + if v.visited.Has(canonicalPath) { + return + } + v.visited.Add(canonicalPath) + + systemEntries := v.host.GetAccessibleEntries(absolutePath) + + for _, current := range systemEntries.Files { + name := tspath.CombinePaths(path, current) + absoluteName := tspath.CombinePaths(absolutePath, current) + + if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { + continue + } + + matchIdx := v.fileMatcher.MatchesFile(absoluteName) + if matchIdx >= 0 { + if v.numIncludePatterns == 0 { + v.results[0] = append(v.results[0], name) + } else { + v.results[matchIdx] = append(v.results[matchIdx], name) + } + } + } + + if depth != nil { + newDepth := *depth - 1 + if newDepth == 0 { + return + } + depth = &newDepth + } + + for _, current := range systemEntries.Directories { + name := tspath.CombinePaths(path, current) + absoluteName := tspath.CombinePaths(absolutePath, current) + + if v.directoryMatcher.MatchesDirectory(absoluteName) { + v.visitDirectory(name, absoluteName, depth) + } + } +} + +// matchFilesNoRegex is the regex-free version of matchFiles +func matchFilesNoRegex(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { + path = tspath.NormalizePath(path) + currentDirectory = tspath.NormalizePath(currentDirectory) + absolutePath := tspath.CombinePaths(currentDirectory, path) + + // Build file matcher + fileMatcher := NewGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) + + // Build directory matcher + directoryMatcher := NewGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) + + basePaths := getBasePaths(path, includes, useCaseSensitiveFileNames) + + numIncludePatterns := len(fileMatcher.includePatterns) + + var results [][]string + if numIncludePatterns > 0 { + results = make([][]string, numIncludePatterns) + for i := range results { + results[i] = []string{} + } + } else { + results = [][]string{{}} + } + + v := visitorNoRegex{ + useCaseSensitiveFileNames: useCaseSensitiveFileNames, + host: host, + fileMatcher: fileMatcher, + directoryMatcher: directoryMatcher, + extensions: extensions, + results: results, + numIncludePatterns: numIncludePatterns, + } + + for _, basePath := range basePaths { + v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) + } + + return core.Flatten(results) +} + +// ReadDirectoryNoRegex is the regex-free version of ReadDirectory +func ReadDirectoryNoRegex(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 3fa4a401c7..7d343db469 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -462,6 +462,17 @@ func matchFiles(path string, extensions []string, excludes []string, includes [] return core.Flatten(results) } +// useGlobMatch controls whether to use the regex-free glob matching implementation. +const useGlobMatch = true + func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + if useGlobMatch { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) + } + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// ReadDirectoryRegex is the regex-based implementation, exported for benchmarking. +func ReadDirectoryRegex(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index af127f9a3d..345907084d 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -17,6 +17,18 @@ func ptrTo[T any](v T) *T { return &v } +// readDirectoryFunc is a function type for ReadDirectory implementations +type readDirectoryFunc func(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string + +// readDirectoryImplementations contains all implementations to test +var readDirectoryImplementations = []struct { + name string + fn readDirectoryFunc +}{ + {"Regex", vfsmatch.ReadDirectory}, + {"NoRegex", vfsmatch.ReadDirectoryNoRegex}, +} + // caseInsensitiveHost simulates a Windows-like file system func caseInsensitiveHost() vfs.FS { return vfstest.FromMap(map[string]string{ @@ -148,7 +160,7 @@ type readDirTestCase struct { expect func(t *testing.T, got []string) } -func runReadDirectoryCase(t *testing.T, tc readDirTestCase) { +func runReadDirectoryCase(t *testing.T, tc readDirTestCase, readDir readDirectoryFunc) { currentDir := tc.currentDir if currentDir == "" { currentDir = "/" @@ -663,10 +675,12 @@ func TestReadDirectory(t *testing.T) { } for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runReadDirectoryCase(t, tc) - }) + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } } } @@ -886,10 +900,12 @@ func TestReadDirectoryEdgeCases(t *testing.T) { } for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runReadDirectoryCase(t, tc) - }) + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } } } @@ -917,10 +933,12 @@ func TestReadDirectoryEmptyIncludes(t *testing.T) { } for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runReadDirectoryCase(t, tc) - }) + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } } } @@ -951,10 +969,12 @@ func TestReadDirectorySymlinkCycle(t *testing.T) { } for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runReadDirectoryCase(t, tc) - }) + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } } } @@ -1221,9 +1241,11 @@ func TestReadDirectoryMatchesTypeScriptBaselines(t *testing.T) { } for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runReadDirectoryCase(t, tc) - }) + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } } } From e08673a9e72dbb31575aa425f7df2ba80ea90bd0 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:47:30 -0800 Subject: [PATCH 04/41] Start splitting apart --- internal/vfs/vfsmatch/bench_test.go | 8 +- internal/vfs/vfsmatch/export_test.go | 12 + .../vfs/vfsmatch/{globmatch.go => new.go} | 5 - internal/vfs/vfsmatch/old.go | 463 +++++++++++++++++ internal/vfs/vfsmatch/vfsmatch.go | 473 +----------------- internal/vfs/vfsmatch/vfsmatch_test.go | 4 +- 6 files changed, 485 insertions(+), 480 deletions(-) create mode 100644 internal/vfs/vfsmatch/export_test.go rename internal/vfs/vfsmatch/{globmatch.go => new.go} (98%) create mode 100644 internal/vfs/vfsmatch/old.go diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 78bdd83b42..08aabc2317 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -94,19 +94,19 @@ func BenchmarkReadDirectory(b *testing.B) { } for _, bc := range benchCases { - b.Run("Regex/"+bc.name, func(b *testing.B) { + b.Run("Old/"+bc.name, func(b *testing.B) { host := bc.host() b.ResetTimer() for range b.N { - vfsmatch.ReadDirectoryRegex(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + vfsmatch.ReadDirectoryOld(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) } }) - b.Run("NoRegex/"+bc.name, func(b *testing.B) { + b.Run("New/"+bc.name, func(b *testing.B) { host := bc.host() b.ResetTimer() for range b.N { - vfsmatch.ReadDirectoryNoRegex(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + vfsmatch.ReadDirectoryNew(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) } }) } diff --git a/internal/vfs/vfsmatch/export_test.go b/internal/vfs/vfsmatch/export_test.go new file mode 100644 index 0000000000..654626c266 --- /dev/null +++ b/internal/vfs/vfsmatch/export_test.go @@ -0,0 +1,12 @@ +package vfsmatch + +import "github.com/microsoft/typescript-go/internal/vfs" + + +func ReadDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +func ReadDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} diff --git a/internal/vfs/vfsmatch/globmatch.go b/internal/vfs/vfsmatch/new.go similarity index 98% rename from internal/vfs/vfsmatch/globmatch.go rename to internal/vfs/vfsmatch/new.go index daa75fccca..a7c729f740 100644 --- a/internal/vfs/vfsmatch/globmatch.go +++ b/internal/vfs/vfsmatch/new.go @@ -623,8 +623,3 @@ func matchFilesNoRegex(path string, extensions []string, excludes []string, incl return core.Flatten(results) } - -// ReadDirectoryNoRegex is the regex-free version of ReadDirectory -func ReadDirectoryNoRegex(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) -} diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go new file mode 100644 index 0000000000..7d50b38d9b --- /dev/null +++ b/internal/vfs/vfsmatch/old.go @@ -0,0 +1,463 @@ +package vfsmatch + +import ( + "fmt" + "regexp" + "sort" + "strings" + "sync" + + "github.com/dlclark/regexp2" + "github.com/microsoft/typescript-go/internal/collections" + "github.com/microsoft/typescript-go/internal/core" + "github.com/microsoft/typescript-go/internal/stringutil" + "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" +) + +type FileMatcherPatterns struct { + // One pattern for each "include" spec. + includeFilePatterns []string + // One pattern matching one of any of the "include" specs. + includeFilePattern string + includeDirectoryPattern string + excludePattern string + basePaths []string +} + +type Usage string + +const ( + UsageFiles Usage = "files" + UsageDirectories Usage = "directories" + UsageExclude Usage = "exclude" +) + +func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { + if len(specs) == 0 { + return nil + } + return core.Map(specs, func(spec string) string { + return GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + }) +} + +func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { + patterns := GetRegularExpressionsForWildcards(specs, basePath, usage) + if len(patterns) == 0 { + return "" + } + + mappedPatterns := make([]string, len(patterns)) + for i, pattern := range patterns { + mappedPatterns[i] = fmt.Sprintf("(%s)", pattern) + } + pattern := strings.Join(mappedPatterns, "|") + + // If excluding, match "foo/bar/baz...", but if including, only allow "foo". + var terminator string + if usage == "exclude" { + terminator = "($|/)" + } else { + terminator = "$" + } + return fmt.Sprintf("^(%s)%s", pattern, terminator) +} + +func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) string { + if match == "*" { + return singleAsteriskRegexFragment + } else { + if match == "?" { + return "[^/]" + } else { + return "\\" + match + } + } +} + +// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, +// and does not contain any glob characters itself. +func IsImplicitGlob(lastPathComponent string) bool { + return !strings.ContainsAny(lastPathComponent, ".*?") +} + +// Reserved characters - only escape actual regex metacharacters. +// Go's regexp doesn't support \x escape sequences for arbitrary characters, +// so we only escape characters that have special meaning in regex. +var ( + reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) + wildcardCharCodes = []rune{'*', '?'} +) + +var ( + commonPackageFolders = []string{"node_modules", "bower_components", "jspm_packages"} + implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))" +) + +type WildcardMatcher struct { + singleAsteriskRegexFragment string + doubleAsteriskRegexFragment string + replaceWildcardCharacter func(match string) string +} + +const ( + // Matches any single directory segment unless it is the last segment and a .min.js file + // Breakdown: + // + // [^./] # matches everything up to the first . character (excluding directory separators) + // (\\.(?!min\\.js$))? # matches . characters but not if they are part of the .min.js file extension + singleAsteriskRegexFragmentFilesMatcher = "([^./]|(\\.(?!min\\.js$))?)*" + singleAsteriskRegexFragment = "[^/]*" +) + +var filesMatcher = WildcardMatcher{ + singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher, + // Regex for the ** wildcard. Matches any number of subdirectories. When used for including + // files or directories, does not match subdirectories that start with a . character + doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", + replaceWildcardCharacter: func(match string) string { + return replaceWildcardCharacter(match, singleAsteriskRegexFragmentFilesMatcher) + }, +} + +var directoriesMatcher = WildcardMatcher{ + singleAsteriskRegexFragment: singleAsteriskRegexFragment, + // Regex for the ** wildcard. Matches any number of subdirectories. When used for including + // files or directories, does not match subdirectories that start with a . character + doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", + replaceWildcardCharacter: func(match string) string { + return replaceWildcardCharacter(match, singleAsteriskRegexFragment) + }, +} + +var excludeMatcher = WildcardMatcher{ + singleAsteriskRegexFragment: singleAsteriskRegexFragment, + doubleAsteriskRegexFragment: "(/.+?)?", + replaceWildcardCharacter: func(match string) string { + return replaceWildcardCharacter(match, singleAsteriskRegexFragment) + }, +} + +var wildcardMatchers = map[Usage]WildcardMatcher{ + UsageFiles: filesMatcher, + UsageDirectories: directoriesMatcher, + UsageExclude: excludeMatcher, +} + +func GetPatternFromSpec( + spec string, + basePath string, + usage Usage, +) string { + pattern := GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + if pattern == "" { + return "" + } + ending := core.IfElse(usage == "exclude", "($|/)", "$") + return fmt.Sprintf("^(%s)%s", pattern, ending) +} + +func GetSubPatternFromSpec( + spec string, + basePath string, + usage Usage, + matcher WildcardMatcher, +) string { + matcher = wildcardMatchers[usage] + + replaceWildcardCharacter := matcher.replaceWildcardCharacter + + var subpattern strings.Builder + hasWrittenComponent := false + components := tspath.GetNormalizedPathComponents(spec, basePath) + lastComponent := core.LastOrNil(components) + if usage != "exclude" && lastComponent == "**" { + return "" + } + + // getNormalizedPathComponents includes the separator for the root component. + // We need to remove to create our regex correctly. + components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) + + if IsImplicitGlob(lastComponent) { + components = append(components, "**", "*") + } + + optionalCount := 0 + for _, component := range components { + if component == "**" { + subpattern.WriteString(matcher.doubleAsteriskRegexFragment) + } else { + if usage == "directories" { + subpattern.WriteString("(") + optionalCount++ + } + + if hasWrittenComponent { + subpattern.WriteRune(tspath.DirectorySeparator) + } + + if usage != "exclude" { + var componentPattern strings.Builder + if strings.HasPrefix(component, "*") { + componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?") + component = component[1:] + } else if strings.HasPrefix(component, "?") { + componentPattern.WriteString("[^./]") + component = component[1:] + } + componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) + + // Patterns should not include subfolders like node_modules unless they are + // explicitly included as part of the path. + // + // As an optimization, if the component pattern is the same as the component, + // then there definitely were no wildcard characters and we do not need to + // add the exclusion pattern. + if componentPattern.String() != component { + subpattern.WriteString(implicitExcludePathRegexPattern) + } + subpattern.WriteString(componentPattern.String()) + } else { + subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) + } + } + hasWrittenComponent = true + } + + for optionalCount > 0 { + subpattern.WriteString(")?") + optionalCount-- + } + + return subpattern.String() +} + +func getIncludeBasePath(absolute string) string { + wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) + if wildcardOffset < 0 { + // No "*" or "?" in the path + if !tspath.HasExtension(absolute) { + return absolute + } else { + return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) + } + } + return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] +} + +// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. +func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { + // Storage for our results in the form of literal paths (e.g. the paths as written by the user). + basePaths := []string{path} + + if len(includes) > 0 { + // Storage for literal base paths amongst the include patterns. + includeBasePaths := []string{} + for _, include := range includes { + // We also need to check the relative paths by converting them to absolute and normalizing + // in case they escape the base path (e.g "..\somedirectory") + var absolute string + if tspath.IsRootedDiskPath(include) { + absolute = include + } else { + absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) + } + // Append the literal and canonical candidate base paths. + includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) + } + + // Sort the offsets array using either the literal or canonical path representations. + stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) + sort.SliceStable(includeBasePaths, func(i, j int) bool { + return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 + }) + + // Iterate over each include base path and include unique base paths that are not a + // subpath of an existing base path + for _, includeBasePath := range includeBasePaths { + if core.Every(basePaths, func(basepath string) bool { + return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) + }) { + basePaths = append(basePaths, includeBasePath) + } + } + } + + return basePaths +} + +// getFileMatcherPatterns generates file matching patterns based on the provided path, +// includes, excludes, and other parameters. path is the directory of the tsconfig.json file. +func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns { + path = tspath.NormalizePath(path) + currentDirectory = tspath.NormalizePath(currentDirectory) + absolutePath := tspath.CombinePaths(currentDirectory, path) + + return FileMatcherPatterns{ + includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), + includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"), + includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"), + excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"), + basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), + } +} + +type regexp2CacheKey struct { + pattern string + opts regexp2.RegexOptions +} + +var ( + regexp2CacheMu sync.RWMutex + regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp) +) + +func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { + opts := regexp2.RegexOptions(regexp2.ECMAScript) + if !useCaseSensitiveFileNames { + opts |= regexp2.IgnoreCase + } + + key := regexp2CacheKey{pattern, opts} + + regexp2CacheMu.RLock() + re, ok := regexp2Cache[key] + regexp2CacheMu.RUnlock() + if ok { + return re + } + + regexp2CacheMu.Lock() + defer regexp2CacheMu.Unlock() + + re, ok = regexp2Cache[key] + if ok { + return re + } + + // Avoid infinite growth; may cause thrashing but no worse than not caching at all. + if len(regexp2Cache) > 1000 { + clear(regexp2Cache) + } + + // Avoid holding onto the pattern string, since this may pin a full config file in memory. + pattern = strings.Clone(pattern) + key.pattern = pattern + + re = regexp2.MustCompile(pattern, opts) + regexp2Cache[key] = re + return re +} + +type visitor struct { + includeFileRegexes []*regexp2.Regexp + excludeRegex *regexp2.Regexp + includeDirectoryRegex *regexp2.Regexp + extensions []string + useCaseSensitiveFileNames bool + host vfs.FS + visited collections.Set[string] + results [][]string +} + +func (v *visitor) visitDirectory( + path string, + absolutePath string, + depth *int, +) { + // Use the real path (with symlinks resolved) for cycle detection. + // This prevents infinite loops when symlinks create cycles. + realPath := v.host.Realpath(absolutePath) + canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) + if v.visited.Has(canonicalPath) { + return + } + v.visited.Add(canonicalPath) + systemEntries := v.host.GetAccessibleEntries(absolutePath) + files := systemEntries.Files + directories := systemEntries.Directories + + for _, current := range files { + name := tspath.CombinePaths(path, current) + absoluteName := tspath.CombinePaths(absolutePath, current) + if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { + continue + } + if v.excludeRegex != nil && core.Must(v.excludeRegex.MatchString(absoluteName)) { + continue + } + if v.includeFileRegexes == nil { + (v.results)[0] = append((v.results)[0], name) + } else { + includeIndex := core.FindIndex(v.includeFileRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(absoluteName)) }) + if includeIndex != -1 { + (v.results)[includeIndex] = append((v.results)[includeIndex], name) + } + } + } + + if depth != nil { + newDepth := *depth - 1 + if newDepth == 0 { + return + } + depth = &newDepth + } + + for _, current := range directories { + name := tspath.CombinePaths(path, current) + absoluteName := tspath.CombinePaths(absolutePath, current) + if (v.includeDirectoryRegex == nil || core.Must(v.includeDirectoryRegex.MatchString(absoluteName))) && (v.excludeRegex == nil || !core.Must(v.excludeRegex.MatchString(absoluteName))) { + v.visitDirectory(name, absoluteName, depth) + } + } +} + +// path is the directory of the tsconfig.json +func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { + path = tspath.NormalizePath(path) + currentDirectory = tspath.NormalizePath(currentDirectory) + + patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory) + var includeFileRegexes []*regexp2.Regexp + if patterns.includeFilePatterns != nil { + includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) }) + } + var includeDirectoryRegex *regexp2.Regexp + if patterns.includeDirectoryPattern != "" { + includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) + } + var excludeRegex *regexp2.Regexp + if patterns.excludePattern != "" { + excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) + } + + // Associate an array of results with each include regex. This keeps results in order of the "include" order. + // If there are no "includes", then just put everything in results[0]. + var results [][]string + if len(includeFileRegexes) > 0 { + tempResults := make([][]string, len(includeFileRegexes)) + for i := range includeFileRegexes { + tempResults[i] = []string{} + } + results = tempResults + } else { + results = [][]string{{}} + } + v := visitor{ + useCaseSensitiveFileNames: useCaseSensitiveFileNames, + host: host, + includeFileRegexes: includeFileRegexes, + excludeRegex: excludeRegex, + includeDirectoryRegex: includeDirectoryRegex, + extensions: extensions, + results: results, + } + for _, basePath := range patterns.basePaths { + v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) + } + + return core.Flatten(results) +} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 7d343db469..069d7a2a95 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -1,478 +1,13 @@ package vfsmatch -import ( - "fmt" - "regexp" - "sort" - "strings" - "sync" +import "github.com/microsoft/typescript-go/internal/vfs" - "github.com/dlclark/regexp2" - "github.com/microsoft/typescript-go/internal/collections" - "github.com/microsoft/typescript-go/internal/core" - "github.com/microsoft/typescript-go/internal/stringutil" - "github.com/microsoft/typescript-go/internal/tspath" - "github.com/microsoft/typescript-go/internal/vfs" -) - -type FileMatcherPatterns struct { - // One pattern for each "include" spec. - includeFilePatterns []string - // One pattern matching one of any of the "include" specs. - includeFilePattern string - includeDirectoryPattern string - excludePattern string - basePaths []string -} - -type Usage string - -const ( - UsageFiles Usage = "files" - UsageDirectories Usage = "directories" - UsageExclude Usage = "exclude" -) - -func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { - if len(specs) == 0 { - return nil - } - return core.Map(specs, func(spec string) string { - return GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) - }) -} - -func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { - patterns := GetRegularExpressionsForWildcards(specs, basePath, usage) - if len(patterns) == 0 { - return "" - } - - mappedPatterns := make([]string, len(patterns)) - for i, pattern := range patterns { - mappedPatterns[i] = fmt.Sprintf("(%s)", pattern) - } - pattern := strings.Join(mappedPatterns, "|") - - // If excluding, match "foo/bar/baz...", but if including, only allow "foo". - var terminator string - if usage == "exclude" { - terminator = "($|/)" - } else { - terminator = "$" - } - return fmt.Sprintf("^(%s)%s", pattern, terminator) -} - -func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) string { - if match == "*" { - return singleAsteriskRegexFragment - } else { - if match == "?" { - return "[^/]" - } else { - return "\\" + match - } - } -} - -// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, -// and does not contain any glob characters itself. -func IsImplicitGlob(lastPathComponent string) bool { - return !strings.ContainsAny(lastPathComponent, ".*?") -} - -// Reserved characters - only escape actual regex metacharacters. -// Go's regexp doesn't support \x escape sequences for arbitrary characters, -// so we only escape characters that have special meaning in regex. -var ( - reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) - wildcardCharCodes = []rune{'*', '?'} -) - -var ( - commonPackageFolders = []string{"node_modules", "bower_components", "jspm_packages"} - implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))" -) - -type WildcardMatcher struct { - singleAsteriskRegexFragment string - doubleAsteriskRegexFragment string - replaceWildcardCharacter func(match string) string -} - -const ( - // Matches any single directory segment unless it is the last segment and a .min.js file - // Breakdown: - // - // [^./] # matches everything up to the first . character (excluding directory separators) - // (\\.(?!min\\.js$))? # matches . characters but not if they are part of the .min.js file extension - singleAsteriskRegexFragmentFilesMatcher = "([^./]|(\\.(?!min\\.js$))?)*" - singleAsteriskRegexFragment = "[^/]*" -) - -var filesMatcher = WildcardMatcher{ - singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher, - // Regex for the ** wildcard. Matches any number of subdirectories. When used for including - // files or directories, does not match subdirectories that start with a . character - doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", - replaceWildcardCharacter: func(match string) string { - return replaceWildcardCharacter(match, singleAsteriskRegexFragmentFilesMatcher) - }, -} - -var directoriesMatcher = WildcardMatcher{ - singleAsteriskRegexFragment: singleAsteriskRegexFragment, - // Regex for the ** wildcard. Matches any number of subdirectories. When used for including - // files or directories, does not match subdirectories that start with a . character - doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", - replaceWildcardCharacter: func(match string) string { - return replaceWildcardCharacter(match, singleAsteriskRegexFragment) - }, -} - -var excludeMatcher = WildcardMatcher{ - singleAsteriskRegexFragment: singleAsteriskRegexFragment, - doubleAsteriskRegexFragment: "(/.+?)?", - replaceWildcardCharacter: func(match string) string { - return replaceWildcardCharacter(match, singleAsteriskRegexFragment) - }, -} - -var wildcardMatchers = map[Usage]WildcardMatcher{ - UsageFiles: filesMatcher, - UsageDirectories: directoriesMatcher, - UsageExclude: excludeMatcher, -} - -func GetPatternFromSpec( - spec string, - basePath string, - usage Usage, -) string { - pattern := GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) - if pattern == "" { - return "" - } - ending := core.IfElse(usage == "exclude", "($|/)", "$") - return fmt.Sprintf("^(%s)%s", pattern, ending) -} - -func GetSubPatternFromSpec( - spec string, - basePath string, - usage Usage, - matcher WildcardMatcher, -) string { - matcher = wildcardMatchers[usage] - - replaceWildcardCharacter := matcher.replaceWildcardCharacter - - var subpattern strings.Builder - hasWrittenComponent := false - components := tspath.GetNormalizedPathComponents(spec, basePath) - lastComponent := core.LastOrNil(components) - if usage != "exclude" && lastComponent == "**" { - return "" - } - - // getNormalizedPathComponents includes the separator for the root component. - // We need to remove to create our regex correctly. - components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) - - if IsImplicitGlob(lastComponent) { - components = append(components, "**", "*") - } - - optionalCount := 0 - for _, component := range components { - if component == "**" { - subpattern.WriteString(matcher.doubleAsteriskRegexFragment) - } else { - if usage == "directories" { - subpattern.WriteString("(") - optionalCount++ - } - - if hasWrittenComponent { - subpattern.WriteRune(tspath.DirectorySeparator) - } - - if usage != "exclude" { - var componentPattern strings.Builder - if strings.HasPrefix(component, "*") { - componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?") - component = component[1:] - } else if strings.HasPrefix(component, "?") { - componentPattern.WriteString("[^./]") - component = component[1:] - } - componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) - - // Patterns should not include subfolders like node_modules unless they are - // explicitly included as part of the path. - // - // As an optimization, if the component pattern is the same as the component, - // then there definitely were no wildcard characters and we do not need to - // add the exclusion pattern. - if componentPattern.String() != component { - subpattern.WriteString(implicitExcludePathRegexPattern) - } - subpattern.WriteString(componentPattern.String()) - } else { - subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) - } - } - hasWrittenComponent = true - } - - for optionalCount > 0 { - subpattern.WriteString(")?") - optionalCount-- - } - - return subpattern.String() -} - -func getIncludeBasePath(absolute string) string { - wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) - if wildcardOffset < 0 { - // No "*" or "?" in the path - if !tspath.HasExtension(absolute) { - return absolute - } else { - return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) - } - } - return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] -} - -// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. -func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { - // Storage for our results in the form of literal paths (e.g. the paths as written by the user). - basePaths := []string{path} - - if len(includes) > 0 { - // Storage for literal base paths amongst the include patterns. - includeBasePaths := []string{} - for _, include := range includes { - // We also need to check the relative paths by converting them to absolute and normalizing - // in case they escape the base path (e.g "..\somedirectory") - var absolute string - if tspath.IsRootedDiskPath(include) { - absolute = include - } else { - absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) - } - // Append the literal and canonical candidate base paths. - includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) - } - - // Sort the offsets array using either the literal or canonical path representations. - stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) - sort.SliceStable(includeBasePaths, func(i, j int) bool { - return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 - }) - - // Iterate over each include base path and include unique base paths that are not a - // subpath of an existing base path - for _, includeBasePath := range includeBasePaths { - if core.Every(basePaths, func(basepath string) bool { - return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) - }) { - basePaths = append(basePaths, includeBasePath) - } - } - } - - return basePaths -} - -// getFileMatcherPatterns generates file matching patterns based on the provided path, -// includes, excludes, and other parameters. path is the directory of the tsconfig.json file. -func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns { - path = tspath.NormalizePath(path) - currentDirectory = tspath.NormalizePath(currentDirectory) - absolutePath := tspath.CombinePaths(currentDirectory, path) - - return FileMatcherPatterns{ - includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), - includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"), - includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"), - excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"), - basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), - } -} - -type regexp2CacheKey struct { - pattern string - opts regexp2.RegexOptions -} - -var ( - regexp2CacheMu sync.RWMutex - regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp) -) - -func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { - opts := regexp2.RegexOptions(regexp2.ECMAScript) - if !useCaseSensitiveFileNames { - opts |= regexp2.IgnoreCase - } - - key := regexp2CacheKey{pattern, opts} - - regexp2CacheMu.RLock() - re, ok := regexp2Cache[key] - regexp2CacheMu.RUnlock() - if ok { - return re - } - - regexp2CacheMu.Lock() - defer regexp2CacheMu.Unlock() - - re, ok = regexp2Cache[key] - if ok { - return re - } - - // Avoid infinite growth; may cause thrashing but no worse than not caching at all. - if len(regexp2Cache) > 1000 { - clear(regexp2Cache) - } - - // Avoid holding onto the pattern string, since this may pin a full config file in memory. - pattern = strings.Clone(pattern) - key.pattern = pattern - - re = regexp2.MustCompile(pattern, opts) - regexp2Cache[key] = re - return re -} - -type visitor struct { - includeFileRegexes []*regexp2.Regexp - excludeRegex *regexp2.Regexp - includeDirectoryRegex *regexp2.Regexp - extensions []string - useCaseSensitiveFileNames bool - host vfs.FS - visited collections.Set[string] - results [][]string -} - -func (v *visitor) visitDirectory( - path string, - absolutePath string, - depth *int, -) { - // Use the real path (with symlinks resolved) for cycle detection. - // This prevents infinite loops when symlinks create cycles. - realPath := v.host.Realpath(absolutePath) - canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) - if v.visited.Has(canonicalPath) { - return - } - v.visited.Add(canonicalPath) - systemEntries := v.host.GetAccessibleEntries(absolutePath) - files := systemEntries.Files - directories := systemEntries.Directories - - for _, current := range files { - name := tspath.CombinePaths(path, current) - absoluteName := tspath.CombinePaths(absolutePath, current) - if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { - continue - } - if v.excludeRegex != nil && core.Must(v.excludeRegex.MatchString(absoluteName)) { - continue - } - if v.includeFileRegexes == nil { - (v.results)[0] = append((v.results)[0], name) - } else { - includeIndex := core.FindIndex(v.includeFileRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(absoluteName)) }) - if includeIndex != -1 { - (v.results)[includeIndex] = append((v.results)[includeIndex], name) - } - } - } - - if depth != nil { - newDepth := *depth - 1 - if newDepth == 0 { - return - } - depth = &newDepth - } - - for _, current := range directories { - name := tspath.CombinePaths(path, current) - absoluteName := tspath.CombinePaths(absolutePath, current) - if (v.includeDirectoryRegex == nil || core.Must(v.includeDirectoryRegex.MatchString(absoluteName))) && (v.excludeRegex == nil || !core.Must(v.excludeRegex.MatchString(absoluteName))) { - v.visitDirectory(name, absoluteName, depth) - } - } -} - -// path is the directory of the tsconfig.json -func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { - path = tspath.NormalizePath(path) - currentDirectory = tspath.NormalizePath(currentDirectory) - - patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory) - var includeFileRegexes []*regexp2.Regexp - if patterns.includeFilePatterns != nil { - includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) }) - } - var includeDirectoryRegex *regexp2.Regexp - if patterns.includeDirectoryPattern != "" { - includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) - } - var excludeRegex *regexp2.Regexp - if patterns.excludePattern != "" { - excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) - } - - // Associate an array of results with each include regex. This keeps results in order of the "include" order. - // If there are no "includes", then just put everything in results[0]. - var results [][]string - if len(includeFileRegexes) > 0 { - tempResults := make([][]string, len(includeFileRegexes)) - for i := range includeFileRegexes { - tempResults[i] = []string{} - } - results = tempResults - } else { - results = [][]string{{}} - } - v := visitor{ - useCaseSensitiveFileNames: useCaseSensitiveFileNames, - host: host, - includeFileRegexes: includeFileRegexes, - excludeRegex: excludeRegex, - includeDirectoryRegex: includeDirectoryRegex, - extensions: extensions, - results: results, - } - for _, basePath := range patterns.basePaths { - v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) - } - - return core.Flatten(results) -} - -// useGlobMatch controls whether to use the regex-free glob matching implementation. -const useGlobMatch = true +// newNewMatch controls whether to use the regex-free glob matching implementation. +const newNewMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - if useGlobMatch { + if newNewMatch { return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } - -// ReadDirectoryRegex is the regex-based implementation, exported for benchmarking. -func ReadDirectoryRegex(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) -} diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 345907084d..91de7f56da 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -25,8 +25,8 @@ var readDirectoryImplementations = []struct { name string fn readDirectoryFunc }{ - {"Regex", vfsmatch.ReadDirectory}, - {"NoRegex", vfsmatch.ReadDirectoryNoRegex}, + {"Old", vfsmatch.ReadDirectoryOld}, + {"New", vfsmatch.ReadDirectoryNew}, } // caseInsensitiveHost simulates a Windows-like file system From 97046feed555827be720e689f36f1c8a98a8f3b3 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 10:06:07 -0800 Subject: [PATCH 05/41] fmt --- internal/vfs/vfsmatch/export_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/vfs/vfsmatch/export_test.go b/internal/vfs/vfsmatch/export_test.go index 654626c266..ba2ee6e485 100644 --- a/internal/vfs/vfsmatch/export_test.go +++ b/internal/vfs/vfsmatch/export_test.go @@ -2,9 +2,8 @@ package vfsmatch import "github.com/microsoft/typescript-go/internal/vfs" - func ReadDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } func ReadDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { From c8ead74098383b8a07b3bc5a04666d48c435b167 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 10:09:30 -0800 Subject: [PATCH 06/41] perf optimizations --- internal/vfs/vfsmatch/new.go | 116 +++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index a7c729f740..b43f90f808 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -143,12 +143,8 @@ func (p *GlobPattern) Matches(path string) bool { return false } - // Split the path into components - pathComponents := splitPath(path) - - matched := p.matchComponents(pathComponents, 0, 0, false) - - return matched + // Use iterator-based matching to avoid slice allocation + return p.matchPath(path, 0, false) } // MatchesPrefix checks if the given directory path could potentially match files under it. @@ -158,50 +154,63 @@ func (p *GlobPattern) MatchesPrefix(path string) bool { return false } - pathComponents := splitPath(path) - - return p.matchComponentsPrefix(pathComponents, 0, 0) + return p.matchPathPrefix(path, 0) } -// splitPath splits a path into its components -func splitPath(path string) []string { - // Handle the case of an absolute path - if len(path) > 0 && path[0] == '/' { - rest := strings.Split(strings.TrimPrefix(path, "/"), "/") - // Prepend empty string to represent root - result := make([]string, 0, len(rest)+1) - result = append(result, "") - for _, s := range rest { - if s != "" { - result = append(result, s) - } - } - return result +// nextPathComponent extracts the next path component from path starting at offset. +// Returns the component, the offset after this component (pointing to char after '/' or len(path)), and whether a component was found. +func nextPathComponent(path string, offset int) (component string, nextOffset int, found bool) { + if offset >= len(path) { + return "", offset, false } - parts := strings.Split(path, "/") - result := make([]string, 0, len(parts)) - for _, s := range parts { - if s != "" { - result = append(result, s) - } + // Handle leading slash for absolute paths - return empty string for root + if offset == 0 && path[0] == '/' { + return "", 1, true + } + + // Skip any leading slashes (for cases like after root) + for offset < len(path) && path[offset] == '/' { + offset++ + } + + if offset >= len(path) { + return "", offset, false } - return result + + // Find the end of this component + start := offset + for offset < len(path) && path[offset] != '/' { + offset++ + } + + return path[start:offset], offset, true +} + +// matchPath matches the path against pattern components starting at patternIdx. +// pathOffset is the current position in the path string. +func (p *GlobPattern) matchPath(path string, patternIdx int, inDoubleAsterisk bool) bool { + // Bootstrap: handle the path from the beginning + return p.matchPathAt(path, 0, patternIdx, inDoubleAsterisk) } -// matchComponents recursively matches path components against pattern components -func (p *GlobPattern) matchComponents(pathComps []string, pathIdx int, patternIdx int, inDoubleAsterisk bool) bool { - // If we've consumed all pattern components, check if path is also fully consumed +// matchPathAt matches path[pathOffset:] against pattern components starting at patternIdx. +func (p *GlobPattern) matchPathAt(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool) bool { + // Get the next path component + pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) + + // If we've consumed all pattern components if patternIdx >= len(p.components) { if p.isExclude { // For exclude patterns, we can match a prefix return true } - return pathIdx >= len(pathComps) + // Path must also be fully consumed + return !hasMore } // If we've consumed all path components but still have pattern components - if pathIdx >= len(pathComps) { + if !hasMore { // Check if remaining pattern components are all optional (** only) for i := patternIdx; i < len(p.components); i++ { if !p.components[i].isDoubleAsterisk { @@ -212,31 +221,26 @@ func (p *GlobPattern) matchComponents(pathComps []string, pathIdx int, patternId } pc := p.components[patternIdx] - pathComp := pathComps[pathIdx] if pc.isDoubleAsterisk { // ** can match zero or more directory levels // First, try matching zero directories (skip the **) - if p.matchComponents(pathComps, pathIdx, patternIdx+1, true) { + if p.matchPathAt(path, pathOffset, patternIdx+1, true) { return true } // For include patterns, ** should not match directories starting with . or common package folders - // But we still try to skip those directories and continue matching if !p.isExclude { if len(pathComp) > 0 && pathComp[0] == '.' { - // Don't match hidden directories in ** for includes - return false - // The next pattern component (if any) might explicitly match it return false } if isCommonPackageFolder(pathComp) { - // Don't match common package folders in ** for includes return false } } // Match current component with ** and continue - return p.matchComponents(pathComps, pathIdx+1, patternIdx, true) + return p.matchPathAt(path, nextPathOffset, patternIdx, true) } // Check implicit package folder exclusion @@ -250,13 +254,21 @@ func (p *GlobPattern) matchComponents(pathComps []string, pathIdx int, patternId } // Continue to next components - return p.matchComponents(pathComps, pathIdx+1, patternIdx+1, false) + return p.matchPathAt(path, nextPathOffset, patternIdx+1, false) +} + +// matchPathPrefix checks if the path could be a prefix of a matching path. +func (p *GlobPattern) matchPathPrefix(path string, patternIdx int) bool { + return p.matchPathPrefixAt(path, 0, patternIdx) } -// matchComponentsPrefix checks if the path could be a prefix of a matching path -func (p *GlobPattern) matchComponentsPrefix(pathComps []string, pathIdx int, patternIdx int) bool { +// matchPathPrefixAt checks if path[pathOffset:] could be a prefix of a matching path. +func (p *GlobPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx int) bool { + // Get the next path component + pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) + // If we've consumed all path components, this prefix could match - if pathIdx >= len(pathComps) { + if !hasMore { return true } @@ -266,12 +278,11 @@ func (p *GlobPattern) matchComponentsPrefix(pathComps []string, pathIdx int, pat } pc := p.components[patternIdx] - pathComp := pathComps[pathIdx] if pc.isDoubleAsterisk { // ** can match any directory level // Try matching zero (skip **) or more directories - if p.matchComponentsPrefix(pathComps, pathIdx, patternIdx+1) { + if p.matchPathPrefixAt(path, pathOffset, patternIdx+1) { return true } @@ -285,7 +296,7 @@ func (p *GlobPattern) matchComponentsPrefix(pathComps []string, pathIdx int, pat } } - return p.matchComponentsPrefix(pathComps, pathIdx+1, patternIdx) + return p.matchPathPrefixAt(path, nextPathOffset, patternIdx) } // Check implicit package folder exclusion @@ -298,7 +309,7 @@ func (p *GlobPattern) matchComponentsPrefix(pathComps []string, pathIdx int, pat return false } - return p.matchComponentsPrefix(pathComps, pathIdx+1, patternIdx+1) + return p.matchPathPrefixAt(path, nextPathOffset, patternIdx+1) } // matchComponent matches a single path component against a pattern component @@ -428,8 +439,9 @@ func (p *GlobPattern) stringsEqual(a, b string) bool { // isCommonPackageFolder checks if a directory name is a common package folder func isCommonPackageFolder(name string) bool { - lower := strings.ToLower(name) - return lower == "node_modules" || lower == "bower_components" || lower == "jspm_packages" + return strings.EqualFold(name, "node_modules") || + strings.EqualFold(name, "bower_components") || + strings.EqualFold(name, "jspm_packages") } // GlobMatcher holds compiled glob patterns for matching files. From c1c09475113e70666cc29b518db581eeeaa6ad3f Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 10:13:15 -0800 Subject: [PATCH 07/41] fix benchmarks --- internal/vfs/vfsmatch/bench_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 08aabc2317..3a523d4b65 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -96,16 +96,14 @@ func BenchmarkReadDirectory(b *testing.B) { for _, bc := range benchCases { b.Run("Old/"+bc.name, func(b *testing.B) { host := bc.host() - b.ResetTimer() - for range b.N { + for b.Loop() { vfsmatch.ReadDirectoryOld(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) } }) b.Run("New/"+bc.name, func(b *testing.B) { host := bc.host() - b.ResetTimer() - for range b.N { + for b.Loop() { vfsmatch.ReadDirectoryNew(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) } }) @@ -161,7 +159,7 @@ func BenchmarkPatternCompilation(b *testing.B) { for _, p := range patterns { b.Run(p.name, func(b *testing.B) { - for range b.N { + for b.Loop() { vfsmatch.CompileGlobPattern(p.spec, "/project", vfsmatch.UsageFiles, true) } }) @@ -223,7 +221,7 @@ func BenchmarkPatternMatching(b *testing.B) { } b.Run(tc.name, func(b *testing.B) { - for range b.N { + for b.Loop() { for _, path := range tc.paths { pattern.Matches(path) } From e16c972c1c34028959403f76a77e36c1d4393ae0 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 11:30:39 -0800 Subject: [PATCH 08/41] perf optimizations --- internal/vfs/vfsmatch/new.go | 233 ++++++++++++++++++++++------------- 1 file changed, 145 insertions(+), 88 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index b43f90f808..2f7f7e6ff9 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -196,65 +196,72 @@ func (p *GlobPattern) matchPath(path string, patternIdx int, inDoubleAsterisk bo // matchPathAt matches path[pathOffset:] against pattern components starting at patternIdx. func (p *GlobPattern) matchPathAt(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool) bool { - // Get the next path component - pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) - - // If we've consumed all pattern components - if patternIdx >= len(p.components) { - if p.isExclude { - // For exclude patterns, we can match a prefix - return true - } - // Path must also be fully consumed - return !hasMore - } - - // If we've consumed all path components but still have pattern components - if !hasMore { - // Check if remaining pattern components are all optional (** only) - for i := patternIdx; i < len(p.components); i++ { - if !p.components[i].isDoubleAsterisk { - return false + for { + // Get the next path component + pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) + + // If we've consumed all pattern components + if patternIdx >= len(p.components) { + if p.isExclude { + // For exclude patterns, we can match a prefix + return true } + // Path must also be fully consumed + return !hasMore } - return true - } - pc := p.components[patternIdx] - - if pc.isDoubleAsterisk { - // ** can match zero or more directory levels - // First, try matching zero directories (skip the **) - if p.matchPathAt(path, pathOffset, patternIdx+1, true) { + // If we've consumed all path components but still have pattern components + if !hasMore { + // Check if remaining pattern components are all optional (** only) + for i := patternIdx; i < len(p.components); i++ { + if !p.components[i].isDoubleAsterisk { + return false + } + } return true } - // For include patterns, ** should not match directories starting with . or common package folders - if !p.isExclude { - if len(pathComp) > 0 && pathComp[0] == '.' { - return false + pc := p.components[patternIdx] + + if pc.isDoubleAsterisk { + // ** can match zero or more directory levels + // First, try matching zero directories (skip the **) - this requires recursion + if p.matchPathAt(path, pathOffset, patternIdx+1, true) { + return true } - if isCommonPackageFolder(pathComp) { - return false + + // For include patterns, ** should not match directories starting with . or common package folders + if !p.isExclude { + if len(pathComp) > 0 && pathComp[0] == '.' { + return false + } + if isCommonPackageFolder(pathComp) { + return false + } } + + // Match current component with ** and continue (iterate instead of recurse) + pathOffset = nextPathOffset + // patternIdx stays the same, inDoubleAsterisk stays true + inDoubleAsterisk = true + continue } - // Match current component with ** and continue - return p.matchPathAt(path, nextPathOffset, patternIdx, true) - } + // Check implicit package folder exclusion + if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { + return false + } - // Check implicit package folder exclusion - if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { - return false - } + // Match current component + if !p.matchComponent(pc, pathComp, inDoubleAsterisk) { + return false + } - // Match current component - if !p.matchComponent(pc, pathComp, inDoubleAsterisk) { - return false + // Continue to next components (iterate instead of recurse) + pathOffset = nextPathOffset + patternIdx++ + inDoubleAsterisk = false } - - // Continue to next components - return p.matchPathAt(path, nextPathOffset, patternIdx+1, false) } // matchPathPrefix checks if the path could be a prefix of a matching path. @@ -264,52 +271,58 @@ func (p *GlobPattern) matchPathPrefix(path string, patternIdx int) bool { // matchPathPrefixAt checks if path[pathOffset:] could be a prefix of a matching path. func (p *GlobPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx int) bool { - // Get the next path component - pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) - - // If we've consumed all path components, this prefix could match - if !hasMore { - return true - } + for { + // Get the next path component + pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) - // If we've consumed all pattern components, no more matches possible - if patternIdx >= len(p.components) { - return false - } - - pc := p.components[patternIdx] - - if pc.isDoubleAsterisk { - // ** can match any directory level - // Try matching zero (skip **) or more directories - if p.matchPathPrefixAt(path, pathOffset, patternIdx+1) { + // If we've consumed all path components, this prefix could match + if !hasMore { return true } - // For include patterns, ** should not match hidden or package directories - if !p.isExclude { - if len(pathComp) > 0 && pathComp[0] == '.' { - return false + // If we've consumed all pattern components, no more matches possible + if patternIdx >= len(p.components) { + return false + } + + pc := p.components[patternIdx] + + if pc.isDoubleAsterisk { + // ** can match any directory level + // Try matching zero (skip **) or more directories - needs recursion for branching + if p.matchPathPrefixAt(path, pathOffset, patternIdx+1) { + return true } - if isCommonPackageFolder(pathComp) { - return false + + // For include patterns, ** should not match hidden or package directories + if !p.isExclude { + if len(pathComp) > 0 && pathComp[0] == '.' { + return false + } + if isCommonPackageFolder(pathComp) { + return false + } } + + // Iterate: consume path component, keep same pattern index + pathOffset = nextPathOffset + continue } - return p.matchPathPrefixAt(path, nextPathOffset, patternIdx) - } + // Check implicit package folder exclusion + if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { + return false + } - // Check implicit package folder exclusion - if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { - return false - } + // Match current component + if !p.matchComponent(pc, pathComp, false) { + return false + } - // Match current component - if !p.matchComponent(pc, pathComp, false) { - return false + // Iterate: advance both path and pattern + pathOffset = nextPathOffset + patternIdx++ } - - return p.matchPathPrefixAt(path, nextPathOffset, patternIdx+1) } // matchComponent matches a single path component against a pattern component @@ -342,6 +355,31 @@ func (p *GlobPattern) matchWildcardComponent(segments []patternSegment, s string } } + // Fast path for common pattern: * followed by literal suffix (e.g., "*.ts") + if len(segments) == 2 && segments[0].kind == segmentStar && segments[1].kind == segmentLiteral { + suffix := segments[1].literal + if len(s) < len(suffix) { + return false + } + // Check that there are no slashes in what * would match + prefixLen := len(s) - len(suffix) + for i := range prefixLen { + if s[i] == '/' { + return false + } + } + // Check suffix match + sSuffix := s[prefixLen:] + if !p.stringsEqual(suffix, sSuffix) { + return false + } + // Check min.js exclusion + if p.excludeMinJs && p.wouldMatchMinJs(s) && !p.patternExplicitlyIncludesMinJs(segments) { + return false + } + return true + } + return p.matchSegments(segments, 0, s, 0) } @@ -439,9 +477,17 @@ func (p *GlobPattern) stringsEqual(a, b string) bool { // isCommonPackageFolder checks if a directory name is a common package folder func isCommonPackageFolder(name string) bool { - return strings.EqualFold(name, "node_modules") || - strings.EqualFold(name, "bower_components") || - strings.EqualFold(name, "jspm_packages") + // Quick length check to avoid EqualFold for most cases + switch len(name) { + case 12: // node_modules + return strings.EqualFold(name, "node_modules") + case 16: // bower_components + return strings.EqualFold(name, "bower_components") + case 13: // jspm_packages + return strings.EqualFold(name, "jspm_packages") + default: + return false + } } // GlobMatcher holds compiled glob patterns for matching files. @@ -557,9 +603,20 @@ func (v *visitorNoRegex) visitDirectory( systemEntries := v.host.GetAccessibleEntries(absolutePath) + // Pre-compute path suffixes to reduce allocations + // We'll build paths by appending "/" + entry name + pathPrefix := path + absPathPrefix := absolutePath + if len(path) > 0 && path[len(path)-1] != '/' { + pathPrefix = path + "/" + } + if len(absolutePath) > 0 && absolutePath[len(absolutePath)-1] != '/' { + absPathPrefix = absolutePath + "/" + } + for _, current := range systemEntries.Files { - name := tspath.CombinePaths(path, current) - absoluteName := tspath.CombinePaths(absolutePath, current) + name := pathPrefix + current + absoluteName := absPathPrefix + current if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { continue @@ -584,8 +641,8 @@ func (v *visitorNoRegex) visitDirectory( } for _, current := range systemEntries.Directories { - name := tspath.CombinePaths(path, current) - absoluteName := tspath.CombinePaths(absolutePath, current) + name := pathPrefix + current + absoluteName := absPathPrefix + current if v.directoryMatcher.MatchesDirectory(absoluteName) { v.visitDirectory(name, absoluteName, depth) From e1a8f97dd9cbce84e6ee80e587df6d1f170255cc Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:28:43 -0800 Subject: [PATCH 09/41] Even fewer regex --- internal/ls/autoimports.go | 36 ++-- internal/tsoptions/tsconfigparsing.go | 39 ++--- internal/tsoptions/wildcarddirectories.go | 16 +- internal/vfs/vfsmatch/export_test.go | 45 ++++- internal/vfs/vfsmatch/new.go | 194 +++++++++++++++++----- internal/vfs/vfsmatch/old.go | 125 ++++++++++---- internal/vfs/vfsmatch/vfsmatch.go | 82 ++++++++- internal/vfs/vfsmatch/vfsmatch_test.go | 192 +++++++++++++++++++++ 8 files changed, 592 insertions(+), 137 deletions(-) diff --git a/internal/ls/autoimports.go b/internal/ls/autoimports.go index d8ee73d620..eaaef3d857 100644 --- a/internal/ls/autoimports.go +++ b/internal/ls/autoimports.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/astnav" "github.com/microsoft/typescript-go/internal/binder" @@ -1384,15 +1383,15 @@ func forEachExternalModuleToImportFrom( // useAutoImportProvider bool, cb func(module *ast.Symbol, moduleFile *ast.SourceFile, checker *checker.Checker, isFromPackageJson bool), ) { - var excludePatterns []*regexp2.Regexp + var excludeMatcher vfsmatch.SpecMatcher if preferences.AutoImportFileExcludePatterns != nil { - excludePatterns = getIsExcludedPatterns(preferences, program.UseCaseSensitiveFileNames()) + excludeMatcher = getIsExcludedMatcher(preferences, program.UseCaseSensitiveFileNames()) } forEachExternalModule( ch, program.GetSourceFiles(), - excludePatterns, + excludeMatcher, func(module *ast.Symbol, file *ast.SourceFile) { cb(module, file, ch, false) }, @@ -1414,35 +1413,26 @@ func forEachExternalModuleToImportFrom( // } } -func getIsExcludedPatterns(preferences *lsutil.UserPreferences, useCaseSensitiveFileNames bool) []*regexp2.Regexp { +func getIsExcludedMatcher(preferences *lsutil.UserPreferences, useCaseSensitiveFileNames bool) vfsmatch.SpecMatcher { if preferences.AutoImportFileExcludePatterns == nil { return nil } - var patterns []*regexp2.Regexp - for _, spec := range preferences.AutoImportFileExcludePatterns { - pattern := vfsmatch.GetSubPatternFromSpec(spec, "", vfsmatch.UsageExclude, vfsmatch.WildcardMatcher{}) - if pattern != "" { - if re := vfsmatch.GetRegexFromPattern(pattern, useCaseSensitiveFileNames); re != nil { - patterns = append(patterns, re) - } - } - } - return patterns + return vfsmatch.NewSpecMatcher(preferences.AutoImportFileExcludePatterns, "", vfsmatch.UsageExclude, useCaseSensitiveFileNames) } func forEachExternalModule( ch *checker.Checker, allSourceFiles []*ast.SourceFile, - excludePatterns []*regexp2.Regexp, + excludeMatcher vfsmatch.SpecMatcher, cb func(moduleSymbol *ast.Symbol, sourceFile *ast.SourceFile), ) { var isExcluded func(*ast.SourceFile) bool = func(_ *ast.SourceFile) bool { return false } - if excludePatterns != nil { - isExcluded = getIsExcluded(excludePatterns) + if excludeMatcher != nil { + isExcluded = getIsExcluded(excludeMatcher) } for _, ambient := range ch.GetAmbientModules() { - if !strings.Contains(ambient.Name, "*") && !(excludePatterns != nil && core.Every(ambient.Declarations, func(d *ast.Node) bool { + if !strings.Contains(ambient.Name, "*") && !(excludeMatcher != nil && core.Every(ambient.Declarations, func(d *ast.Node) bool { return isExcluded(ast.GetSourceFileOfNode(d)) })) { cb(ambient, nil /*sourceFile*/) @@ -1455,15 +1445,13 @@ func forEachExternalModule( } } -func getIsExcluded(excludePatterns []*regexp2.Regexp) func(sourceFile *ast.SourceFile) bool { +func getIsExcluded(excludeMatcher vfsmatch.SpecMatcher) func(sourceFile *ast.SourceFile) bool { // !!! SymlinkCache // const realpathsWithSymlinks = host.getSymlinkCache?.().getSymlinkedDirectoriesByRealpath(); return func(sourceFile *ast.SourceFile) bool { fileName := sourceFile.FileName() - for _, p := range excludePatterns { - if matched, _ := p.MatchString(fileName); matched { - return true - } + if excludeMatcher.MatchString(fileName) { + return true } // !! SymlinkCache // if (realpathsWithSymlinks?.size && pathContainsNodeModules(fileName)) { diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index 6887a45b77..9c3d7e34fc 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -2,13 +2,11 @@ package tsoptions import ( "cmp" - "fmt" "reflect" "regexp" "slices" "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/collections" "github.com/microsoft/typescript-go/internal/core" @@ -107,13 +105,15 @@ func (c *configFileSpecs) matchesExclude(fileName string, comparePathsOptions ts if len(c.validatedExcludeSpecs) == 0 { return false } - excludePattern := vfsmatch.GetRegularExpressionForWildcard(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude") - excludeRegex := vfsmatch.GetRegexFromPattern(excludePattern, comparePathsOptions.UseCaseSensitiveFileNames) - if match, err := excludeRegex.MatchString(fileName); err == nil && match { + excludeMatcher := vfsmatch.NewSpecMatcher(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude", comparePathsOptions.UseCaseSensitiveFileNames) + if excludeMatcher == nil { + return false + } + if excludeMatcher.MatchString(fileName) { return true } if !tspath.HasExtension(fileName) { - if match, err := excludeRegex.MatchString(tspath.EnsureTrailingDirectorySeparator(fileName)); err == nil && match { + if excludeMatcher.MatchString(tspath.EnsureTrailingDirectorySeparator(fileName)) { return true } } @@ -125,12 +125,9 @@ func (c *configFileSpecs) getMatchedIncludeSpec(fileName string, comparePathsOpt return "" } for index, spec := range c.validatedIncludeSpecs { - includePattern := vfsmatch.GetPatternFromSpec(spec, comparePathsOptions.CurrentDirectory, "files") - if includePattern != "" { - includeRegex := vfsmatch.GetRegexFromPattern(includePattern, comparePathsOptions.UseCaseSensitiveFileNames) - if match, err := includeRegex.MatchString(fileName); err == nil && match { - return c.validatedIncludeSpecsBeforeSubstitution[index] - } + includeMatcher := vfsmatch.NewSingleSpecMatcher(spec, comparePathsOptions.CurrentDirectory, "files", comparePathsOptions.UseCaseSensitiveFileNames) + if includeMatcher != nil && includeMatcher.MatchString(fileName) { + return c.validatedIncludeSpecsBeforeSubstitution[index] } } return "" @@ -1662,23 +1659,19 @@ func getFileNamesFromConfigSpecs( literalFileMap.Set(keyMappper(fileName), file) } - var jsonOnlyIncludeRegexes []*regexp2.Regexp + var jsonOnlyIncludeMatchers vfsmatch.SpecMatchers if len(validatedIncludeSpecs) > 0 { files := vfsmatch.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, nil) for _, file := range files { if tspath.FileExtensionIs(file, tspath.ExtensionJson) { - if jsonOnlyIncludeRegexes == nil { + if jsonOnlyIncludeMatchers == nil { includes := core.Filter(validatedIncludeSpecs, func(include string) bool { return strings.HasSuffix(include, tspath.ExtensionJson) }) - includeFilePatterns := core.Map(vfsmatch.GetRegularExpressionsForWildcards(includes, basePath, "files"), func(pattern string) string { return fmt.Sprintf("^%s$", pattern) }) - if includeFilePatterns != nil { - jsonOnlyIncludeRegexes = core.Map(includeFilePatterns, func(pattern string) *regexp2.Regexp { - return vfsmatch.GetRegexFromPattern(pattern, host.UseCaseSensitiveFileNames()) - }) - } else { - jsonOnlyIncludeRegexes = nil - } + jsonOnlyIncludeMatchers = vfsmatch.NewSpecMatchers(includes, basePath, "files", host.UseCaseSensitiveFileNames()) + } + var includeIndex int = -1 + if jsonOnlyIncludeMatchers != nil { + includeIndex = jsonOnlyIncludeMatchers.MatchIndex(file) } - includeIndex := core.FindIndex(jsonOnlyIncludeRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(file)) }) if includeIndex != -1 { key := keyMappper(file) if !literalFileMap.Has(key) && !wildCardJsonFileMap.Has(key) { diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index b4e223a00b..a650b3de79 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -26,15 +26,7 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti return nil } - rawExcludeRegex := vfsmatch.GetRegularExpressionForWildcard(exclude, comparePathsOptions.CurrentDirectory, "exclude") - var excludeRegex *regexp2.Regexp - if rawExcludeRegex != "" { - flags := regexp2.ECMAScript - if !comparePathsOptions.UseCaseSensitiveFileNames { - flags |= regexp2.IgnoreCase - } - excludeRegex = regexp2.MustCompile(rawExcludeRegex, regexp2.RegexOptions(flags)) - } + excludeMatcher := vfsmatch.NewSpecMatcher(exclude, comparePathsOptions.CurrentDirectory, "exclude", comparePathsOptions.UseCaseSensitiveFileNames) wildcardDirectories := make(map[string]bool) wildCardKeyToPath := make(map[string]string) @@ -43,10 +35,8 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti for _, file := range include { spec := tspath.NormalizeSlashes(tspath.CombinePaths(comparePathsOptions.CurrentDirectory, file)) - if excludeRegex != nil { - if matched, _ := excludeRegex.MatchString(spec); matched { - continue - } + if excludeMatcher != nil && excludeMatcher.MatchString(spec) { + continue } match := getWildcardDirectoryFromSpec(spec, comparePathsOptions.UseCaseSensitiveFileNames) diff --git a/internal/vfs/vfsmatch/export_test.go b/internal/vfs/vfsmatch/export_test.go index ba2ee6e485..6fd3e62666 100644 --- a/internal/vfs/vfsmatch/export_test.go +++ b/internal/vfs/vfsmatch/export_test.go @@ -2,10 +2,51 @@ package vfsmatch import "github.com/microsoft/typescript-go/internal/vfs" +// Test-only exports for functions and types that are not part of the public API. + +// ReadDirectoryOld is a test-only export for the regex-based implementation. +func ReadDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// ReadDirectoryNew is a test-only export for the regex-free implementation. func ReadDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } -func ReadDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +// GlobPatternWrapper is a test-only wrapper for the unexported globPattern type. +type GlobPatternWrapper struct { + pattern *globPattern +} + +// Matches calls the unexported matches method on the wrapped globPattern. +func (w *GlobPatternWrapper) Matches(path string) bool { + if w == nil || w.pattern == nil { + return false + } + return w.pattern.matches(path) +} + +// CompileGlobPattern is a test-only export for compiling glob patterns. +func CompileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *GlobPatternWrapper { + p := compileGlobPattern(spec, basePath, usage, caseSensitive) + if p == nil { + return nil + } + return &GlobPatternWrapper{pattern: p} +} + +// GetRegularExpressionForWildcard is a test-only export for getting the regex for wildcard specs. +func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { + return getRegularExpressionForWildcard(specs, basePath, usage) +} + +// GetRegularExpressionsForWildcards is a test-only export for getting regexes for wildcard specs. +func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { + return getRegularExpressionsForWildcards(specs, basePath, usage) +} + +// GetPatternFromSpec is a test-only export for getting a pattern from a spec. +func GetPatternFromSpec(spec string, basePath string, usage Usage) string { + return getPatternFromSpec(spec, basePath, usage) } diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 2f7f7e6ff9..4710771110 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -9,9 +9,9 @@ import ( "github.com/microsoft/typescript-go/internal/vfs" ) -// GlobPattern represents a compiled glob pattern for matching file paths. +// globPattern represents a compiled glob pattern for matching file paths. // It stores the pattern components for efficient matching without using regex. -type GlobPattern struct { +type globPattern struct { // The original pattern specification spec string // The base path from which the pattern was derived @@ -54,8 +54,8 @@ const ( segmentQuestion // ? - matches single char except / ) -// CompileGlobPattern compiles a glob spec into a GlobPattern for matching. -func CompileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *GlobPattern { +// compileGlobPattern compiles a glob spec into a globPattern for matching. +func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *globPattern { components := tspath.GetNormalizedPathComponents(spec, basePath) lastComponent := core.LastOrNil(components) @@ -69,11 +69,11 @@ func CompileGlobPattern(spec string, basePath string, usage Usage, caseSensitive components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) // Handle implicit glob (directories become dir/**/*) - if IsImplicitGlob(lastComponent) { + if isImplicitGlob(lastComponent) { components = append(components, "**", "*") } - pattern := &GlobPattern{ + pattern := &globPattern{ spec: spec, basePath: basePath, isExclude: usage == UsageExclude, @@ -137,8 +137,8 @@ func parsePatternSegments(comp string) []patternSegment { return segments } -// Matches checks if the given path matches this glob pattern. -func (p *GlobPattern) Matches(path string) bool { +// matches checks if the given path matches this glob pattern. +func (p *globPattern) matches(path string) bool { if p == nil { return false } @@ -147,9 +147,9 @@ func (p *GlobPattern) Matches(path string) bool { return p.matchPath(path, 0, false) } -// MatchesPrefix checks if the given directory path could potentially match files under it. +// matchesPrefix checks if the given directory path could potentially match files under it. // This is used for directory filtering during traversal. -func (p *GlobPattern) MatchesPrefix(path string) bool { +func (p *globPattern) matchesPrefix(path string) bool { if p == nil { return false } @@ -189,13 +189,13 @@ func nextPathComponent(path string, offset int) (component string, nextOffset in // matchPath matches the path against pattern components starting at patternIdx. // pathOffset is the current position in the path string. -func (p *GlobPattern) matchPath(path string, patternIdx int, inDoubleAsterisk bool) bool { +func (p *globPattern) matchPath(path string, patternIdx int, inDoubleAsterisk bool) bool { // Bootstrap: handle the path from the beginning return p.matchPathAt(path, 0, patternIdx, inDoubleAsterisk) } // matchPathAt matches path[pathOffset:] against pattern components starting at patternIdx. -func (p *GlobPattern) matchPathAt(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool) bool { +func (p *globPattern) matchPathAt(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool) bool { for { // Get the next path component pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) @@ -212,6 +212,10 @@ func (p *GlobPattern) matchPathAt(path string, pathOffset int, patternIdx int, i // If we've consumed all path components but still have pattern components if !hasMore { + // For exclude patterns, if remaining is just the implicit glob suffix (** and *), match + if p.isExclude { + return p.isImplicitGlobSuffix(patternIdx) + } // Check if remaining pattern components are all optional (** only) for i := patternIdx; i < len(p.components); i++ { if !p.components[i].isDoubleAsterisk { @@ -265,12 +269,12 @@ func (p *GlobPattern) matchPathAt(path string, pathOffset int, patternIdx int, i } // matchPathPrefix checks if the path could be a prefix of a matching path. -func (p *GlobPattern) matchPathPrefix(path string, patternIdx int) bool { +func (p *globPattern) matchPathPrefix(path string, patternIdx int) bool { return p.matchPathPrefixAt(path, 0, patternIdx) } // matchPathPrefixAt checks if path[pathOffset:] could be a prefix of a matching path. -func (p *GlobPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx int) bool { +func (p *globPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx int) bool { for { // Get the next path component pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) @@ -326,7 +330,7 @@ func (p *GlobPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx } // matchComponent matches a single path component against a pattern component -func (p *GlobPattern) matchComponent(pc patternComponent, pathComp string, afterDoubleAsterisk bool) bool { +func (p *globPattern) matchComponent(pc patternComponent, pathComp string, afterDoubleAsterisk bool) bool { if pc.isDoubleAsterisk { // Should not happen here, handled separately return true @@ -344,7 +348,7 @@ func (p *GlobPattern) matchComponent(pc patternComponent, pathComp string, after } // matchWildcardComponent matches a path component against wildcard segments -func (p *GlobPattern) matchWildcardComponent(segments []patternSegment, s string) bool { +func (p *globPattern) matchWildcardComponent(segments []patternSegment, s string) bool { // For non-exclude patterns, if the segments start with * or ?, // the matched string cannot start with '.' if !p.isExclude && len(segments) > 0 && len(s) > 0 && s[0] == '.' { @@ -383,7 +387,7 @@ func (p *GlobPattern) matchWildcardComponent(segments []patternSegment, s string return p.matchSegments(segments, 0, s, 0) } -func (p *GlobPattern) matchSegments(segments []patternSegment, segIdx int, s string, sIdx int) bool { +func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s string, sIdx int) bool { // If we've processed all segments if segIdx >= len(segments) { return sIdx >= len(s) @@ -452,12 +456,12 @@ func (p *GlobPattern) matchSegments(segments []patternSegment, segIdx int, s str } // wouldMatchMinJs checks if the filename ends with .min.js -func (p *GlobPattern) wouldMatchMinJs(filename string) bool { +func (p *globPattern) wouldMatchMinJs(filename string) bool { return strings.HasSuffix(strings.ToLower(filename), ".min.js") } // patternExplicitlyIncludesMinJs checks if the pattern explicitly includes .min.js -func (p *GlobPattern) patternExplicitlyIncludesMinJs(segments []patternSegment) bool { +func (p *globPattern) patternExplicitlyIncludesMinJs(segments []patternSegment) bool { // Look for .min.js in the literal segments for _, seg := range segments { if seg.kind == segmentLiteral && strings.Contains(strings.ToLower(seg.literal), ".min.js") { @@ -467,8 +471,40 @@ func (p *GlobPattern) patternExplicitlyIncludesMinJs(segments []patternSegment) return false } +// isImplicitGlobSuffix checks if the remaining pattern components from patternIdx +// are the implicit glob suffix (** followed by *) or all ** +func (p *globPattern) isImplicitGlobSuffix(patternIdx int) bool { + remaining := len(p.components) - patternIdx + if remaining == 0 { + return true + } + // All remaining must be ** (can match zero components) + // OR it's exactly **/* (the implicit glob pattern added for directories) + allDoubleAsterisk := true + for i := patternIdx; i < len(p.components); i++ { + if !p.components[i].isDoubleAsterisk { + allDoubleAsterisk = false + break + } + } + if allDoubleAsterisk { + return true + } + // Check for exactly **/* pattern (implicit glob suffix) + if remaining == 2 { + if p.components[patternIdx].isDoubleAsterisk { + last := p.components[patternIdx+1] + // The last component must be a pure * wildcard (matching any filename) + if last.hasWildcards && len(last.segments) == 1 && last.segments[0].kind == segmentStar { + return true + } + } + } + return false +} + // stringsEqual compares two strings with case sensitivity based on pattern settings -func (p *GlobPattern) stringsEqual(a, b string) bool { +func (p *globPattern) stringsEqual(a, b string) bool { if p.caseSensitive { return a == b } @@ -490,30 +526,30 @@ func isCommonPackageFolder(name string) bool { } } -// GlobMatcher holds compiled glob patterns for matching files. -type GlobMatcher struct { - includePatterns []*GlobPattern - excludePatterns []*GlobPattern +// globMatcher holds compiled glob patterns for matching files. +type globMatcher struct { + includePatterns []*globPattern + excludePatterns []*globPattern caseSensitive bool // hadIncludes tracks whether any include specs were provided (even if they compiled to nothing) hadIncludes bool } -// NewGlobMatcher creates a new GlobMatcher from include and exclude specs. -func NewGlobMatcher(includes []string, excludes []string, basePath string, caseSensitive bool, usage Usage) *GlobMatcher { - m := &GlobMatcher{ +// newGlobMatcher creates a new globMatcher from include and exclude specs. +func newGlobMatcher(includes []string, excludes []string, basePath string, caseSensitive bool, usage Usage) *globMatcher { + m := &globMatcher{ caseSensitive: caseSensitive, hadIncludes: len(includes) > 0, } for _, spec := range includes { - if pattern := CompileGlobPattern(spec, basePath, usage, caseSensitive); pattern != nil { + if pattern := compileGlobPattern(spec, basePath, usage, caseSensitive); pattern != nil { m.includePatterns = append(m.includePatterns, pattern) } } for _, spec := range excludes { - if pattern := CompileGlobPattern(spec, basePath, UsageExclude, caseSensitive); pattern != nil { + if pattern := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); pattern != nil { m.excludePatterns = append(m.excludePatterns, pattern) } } @@ -523,10 +559,10 @@ func NewGlobMatcher(includes []string, excludes []string, basePath string, caseS // MatchesFile checks if a file path matches the include patterns and doesn't match exclude patterns. // Returns the index of the matching include pattern, or -1 if no match. -func (m *GlobMatcher) MatchesFile(path string) int { +func (m *globMatcher) MatchesFile(path string) int { // First check excludes for _, exc := range m.excludePatterns { - if exc.Matches(path) { + if exc.matches(path) { return -1 } } @@ -541,7 +577,7 @@ func (m *GlobMatcher) MatchesFile(path string) int { // Check includes for i, inc := range m.includePatterns { - if inc.Matches(path) { + if inc.matches(path) { return i } } @@ -550,10 +586,10 @@ func (m *GlobMatcher) MatchesFile(path string) int { } // MatchesDirectory checks if a directory could contain matching files. -func (m *GlobMatcher) MatchesDirectory(path string) bool { +func (m *globMatcher) MatchesDirectory(path string) bool { // First check if excluded for _, exc := range m.excludePatterns { - if exc.Matches(path) { + if exc.matches(path) { return false } } @@ -568,7 +604,7 @@ func (m *GlobMatcher) MatchesDirectory(path string) bool { // Check if any include pattern could match files in this directory for _, inc := range m.includePatterns { - if inc.MatchesPrefix(path) { + if inc.matchesPrefix(path) { return true } } @@ -576,10 +612,10 @@ func (m *GlobMatcher) MatchesDirectory(path string) bool { return false } -// visitorNoRegex is similar to visitor but uses GlobMatcher instead of regex +// visitorNoRegex is similar to visitor but uses globMatcher instead of regex type visitorNoRegex struct { - fileMatcher *GlobMatcher - directoryMatcher *GlobMatcher + fileMatcher *globMatcher + directoryMatcher *globMatcher extensions []string useCaseSensitiveFileNames bool host vfs.FS @@ -657,10 +693,10 @@ func matchFilesNoRegex(path string, extensions []string, excludes []string, incl absolutePath := tspath.CombinePaths(currentDirectory, path) // Build file matcher - fileMatcher := NewGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) + fileMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) // Build directory matcher - directoryMatcher := NewGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) + directoryMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) basePaths := getBasePaths(path, includes, useCaseSensitiveFileNames) @@ -692,3 +728,81 @@ func matchFilesNoRegex(path string, extensions []string, excludes []string, incl return core.Flatten(results) } + +// globSpecMatcher wraps a globMatcher for SpecMatcher interface. +type globSpecMatcher struct { + patterns []*globPattern +} + +func (m *globSpecMatcher) MatchString(path string) bool { + if m == nil { + return false + } + for _, p := range m.patterns { + if p.matches(path) { + return true + } + } + return false +} + +// newGlobSpecMatcher creates a glob-based matcher for multiple specs. +func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { + if len(specs) == 0 { + return nil + } + m := &globSpecMatcher{} + for _, spec := range specs { + if pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); pattern != nil { + m.patterns = append(m.patterns, pattern) + } + } + if len(m.patterns) == 0 { + return nil + } + return m +} + +// newGlobSingleSpecMatcher creates a glob-based matcher for a single spec. +func newGlobSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { + pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames) + if pattern == nil { + return nil + } + return &globSpecMatcher{patterns: []*globPattern{pattern}} +} + +// globSpecMatchers holds a list of individual glob matchers for index lookup. +type globSpecMatchers struct { + patterns []*globPattern +} + +func (m *globSpecMatchers) MatchIndex(path string) int { + for i, p := range m.patterns { + if p.matches(path) { + return i + } + } + return -1 +} + +func (m *globSpecMatchers) Len() int { + return len(m.patterns) +} + +// newGlobSpecMatchers creates individual glob matchers for each spec. +func newGlobSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatchers { + if len(specs) == 0 { + return nil + } + var patterns []*globPattern + for _, spec := range specs { + if pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); pattern != nil { + patterns = append(patterns, pattern) + } + } + if len(patterns) == 0 { + return nil + } + return &globSpecMatchers{patterns: patterns} +} diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go index 7d50b38d9b..d113133000 100644 --- a/internal/vfs/vfsmatch/old.go +++ b/internal/vfs/vfsmatch/old.go @@ -15,7 +15,7 @@ import ( "github.com/microsoft/typescript-go/internal/vfs" ) -type FileMatcherPatterns struct { +type fileMatcherPatterns struct { // One pattern for each "include" spec. includeFilePatterns []string // One pattern matching one of any of the "include" specs. @@ -25,25 +25,17 @@ type FileMatcherPatterns struct { basePaths []string } -type Usage string - -const ( - UsageFiles Usage = "files" - UsageDirectories Usage = "directories" - UsageExclude Usage = "exclude" -) - -func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { +func getRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { if len(specs) == 0 { return nil } return core.Map(specs, func(spec string) string { - return GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + return getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) }) } -func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { - patterns := GetRegularExpressionsForWildcards(specs, basePath, usage) +func getRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { + patterns := getRegularExpressionsForWildcards(specs, basePath, usage) if len(patterns) == 0 { return "" } @@ -76,9 +68,10 @@ func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) } } +// isImplicitGlob checks if a path is implicitly a glob. // An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, // and does not contain any glob characters itself. -func IsImplicitGlob(lastPathComponent string) bool { +func isImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } @@ -95,7 +88,7 @@ var ( implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))" ) -type WildcardMatcher struct { +type wildcardMatcher struct { singleAsteriskRegexFragment string doubleAsteriskRegexFragment string replaceWildcardCharacter func(match string) string @@ -111,7 +104,7 @@ const ( singleAsteriskRegexFragment = "[^/]*" ) -var filesMatcher = WildcardMatcher{ +var filesMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character @@ -121,7 +114,7 @@ var filesMatcher = WildcardMatcher{ }, } -var directoriesMatcher = WildcardMatcher{ +var directoriesMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character @@ -131,7 +124,7 @@ var directoriesMatcher = WildcardMatcher{ }, } -var excludeMatcher = WildcardMatcher{ +var excludeMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, doubleAsteriskRegexFragment: "(/.+?)?", replaceWildcardCharacter: func(match string) string { @@ -139,18 +132,18 @@ var excludeMatcher = WildcardMatcher{ }, } -var wildcardMatchers = map[Usage]WildcardMatcher{ +var wildcardMatchers = map[Usage]wildcardMatcher{ UsageFiles: filesMatcher, UsageDirectories: directoriesMatcher, UsageExclude: excludeMatcher, } -func GetPatternFromSpec( +func getPatternFromSpec( spec string, basePath string, usage Usage, ) string { - pattern := GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + pattern := getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) if pattern == "" { return "" } @@ -158,11 +151,11 @@ func GetPatternFromSpec( return fmt.Sprintf("^(%s)%s", pattern, ending) } -func GetSubPatternFromSpec( +func getSubPatternFromSpec( spec string, basePath string, usage Usage, - matcher WildcardMatcher, + matcher wildcardMatcher, ) string { matcher = wildcardMatchers[usage] @@ -180,7 +173,7 @@ func GetSubPatternFromSpec( // We need to remove to create our regex correctly. components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) - if IsImplicitGlob(lastComponent) { + if isImplicitGlob(lastComponent) { components = append(components, "**", "*") } @@ -290,16 +283,16 @@ func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool // getFileMatcherPatterns generates file matching patterns based on the provided path, // includes, excludes, and other parameters. path is the directory of the tsconfig.json file. -func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns { +func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) fileMatcherPatterns { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) absolutePath := tspath.CombinePaths(currentDirectory, path) - return FileMatcherPatterns{ - includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), - includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"), - includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"), - excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"), + return fileMatcherPatterns{ + includeFilePatterns: core.Map(getRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), + includeFilePattern: getRegularExpressionForWildcard(includes, absolutePath, "files"), + includeDirectoryPattern: getRegularExpressionForWildcard(includes, absolutePath, "directories"), + excludePattern: getRegularExpressionForWildcard(excludes, absolutePath, "exclude"), basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), } } @@ -314,7 +307,7 @@ var ( regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp) ) -func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { +func getRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { opts := regexp2.RegexOptions(regexp2.ECMAScript) if !useCaseSensitiveFileNames { opts |= regexp2.IgnoreCase @@ -423,15 +416,15 @@ func matchFiles(path string, extensions []string, excludes []string, includes [] patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory) var includeFileRegexes []*regexp2.Regexp if patterns.includeFilePatterns != nil { - includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) }) + includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return getRegexFromPattern(pattern, useCaseSensitiveFileNames) }) } var includeDirectoryRegex *regexp2.Regexp if patterns.includeDirectoryPattern != "" { - includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) + includeDirectoryRegex = getRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) } var excludeRegex *regexp2.Regexp if patterns.excludePattern != "" { - excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) + excludeRegex = getRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) } // Associate an array of results with each include regex. This keeps results in order of the "include" order. @@ -461,3 +454,67 @@ func matchFiles(path string, extensions []string, excludes []string, includes [] return core.Flatten(results) } + +// regexSpecMatcher wraps a regexp2.Regexp for SpecMatcher interface. +type regexSpecMatcher struct { + re *regexp2.Regexp +} + +func (m *regexSpecMatcher) MatchString(path string) bool { + if m == nil || m.re == nil { + return false + } + matched, err := m.re.MatchString(path) + return err == nil && matched +} + +// newRegexSpecMatcher creates a regex-based matcher for multiple specs. +func newRegexSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatcher { + pattern := getRegularExpressionForWildcard(specs, basePath, usage) + if pattern == "" { + return nil + } + return ®exSpecMatcher{re: getRegexFromPattern(pattern, useCaseSensitiveFileNames)} +} + +// newRegexSingleSpecMatcher creates a regex-based matcher for a single spec. +func newRegexSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatcher { + pattern := getPatternFromSpec(spec, basePath, usage) + if pattern == "" { + return nil + } + return ®exSpecMatcher{re: getRegexFromPattern(pattern, useCaseSensitiveFileNames)} +} + +// regexSpecMatchers holds a list of individual regex matchers for index lookup. +type regexSpecMatchers struct { + matchers []*regexp2.Regexp +} + +func (m *regexSpecMatchers) MatchIndex(path string) int { + for i, re := range m.matchers { + if matched, err := re.MatchString(path); err == nil && matched { + return i + } + } + return -1 +} + +func (m *regexSpecMatchers) Len() int { + return len(m.matchers) +} + +// newRegexSpecMatchers creates individual regex matchers for each spec. +func newRegexSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatchers { + patterns := getRegularExpressionsForWildcards(specs, basePath, usage) + if len(patterns) == 0 { + return nil + } + matchers := make([]*regexp2.Regexp, len(patterns)) + for i, pattern := range patterns { + // Wrap pattern with ^ and $ for full match + fullPattern := "^" + pattern + "$" + matchers[i] = getRegexFromPattern(fullPattern, useCaseSensitiveFileNames) + } + return ®exSpecMatchers{matchers: matchers} +} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 069d7a2a95..df62cd4665 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -1,6 +1,10 @@ package vfsmatch -import "github.com/microsoft/typescript-go/internal/vfs" +import ( + "strings" + + "github.com/microsoft/typescript-go/internal/vfs" +) // newNewMatch controls whether to use the regex-free glob matching implementation. const newNewMatch = true @@ -11,3 +15,79 @@ func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []str } return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } + +// IsImplicitGlob checks if a path component is implicitly a glob. +// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, +// and does not contain any glob characters itself. +func IsImplicitGlob(lastPathComponent string) bool { + return !strings.ContainsAny(lastPathComponent, ".*?") +} + +type Usage string + +const ( + UsageFiles Usage = "files" + UsageDirectories Usage = "directories" + UsageExclude Usage = "exclude" +) + +// SpecMatcher is an interface for matching file paths against compiled glob patterns. +// It abstracts over both regex-based and regex-free implementations. +type SpecMatcher interface { + // MatchString returns true if the given path matches the pattern. + MatchString(path string) bool +} + +// SpecMatchers is an interface for matching file paths against multiple compiled glob patterns. +// It can return the index of the matching pattern. +type SpecMatchers interface { + // MatchIndex returns the index of the first matching pattern, or -1 if none match. + MatchIndex(path string) int + // Len returns the number of patterns. + Len() int +} + +// NewSpecMatcher creates a matcher for one or more glob specs. +// It returns a matcher that can test if paths match any of the patterns. +func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + if newNewMatch { + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} + +// NewSingleSpecMatcher creates a matcher for a single glob spec. +// Returns nil if the spec compiles to an empty pattern (e.g., trailing ** for non-exclude). +func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + if newNewMatch { + if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} + +// NewSpecMatchers creates individual matchers for each spec, allowing lookup of which spec matched. +// Returns nil if no valid patterns could be compiled from the specs. +func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + if newNewMatch { + if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 91de7f56da..a2a1f376a6 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1249,3 +1249,195 @@ func TestReadDirectoryMatchesTypeScriptBaselines(t *testing.T) { } } } + +// TestSpecMatcher tests the SpecMatcher API +func TestSpecMatcher(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + basePath string + usage vfsmatch.Usage + useCaseSensitiveFileNames bool + matchingPaths []string + nonMatchingPaths []string + }{ + { + name: "simple wildcard", + specs: []string{"*.ts"}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/b.ts", "/project/foo.ts"}, + nonMatchingPaths: []string{"/project/a.js", "/project/sub/a.ts"}, + }, + { + name: "recursive wildcard", + specs: []string{"**/*.ts"}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/sub/deep/a.ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "exclude pattern", + specs: []string{"node_modules"}, + basePath: "/project", + usage: vfsmatch.UsageExclude, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/node_modules", "/project/node_modules/foo"}, + nonMatchingPaths: []string{"/project/src"}, + }, + { + name: "case insensitive", + specs: []string{"*.ts"}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: false, + matchingPaths: []string{"/project/A.TS", "/project/B.Ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "multiple specs", + specs: []string{"*.ts", "*.tsx"}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/b.tsx"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matcher := vfsmatch.NewSpecMatcher(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if matcher == nil { + t.Fatal("matcher should not be nil") + } + for _, path := range tc.matchingPaths { + assert.Assert(t, matcher.MatchString(path), "should match: %s", path) + } + for _, path := range tc.nonMatchingPaths { + assert.Assert(t, !matcher.MatchString(path), "should not match: %s", path) + } + }) + } +} + +func TestSingleSpecMatcher(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + spec string + basePath string + usage vfsmatch.Usage + useCaseSensitiveFileNames bool + expectNil bool + matchingPaths []string + nonMatchingPaths []string + }{ + { + name: "simple spec", + spec: "*.ts", + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "trailing ** non-exclude returns nil", + spec: "**", + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + expectNil: true, + }, + { + name: "trailing ** exclude works", + spec: "**", + basePath: "/project", + usage: vfsmatch.UsageExclude, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/anything", "/project/deep/path"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matcher := vfsmatch.NewSingleSpecMatcher(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if tc.expectNil { + assert.Assert(t, matcher == nil, "should be nil") + return + } + if matcher == nil { + t.Fatal("matcher should not be nil") + } + for _, path := range tc.matchingPaths { + assert.Assert(t, matcher.MatchString(path), "should match: %s", path) + } + for _, path := range tc.nonMatchingPaths { + assert.Assert(t, !matcher.MatchString(path), "should not match: %s", path) + } + }) + } +} + +func TestSpecMatchers(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + basePath string + usage vfsmatch.Usage + useCaseSensitiveFileNames bool + expectNil bool + pathToIndex map[string]int + }{ + { + name: "multiple specs return correct index", + specs: []string{"*.ts", "*.tsx", "*.js"}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + pathToIndex: map[string]int{ + "/project/a.ts": 0, + "/project/b.tsx": 1, + "/project/c.js": 2, + "/project/d.css": -1, // no match + }, + }, + { + name: "empty specs returns nil", + specs: []string{}, + basePath: "/project", + usage: vfsmatch.UsageFiles, + useCaseSensitiveFileNames: true, + expectNil: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matchers := vfsmatch.NewSpecMatchers(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if tc.expectNil { + assert.Assert(t, matchers == nil, "should be nil") + return + } + if matchers == nil { + t.Fatal("matchers should not be nil") + } + for path, expectedIndex := range tc.pathToIndex { + gotIndex := matchers.MatchIndex(path) + assert.Equal(t, gotIndex, expectedIndex, "path: %s", path) + } + }) + } +} From 8d9ecfd3d01fdb2a32ca3b666d3624c13fc25629 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:34:17 -0800 Subject: [PATCH 10/41] move to old school testing --- internal/vfs/vfsmatch/bench_test.go | 13 ++-- internal/vfs/vfsmatch/export_test.go | 52 --------------- internal/vfs/vfsmatch/vfsmatch_test.go | 89 ++++++++++++++------------ 3 files changed, 55 insertions(+), 99 deletions(-) delete mode 100644 internal/vfs/vfsmatch/export_test.go diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 3a523d4b65..05718cf0f6 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -1,10 +1,9 @@ -package vfsmatch_test +package vfsmatch import ( "testing" "github.com/microsoft/typescript-go/internal/vfs" - "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" "github.com/microsoft/typescript-go/internal/vfs/vfstest" ) @@ -97,14 +96,14 @@ func BenchmarkReadDirectory(b *testing.B) { b.Run("Old/"+bc.name, func(b *testing.B) { host := bc.host() for b.Loop() { - vfsmatch.ReadDirectoryOld(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + matchFiles(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) } }) b.Run("New/"+bc.name, func(b *testing.B) { host := bc.host() for b.Loop() { - vfsmatch.ReadDirectoryNew(host, "/", bc.path, bc.extensions, bc.excludes, bc.includes, nil) + matchFilesNoRegex(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) } }) } @@ -160,7 +159,7 @@ func BenchmarkPatternCompilation(b *testing.B) { for _, p := range patterns { b.Run(p.name, func(b *testing.B) { for b.Loop() { - vfsmatch.CompileGlobPattern(p.spec, "/project", vfsmatch.UsageFiles, true) + compileGlobPattern(p.spec, "/project", UsageFiles, true) } }) } @@ -215,7 +214,7 @@ func BenchmarkPatternMatching(b *testing.B) { } for _, tc := range testCases { - pattern := vfsmatch.CompileGlobPattern(tc.spec, "/project", vfsmatch.UsageFiles, true) + pattern := compileGlobPattern(tc.spec, "/project", UsageFiles, true) if pattern == nil { continue } @@ -223,7 +222,7 @@ func BenchmarkPatternMatching(b *testing.B) { b.Run(tc.name, func(b *testing.B) { for b.Loop() { for _, path := range tc.paths { - pattern.Matches(path) + pattern.matches(path) } } }) diff --git a/internal/vfs/vfsmatch/export_test.go b/internal/vfs/vfsmatch/export_test.go deleted file mode 100644 index 6fd3e62666..0000000000 --- a/internal/vfs/vfsmatch/export_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package vfsmatch - -import "github.com/microsoft/typescript-go/internal/vfs" - -// Test-only exports for functions and types that are not part of the public API. - -// ReadDirectoryOld is a test-only export for the regex-based implementation. -func ReadDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) -} - -// ReadDirectoryNew is a test-only export for the regex-free implementation. -func ReadDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) -} - -// GlobPatternWrapper is a test-only wrapper for the unexported globPattern type. -type GlobPatternWrapper struct { - pattern *globPattern -} - -// Matches calls the unexported matches method on the wrapped globPattern. -func (w *GlobPatternWrapper) Matches(path string) bool { - if w == nil || w.pattern == nil { - return false - } - return w.pattern.matches(path) -} - -// CompileGlobPattern is a test-only export for compiling glob patterns. -func CompileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *GlobPatternWrapper { - p := compileGlobPattern(spec, basePath, usage, caseSensitive) - if p == nil { - return nil - } - return &GlobPatternWrapper{pattern: p} -} - -// GetRegularExpressionForWildcard is a test-only export for getting the regex for wildcard specs. -func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { - return getRegularExpressionForWildcard(specs, basePath, usage) -} - -// GetRegularExpressionsForWildcards is a test-only export for getting regexes for wildcard specs. -func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { - return getRegularExpressionsForWildcards(specs, basePath, usage) -} - -// GetPatternFromSpec is a test-only export for getting a pattern from a spec. -func GetPatternFromSpec(spec string, basePath string, usage Usage) string { - return getPatternFromSpec(spec, basePath, usage) -} diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index a2a1f376a6..bbb4aeed6f 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1,11 +1,10 @@ -package vfsmatch_test +package vfsmatch import ( "slices" "testing" "github.com/microsoft/typescript-go/internal/vfs" - "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" "github.com/microsoft/typescript-go/internal/vfs/vfstest" "gotest.tools/v3/assert" ) @@ -20,13 +19,23 @@ func ptrTo[T any](v T) *T { // readDirectoryFunc is a function type for ReadDirectory implementations type readDirectoryFunc func(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string +// readDirectoryOld wraps matchFiles with the expected test signature +func readDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// readDirectoryNew wraps matchFilesNoRegex with the expected test signature +func readDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + // readDirectoryImplementations contains all implementations to test var readDirectoryImplementations = []struct { name string fn readDirectoryFunc }{ - {"Old", vfsmatch.ReadDirectoryOld}, - {"New", vfsmatch.ReadDirectoryNew}, + {"Old", readDirectoryOld}, + {"New", readDirectoryNew}, } // caseInsensitiveHost simulates a Windows-like file system @@ -169,7 +178,7 @@ func runReadDirectoryCase(t *testing.T, tc readDirTestCase, readDir readDirector if path == "" { path = "/dev" } - got := vfsmatch.ReadDirectory(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) + got := ReadDirectory(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) tc.expect(t, got) } @@ -729,7 +738,7 @@ func TestIsImplicitGlob(t *testing.T) { tc := tt t.Run(tc.name, func(t *testing.T) { t.Parallel() - result := vfsmatch.IsImplicitGlob(tc.input) + result := IsImplicitGlob(tc.input) assert.Equal(t, result, tc.expected) }) } @@ -741,20 +750,20 @@ func TestGetRegularExpressionForWildcard(t *testing.T) { cases := []struct { name string specs []string - usage vfsmatch.Usage + usage Usage expected string assertFn func(t *testing.T, got string) }{ - {name: "nil specs", specs: nil, usage: vfsmatch.UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, - {name: "empty specs", specs: []string{}, usage: vfsmatch.UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, - {name: "single spec", specs: []string{"*.ts"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, - {name: "multiple specs", specs: []string{"*.ts", "*.tsx"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "nil specs", specs: nil, usage: UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "empty specs", specs: []string{}, usage: UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "single spec", specs: []string{"*.ts"}, usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "multiple specs", specs: []string{"*.ts", "*.tsx"}, usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - result := vfsmatch.GetRegularExpressionForWildcard(tc.specs, "/", tc.usage) + result := getRegularExpressionForWildcard(tc.specs, "/", tc.usage) if tc.assertFn != nil { tc.assertFn(t, result) } else { @@ -770,18 +779,18 @@ func TestGetRegularExpressionsForWildcards(t *testing.T) { cases := []struct { name string specs []string - usage vfsmatch.Usage + usage Usage assertFn func(t *testing.T, got []string) }{ - {name: "nil specs", specs: nil, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, - {name: "empty specs", specs: []string{}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, - {name: "two specs", specs: []string{"*.ts", "*.tsx"}, usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Equal(t, len(got), 2) }}, + {name: "nil specs", specs: nil, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "empty specs", specs: []string{}, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "two specs", specs: []string{"*.ts", "*.tsx"}, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Equal(t, len(got), 2) }}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - result := vfsmatch.GetRegularExpressionsForWildcards(tc.specs, "/", tc.usage) + result := getRegularExpressionsForWildcards(tc.specs, "/", tc.usage) tc.assertFn(t, result) }) } @@ -793,26 +802,26 @@ func TestGetPatternFromSpec(t *testing.T) { cases := []struct { name string spec string - usage vfsmatch.Usage + usage Usage assertFn func(t *testing.T, got string) }{ - {name: "files usage", spec: "*.ts", usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { + {name: "files usage", spec: "*.ts", usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") assert.Assert(t, hasSuffix(got, "$")) }}, - {name: "directories usage", spec: "src", usage: vfsmatch.UsageDirectories, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, - {name: "exclude usage", spec: "node_modules", usage: vfsmatch.UsageExclude, assertFn: func(t *testing.T, got string) { + {name: "directories usage", spec: "src", usage: UsageDirectories, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "exclude usage", spec: "node_modules", usage: UsageExclude, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") assert.Assert(t, contains(got, "($|/)")) }}, - {name: "trailing starstar non exclude", spec: "**", usage: vfsmatch.UsageFiles, assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, - {name: "trailing starstar exclude allowed", spec: "**", usage: vfsmatch.UsageExclude, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "trailing starstar non exclude", spec: "**", usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "trailing starstar exclude allowed", spec: "**", usage: UsageExclude, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - result := vfsmatch.GetPatternFromSpec(tc.spec, "/", tc.usage) + result := getPatternFromSpec(tc.spec, "/", tc.usage) tc.assertFn(t, result) }) } @@ -1258,7 +1267,7 @@ func TestSpecMatcher(t *testing.T) { name string specs []string basePath string - usage vfsmatch.Usage + usage Usage useCaseSensitiveFileNames bool matchingPaths []string nonMatchingPaths []string @@ -1267,7 +1276,7 @@ func TestSpecMatcher(t *testing.T) { name: "simple wildcard", specs: []string{"*.ts"}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/a.ts", "/project/b.ts", "/project/foo.ts"}, nonMatchingPaths: []string{"/project/a.js", "/project/sub/a.ts"}, @@ -1276,7 +1285,7 @@ func TestSpecMatcher(t *testing.T) { name: "recursive wildcard", specs: []string{"**/*.ts"}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/sub/deep/a.ts"}, nonMatchingPaths: []string{"/project/a.js"}, @@ -1285,7 +1294,7 @@ func TestSpecMatcher(t *testing.T) { name: "exclude pattern", specs: []string{"node_modules"}, basePath: "/project", - usage: vfsmatch.UsageExclude, + usage: UsageExclude, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/node_modules", "/project/node_modules/foo"}, nonMatchingPaths: []string{"/project/src"}, @@ -1294,7 +1303,7 @@ func TestSpecMatcher(t *testing.T) { name: "case insensitive", specs: []string{"*.ts"}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: false, matchingPaths: []string{"/project/A.TS", "/project/B.Ts"}, nonMatchingPaths: []string{"/project/a.js"}, @@ -1303,7 +1312,7 @@ func TestSpecMatcher(t *testing.T) { name: "multiple specs", specs: []string{"*.ts", "*.tsx"}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/a.ts", "/project/b.tsx"}, nonMatchingPaths: []string{"/project/a.js"}, @@ -1313,7 +1322,7 @@ func TestSpecMatcher(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - matcher := vfsmatch.NewSpecMatcher(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + matcher := NewSpecMatcher(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) if matcher == nil { t.Fatal("matcher should not be nil") } @@ -1334,7 +1343,7 @@ func TestSingleSpecMatcher(t *testing.T) { name string spec string basePath string - usage vfsmatch.Usage + usage Usage useCaseSensitiveFileNames bool expectNil bool matchingPaths []string @@ -1344,7 +1353,7 @@ func TestSingleSpecMatcher(t *testing.T) { name: "simple spec", spec: "*.ts", basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/a.ts"}, nonMatchingPaths: []string{"/project/a.js"}, @@ -1353,7 +1362,7 @@ func TestSingleSpecMatcher(t *testing.T) { name: "trailing ** non-exclude returns nil", spec: "**", basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, expectNil: true, }, @@ -1361,7 +1370,7 @@ func TestSingleSpecMatcher(t *testing.T) { name: "trailing ** exclude works", spec: "**", basePath: "/project", - usage: vfsmatch.UsageExclude, + usage: UsageExclude, useCaseSensitiveFileNames: true, matchingPaths: []string{"/project/anything", "/project/deep/path"}, }, @@ -1370,7 +1379,7 @@ func TestSingleSpecMatcher(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - matcher := vfsmatch.NewSingleSpecMatcher(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + matcher := NewSingleSpecMatcher(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) if tc.expectNil { assert.Assert(t, matcher == nil, "should be nil") return @@ -1395,7 +1404,7 @@ func TestSpecMatchers(t *testing.T) { name string specs []string basePath string - usage vfsmatch.Usage + usage Usage useCaseSensitiveFileNames bool expectNil bool pathToIndex map[string]int @@ -1404,7 +1413,7 @@ func TestSpecMatchers(t *testing.T) { name: "multiple specs return correct index", specs: []string{"*.ts", "*.tsx", "*.js"}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, pathToIndex: map[string]int{ "/project/a.ts": 0, @@ -1417,7 +1426,7 @@ func TestSpecMatchers(t *testing.T) { name: "empty specs returns nil", specs: []string{}, basePath: "/project", - usage: vfsmatch.UsageFiles, + usage: UsageFiles, useCaseSensitiveFileNames: true, expectNil: true, }, @@ -1426,7 +1435,7 @@ func TestSpecMatchers(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - matchers := vfsmatch.NewSpecMatchers(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + matchers := NewSpecMatchers(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) if tc.expectNil { assert.Assert(t, matchers == nil, "should be nil") return From f3e63f26a6e440a807681338c8e9ec97d932140e Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:43:17 -0800 Subject: [PATCH 11/41] Make seperable --- internal/vfs/vfsmatch/new.go | 2 +- internal/vfs/vfsmatch/old.go | 66 +----------------------- internal/vfs/vfsmatch/vfsmatch.go | 86 ++++++++++++++++++++++++++----- 3 files changed, 75 insertions(+), 79 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 4710771110..76b7da8dbc 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -69,7 +69,7 @@ func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) // Handle implicit glob (directories become dir/**/*) - if isImplicitGlob(lastComponent) { + if IsImplicitGlob(lastComponent) { components = append(components, "**", "*") } diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go index d113133000..f88b2b5ddd 100644 --- a/internal/vfs/vfsmatch/old.go +++ b/internal/vfs/vfsmatch/old.go @@ -3,14 +3,12 @@ package vfsmatch import ( "fmt" "regexp" - "sort" "strings" "sync" "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/collections" "github.com/microsoft/typescript-go/internal/core" - "github.com/microsoft/typescript-go/internal/stringutil" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" ) @@ -68,19 +66,11 @@ func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) } } -// isImplicitGlob checks if a path is implicitly a glob. -// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, -// and does not contain any glob characters itself. -func isImplicitGlob(lastPathComponent string) bool { - return !strings.ContainsAny(lastPathComponent, ".*?") -} - // Reserved characters - only escape actual regex metacharacters. // Go's regexp doesn't support \x escape sequences for arbitrary characters, // so we only escape characters that have special meaning in regex. var ( reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) - wildcardCharCodes = []rune{'*', '?'} ) var ( @@ -173,7 +163,7 @@ func getSubPatternFromSpec( // We need to remove to create our regex correctly. components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) - if isImplicitGlob(lastComponent) { + if IsImplicitGlob(lastComponent) { components = append(components, "**", "*") } @@ -227,60 +217,6 @@ func getSubPatternFromSpec( return subpattern.String() } -func getIncludeBasePath(absolute string) string { - wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) - if wildcardOffset < 0 { - // No "*" or "?" in the path - if !tspath.HasExtension(absolute) { - return absolute - } else { - return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) - } - } - return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] -} - -// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. -func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { - // Storage for our results in the form of literal paths (e.g. the paths as written by the user). - basePaths := []string{path} - - if len(includes) > 0 { - // Storage for literal base paths amongst the include patterns. - includeBasePaths := []string{} - for _, include := range includes { - // We also need to check the relative paths by converting them to absolute and normalizing - // in case they escape the base path (e.g "..\somedirectory") - var absolute string - if tspath.IsRootedDiskPath(include) { - absolute = include - } else { - absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) - } - // Append the literal and canonical candidate base paths. - includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) - } - - // Sort the offsets array using either the literal or canonical path representations. - stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) - sort.SliceStable(includeBasePaths, func(i, j int) bool { - return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 - }) - - // Iterate over each include base path and include unique base paths that are not a - // subpath of an existing base path - for _, includeBasePath := range includeBasePaths { - if core.Every(basePaths, func(basepath string) bool { - return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) - }) { - basePaths = append(basePaths, includeBasePath) - } - } - } - - return basePaths -} - // getFileMatcherPatterns generates file matching patterns based on the provided path, // includes, excludes, and other parameters. path is the directory of the tsconfig.json file. func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) fileMatcherPatterns { diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index df62cd4665..2aef192f0b 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -1,8 +1,12 @@ package vfsmatch import ( + "sort" "strings" + "github.com/microsoft/typescript-go/internal/core" + "github.com/microsoft/typescript-go/internal/stringutil" + "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" ) @@ -11,7 +15,7 @@ const newNewMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { if newNewMatch { - return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } @@ -51,10 +55,10 @@ type SpecMatchers interface { // It returns a matcher that can test if paths match any of the patterns. func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { if newNewMatch { - if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m @@ -66,10 +70,10 @@ func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensiti // Returns nil if the spec compiles to an empty pattern (e.g., trailing ** for non-exclude). func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { if newNewMatch { - if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { return m @@ -81,13 +85,69 @@ func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSens // Returns nil if no valid patterns could be compiled from the specs. func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { if newNewMatch { - if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m } return nil } + +var wildcardCharCodes = []rune{'*', '?'} + +func getIncludeBasePath(absolute string) string { + wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) + if wildcardOffset < 0 { + // No "*" or "?" in the path + if !tspath.HasExtension(absolute) { + return absolute + } else { + return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) + } + } + return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] +} + +// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. +func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { + // Storage for our results in the form of literal paths (e.g. the paths as written by the user). + basePaths := []string{path} + + if len(includes) > 0 { + // Storage for literal base paths amongst the include patterns. + includeBasePaths := []string{} + for _, include := range includes { + // We also need to check the relative paths by converting them to absolute and normalizing + // in case they escape the base path (e.g "..\somedirectory") + var absolute string + if tspath.IsRootedDiskPath(include) { + absolute = include + } else { + absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) + } + // Append the literal and canonical candidate base paths. + includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) + } + + // Sort the offsets array using either the literal or canonical path representations. + stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) + sort.SliceStable(includeBasePaths, func(i, j int) bool { + return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 + }) + + // Iterate over each include base path and include unique base paths that are not a + // subpath of an existing base path + for _, includeBasePath := range includeBasePaths { + if core.Every(basePaths, func(basepath string) bool { + return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) + }) { + basePaths = append(basePaths, includeBasePath) + } + } + } + + return basePaths +} From 297c82bb4d439c23fe8b902ec8b87f6b1fe7875a Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:54:19 -0800 Subject: [PATCH 12/41] fmt --- internal/vfs/vfsmatch/vfsmatch.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 2aef192f0b..2435353212 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -15,7 +15,7 @@ const newNewMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { if newNewMatch { - return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } @@ -55,10 +55,10 @@ type SpecMatchers interface { // It returns a matcher that can test if paths match any of the patterns. func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { if newNewMatch { - if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m @@ -70,10 +70,10 @@ func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensiti // Returns nil if the spec compiles to an empty pattern (e.g., trailing ** for non-exclude). func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { if newNewMatch { - if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { return m @@ -85,10 +85,10 @@ func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSens // Returns nil if no valid patterns could be compiled from the specs. func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { if newNewMatch { - if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { - return m - } - return nil + if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil } if m := newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m From 57c83ff9127e2ce0ed19921fe365f24dec471b15 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:54:38 -0800 Subject: [PATCH 13/41] Remove from getWildcardDirectoryFromSpec --- internal/tsoptions/wildcarddirectories.go | 37 +++++++++++------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index a650b3de79..20133edd5c 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -3,7 +3,6 @@ package tsoptions import ( "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) @@ -90,9 +89,6 @@ func toCanonicalKey(path string, useCaseSensitiveFileNames bool) string { return strings.ToLower(path) } -// wildcardDirectoryPattern matches paths with wildcard characters -var wildcardDirectoryPattern = regexp2.MustCompile(`^[^*?]*(?=\/[^/]*[*?])`, 0) - // wildcardDirectoryMatch represents the result of a wildcard directory match type wildcardDirectoryMatch struct { Key string @@ -101,21 +97,24 @@ type wildcardDirectoryMatch struct { } func getWildcardDirectoryFromSpec(spec string, useCaseSensitiveFileNames bool) *wildcardDirectoryMatch { - match, _ := wildcardDirectoryPattern.FindStringMatch(spec) - if match != nil { - // We check this with a few `Index` calls because it's more efficient than complex regex - questionWildcardIndex := strings.Index(spec, "?") - starWildcardIndex := strings.Index(spec, "*") - lastDirectorySeparatorIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator) - - // Determine if this should be watched recursively - recursive := (questionWildcardIndex != -1 && questionWildcardIndex < lastDirectorySeparatorIndex) || - (starWildcardIndex != -1 && starWildcardIndex < lastDirectorySeparatorIndex) - - return &wildcardDirectoryMatch{ - Key: toCanonicalKey(match.String(), useCaseSensitiveFileNames), - Path: match.String(), - Recursive: recursive, + // Find the first occurrence of a wildcard character + firstWildcard := strings.IndexAny(spec, "*?") + if firstWildcard != -1 { + // Find the last directory separator before the wildcard + lastSepBeforeWildcard := strings.LastIndexByte(spec[:firstWildcard], tspath.DirectorySeparator) + if lastSepBeforeWildcard != -1 { + path := spec[:lastSepBeforeWildcard] + lastDirectorySeparatorIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator) + + // Determine if this should be watched recursively: + // recursive if the wildcard appears in a directory segment (not just the final file segment) + recursive := firstWildcard < lastDirectorySeparatorIndex + + return &wildcardDirectoryMatch{ + Key: toCanonicalKey(path, useCaseSensitiveFileNames), + Path: path, + Recursive: recursive, + } } } From 9ddea2ee529b4017070892492e768e90a3031f34 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:06:07 -0800 Subject: [PATCH 14/41] bench --- internal/vfs/vfsmatch/bench_test.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 05718cf0f6..01a580192a 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/cachedvfs" "github.com/microsoft/typescript-go/internal/vfs/vfstest" ) @@ -90,18 +91,28 @@ func BenchmarkReadDirectory(b *testing.B) { includes: []string{"src/**/*.ts"}, excludes: []string{"**/node_modules/**", "**/*.test.ts"}, }, + { + name: "LargeAllFiles", + host: largeFileSystemHost, + path: "/project", + extensions: []string{".ts", ".tsx", ".js"}, + excludes: []string{"**/node_modules/**"}, + includes: []string{"**/*"}, + }, } for _, bc := range benchCases { b.Run("Old/"+bc.name, func(b *testing.B) { - host := bc.host() + host := cachedvfs.From(bc.host()) + b.ReportAllocs() for b.Loop() { matchFiles(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) } }) b.Run("New/"+bc.name, func(b *testing.B) { - host := bc.host() + host := cachedvfs.From(bc.host()) + b.ReportAllocs() for b.Loop() { matchFilesNoRegex(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) } From bab63db589e401919397cad7e7fbeecd3759e164 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:29:37 -0800 Subject: [PATCH 15/41] Eliminate another --- internal/tsoptions/tsconfigparsing.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index 9c3d7e34fc..a04dd4fa29 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -3,7 +3,6 @@ package tsoptions import ( "cmp" "reflect" - "regexp" "slices" "strings" @@ -1384,7 +1383,7 @@ func validateSpecs(specs any, disallowTrailingRecursion bool, jsonSourceFile *as func specToDiagnostic(spec string, disallowTrailingRecursion bool) *diagnostics.Message { if disallowTrailingRecursion { - if ok, _ := regexp.MatchString(invalidTrailingRecursionPattern, spec); ok { + if invalidTrailingRecursion(spec) { return diagnostics.File_specification_cannot_end_in_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0 } } else if invalidDotDotAfterRecursiveWildcard(spec) { @@ -1393,6 +1392,13 @@ func specToDiagnostic(spec string, disallowTrailingRecursion bool) *diagnostics. return nil } +func invalidTrailingRecursion(spec string) bool { + // Matches **, /**, **/, and /**/, but not a**b. + // Strip optional trailing slash, then check if it ends with /** or is just ** + s := strings.TrimSuffix(spec, "/") + return s == "**" || strings.HasSuffix(s, "/**") +} + func invalidDotDotAfterRecursiveWildcard(s string) bool { // We used to use the regex /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/ to check for this case, but // in v8, that has polynomial performance because the recursive wildcard match - **/ - @@ -1417,18 +1423,6 @@ func invalidDotDotAfterRecursiveWildcard(s string) bool { return lastDotIndex > wildcardIndex } -// Tests for a path that ends in a recursive directory wildcard. -// -// Matches **, \**, **\, and \**\, but not a**b. -// NOTE: used \ in place of / above to avoid issues with multiline comments. -// -// Breakdown: -// -// (^|\/) # matches either the beginning of the string or a directory separator. -// \*\* # matches the recursive directory wildcard "**". -// \/?$ # matches an optional trailing directory separator at the end of the string. -const invalidTrailingRecursionPattern = `(?:^|\/)\*\*\/?$` - func GetTsConfigPropArrayElementValue(tsConfigSourceFile *ast.SourceFile, propKey string, elementValue string) *ast.StringLiteral { callback := GetCallbackForFindingPropertyAssignmentByValue(elementValue) return ForEachTsConfigPropArray(tsConfigSourceFile, propKey, func(property *ast.PropertyAssignment) *ast.StringLiteral { From 83931d1dec9b0d8d6ba12044b15b0c1d388f8eca Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:39:09 -0800 Subject: [PATCH 16/41] Drop bad comments --- internal/vfs/vfsmatch/vfsmatch.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 2435353212..08024652c6 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -10,7 +10,6 @@ import ( "github.com/microsoft/typescript-go/internal/vfs" ) -// newNewMatch controls whether to use the regex-free glob matching implementation. const newNewMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { @@ -36,7 +35,6 @@ const ( ) // SpecMatcher is an interface for matching file paths against compiled glob patterns. -// It abstracts over both regex-based and regex-free implementations. type SpecMatcher interface { // MatchString returns true if the given path matches the pattern. MatchString(path string) bool From ccd944cb2e6449585f5285f63109e5afff420058 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:55:15 -0800 Subject: [PATCH 17/41] Make my life easier --- internal/vfs/vfsmatch/bench_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 01a580192a..ce98822ab0 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -101,7 +101,23 @@ func BenchmarkReadDirectory(b *testing.B) { }, } + var benchOnly func(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string + // For benchmark comparison + // benchOnly = matchFiles + // benchOnly = matchFilesNoRegex + for _, bc := range benchCases { + if benchOnly != nil { + b.Run(bc.name, func(b *testing.B) { + host := cachedvfs.From(bc.host()) + b.ReportAllocs() + for b.Loop() { + benchOnly(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) + } + }) + continue + } + b.Run("Old/"+bc.name, func(b *testing.B) { host := cachedvfs.From(bc.host()) b.ReportAllocs() From c21638abb1709759177ad334d76dd3a28611a70a Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 14:13:41 -0800 Subject: [PATCH 18/41] more perf --- internal/vfs/vfsmatch/new.go | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 76b7da8dbc..07569d702e 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -178,13 +178,14 @@ func nextPathComponent(path string, offset int) (component string, nextOffset in return "", offset, false } - // Find the end of this component - start := offset - for offset < len(path) && path[offset] != '/' { - offset++ + // Find the end of this component using optimized byte search + remaining := path[offset:] + idx := strings.IndexByte(remaining, '/') + if idx < 0 { + // No more slashes, rest of path is the component + return remaining, len(path), true } - - return path[start:offset], offset, true + return remaining[:idx], offset + idx, true } // matchPath matches the path against pattern components starting at patternIdx. @@ -455,9 +456,16 @@ func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s str return false } -// wouldMatchMinJs checks if the filename ends with .min.js +// wouldMatchMinJs checks if the filename ends with .min.js (case-insensitive) func (p *globPattern) wouldMatchMinJs(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".min.js") + // Check length first to avoid string operations + const suffix = ".min.js" + if len(filename) < len(suffix) { + return false + } + // Get the last 7 characters and compare case-insensitively + end := filename[len(filename)-len(suffix):] + return strings.EqualFold(end, suffix) } // patternExplicitlyIncludesMinJs checks if the pattern explicitly includes .min.js @@ -651,15 +659,18 @@ func (v *visitorNoRegex) visitDirectory( } for _, current := range systemEntries.Files { - name := pathPrefix + current - absoluteName := absPathPrefix + current - - if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { + // Check extension first using just the filename (avoids path concatenation) + if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(current, v.extensions) { continue } + // Build absolute name for pattern matching + absoluteName := absPathPrefix + current + matchIdx := v.fileMatcher.MatchesFile(absoluteName) if matchIdx >= 0 { + // Only build the relative name if we have a match + name := pathPrefix + current if v.numIncludePatterns == 0 { v.results[0] = append(v.results[0], name) } else { From f37c9bf0383b9525d067e525dfa4d291af924588 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 15:26:38 -0800 Subject: [PATCH 19/41] Simplify --- internal/vfs/vfsmatch/new.go | 207 +++++++++--------------------- internal/vfs/vfsmatch/vfsmatch.go | 2 +- 2 files changed, 61 insertions(+), 148 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 07569d702e..b2fe893bc8 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -142,9 +142,7 @@ func (p *globPattern) matches(path string) bool { if p == nil { return false } - - // Use iterator-based matching to avoid slice allocation - return p.matchPath(path, 0, false) + return p.matchPathWorker(path, 0, 0, false, false) } // matchesPrefix checks if the given directory path could potentially match files under it. @@ -153,8 +151,7 @@ func (p *globPattern) matchesPrefix(path string) bool { if p == nil { return false } - - return p.matchPathPrefix(path, 0) + return p.matchPathWorker(path, 0, 0, false, true) } // nextPathComponent extracts the next path component from path starting at offset. @@ -188,32 +185,21 @@ func nextPathComponent(path string, offset int) (component string, nextOffset in return remaining[:idx], offset + idx, true } -// matchPath matches the path against pattern components starting at patternIdx. -// pathOffset is the current position in the path string. -func (p *globPattern) matchPath(path string, patternIdx int, inDoubleAsterisk bool) bool { - // Bootstrap: handle the path from the beginning - return p.matchPathAt(path, 0, patternIdx, inDoubleAsterisk) -} - -// matchPathAt matches path[pathOffset:] against pattern components starting at patternIdx. -func (p *globPattern) matchPathAt(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool) bool { +// matchPathWorker is the unified path matching function. +// When prefixMatch is true, it checks if the path could be a prefix of a matching path. +// When prefixMatch is false, it checks if the path fully matches the pattern. +func (p *globPattern) matchPathWorker(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool, prefixMatch bool) bool { for { // Get the next path component pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) - // If we've consumed all pattern components - if patternIdx >= len(p.components) { - if p.isExclude { - // For exclude patterns, we can match a prefix + // If we've consumed all path components + if !hasMore { + if prefixMatch { + // For prefix matching, any prefix could match return true } - // Path must also be fully consumed - return !hasMore - } - - // If we've consumed all path components but still have pattern components - if !hasMore { - // For exclude patterns, if remaining is just the implicit glob suffix (** and *), match + // For full matching, check remaining pattern components if p.isExclude { return p.isImplicitGlobSuffix(patternIdx) } @@ -226,12 +212,22 @@ func (p *globPattern) matchPathAt(path string, pathOffset int, patternIdx int, i return true } + // If we've consumed all pattern components + if patternIdx >= len(p.components) { + if prefixMatch { + // For prefix matching, no more matches possible + return false + } + // For full matching with exclude patterns, we can match a prefix + return p.isExclude + } + pc := p.components[patternIdx] if pc.isDoubleAsterisk { // ** can match zero or more directory levels // First, try matching zero directories (skip the **) - this requires recursion - if p.matchPathAt(path, pathOffset, patternIdx+1, true) { + if p.matchPathWorker(path, pathOffset, patternIdx+1, true, prefixMatch) { return true } @@ -247,7 +243,6 @@ func (p *globPattern) matchPathAt(path string, pathOffset int, patternIdx int, i // Match current component with ** and continue (iterate instead of recurse) pathOffset = nextPathOffset - // patternIdx stays the same, inDoubleAsterisk stays true inDoubleAsterisk = true continue } @@ -269,67 +264,6 @@ func (p *globPattern) matchPathAt(path string, pathOffset int, patternIdx int, i } } -// matchPathPrefix checks if the path could be a prefix of a matching path. -func (p *globPattern) matchPathPrefix(path string, patternIdx int) bool { - return p.matchPathPrefixAt(path, 0, patternIdx) -} - -// matchPathPrefixAt checks if path[pathOffset:] could be a prefix of a matching path. -func (p *globPattern) matchPathPrefixAt(path string, pathOffset int, patternIdx int) bool { - for { - // Get the next path component - pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) - - // If we've consumed all path components, this prefix could match - if !hasMore { - return true - } - - // If we've consumed all pattern components, no more matches possible - if patternIdx >= len(p.components) { - return false - } - - pc := p.components[patternIdx] - - if pc.isDoubleAsterisk { - // ** can match any directory level - // Try matching zero (skip **) or more directories - needs recursion for branching - if p.matchPathPrefixAt(path, pathOffset, patternIdx+1) { - return true - } - - // For include patterns, ** should not match hidden or package directories - if !p.isExclude { - if len(pathComp) > 0 && pathComp[0] == '.' { - return false - } - if isCommonPackageFolder(pathComp) { - return false - } - } - - // Iterate: consume path component, keep same pattern index - pathOffset = nextPathOffset - continue - } - - // Check implicit package folder exclusion - if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { - return false - } - - // Match current component - if !p.matchComponent(pc, pathComp, false) { - return false - } - - // Iterate: advance both path and pattern - pathOffset = nextPathOffset - patternIdx++ - } -} - // matchComponent matches a single path component against a pattern component func (p *globPattern) matchComponent(pc patternComponent, pathComp string, afterDoubleAsterisk bool) bool { if pc.isDoubleAsterisk { @@ -378,14 +312,26 @@ func (p *globPattern) matchWildcardComponent(segments []patternSegment, s string if !p.stringsEqual(suffix, sSuffix) { return false } - // Check min.js exclusion - if p.excludeMinJs && p.wouldMatchMinJs(s) && !p.patternExplicitlyIncludesMinJs(segments) { - return false - } - return true + return p.checkMinJsExclusion(s, segments) + } + + if !p.matchSegments(segments, 0, s, 0) { + return false } + return p.checkMinJsExclusion(s, segments) +} - return p.matchSegments(segments, 0, s, 0) +// checkMinJsExclusion returns true if the match should be allowed (not excluded). +// Returns false if this is a .min.js file that should be excluded. +func (p *globPattern) checkMinJsExclusion(filename string, segments []patternSegment) bool { + if !p.excludeMinJs { + return true + } + if !p.wouldMatchMinJs(filename) { + return true + } + // Exclude .min.js unless pattern explicitly includes it + return p.patternExplicitlyIncludesMinJs(segments) } func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s string, sIdx int) bool { @@ -420,33 +366,16 @@ func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s str case segmentStar: // Match zero or more characters (not /) - // For files usage, also need to handle .min.js exclusion - // Try matching zero characters first if p.matchSegments(segments, segIdx+1, s, sIdx) { - // Before returning true, check min.js exclusion - if p.excludeMinJs && segIdx == 0 && segIdx+1 < len(segments) { - // Check if this could result in matching a .min.js file - if p.wouldMatchMinJs(s) { - return false - } - } return true } - // Try matching more characters for i := sIdx; i < len(s); i++ { if s[i] == '/' { break } if p.matchSegments(segments, segIdx+1, s, i+1) { - // Check min.js exclusion - if p.excludeMinJs && strings.HasSuffix(s, ".min.js") { - // Only exclude if pattern doesn't explicitly include .min.js - if !p.patternExplicitlyIncludesMinJs(segments) { - return false - } - } return true } } @@ -740,7 +669,7 @@ func matchFilesNoRegex(path string, extensions []string, excludes []string, incl return core.Flatten(results) } -// globSpecMatcher wraps a globMatcher for SpecMatcher interface. +// globSpecMatcher wraps glob patterns for matching paths. type globSpecMatcher struct { patterns []*globPattern } @@ -757,6 +686,25 @@ func (m *globSpecMatcher) MatchString(path string) bool { return false } +func (m *globSpecMatcher) MatchIndex(path string) int { + if m == nil { + return -1 + } + for i, p := range m.patterns { + if p.matches(path) { + return i + } + } + return -1 +} + +func (m *globSpecMatcher) Len() int { + if m == nil { + return 0 + } + return len(m.patterns) +} + // newGlobSpecMatcher creates a glob-based matcher for multiple specs. func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { if len(specs) == 0 { @@ -782,38 +730,3 @@ func newGlobSingleSpecMatcher(spec string, basePath string, usage Usage, useCase } return &globSpecMatcher{patterns: []*globPattern{pattern}} } - -// globSpecMatchers holds a list of individual glob matchers for index lookup. -type globSpecMatchers struct { - patterns []*globPattern -} - -func (m *globSpecMatchers) MatchIndex(path string) int { - for i, p := range m.patterns { - if p.matches(path) { - return i - } - } - return -1 -} - -func (m *globSpecMatchers) Len() int { - return len(m.patterns) -} - -// newGlobSpecMatchers creates individual glob matchers for each spec. -func newGlobSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatchers { - if len(specs) == 0 { - return nil - } - var patterns []*globPattern - for _, spec := range specs { - if pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); pattern != nil { - patterns = append(patterns, pattern) - } - } - if len(patterns) == 0 { - return nil - } - return &globSpecMatchers{patterns: patterns} -} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 08024652c6..4a41f31381 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -83,7 +83,7 @@ func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSens // Returns nil if no valid patterns could be compiled from the specs. func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { if newNewMatch { - if m := newGlobSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m } return nil From 314bc82e8dac98cbe0c6313f24dd980493aac99b Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 15:48:23 -0800 Subject: [PATCH 20/41] Proper enum --- internal/tsoptions/tsconfigparsing.go | 6 ++--- internal/tsoptions/wildcarddirectories.go | 2 +- internal/vfs/vfsmatch/old.go | 18 +++++++------- internal/vfs/vfsmatch/stringer_generated.go | 26 +++++++++++++++++++++ internal/vfs/vfsmatch/vfsmatch.go | 19 ++++++++------- 5 files changed, 50 insertions(+), 21 deletions(-) create mode 100644 internal/vfs/vfsmatch/stringer_generated.go diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index a04dd4fa29..d6099214af 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -104,7 +104,7 @@ func (c *configFileSpecs) matchesExclude(fileName string, comparePathsOptions ts if len(c.validatedExcludeSpecs) == 0 { return false } - excludeMatcher := vfsmatch.NewSpecMatcher(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude", comparePathsOptions.UseCaseSensitiveFileNames) + excludeMatcher := vfsmatch.NewSpecMatcher(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, vfsmatch.UsageExclude, comparePathsOptions.UseCaseSensitiveFileNames) if excludeMatcher == nil { return false } @@ -124,7 +124,7 @@ func (c *configFileSpecs) getMatchedIncludeSpec(fileName string, comparePathsOpt return "" } for index, spec := range c.validatedIncludeSpecs { - includeMatcher := vfsmatch.NewSingleSpecMatcher(spec, comparePathsOptions.CurrentDirectory, "files", comparePathsOptions.UseCaseSensitiveFileNames) + includeMatcher := vfsmatch.NewSingleSpecMatcher(spec, comparePathsOptions.CurrentDirectory, vfsmatch.UsageFiles, comparePathsOptions.UseCaseSensitiveFileNames) if includeMatcher != nil && includeMatcher.MatchString(fileName) { return c.validatedIncludeSpecsBeforeSubstitution[index] } @@ -1660,7 +1660,7 @@ func getFileNamesFromConfigSpecs( if tspath.FileExtensionIs(file, tspath.ExtensionJson) { if jsonOnlyIncludeMatchers == nil { includes := core.Filter(validatedIncludeSpecs, func(include string) bool { return strings.HasSuffix(include, tspath.ExtensionJson) }) - jsonOnlyIncludeMatchers = vfsmatch.NewSpecMatchers(includes, basePath, "files", host.UseCaseSensitiveFileNames()) + jsonOnlyIncludeMatchers = vfsmatch.NewSpecMatchers(includes, basePath, vfsmatch.UsageFiles, host.UseCaseSensitiveFileNames()) } var includeIndex int = -1 if jsonOnlyIncludeMatchers != nil { diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index 20133edd5c..56abf49291 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -25,7 +25,7 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti return nil } - excludeMatcher := vfsmatch.NewSpecMatcher(exclude, comparePathsOptions.CurrentDirectory, "exclude", comparePathsOptions.UseCaseSensitiveFileNames) + excludeMatcher := vfsmatch.NewSpecMatcher(exclude, comparePathsOptions.CurrentDirectory, vfsmatch.UsageExclude, comparePathsOptions.UseCaseSensitiveFileNames) wildcardDirectories := make(map[string]bool) wildCardKeyToPath := make(map[string]string) diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go index f88b2b5ddd..64cdad7b44 100644 --- a/internal/vfs/vfsmatch/old.go +++ b/internal/vfs/vfsmatch/old.go @@ -46,7 +46,7 @@ func getRegularExpressionForWildcard(specs []string, basePath string, usage Usag // If excluding, match "foo/bar/baz...", but if including, only allow "foo". var terminator string - if usage == "exclude" { + if usage == UsageExclude { terminator = "($|/)" } else { terminator = "$" @@ -137,7 +137,7 @@ func getPatternFromSpec( if pattern == "" { return "" } - ending := core.IfElse(usage == "exclude", "($|/)", "$") + ending := core.IfElse(usage == UsageExclude, "($|/)", "$") return fmt.Sprintf("^(%s)%s", pattern, ending) } @@ -155,7 +155,7 @@ func getSubPatternFromSpec( hasWrittenComponent := false components := tspath.GetNormalizedPathComponents(spec, basePath) lastComponent := core.LastOrNil(components) - if usage != "exclude" && lastComponent == "**" { + if usage != UsageExclude && lastComponent == "**" { return "" } @@ -172,7 +172,7 @@ func getSubPatternFromSpec( if component == "**" { subpattern.WriteString(matcher.doubleAsteriskRegexFragment) } else { - if usage == "directories" { + if usage == UsageDirectories { subpattern.WriteString("(") optionalCount++ } @@ -181,7 +181,7 @@ func getSubPatternFromSpec( subpattern.WriteRune(tspath.DirectorySeparator) } - if usage != "exclude" { + if usage != UsageExclude { var componentPattern strings.Builder if strings.HasPrefix(component, "*") { componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?") @@ -225,10 +225,10 @@ func getFileMatcherPatterns(path string, excludes []string, includes []string, u absolutePath := tspath.CombinePaths(currentDirectory, path) return fileMatcherPatterns{ - includeFilePatterns: core.Map(getRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), - includeFilePattern: getRegularExpressionForWildcard(includes, absolutePath, "files"), - includeDirectoryPattern: getRegularExpressionForWildcard(includes, absolutePath, "directories"), - excludePattern: getRegularExpressionForWildcard(excludes, absolutePath, "exclude"), + includeFilePatterns: core.Map(getRegularExpressionsForWildcards(includes, absolutePath, UsageFiles), func(pattern string) string { return "^" + pattern + "$" }), + includeFilePattern: getRegularExpressionForWildcard(includes, absolutePath, UsageFiles), + includeDirectoryPattern: getRegularExpressionForWildcard(includes, absolutePath, UsageDirectories), + excludePattern: getRegularExpressionForWildcard(excludes, absolutePath, UsageExclude), basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), } } diff --git a/internal/vfs/vfsmatch/stringer_generated.go b/internal/vfs/vfsmatch/stringer_generated.go new file mode 100644 index 0000000000..18d4c40eac --- /dev/null +++ b/internal/vfs/vfsmatch/stringer_generated.go @@ -0,0 +1,26 @@ +// Code generated by "stringer -type=Usage -trimprefix=Usage -output=stringer_generated.go"; DO NOT EDIT. + +package vfsmatch + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UsageFiles-0] + _ = x[UsageDirectories-1] + _ = x[UsageExclude-2] +} + +const _Usage_name = "FilesDirectoriesExclude" + +var _Usage_index = [...]uint8{0, 5, 16, 23} + +func (i Usage) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_Usage_index)-1 { + return "Usage(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Usage_name[_Usage_index[idx]:_Usage_index[idx+1]] +} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index 4a41f31381..d0e7bf836d 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -10,6 +10,17 @@ import ( "github.com/microsoft/typescript-go/internal/vfs" ) +//go:generate go tool golang.org/x/tools/cmd/stringer -type=Usage -trimprefix=Usage -output=stringer_generated.go +//go:generate go tool mvdan.cc/gofumpt -w stringer_generated.go + +type Usage int8 + +const ( + UsageFiles Usage = iota + UsageDirectories + UsageExclude +) + const newNewMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { @@ -26,14 +37,6 @@ func IsImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } -type Usage string - -const ( - UsageFiles Usage = "files" - UsageDirectories Usage = "directories" - UsageExclude Usage = "exclude" -) - // SpecMatcher is an interface for matching file paths against compiled glob patterns. type SpecMatcher interface { // MatchString returns true if the given path matches the pattern. From 6919ad37ed248d3283b01a7ed277dc9618a943de Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 15:51:33 -0800 Subject: [PATCH 21/41] No hardcode --- internal/vfs/vfsmatch/new.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index b2fe893bc8..eeccdb3aa9 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -452,11 +452,11 @@ func (p *globPattern) stringsEqual(a, b string) bool { func isCommonPackageFolder(name string) bool { // Quick length check to avoid EqualFold for most cases switch len(name) { - case 12: // node_modules + case len("node_modules"): return strings.EqualFold(name, "node_modules") - case 16: // bower_components + case len("bower_components"): return strings.EqualFold(name, "bower_components") - case 13: // jspm_packages + case len("jspm_packages"): return strings.EqualFold(name, "jspm_packages") default: return false From 834fbd830943f5acac7c7b98f919667489513301 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 16:09:58 -0800 Subject: [PATCH 22/41] Big simplify pass --- internal/vfs/vfsmatch/new.go | 736 ++++++++++++++++------------------- 1 file changed, 335 insertions(+), 401 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index eeccdb3aa9..fce9d6dc9f 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -9,373 +9,354 @@ import ( "github.com/microsoft/typescript-go/internal/vfs" ) -// globPattern represents a compiled glob pattern for matching file paths. -// It stores the pattern components for efficient matching without using regex. +// globPattern is a compiled glob pattern for matching file paths without regex. type globPattern struct { - // The original pattern specification - spec string - // The base path from which the pattern was derived - basePath string - // The normalized path components to match - components []patternComponent - // Whether this is an exclude pattern (affects matching behavior) - isExclude bool - // Whether pattern matching should be case-sensitive + components []component // path segments to match (e.g., ["src", "**", "*.ts"]) + isExclude bool // exclude patterns have different matching rules caseSensitive bool - // For files patterns, exclude .min.js by default - excludeMinJs bool + excludeMinJs bool // for "files" patterns, exclude .min.js by default } -// patternComponent represents a single segment of a glob pattern -type patternComponent struct { - // Whether this component is a ** wildcard - isDoubleAsterisk bool - // The literal text if not a wildcard pattern - literal string - // Whether this component contains wildcards - hasWildcards bool - // Parsed wildcard segments for matching - segments []patternSegment - // For include patterns (not exclude), implicitly exclude common package folders - implicitlyExcludePackages bool +// component is a single path segment in a glob pattern. +// Examples: "src" (literal), "*" (wildcard), "*.ts" (wildcard), "**" (recursive) +type component struct { + kind componentKind + literal string // for kindLiteral: the exact string to match + segments []segment // for kindWildcard: parsed wildcard pattern + // Include patterns with wildcards skip common package folders (node_modules, etc.) + skipPackageFolders bool } -// patternSegment represents a parsed segment within a component -type patternSegment struct { +type componentKind int + +const ( + kindLiteral componentKind = iota // exact match (e.g., "src") + kindWildcard // contains * or ? (e.g., "*.ts") + kindDoubleAsterisk // ** matches zero or more directories +) + +// segment is a piece of a wildcard component. +// Example: "*.ts" becomes [segStar, segLiteral(".ts")] +type segment struct { kind segmentKind - literal string + literal string // only for segLiteral } type segmentKind int const ( - segmentLiteral segmentKind = iota - segmentStar // * - matches any chars except / - segmentQuestion // ? - matches single char except / + segLiteral segmentKind = iota // exact text + segStar // * matches any chars except / + segQuestion // ? matches single char except / ) -// compileGlobPattern compiles a glob spec into a globPattern for matching. +// compileGlobPattern compiles a glob spec (e.g., "src/**/*.ts") into a pattern. +// Returns nil if the pattern would match nothing. func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *globPattern { - components := tspath.GetNormalizedPathComponents(spec, basePath) - lastComponent := core.LastOrNil(components) + parts := tspath.GetNormalizedPathComponents(spec, basePath) - // If the last component is ** and this is not an exclude pattern, return nil - // (such patterns match nothing) - if usage != UsageExclude && lastComponent == "**" { + // "src/**" without a filename matches nothing (for include patterns) + if usage != UsageExclude && core.LastOrNil(parts) == "**" { return nil } - // Remove trailing separator from root component - components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) + // Normalize root: "/home/" -> "/home" + parts[0] = tspath.RemoveTrailingDirectorySeparator(parts[0]) - // Handle implicit glob (directories become dir/**/*) - if IsImplicitGlob(lastComponent) { - components = append(components, "**", "*") + // Directories implicitly match all files: "src" -> "src/**/*" + if IsImplicitGlob(core.LastOrNil(parts)) { + parts = append(parts, "**", "*") } - pattern := &globPattern{ - spec: spec, - basePath: basePath, + p := &globPattern{ isExclude: usage == UsageExclude, caseSensitive: caseSensitive, excludeMinJs: usage == UsageFiles, } - for _, comp := range components { - pc := patternComponent{} - - if comp == "**" { - pc.isDoubleAsterisk = true - } else { - pc.hasWildcards = strings.ContainsAny(comp, "*?") - - if pc.hasWildcards { - pc.segments = parsePatternSegments(comp) - // For non-exclude patterns with wildcards, implicitly exclude common package folders - if usage != UsageExclude { - pc.implicitlyExcludePackages = true - } - } else { - pc.literal = comp - } - } - - pattern.components = append(pattern.components, pc) + for _, part := range parts { + p.components = append(p.components, parseComponent(part, usage != UsageExclude)) } + return p +} - return pattern +// parseComponent converts a path segment string into a component. +func parseComponent(s string, isInclude bool) component { + if s == "**" { + return component{kind: kindDoubleAsterisk} + } + if !strings.ContainsAny(s, "*?") { + return component{kind: kindLiteral, literal: s} + } + return component{ + kind: kindWildcard, + segments: parseSegments(s), + skipPackageFolders: isInclude, + } } -// parsePatternSegments breaks a component with wildcards into segments -func parsePatternSegments(comp string) []patternSegment { - var segments []patternSegment - var current strings.Builder +// parseSegments breaks "*.ts" into [segStar, segLiteral(".ts")] +func parseSegments(s string) []segment { + var result []segment + var buf strings.Builder - for i := range len(comp) { - switch comp[i] { + flushLiteral := func() { + if buf.Len() > 0 { + result = append(result, segment{kind: segLiteral, literal: buf.String()}) + buf.Reset() + } + } + + for i := range len(s) { + switch s[i] { case '*': - if current.Len() > 0 { - segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) - current.Reset() - } - segments = append(segments, patternSegment{kind: segmentStar}) + flushLiteral() + result = append(result, segment{kind: segStar}) case '?': - if current.Len() > 0 { - segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) - current.Reset() - } - segments = append(segments, patternSegment{kind: segmentQuestion}) + flushLiteral() + result = append(result, segment{kind: segQuestion}) default: - current.WriteByte(comp[i]) + buf.WriteByte(s[i]) } } - - if current.Len() > 0 { - segments = append(segments, patternSegment{kind: segmentLiteral, literal: current.String()}) - } - - return segments + flushLiteral() + return result } -// matches checks if the given path matches this glob pattern. +// matches returns true if path matches this pattern. func (p *globPattern) matches(path string) bool { if p == nil { return false } - return p.matchPathWorker(path, 0, 0, false, false) + return p.matchPath(path, 0, 0, false) } -// matchesPrefix checks if the given directory path could potentially match files under it. -// This is used for directory filtering during traversal. +// matchesPrefix returns true if files under this directory path could match. +// Used to skip directories during traversal. func (p *globPattern) matchesPrefix(path string) bool { if p == nil { return false } - return p.matchPathWorker(path, 0, 0, false, true) + return p.matchPathPrefix(path, 0, 0) } -// nextPathComponent extracts the next path component from path starting at offset. -// Returns the component, the offset after this component (pointing to char after '/' or len(path)), and whether a component was found. -func nextPathComponent(path string, offset int) (component string, nextOffset int, found bool) { - if offset >= len(path) { - return "", offset, false - } - - // Handle leading slash for absolute paths - return empty string for root - if offset == 0 && path[0] == '/' { - return "", 1, true - } - - // Skip any leading slashes (for cases like after root) - for offset < len(path) && path[offset] == '/' { - offset++ - } - - if offset >= len(path) { - return "", offset, false - } +// matchPath checks if path matches the pattern starting from the given offsets. +// afterRecursive is true if we just matched a ** (affects dot-file handling). +func (p *globPattern) matchPath(path string, pathOffset, compIdx int, afterRecursive bool) bool { + for { + pathPart, nextOffset, ok := nextPathPart(path, pathOffset) + if !ok { + // No more path parts - check if pattern is satisfied + return p.patternSatisfied(compIdx) + } - // Find the end of this component using optimized byte search - remaining := path[offset:] - idx := strings.IndexByte(remaining, '/') - if idx < 0 { - // No more slashes, rest of path is the component - return remaining, len(path), true - } - return remaining[:idx], offset + idx, true -} + if compIdx >= len(p.components) { + // Path has more parts but pattern is done + // Exclude patterns match prefixes (e.g., "node_modules" excludes "node_modules/foo") + return p.isExclude + } -// matchPathWorker is the unified path matching function. -// When prefixMatch is true, it checks if the path could be a prefix of a matching path. -// When prefixMatch is false, it checks if the path fully matches the pattern. -func (p *globPattern) matchPathWorker(path string, pathOffset int, patternIdx int, inDoubleAsterisk bool, prefixMatch bool) bool { - for { - // Get the next path component - pathComp, nextPathOffset, hasMore := nextPathComponent(path, pathOffset) + comp := p.components[compIdx] - // If we've consumed all path components - if !hasMore { - if prefixMatch { - // For prefix matching, any prefix could match + switch comp.kind { + case kindDoubleAsterisk: + // ** can match zero directories: try skipping it + if p.matchPath(path, pathOffset, compIdx+1, true) { return true } - // For full matching, check remaining pattern components - if p.isExclude { - return p.isImplicitGlobSuffix(patternIdx) - } - // Check if remaining pattern components are all optional (** only) - for i := patternIdx; i < len(p.components); i++ { - if !p.components[i].isDoubleAsterisk { + // ** should not match hidden dirs or package folders (for includes) + if !p.isExclude { + if len(pathPart) > 0 && pathPart[0] == '.' { + return false + } + if isPackageFolder(pathPart) { return false } } - return true - } + // ** matches this directory, try next path part with same ** + pathOffset = nextOffset + afterRecursive = true + continue - // If we've consumed all pattern components - if patternIdx >= len(p.components) { - if prefixMatch { - // For prefix matching, no more matches possible + case kindLiteral: + if comp.skipPackageFolders && isPackageFolder(pathPart) { return false } - // For full matching with exclude patterns, we can match a prefix - return p.isExclude + if !p.stringsEqual(comp.literal, pathPart) { + return false + } + + case kindWildcard: + if comp.skipPackageFolders && isPackageFolder(pathPart) { + return false + } + if !p.matchWildcard(comp.segments, pathPart) { + return false + } + } + + pathOffset = nextOffset + compIdx++ + afterRecursive = false + } +} + +// matchPathPrefix checks if path could be a prefix of a matching path. +// Similar to matchPath but returns true when path is exhausted. +func (p *globPattern) matchPathPrefix(path string, pathOffset, compIdx int) bool { + for { + pathPart, nextOffset, ok := nextPathPart(path, pathOffset) + if !ok { + // Path exhausted - any prefix could potentially match + return true } - pc := p.components[patternIdx] + if compIdx >= len(p.components) { + return false + } - if pc.isDoubleAsterisk { - // ** can match zero or more directory levels - // First, try matching zero directories (skip the **) - this requires recursion - if p.matchPathWorker(path, pathOffset, patternIdx+1, true, prefixMatch) { + comp := p.components[compIdx] + + switch comp.kind { + case kindDoubleAsterisk: + if p.matchPathPrefix(path, pathOffset, compIdx+1) { return true } - - // For include patterns, ** should not match directories starting with . or common package folders if !p.isExclude { - if len(pathComp) > 0 && pathComp[0] == '.' { + if len(pathPart) > 0 && pathPart[0] == '.' { return false } - if isCommonPackageFolder(pathComp) { + if isPackageFolder(pathPart) { return false } } - - // Match current component with ** and continue (iterate instead of recurse) - pathOffset = nextPathOffset - inDoubleAsterisk = true + pathOffset = nextOffset continue - } - // Check implicit package folder exclusion - if pc.implicitlyExcludePackages && !p.isExclude && isCommonPackageFolder(pathComp) { - return false + case kindLiteral: + if comp.skipPackageFolders && isPackageFolder(pathPart) { + return false + } + if !p.stringsEqual(comp.literal, pathPart) { + return false + } + + case kindWildcard: + if comp.skipPackageFolders && isPackageFolder(pathPart) { + return false + } + if !p.matchWildcard(comp.segments, pathPart) { + return false + } } - // Match current component - if !p.matchComponent(pc, pathComp, inDoubleAsterisk) { + pathOffset = nextOffset + compIdx++ + } +} + +// patternSatisfied checks if remaining pattern components can match empty input. +func (p *globPattern) patternSatisfied(compIdx int) bool { + if p.isExclude { + // Exclude patterns: check for implicit glob suffix (added for directories) + return p.isImplicitGlobSuffix(compIdx) + } + // Include patterns: all remaining components must be ** (matches zero dirs) + for i := compIdx; i < len(p.components); i++ { + if p.components[i].kind != kindDoubleAsterisk { return false } - - // Continue to next components (iterate instead of recurse) - pathOffset = nextPathOffset - patternIdx++ - inDoubleAsterisk = false } + return true } -// matchComponent matches a single path component against a pattern component -func (p *globPattern) matchComponent(pc patternComponent, pathComp string, afterDoubleAsterisk bool) bool { - if pc.isDoubleAsterisk { - // Should not happen here, handled separately - return true +// nextPathPart extracts the next path component from path starting at offset. +func nextPathPart(path string, offset int) (part string, nextOffset int, ok bool) { + if offset >= len(path) { + return "", offset, false } - // If the pattern component has no wildcards, do literal comparison - if !pc.hasWildcards { - return p.stringsEqual(pc.literal, pathComp) + // Handle leading slash (root of absolute path) + if offset == 0 && path[0] == '/' { + return "", 1, true } - // Match with wildcards - // Note: The check for dotted names after ** is handled in matchWildcardComponent - // where we only reject if the pattern itself starts with a wildcard - return p.matchWildcardComponent(pc.segments, pathComp) + // Skip consecutive slashes + for offset < len(path) && path[offset] == '/' { + offset++ + } + if offset >= len(path) { + return "", offset, false + } + + // Find end of this component + rest := path[offset:] + if idx := strings.IndexByte(rest, '/'); idx >= 0 { + return rest[:idx], offset + idx, true + } + return rest, len(path), true } -// matchWildcardComponent matches a path component against wildcard segments -func (p *globPattern) matchWildcardComponent(segments []patternSegment, s string) bool { - // For non-exclude patterns, if the segments start with * or ?, - // the matched string cannot start with '.' - if !p.isExclude && len(segments) > 0 && len(s) > 0 && s[0] == '.' { - firstSeg := segments[0] - if firstSeg.kind == segmentStar || firstSeg.kind == segmentQuestion { - // Pattern starts with wildcard, so it cannot match a string starting with '.' +// matchWildcard matches a path component against wildcard segments. +func (p *globPattern) matchWildcard(segs []segment, s string) bool { + // Include patterns: wildcards at start cannot match hidden files + if !p.isExclude && len(segs) > 0 && len(s) > 0 && s[0] == '.' { + if segs[0].kind == segStar || segs[0].kind == segQuestion { return false } } - // Fast path for common pattern: * followed by literal suffix (e.g., "*.ts") - if len(segments) == 2 && segments[0].kind == segmentStar && segments[1].kind == segmentLiteral { - suffix := segments[1].literal + // Fast path: single * followed by literal suffix (e.g., "*.ts") + if len(segs) == 2 && segs[0].kind == segStar && segs[1].kind == segLiteral { + suffix := segs[1].literal if len(s) < len(suffix) { return false } - // Check that there are no slashes in what * would match - prefixLen := len(s) - len(suffix) - for i := range prefixLen { - if s[i] == '/' { - return false - } - } - // Check suffix match - sSuffix := s[prefixLen:] - if !p.stringsEqual(suffix, sSuffix) { + matched := s[len(s)-len(suffix):] + if !p.stringsEqual(suffix, matched) { return false } - return p.checkMinJsExclusion(s, segments) + return p.checkMinJsExclusion(s, segs) } - if !p.matchSegments(segments, 0, s, 0) { + if !p.matchSegments(segs, 0, s, 0) { return false } - return p.checkMinJsExclusion(s, segments) + return p.checkMinJsExclusion(s, segs) } -// checkMinJsExclusion returns true if the match should be allowed (not excluded). -// Returns false if this is a .min.js file that should be excluded. -func (p *globPattern) checkMinJsExclusion(filename string, segments []patternSegment) bool { - if !p.excludeMinJs { - return true - } - if !p.wouldMatchMinJs(filename) { - return true - } - // Exclude .min.js unless pattern explicitly includes it - return p.patternExplicitlyIncludesMinJs(segments) -} - -func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s string, sIdx int) bool { - // If we've processed all segments - if segIdx >= len(segments) { +// matchSegments recursively matches segments against string s. +func (p *globPattern) matchSegments(segs []segment, segIdx int, s string, sIdx int) bool { + if segIdx >= len(segs) { return sIdx >= len(s) } - seg := segments[segIdx] + seg := segs[segIdx] switch seg.kind { - case segmentLiteral: - // Must match the literal exactly - if sIdx+len(seg.literal) > len(s) { + case segLiteral: + end := sIdx + len(seg.literal) + if end > len(s) { return false } - substr := s[sIdx : sIdx+len(seg.literal)] - if !p.stringsEqual(seg.literal, substr) { + if !p.stringsEqual(seg.literal, s[sIdx:end]) { return false } - return p.matchSegments(segments, segIdx+1, s, sIdx+len(seg.literal)) + return p.matchSegments(segs, segIdx+1, s, end) - case segmentQuestion: - // Must match exactly one character (not /) - if sIdx >= len(s) { - return false - } - if s[sIdx] == '/' { + case segQuestion: + if sIdx >= len(s) || s[sIdx] == '/' { return false } - return p.matchSegments(segments, segIdx+1, s, sIdx+1) + return p.matchSegments(segs, segIdx+1, s, sIdx+1) - case segmentStar: - // Match zero or more characters (not /) - // Try matching zero characters first - if p.matchSegments(segments, segIdx+1, s, sIdx) { + case segStar: + // Try matching 0, 1, 2, ... characters (but not /) + if p.matchSegments(segs, segIdx+1, s, sIdx) { return true } - // Try matching more characters - for i := sIdx; i < len(s); i++ { - if s[i] == '/' { - break - } - if p.matchSegments(segments, segIdx+1, s, i+1) { + for i := sIdx; i < len(s) && s[i] != '/'; i++ { + if p.matchSegments(segs, segIdx+1, s, i+1) { return true } } @@ -385,54 +366,44 @@ func (p *globPattern) matchSegments(segments []patternSegment, segIdx int, s str return false } -// wouldMatchMinJs checks if the filename ends with .min.js (case-insensitive) -func (p *globPattern) wouldMatchMinJs(filename string) bool { - // Check length first to avoid string operations - const suffix = ".min.js" - if len(filename) < len(suffix) { - return false +// checkMinJsExclusion returns false if this is a .min.js file that should be excluded. +func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool { + if !p.excludeMinJs { + return true } - // Get the last 7 characters and compare case-insensitively - end := filename[len(filename)-len(suffix):] - return strings.EqualFold(end, suffix) -} - -// patternExplicitlyIncludesMinJs checks if the pattern explicitly includes .min.js -func (p *globPattern) patternExplicitlyIncludesMinJs(segments []patternSegment) bool { - // Look for .min.js in the literal segments - for _, seg := range segments { - if seg.kind == segmentLiteral && strings.Contains(strings.ToLower(seg.literal), ".min.js") { + if !strings.HasSuffix(strings.ToLower(filename), ".min.js") { + return true + } + // Allow if pattern explicitly includes .min.js + for _, seg := range segs { + if seg.kind == segLiteral && strings.Contains(strings.ToLower(seg.literal), ".min.js") { return true } } return false } -// isImplicitGlobSuffix checks if the remaining pattern components from patternIdx -// are the implicit glob suffix (** followed by *) or all ** -func (p *globPattern) isImplicitGlobSuffix(patternIdx int) bool { - remaining := len(p.components) - patternIdx - if remaining == 0 { - return true - } - // All remaining must be ** (can match zero components) - // OR it's exactly **/* (the implicit glob pattern added for directories) - allDoubleAsterisk := true - for i := patternIdx; i < len(p.components); i++ { - if !p.components[i].isDoubleAsterisk { - allDoubleAsterisk = false +// isImplicitGlobSuffix checks if remaining components are the implicit "**/*" suffix. +func (p *globPattern) isImplicitGlobSuffix(compIdx int) bool { + remaining := p.components[compIdx:] + + // All ** is fine (matches zero) + allRecursive := true + for _, c := range remaining { + if c.kind != kindDoubleAsterisk { + allRecursive = false break } } - if allDoubleAsterisk { + if allRecursive { return true } - // Check for exactly **/* pattern (implicit glob suffix) - if remaining == 2 { - if p.components[patternIdx].isDoubleAsterisk { - last := p.components[patternIdx+1] - // The last component must be a pure * wildcard (matching any filename) - if last.hasWildcards && len(last.segments) == 1 && last.segments[0].kind == segmentStar { + + // Check for exactly **/* (the implicit glob added for directories) + if len(remaining) == 2 { + if remaining[0].kind == kindDoubleAsterisk && remaining[1].kind == kindWildcard { + segs := remaining[1].segments + if len(segs) == 1 && segs[0].kind == segStar { return true } } @@ -440,7 +411,7 @@ func (p *globPattern) isImplicitGlobSuffix(patternIdx int) bool { return false } -// stringsEqual compares two strings with case sensitivity based on pattern settings +// stringsEqual compares strings with appropriate case sensitivity. func (p *globPattern) stringsEqual(a, b string) bool { if p.caseSensitive { return a == b @@ -448,125 +419,101 @@ func (p *globPattern) stringsEqual(a, b string) bool { return strings.EqualFold(a, b) } -// isCommonPackageFolder checks if a directory name is a common package folder -func isCommonPackageFolder(name string) bool { - // Quick length check to avoid EqualFold for most cases +// isPackageFolder checks if name is a common package folder (node_modules, etc.) +func isPackageFolder(name string) bool { switch len(name) { - case len("node_modules"): + case 12: // node_modules return strings.EqualFold(name, "node_modules") - case len("bower_components"): + case 16: // bower_components return strings.EqualFold(name, "bower_components") - case len("jspm_packages"): + case 13: // jspm_packages return strings.EqualFold(name, "jspm_packages") - default: - return false } + return false } -// globMatcher holds compiled glob patterns for matching files. +// globMatcher combines include and exclude patterns for file matching. type globMatcher struct { - includePatterns []*globPattern - excludePatterns []*globPattern - caseSensitive bool - // hadIncludes tracks whether any include specs were provided (even if they compiled to nothing) - hadIncludes bool + includes []*globPattern + excludes []*globPattern + hadIncludes bool // true if include specs were provided (even if none compiled) } -// newGlobMatcher creates a new globMatcher from include and exclude specs. -func newGlobMatcher(includes []string, excludes []string, basePath string, caseSensitive bool, usage Usage) *globMatcher { - m := &globMatcher{ - caseSensitive: caseSensitive, - hadIncludes: len(includes) > 0, - } +func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSensitive bool, usage Usage) *globMatcher { + m := &globMatcher{hadIncludes: len(includeSpecs) > 0} - for _, spec := range includes { - if pattern := compileGlobPattern(spec, basePath, usage, caseSensitive); pattern != nil { - m.includePatterns = append(m.includePatterns, pattern) + for _, spec := range includeSpecs { + if p := compileGlobPattern(spec, basePath, usage, caseSensitive); p != nil { + m.includes = append(m.includes, p) } } - - for _, spec := range excludes { - if pattern := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); pattern != nil { - m.excludePatterns = append(m.excludePatterns, pattern) + for _, spec := range excludeSpecs { + if p := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); p != nil { + m.excludes = append(m.excludes, p) } } - return m } -// MatchesFile checks if a file path matches the include patterns and doesn't match exclude patterns. -// Returns the index of the matching include pattern, or -1 if no match. +// MatchesFile returns the index of the matching include pattern, or -1 if excluded/no match. func (m *globMatcher) MatchesFile(path string) int { - // First check excludes - for _, exc := range m.excludePatterns { + // Check excludes first + for _, exc := range m.excludes { if exc.matches(path) { return -1 } } - // If no valid include patterns but includes were specified, nothing matches - if len(m.includePatterns) == 0 { + // No includes compiled but specs were provided -> nothing matches + if len(m.includes) == 0 { if m.hadIncludes { return -1 } return 0 } - // Check includes - for i, inc := range m.includePatterns { + for i, inc := range m.includes { if inc.matches(path) { return i } } - return -1 } -// MatchesDirectory checks if a directory could contain matching files. +// MatchesDirectory returns true if this directory could contain matching files. func (m *globMatcher) MatchesDirectory(path string) bool { - // First check if excluded - for _, exc := range m.excludePatterns { + for _, exc := range m.excludes { if exc.matches(path) { return false } } - // If no valid include patterns but includes were specified, nothing matches - if len(m.includePatterns) == 0 { - if m.hadIncludes { - return false - } - return true + if len(m.includes) == 0 { + return !m.hadIncludes } - // Check if any include pattern could match files in this directory - for _, inc := range m.includePatterns { + for _, inc := range m.includes { if inc.matchesPrefix(path) { return true } } - return false } -// visitorNoRegex is similar to visitor but uses globMatcher instead of regex -type visitorNoRegex struct { +// globVisitor traverses directories matching files against glob patterns. +type globVisitor struct { + host vfs.FS fileMatcher *globMatcher directoryMatcher *globMatcher extensions []string useCaseSensitiveFileNames bool - host vfs.FS visited collections.Set[string] results [][]string - numIncludePatterns int + numIncludes int } -func (v *visitorNoRegex) visitDirectory( - path string, - absolutePath string, - depth *int, -) { - // Use the real path for cycle detection +func (v *globVisitor) visit(path, absolutePath string, depth *int) { + // Detect symlink cycles realPath := v.host.Realpath(absolutePath) canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) if v.visited.Has(canonicalPath) { @@ -574,40 +521,35 @@ func (v *visitorNoRegex) visitDirectory( } v.visited.Add(canonicalPath) - systemEntries := v.host.GetAccessibleEntries(absolutePath) + entries := v.host.GetAccessibleEntries(absolutePath) - // Pre-compute path suffixes to reduce allocations - // We'll build paths by appending "/" + entry name + // Prepare path prefixes for building child paths pathPrefix := path - absPathPrefix := absolutePath + absPrefix := absolutePath if len(path) > 0 && path[len(path)-1] != '/' { pathPrefix = path + "/" } if len(absolutePath) > 0 && absolutePath[len(absolutePath)-1] != '/' { - absPathPrefix = absolutePath + "/" + absPrefix = absolutePath + "/" } - for _, current := range systemEntries.Files { - // Check extension first using just the filename (avoids path concatenation) - if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(current, v.extensions) { + // Match files + for _, file := range entries.Files { + if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { continue } - - // Build absolute name for pattern matching - absoluteName := absPathPrefix + current - - matchIdx := v.fileMatcher.MatchesFile(absoluteName) - if matchIdx >= 0 { - // Only build the relative name if we have a match - name := pathPrefix + current - if v.numIncludePatterns == 0 { - v.results[0] = append(v.results[0], name) + absFile := absPrefix + file + if idx := v.fileMatcher.MatchesFile(absFile); idx >= 0 { + relFile := pathPrefix + file + if v.numIncludes == 0 { + v.results[0] = append(v.results[0], relFile) } else { - v.results[matchIdx] = append(v.results[matchIdx], name) + v.results[idx] = append(v.results[idx], relFile) } } } + // Recurse into directories if depth != nil { newDepth := *depth - 1 if newDepth == 0 { @@ -616,64 +558,54 @@ func (v *visitorNoRegex) visitDirectory( depth = &newDepth } - for _, current := range systemEntries.Directories { - name := pathPrefix + current - absoluteName := absPathPrefix + current - - if v.directoryMatcher.MatchesDirectory(absoluteName) { - v.visitDirectory(name, absoluteName, depth) + for _, dir := range entries.Directories { + absDir := absPrefix + dir + if v.directoryMatcher.MatchesDirectory(absDir) { + v.visit(pathPrefix+dir, absDir, depth) } } } -// matchFilesNoRegex is the regex-free version of matchFiles -func matchFilesNoRegex(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { +// matchFilesNoRegex matches files using compiled glob patterns (no regex). +func matchFilesNoRegex(path string, extensions, excludes, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) absolutePath := tspath.CombinePaths(currentDirectory, path) - // Build file matcher fileMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) - - // Build directory matcher directoryMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) basePaths := getBasePaths(path, includes, useCaseSensitiveFileNames) + numIncludes := len(fileMatcher.includes) - numIncludePatterns := len(fileMatcher.includePatterns) - - var results [][]string - if numIncludePatterns > 0 { - results = make([][]string, numIncludePatterns) - for i := range results { - results[i] = []string{} - } - } else { - results = [][]string{{}} + results := make([][]string, max(numIncludes, 1)) + for i := range results { + results[i] = []string{} } - v := visitorNoRegex{ - useCaseSensitiveFileNames: useCaseSensitiveFileNames, + v := globVisitor{ host: host, fileMatcher: fileMatcher, directoryMatcher: directoryMatcher, extensions: extensions, + useCaseSensitiveFileNames: useCaseSensitiveFileNames, results: results, - numIncludePatterns: numIncludePatterns, + numIncludes: numIncludes, } for _, basePath := range basePaths { - v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) + v.visit(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) } return core.Flatten(results) } -// globSpecMatcher wraps glob patterns for matching paths. +// globSpecMatcher wraps multiple glob patterns for matching paths. type globSpecMatcher struct { patterns []*globPattern } +// MatchString returns true if any pattern matches the path. func (m *globSpecMatcher) MatchString(path string) bool { if m == nil { return false @@ -686,6 +618,7 @@ func (m *globSpecMatcher) MatchString(path string) bool { return false } +// MatchIndex returns the index of the first matching pattern, or -1. func (m *globSpecMatcher) MatchIndex(path string) int { if m == nil { return -1 @@ -698,6 +631,7 @@ func (m *globSpecMatcher) MatchIndex(path string) int { return -1 } +// Len returns the number of patterns. func (m *globSpecMatcher) Len() int { if m == nil { return 0 @@ -705,28 +639,28 @@ func (m *globSpecMatcher) Len() int { return len(m.patterns) } -// newGlobSpecMatcher creates a glob-based matcher for multiple specs. +// newGlobSpecMatcher creates a matcher for multiple glob specs. func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { if len(specs) == 0 { return nil } - m := &globSpecMatcher{} + var patterns []*globPattern for _, spec := range specs { - if pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); pattern != nil { - m.patterns = append(m.patterns, pattern) + if p := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); p != nil { + patterns = append(patterns, p) } } - if len(m.patterns) == 0 { + if len(patterns) == 0 { return nil } - return m + return &globSpecMatcher{patterns: patterns} } -// newGlobSingleSpecMatcher creates a glob-based matcher for a single spec. -func newGlobSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { - pattern := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames) - if pattern == nil { +// newGlobSingleSpecMatcher creates a matcher for a single glob spec. +func newGlobSingleSpecMatcher(spec, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { + p := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames) + if p == nil { return nil } - return &globSpecMatcher{patterns: []*globPattern{pattern}} + return &globSpecMatcher{patterns: []*globPattern{p}} } From 604196e02e5bffabe6be96703e62542fdd9376ab Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 16:16:38 -0800 Subject: [PATCH 23/41] More simplficiations --- internal/vfs/vfsmatch/new.go | 82 +++++++++++++----------------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index fce9d6dc9f..10fe04a425 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -128,7 +128,7 @@ func (p *globPattern) matches(path string) bool { if p == nil { return false } - return p.matchPath(path, 0, 0, false) + return p.matchPath(path, 0, 0) } // matchesPrefix returns true if files under this directory path could match. @@ -141,17 +141,14 @@ func (p *globPattern) matchesPrefix(path string) bool { } // matchPath checks if path matches the pattern starting from the given offsets. -// afterRecursive is true if we just matched a ** (affects dot-file handling). -func (p *globPattern) matchPath(path string, pathOffset, compIdx int, afterRecursive bool) bool { +func (p *globPattern) matchPath(path string, pathOffset, compIdx int) bool { for { pathPart, nextOffset, ok := nextPathPart(path, pathOffset) if !ok { - // No more path parts - check if pattern is satisfied return p.patternSatisfied(compIdx) } if compIdx >= len(p.components) { - // Path has more parts but pattern is done // Exclude patterns match prefixes (e.g., "node_modules" excludes "node_modules/foo") return p.isExclude } @@ -161,21 +158,15 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, afterRecur switch comp.kind { case kindDoubleAsterisk: // ** can match zero directories: try skipping it - if p.matchPath(path, pathOffset, compIdx+1, true) { + if p.matchPath(path, pathOffset, compIdx+1) { return true } // ** should not match hidden dirs or package folders (for includes) - if !p.isExclude { - if len(pathPart) > 0 && pathPart[0] == '.' { - return false - } - if isPackageFolder(pathPart) { - return false - } + if !p.isExclude && (isHiddenPath(pathPart) || isPackageFolder(pathPart)) { + return false } // ** matches this directory, try next path part with same ** pathOffset = nextOffset - afterRecursive = true continue case kindLiteral: @@ -197,7 +188,6 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, afterRecur pathOffset = nextOffset compIdx++ - afterRecursive = false } } @@ -207,8 +197,7 @@ func (p *globPattern) matchPathPrefix(path string, pathOffset, compIdx int) bool for { pathPart, nextOffset, ok := nextPathPart(path, pathOffset) if !ok { - // Path exhausted - any prefix could potentially match - return true + return true // Path exhausted - could potentially match } if compIdx >= len(p.components) { @@ -222,13 +211,8 @@ func (p *globPattern) matchPathPrefix(path string, pathOffset, compIdx int) bool if p.matchPathPrefix(path, pathOffset, compIdx+1) { return true } - if !p.isExclude { - if len(pathPart) > 0 && pathPart[0] == '.' { - return false - } - if isPackageFolder(pathPart) { - return false - } + if !p.isExclude && (isHiddenPath(pathPart) || isPackageFolder(pathPart)) { + return false } pathOffset = nextOffset continue @@ -300,7 +284,7 @@ func nextPathPart(path string, offset int) (part string, nextOffset int, ok bool // matchWildcard matches a path component against wildcard segments. func (p *globPattern) matchWildcard(segs []segment, s string) bool { // Include patterns: wildcards at start cannot match hidden files - if !p.isExclude && len(segs) > 0 && len(s) > 0 && s[0] == '.' { + if !p.isExclude && len(segs) > 0 && isHiddenPath(s) { if segs[0].kind == segStar || segs[0].kind == segQuestion { return false } @@ -309,20 +293,13 @@ func (p *globPattern) matchWildcard(segs []segment, s string) bool { // Fast path: single * followed by literal suffix (e.g., "*.ts") if len(segs) == 2 && segs[0].kind == segStar && segs[1].kind == segLiteral { suffix := segs[1].literal - if len(s) < len(suffix) { - return false - } - matched := s[len(s)-len(suffix):] - if !p.stringsEqual(suffix, matched) { + if len(s) < len(suffix) || !p.stringsEqual(suffix, s[len(s)-len(suffix):]) { return false } return p.checkMinJsExclusion(s, segs) } - if !p.matchSegments(segs, 0, s, 0) { - return false - } - return p.checkMinJsExclusion(s, segs) + return p.matchSegments(segs, 0, s, 0) && p.checkMinJsExclusion(s, segs) } // matchSegments recursively matches segments against string s. @@ -387,28 +364,22 @@ func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool func (p *globPattern) isImplicitGlobSuffix(compIdx int) bool { remaining := p.components[compIdx:] - // All ** is fine (matches zero) - allRecursive := true - for _, c := range remaining { - if c.kind != kindDoubleAsterisk { - allRecursive = false - break - } - } - if allRecursive { + // Check for exactly **/* (the implicit glob added for directories) + if len(remaining) == 2 && + remaining[0].kind == kindDoubleAsterisk && + remaining[1].kind == kindWildcard && + len(remaining[1].segments) == 1 && + remaining[1].segments[0].kind == segStar { return true } - // Check for exactly **/* (the implicit glob added for directories) - if len(remaining) == 2 { - if remaining[0].kind == kindDoubleAsterisk && remaining[1].kind == kindWildcard { - segs := remaining[1].segments - if len(segs) == 1 && segs[0].kind == segStar { - return true - } + // All ** is fine (matches zero directories) + for _, c := range remaining { + if c.kind != kindDoubleAsterisk { + return false } } - return false + return true } // stringsEqual compares strings with appropriate case sensitivity. @@ -419,15 +390,20 @@ func (p *globPattern) stringsEqual(a, b string) bool { return strings.EqualFold(a, b) } +// isHiddenPath checks if a path component is hidden (starts with dot). +func isHiddenPath(name string) bool { + return len(name) > 0 && name[0] == '.' +} + // isPackageFolder checks if name is a common package folder (node_modules, etc.) func isPackageFolder(name string) bool { switch len(name) { case 12: // node_modules return strings.EqualFold(name, "node_modules") - case 16: // bower_components - return strings.EqualFold(name, "bower_components") case 13: // jspm_packages return strings.EqualFold(name, "jspm_packages") + case 16: // bower_components + return strings.EqualFold(name, "bower_components") } return false } From ef638bfe0bc12937ec8fb2ad25592137c552ad37 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 16:27:39 -0800 Subject: [PATCH 24/41] More simplficiations --- internal/vfs/vfsmatch/new.go | 72 ++++++++++++++---------------------- 1 file changed, 27 insertions(+), 45 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 10fe04a425..6cc431297e 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -284,10 +284,8 @@ func nextPathPart(path string, offset int) (part string, nextOffset int, ok bool // matchWildcard matches a path component against wildcard segments. func (p *globPattern) matchWildcard(segs []segment, s string) bool { // Include patterns: wildcards at start cannot match hidden files - if !p.isExclude && len(segs) > 0 && isHiddenPath(s) { - if segs[0].kind == segStar || segs[0].kind == segQuestion { - return false - } + if !p.isExclude && len(segs) > 0 && isHiddenPath(s) && (segs[0].kind == segStar || segs[0].kind == segQuestion) { + return false } // Fast path: single * followed by literal suffix (e.g., "*.ts") @@ -364,18 +362,17 @@ func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool func (p *globPattern) isImplicitGlobSuffix(compIdx int) bool { remaining := p.components[compIdx:] - // Check for exactly **/* (the implicit glob added for directories) - if len(remaining) == 2 && - remaining[0].kind == kindDoubleAsterisk && - remaining[1].kind == kindWildcard && - len(remaining[1].segments) == 1 && - remaining[1].segments[0].kind == segStar { - return true - } - - // All ** is fine (matches zero directories) - for _, c := range remaining { - if c.kind != kindDoubleAsterisk { + for i, c := range remaining { + switch c.kind { + case kindDoubleAsterisk: + continue + case kindWildcard: + // Allow single * as last component (the implicit glob suffix) + if i == len(remaining)-1 && len(c.segments) == 1 && c.segments[0].kind == segStar { + return true + } + return false + default: return false } } @@ -408,6 +405,13 @@ func isPackageFolder(name string) bool { return false } +func ensureTrailingSlash(s string) string { + if len(s) > 0 && s[len(s)-1] != '/' { + return s + "/" + } + return s +} + // globMatcher combines include and exclude patterns for file matching. type globMatcher struct { includes []*globPattern @@ -485,7 +489,6 @@ type globVisitor struct { useCaseSensitiveFileNames bool visited collections.Set[string] results [][]string - numIncludes int } func (v *globVisitor) visit(path, absolutePath string, depth *int) { @@ -500,28 +503,16 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { entries := v.host.GetAccessibleEntries(absolutePath) // Prepare path prefixes for building child paths - pathPrefix := path - absPrefix := absolutePath - if len(path) > 0 && path[len(path)-1] != '/' { - pathPrefix = path + "/" - } - if len(absolutePath) > 0 && absolutePath[len(absolutePath)-1] != '/' { - absPrefix = absolutePath + "/" - } + pathPrefix := ensureTrailingSlash(path) + absPrefix := ensureTrailingSlash(absolutePath) // Match files for _, file := range entries.Files { if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { continue } - absFile := absPrefix + file - if idx := v.fileMatcher.MatchesFile(absFile); idx >= 0 { - relFile := pathPrefix + file - if v.numIncludes == 0 { - v.results[0] = append(v.results[0], relFile) - } else { - v.results[idx] = append(v.results[idx], relFile) - } + if idx := v.fileMatcher.MatchesFile(absPrefix + file); idx >= 0 { + v.results[idx] = append(v.results[idx], pathPrefix+file) } } @@ -551,29 +542,20 @@ func matchFilesNoRegex(path string, extensions, excludes, includes []string, use fileMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) directoryMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) - basePaths := getBasePaths(path, includes, useCaseSensitiveFileNames) - numIncludes := len(fileMatcher.includes) - - results := make([][]string, max(numIncludes, 1)) - for i := range results { - results[i] = []string{} - } - v := globVisitor{ host: host, fileMatcher: fileMatcher, directoryMatcher: directoryMatcher, extensions: extensions, useCaseSensitiveFileNames: useCaseSensitiveFileNames, - results: results, - numIncludes: numIncludes, + results: make([][]string, max(len(fileMatcher.includes), 1)), } - for _, basePath := range basePaths { + for _, basePath := range getBasePaths(path, includes, useCaseSensitiveFileNames) { v.visit(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) } - return core.Flatten(results) + return core.Flatten(v.results) } // globSpecMatcher wraps multiple glob patterns for matching paths. From c06760b1028ca9ef0d47683abf4bb2ec8d1f294c Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 16:37:23 -0800 Subject: [PATCH 25/41] More simplficiations --- internal/vfs/vfsmatch/new.go | 102 +++++++++-------------------------- 1 file changed, 25 insertions(+), 77 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 6cc431297e..927aad4391 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -98,28 +98,24 @@ func parseComponent(s string, isInclude bool) component { // parseSegments breaks "*.ts" into [segStar, segLiteral(".ts")] func parseSegments(s string) []segment { var result []segment - var buf strings.Builder - - flushLiteral := func() { - if buf.Len() > 0 { - result = append(result, segment{kind: segLiteral, literal: buf.String()}) - buf.Reset() - } - } - - for i := range len(s) { + start := 0 + for i := 0; i < len(s); i++ { switch s[i] { - case '*': - flushLiteral() - result = append(result, segment{kind: segStar}) - case '?': - flushLiteral() - result = append(result, segment{kind: segQuestion}) - default: - buf.WriteByte(s[i]) + case '*', '?': + if i > start { + result = append(result, segment{kind: segLiteral, literal: s[start:i]}) + } + if s[i] == '*' { + result = append(result, segment{kind: segStar}) + } else { + result = append(result, segment{kind: segQuestion}) + } + start = i + 1 } } - flushLiteral() + if start < len(s) { + result = append(result, segment{kind: segLiteral, literal: s[start:]}) + } return result } @@ -128,7 +124,7 @@ func (p *globPattern) matches(path string) bool { if p == nil { return false } - return p.matchPath(path, 0, 0) + return p.matchPath(path, 0, 0, false) } // matchesPrefix returns true if files under this directory path could match. @@ -137,20 +133,24 @@ func (p *globPattern) matchesPrefix(path string) bool { if p == nil { return false } - return p.matchPathPrefix(path, 0, 0) + return p.matchPath(path, 0, 0, true) } // matchPath checks if path matches the pattern starting from the given offsets. -func (p *globPattern) matchPath(path string, pathOffset, compIdx int) bool { +// If prefixOnly is true, returns true when path is exhausted (prefix matching for directories). +func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly bool) bool { for { pathPart, nextOffset, ok := nextPathPart(path, pathOffset) if !ok { + if prefixOnly { + return true // Path exhausted - could potentially match + } return p.patternSatisfied(compIdx) } if compIdx >= len(p.components) { // Exclude patterns match prefixes (e.g., "node_modules" excludes "node_modules/foo") - return p.isExclude + return p.isExclude && !prefixOnly } comp := p.components[compIdx] @@ -158,7 +158,7 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int) bool { switch comp.kind { case kindDoubleAsterisk: // ** can match zero directories: try skipping it - if p.matchPath(path, pathOffset, compIdx+1) { + if p.matchPath(path, pathOffset, compIdx+1, prefixOnly) { return true } // ** should not match hidden dirs or package folders (for includes) @@ -191,54 +191,6 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int) bool { } } -// matchPathPrefix checks if path could be a prefix of a matching path. -// Similar to matchPath but returns true when path is exhausted. -func (p *globPattern) matchPathPrefix(path string, pathOffset, compIdx int) bool { - for { - pathPart, nextOffset, ok := nextPathPart(path, pathOffset) - if !ok { - return true // Path exhausted - could potentially match - } - - if compIdx >= len(p.components) { - return false - } - - comp := p.components[compIdx] - - switch comp.kind { - case kindDoubleAsterisk: - if p.matchPathPrefix(path, pathOffset, compIdx+1) { - return true - } - if !p.isExclude && (isHiddenPath(pathPart) || isPackageFolder(pathPart)) { - return false - } - pathOffset = nextOffset - continue - - case kindLiteral: - if comp.skipPackageFolders && isPackageFolder(pathPart) { - return false - } - if !p.stringsEqual(comp.literal, pathPart) { - return false - } - - case kindWildcard: - if comp.skipPackageFolders && isPackageFolder(pathPart) { - return false - } - if !p.matchWildcard(comp.segments, pathPart) { - return false - } - } - - pathOffset = nextOffset - compIdx++ - } -} - // patternSatisfied checks if remaining pattern components can match empty input. func (p *globPattern) patternSatisfied(compIdx int) bool { if p.isExclude { @@ -616,9 +568,5 @@ func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSen // newGlobSingleSpecMatcher creates a matcher for a single glob spec. func newGlobSingleSpecMatcher(spec, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { - p := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames) - if p == nil { - return nil - } - return &globSpecMatcher{patterns: []*globPattern{p}} + return newGlobSpecMatcher([]string{spec}, basePath, usage, useCaseSensitiveFileNames) } From 0801191e76c7987f4e9441efaf9fd758e66a5686 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 16:39:52 -0800 Subject: [PATCH 26/41] Fix range --- internal/vfs/vfsmatch/new.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 927aad4391..fb83df1a26 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -99,7 +99,7 @@ func parseComponent(s string, isInclude bool) component { func parseSegments(s string) []segment { var result []segment start := 0 - for i := 0; i < len(s); i++ { + for i := range len(s) { switch s[i] { case '*', '?': if i > start { From e511530a8dccb03995e30380874ce497d8b6aaa2 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:55:39 -0800 Subject: [PATCH 27/41] rando fixups --- internal/vfs/vfsmatch/new.go | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index fb83df1a26..9d130dcd21 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -194,12 +194,11 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly // patternSatisfied checks if remaining pattern components can match empty input. func (p *globPattern) patternSatisfied(compIdx int) bool { if p.isExclude { - // Exclude patterns: check for implicit glob suffix (added for directories) return p.isImplicitGlobSuffix(compIdx) } // Include patterns: all remaining components must be ** (matches zero dirs) - for i := compIdx; i < len(p.components); i++ { - if p.components[i].kind != kindDoubleAsterisk { + for _, c := range p.components[compIdx:] { + if c.kind != kindDoubleAsterisk { return false } } @@ -295,10 +294,7 @@ func (p *globPattern) matchSegments(segs []segment, segIdx int, s string, sIdx i // checkMinJsExclusion returns false if this is a .min.js file that should be excluded. func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool { - if !p.excludeMinJs { - return true - } - if !strings.HasSuffix(strings.ToLower(filename), ".min.js") { + if !p.excludeMinJs || !strings.HasSuffix(strings.ToLower(filename), ".min.js") { return true } // Allow if pattern explicitly includes .min.js @@ -313,7 +309,6 @@ func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool // isImplicitGlobSuffix checks if remaining components are the implicit "**/*" suffix. func (p *globPattern) isImplicitGlobSuffix(compIdx int) bool { remaining := p.components[compIdx:] - for i, c := range remaining { switch c.kind { case kindDoubleAsterisk: @@ -347,11 +342,11 @@ func isHiddenPath(name string) bool { // isPackageFolder checks if name is a common package folder (node_modules, etc.) func isPackageFolder(name string) bool { switch len(name) { - case 12: // node_modules + case len("node_modules"): return strings.EqualFold(name, "node_modules") - case 13: // jspm_packages + case len("jspm_packages"): return strings.EqualFold(name, "jspm_packages") - case 16: // bower_components + case len("bower_components"): return strings.EqualFold(name, "bower_components") } return false @@ -389,21 +384,17 @@ func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSe // MatchesFile returns the index of the matching include pattern, or -1 if excluded/no match. func (m *globMatcher) MatchesFile(path string) int { - // Check excludes first for _, exc := range m.excludes { if exc.matches(path) { return -1 } } - - // No includes compiled but specs were provided -> nothing matches if len(m.includes) == 0 { if m.hadIncludes { return -1 } return 0 } - for i, inc := range m.includes { if inc.matches(path) { return i @@ -454,11 +445,9 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { entries := v.host.GetAccessibleEntries(absolutePath) - // Prepare path prefixes for building child paths pathPrefix := ensureTrailingSlash(path) absPrefix := ensureTrailingSlash(absolutePath) - // Match files for _, file := range entries.Files { if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { continue @@ -468,7 +457,6 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { } } - // Recurse into directories if depth != nil { newDepth := *depth - 1 if newDepth == 0 { From 8d06bcdb3596751b942246c2ffc1fdfe79d82112 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 18:18:31 -0800 Subject: [PATCH 28/41] Unused method --- internal/vfs/vfsmatch/new.go | 8 -------- internal/vfs/vfsmatch/old.go | 4 ---- internal/vfs/vfsmatch/vfsmatch.go | 2 -- 3 files changed, 14 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 9d130dcd21..bedc485e00 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -529,14 +529,6 @@ func (m *globSpecMatcher) MatchIndex(path string) int { return -1 } -// Len returns the number of patterns. -func (m *globSpecMatcher) Len() int { - if m == nil { - return 0 - } - return len(m.patterns) -} - // newGlobSpecMatcher creates a matcher for multiple glob specs. func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { if len(specs) == 0 { diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go index 64cdad7b44..9aa5fe0afd 100644 --- a/internal/vfs/vfsmatch/old.go +++ b/internal/vfs/vfsmatch/old.go @@ -436,10 +436,6 @@ func (m *regexSpecMatchers) MatchIndex(path string) int { return -1 } -func (m *regexSpecMatchers) Len() int { - return len(m.matchers) -} - // newRegexSpecMatchers creates individual regex matchers for each spec. func newRegexSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatchers { patterns := getRegularExpressionsForWildcards(specs, basePath, usage) diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index d0e7bf836d..a1c2d4457b 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -48,8 +48,6 @@ type SpecMatcher interface { type SpecMatchers interface { // MatchIndex returns the index of the first matching pattern, or -1 if none match. MatchIndex(path string) int - // Len returns the number of patterns. - Len() int } // NewSpecMatcher creates a matcher for one or more glob specs. From 816b3b25311e929d19d0de5e2f076256f42707b7 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 18:22:31 -0800 Subject: [PATCH 29/41] Remove leftover nil checks --- internal/vfs/vfsmatch/new.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index bedc485e00..dc6816d2c0 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -121,18 +121,12 @@ func parseSegments(s string) []segment { // matches returns true if path matches this pattern. func (p *globPattern) matches(path string) bool { - if p == nil { - return false - } return p.matchPath(path, 0, 0, false) } // matchesPrefix returns true if files under this directory path could match. // Used to skip directories during traversal. func (p *globPattern) matchesPrefix(path string) bool { - if p == nil { - return false - } return p.matchPath(path, 0, 0, true) } @@ -505,9 +499,6 @@ type globSpecMatcher struct { // MatchString returns true if any pattern matches the path. func (m *globSpecMatcher) MatchString(path string) bool { - if m == nil { - return false - } for _, p := range m.patterns { if p.matches(path) { return true @@ -518,9 +509,6 @@ func (m *globSpecMatcher) MatchString(path string) bool { // MatchIndex returns the index of the first matching pattern, or -1. func (m *globSpecMatcher) MatchIndex(path string) int { - if m == nil { - return -1 - } for i, p := range m.patterns { if p.matches(path) { return i From 11686506f8305fa4e091c559a7f33b2ff5d2fade Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 18:28:52 -0800 Subject: [PATCH 30/41] more testing --- internal/vfs/vfsmatch/new.go | 6 +- internal/vfs/vfsmatch/vfsmatch_test.go | 210 +++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 3 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index dc6816d2c0..6b8aaa62ce 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -165,7 +165,7 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly case kindLiteral: if comp.skipPackageFolders && isPackageFolder(pathPart) { - return false + panic("unreachable: literal components never have skipPackageFolders") } if !p.stringsEqual(comp.literal, pathPart) { return false @@ -281,9 +281,9 @@ func (p *globPattern) matchSegments(segs []segment, segIdx int, s string, sIdx i } } return false + default: + panic("unreachable: unknown segment kind") } - - return false } // checkMinJsExclusion returns false if this is a .min.js file that should be excluded. diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index bbb4aeed6f..059ad755ea 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1450,3 +1450,213 @@ func TestSpecMatchers(t *testing.T) { }) } } + +// TestGlobPatternInternals tests internal glob pattern matching logic +// to ensure edge cases are covered that may not be hit by ReadDirectory tests +func TestGlobPatternInternals(t *testing.T) { + t.Parallel() + + t.Run("nextPathPart handles consecutive slashes", func(t *testing.T) { + t.Parallel() + // Test path with consecutive slashes + path := "/dev//foo///bar" + + // First call - returns empty for root + part, offset, ok := nextPathPart(path, 0) + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + // Second call - should skip consecutive slashes after /dev + part, offset, ok = nextPathPart(path, 1) + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + // Third call - should skip the double slashes before foo + part, offset, ok = nextPathPart(path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + + // Fourth call - should skip the triple slashes before bar + part, _, ok = nextPathPart(path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "bar") + }) + + t.Run("nextPathPart handles path ending with slashes", func(t *testing.T) { + t.Parallel() + path := "/dev/" + + // Skip to after "dev" + _, offset, ok := nextPathPart(path, 0) // root + assert.Assert(t, ok) + _, offset, ok = nextPathPart(path, offset) // dev + assert.Assert(t, ok) + // Now at trailing slash, should return not ok + _, _, ok = nextPathPart(path, offset) + assert.Assert(t, !ok) + }) + + t.Run("question mark segment at end of string", func(t *testing.T) { + t.Parallel() + // Create pattern with question mark that should fail when string is exhausted + p := compileGlobPattern("a?", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + // Should match "ab" + assert.Assert(t, p.matches("/ab")) + + // Should NOT match "a" (question mark requires a character) + assert.Assert(t, !p.matches("/a")) + }) + + t.Run("star segment with complex pattern", func(t *testing.T) { + t.Parallel() + // Pattern like "a*b*c" requires backtracking in star matching + p := compileGlobPattern("a*b*c", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + // Should match "abc" + assert.Assert(t, p.matches("/abc")) + + // Should match "aXbYc" + assert.Assert(t, p.matches("/aXbYc")) + + // Should match "aXXXbYYYc" + assert.Assert(t, p.matches("/aXXXbYYYc")) + + // Should NOT match "aXbY" (no trailing c) + assert.Assert(t, !p.matches("/aXbY")) + }) + + t.Run("ensureTrailingSlash with existing slash", func(t *testing.T) { + t.Parallel() + // Test that ensureTrailingSlash doesn't double-add slashes + result := ensureTrailingSlash("/dev/") + assert.Equal(t, result, "/dev/") + + result = ensureTrailingSlash("/") + assert.Equal(t, result, "/") + }) + + t.Run("ensureTrailingSlash with empty string", func(t *testing.T) { + t.Parallel() + result := ensureTrailingSlash("") + assert.Equal(t, result, "") + }) + + t.Run("literal component with package folder in include", func(t *testing.T) { + t.Parallel() + // When a literal include path goes through a package folder, + // the skipPackageFolders flag on literal components should not block it + // because literal components in includes don't have skipPackageFolders=true + host := vfstest.FromMap(map[string]string{ + "/dev/node_modules/pkg/index.ts": "", + }, false) + + // Explicit literal path should work + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, + []string{"node_modules/pkg/index.ts"}, false, "/", nil, host) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/pkg/index.ts")) + }) +} + +// TestMatchSegmentsEdgeCases tests edge cases in the matchSegments function +func TestMatchSegmentsEdgeCases(t *testing.T) { + t.Parallel() + + t.Run("question mark before slash in string", func(t *testing.T) { + t.Parallel() + // This tests the case where question mark encounters a slash character + // which should fail since ? doesn't match / + p := compileGlobPattern("a?b", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + // "a/b" should not match "a?b" pattern since ? shouldn't match / + // But this is a single component pattern, so / wouldn't be in the component + // We need to test this within the segment matching + + // Create a pattern that will exercise question mark matching edge cases + assert.Assert(t, p.matches("/aXb")) // X matches ? + assert.Assert(t, !p.matches("/ab")) // nothing to match ? + assert.Assert(t, !p.matches("/aXYb")) // XY is too many chars for ? + }) + + t.Run("star with no trailing content", func(t *testing.T) { + t.Parallel() + // Test that star can match to end of string + p := compileGlobPattern("a*", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + assert.Assert(t, p.matches("/a")) + assert.Assert(t, p.matches("/abc")) + assert.Assert(t, p.matches("/aXYZ")) + }) + + t.Run("multiple stars in pattern", func(t *testing.T) { + t.Parallel() + // Test patterns with multiple stars that require backtracking + p := compileGlobPattern("*a*", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + assert.Assert(t, p.matches("/a")) + assert.Assert(t, p.matches("/Xa")) + assert.Assert(t, p.matches("/aX")) + assert.Assert(t, p.matches("/XaY")) + assert.Assert(t, !p.matches("/XYZ")) // no 'a' + }) + + t.Run("literal segment not matching", func(t *testing.T) { + t.Parallel() + // Test literal segment that's longer than remaining string + p := compileGlobPattern("abcdefgh.ts", "/", UsageFiles, true) + assert.Assert(t, p != nil) + + assert.Assert(t, !p.matches("/abc.ts")) // different literal + assert.Assert(t, p.matches("/abcdefgh.ts")) // exact match + }) +} + +// TestReadDirectoryConsecutiveSlashes tests handling of paths with consecutive slashes +func TestReadDirectoryConsecutiveSlashes(t *testing.T) { + t.Parallel() + + host := vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/x/b.ts": "", + }, false) + + // The matchFilesNoRegex function normalizes paths, but we can test internal handling + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"**/*.ts"}, false, "/", nil, host) + assert.Assert(t, len(got) >= 2, "should find files") + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) +} + +// TestGlobPatternLiteralWithPackageFolders tests literal component behavior with package folders +func TestGlobPatternLiteralWithPackageFolders(t *testing.T) { + t.Parallel() + + t.Run("wildcard skips package folders", func(t *testing.T) { + t.Parallel() + // Wildcard patterns should skip node_modules + host := vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/node_modules/b.ts": "", + }, false) + + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"*/*.ts"}, false, "/", nil, host) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/b.ts"), "should skip node_modules with wildcard") + }) + + t.Run("explicit literal includes package folder", func(t *testing.T) { + t.Parallel() + // Explicit literal paths should include package folders + host := vfstest.FromMap(map[string]string{ + "/dev/node_modules/b.ts": "", + }, false) + + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"node_modules/b.ts"}, false, "/", nil, host) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/b.ts"), "should include explicit node_modules path") + }) +} From e51f138f497134ee14fc177dc6cedecce7c25d6f Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 19:33:59 -0800 Subject: [PATCH 31/41] More cleanup, tests --- internal/vfs/vfsmatch/new.go | 26 +-- internal/vfs/vfsmatch/vfsmatch_test.go | 218 ++++++++++++++++++++++++- 2 files changed, 218 insertions(+), 26 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 6b8aaa62ce..2581d154be 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -187,10 +187,8 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly // patternSatisfied checks if remaining pattern components can match empty input. func (p *globPattern) patternSatisfied(compIdx int) bool { - if p.isExclude { - return p.isImplicitGlobSuffix(compIdx) - } - // Include patterns: all remaining components must be ** (matches zero dirs) + // A pattern is satisfied when remaining components can match empty input. + // For both include and exclude patterns, only trailing "**" components may match nothing. for _, c := range p.components[compIdx:] { if c.kind != kindDoubleAsterisk { return false @@ -300,26 +298,6 @@ func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool return false } -// isImplicitGlobSuffix checks if remaining components are the implicit "**/*" suffix. -func (p *globPattern) isImplicitGlobSuffix(compIdx int) bool { - remaining := p.components[compIdx:] - for i, c := range remaining { - switch c.kind { - case kindDoubleAsterisk: - continue - case kindWildcard: - // Allow single * as last component (the implicit glob suffix) - if i == len(remaining)-1 && len(c.segments) == 1 && c.segments[0].kind == segStar { - return true - } - return false - default: - return false - } - } - return true -} - // stringsEqual compares strings with appropriate case sensitivity. func (p *globPattern) stringsEqual(a, b string) bool { if p.caseSensitive { diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 059ad755ea..9f7052e4dd 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1296,8 +1296,8 @@ func TestSpecMatcher(t *testing.T) { basePath: "/project", usage: UsageExclude, useCaseSensitiveFileNames: true, - matchingPaths: []string{"/project/node_modules", "/project/node_modules/foo"}, - nonMatchingPaths: []string{"/project/src"}, + matchingPaths: []string{"/project/node_modules/foo"}, + nonMatchingPaths: []string{"/project/node_modules", "/project/src"}, }, { name: "case insensitive", @@ -1336,6 +1336,220 @@ func TestSpecMatcher(t *testing.T) { } } +func TestSpecMatcher_MatchString(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher + }{ + { + name: "Old", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []bool + }{ + { + name: "simple wildcard files", + specs: []string{"*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, false, false}, + }, + { + name: "recursive wildcard files", + specs: []string{"**/*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, true, false}, + }, + { + name: "exclude pattern matches prefix", + specs: []string{"node_modules"}, + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/node_modules", "/project/node_modules/foo", "/project/src"}, + expected: []bool{false, true, false}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchString(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + +func TestSingleSpecMatcher_MatchString(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher + }{ + { + name: "Old", + new: func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + spec string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []bool + }{ + { + name: "single spec wildcard", + spec: "*.ts", + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, false, false}, + }, + { + name: "single spec trailing starstar exclude allowed", + spec: "**", + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts"}, + expected: []bool{true, true}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchString(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + +func TestSpecMatchers_MatchIndex(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers + }{ + { + name: "Old", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + return newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + return newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []int + }{ + { + name: "index lookup prefers first match", + specs: []string{"*.ts", "*.tsx"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/a.tsx", "/project/a.js"}, + expected: []int{0, 1, -1}, + }, + { + name: "exclude index lookup", + specs: []string{"node_modules", "bower_components"}, + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/node_modules", "/project/node_modules/foo", "/project/bower_components", "/project/bower_components/bar", "/project/src"}, + expected: []int{-1, 0, -1, 1, -1}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchIndex(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + func TestSingleSpecMatcher(t *testing.T) { t.Parallel() From 68e49cf985a8ddd263c87505aa41f5999a538e16 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 19:55:09 -0800 Subject: [PATCH 32/41] big oops --- internal/vfs/vfsmatch/vfsmatch_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 9f7052e4dd..ae0b9ca0c6 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -178,7 +178,7 @@ func runReadDirectoryCase(t *testing.T, tc readDirTestCase, readDir readDirector if path == "" { path = "/dev" } - got := ReadDirectory(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) + got := readDir(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) tc.expect(t, got) } From 1d8a075333cf9e5b62ee3b924328af125d1c84b9 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 20:04:43 -0800 Subject: [PATCH 33/41] Fix confusing min.js behavior --- internal/vfs/vfsmatch/new.go | 18 +++++++++++--- internal/vfs/vfsmatch/vfsmatch_test.go | 33 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 2581d154be..f2a1c324c0 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -286,15 +286,27 @@ func (p *globPattern) matchSegments(segs []segment, segIdx int, s string, sIdx i // checkMinJsExclusion returns false if this is a .min.js file that should be excluded. func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool { - if !p.excludeMinJs || !strings.HasSuffix(strings.ToLower(filename), ".min.js") { + if !p.excludeMinJs { return true } - // Allow if pattern explicitly includes .min.js + + lowerName := strings.ToLower(filename) + if !strings.HasSuffix(lowerName, ".min.js") { + return true + } + + // Match legacy behavior: exclude .min.js by default for "files" patterns, but allow it + // when the user's pattern explicitly references the .min. suffix (e.g. "*.min.*" or "*.min.js"). for _, seg := range segs { - if seg.kind == segLiteral && strings.Contains(strings.ToLower(seg.literal), ".min.js") { + if seg.kind != segLiteral { + continue + } + lowerLit := strings.ToLower(seg.literal) + if strings.Contains(lowerLit, ".min.js") || strings.Contains(lowerLit, ".min.") { return true } } + return false } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index ae0b9ca0c6..02c14c6d1b 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -338,6 +338,17 @@ func TestReadDirectory(t *testing.T) { assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) }, }, + { + name: "double asterisk matches zero-or-more directories", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 2) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, { name: "wildcard multiple recursive directories", host: caseInsensitiveHost, @@ -640,6 +651,28 @@ func TestReadDirectory(t *testing.T) { assert.Assert(t, slices.Contains(got, "/dev/js/ab.min.js")) }, }, + { + name: "min js files included when pattern mentions .min.", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*.min.*"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 2) + assert.Assert(t, slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "exclude literal node_modules folder", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"node_modules"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, { name: "same named declarations include ts", host: sameNamedDeclarationsHost, From 316e46d170714e9d2f8209f4f817a5f5307fc968 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Dec 2025 20:34:02 -0800 Subject: [PATCH 34/41] Pesky pesky min.js --- internal/vfs/vfsmatch/new.go | 30 ++++++++++++++++++++------ internal/vfs/vfsmatch/vfsmatch_test.go | 13 +++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index f2a1c324c0..1e873e17d9 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -290,23 +290,39 @@ func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool return true } - lowerName := strings.ToLower(filename) - if !strings.HasSuffix(lowerName, ".min.js") { + // Preserve legacy behavior: + // - When matching is case-sensitive, only the exact ".min.js" suffix is excluded by default. + // - When matching is case-insensitive, any casing variant is excluded by default. + if !p.hasMinJsSuffix(filename) { return true } + // Allow when the user's pattern explicitly references the .min. suffix. + if p.patternMentionsMinSuffix(segs) { + return true + } + return false +} + +func (p *globPattern) hasMinJsSuffix(filename string) bool { + if p.caseSensitive { + return strings.HasSuffix(filename, ".min.js") + } + return strings.HasSuffix(strings.ToLower(filename), ".min.js") +} - // Match legacy behavior: exclude .min.js by default for "files" patterns, but allow it - // when the user's pattern explicitly references the .min. suffix (e.g. "*.min.*" or "*.min.js"). +func (p *globPattern) patternMentionsMinSuffix(segs []segment) bool { for _, seg := range segs { if seg.kind != segLiteral { continue } - lowerLit := strings.ToLower(seg.literal) - if strings.Contains(lowerLit, ".min.js") || strings.Contains(lowerLit, ".min.") { + lit := seg.literal + if !p.caseSensitive { + lit = strings.ToLower(lit) + } + if strings.Contains(lit, ".min.js") || strings.Contains(lit, ".min.") { return true } } - return false } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 02c14c6d1b..aa8f4b1f58 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -91,6 +91,7 @@ func caseSensitiveHost() vfs.FS { "/dev/q/a/c/b/d.ts": "", "/dev/js/a.js": "", "/dev/js/b.js": "", + "/dev/js/d.MIN.js": "", }, true) } @@ -641,6 +642,18 @@ func TestReadDirectory(t *testing.T) { assert.Assert(t, !slices.Contains(got, "/dev/js/ab.min.js")) }, }, + { + name: "min js exclusion is case-sensitive on case-sensitive FS", + host: caseSensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/a.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/b.js")) + // Legacy behavior: only lowercase ".min.js" is excluded by default when matching is case-sensitive. + assert.Assert(t, slices.Contains(got, "/dev/js/d.MIN.js")) + }, + }, { name: "min js files explicitly included", host: caseInsensitiveHost, From ee3f2e4762bd3493a9310a5cab9b57c3d08633d7 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 11:39:23 -0800 Subject: [PATCH 35/41] why can't I type --- internal/vfs/vfsmatch/vfsmatch.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index a1c2d4457b..db61f91b7a 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -21,10 +21,10 @@ const ( UsageExclude ) -const newNewMatch = true +const newMatch = true func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - if newNewMatch { + if newMatch { return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) @@ -53,7 +53,7 @@ type SpecMatchers interface { // NewSpecMatcher creates a matcher for one or more glob specs. // It returns a matcher that can test if paths match any of the patterns. func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { - if newNewMatch { + if newMatch { if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m } @@ -68,7 +68,7 @@ func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensiti // NewSingleSpecMatcher creates a matcher for a single glob spec. // Returns nil if the spec compiles to an empty pattern (e.g., trailing ** for non-exclude). func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { - if newNewMatch { + if newMatch { if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { return m } @@ -83,7 +83,7 @@ func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSens // NewSpecMatchers creates individual matchers for each spec, allowing lookup of which spec matched. // Returns nil if no valid patterns could be compiled from the specs. func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { - if newNewMatch { + if newMatch { if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { return m } From 5f971f6b643bffe39f399c9c150f557184d8685b Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 13:02:16 -0800 Subject: [PATCH 36/41] More perf --- internal/vfs/vfsmatch/new.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 1e873e17d9..ea91b8f840 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -73,6 +73,8 @@ func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive caseSensitive: caseSensitive, excludeMinJs: usage == UsageFiles, } + // Avoid slice growth during compilation. + p.components = make([]component, 0, len(parts)) for _, part := range parts { p.components = append(p.components, parseComponent(part, usage != UsageExclude)) @@ -97,7 +99,15 @@ func parseComponent(s string, isInclude bool) component { // parseSegments breaks "*.ts" into [segStar, segLiteral(".ts")] func parseSegments(s string) []segment { - var result []segment + // Preallocate based on wildcard count: each wildcard contributes 1 segment, + // and each wildcard can split literals into at most one extra literal segment. + wildcards := 0 + for i := range len(s) { + if s[i] == '*' || s[i] == '?' { + wildcards++ + } + } + result := make([]segment, 0, 2*wildcards+1) start := 0 for i := range len(s) { switch s[i] { From efd6350c1f7fa0636ccc9be632ddd3f97e7360a9 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 13:03:13 -0800 Subject: [PATCH 37/41] GetNormalizedPathComponents specialize --- internal/tspath/path.go | 45 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/internal/tspath/path.go b/internal/tspath/path.go index 70e2cfe3e7..8ed231e0e6 100644 --- a/internal/tspath/path.go +++ b/internal/tspath/path.go @@ -332,7 +332,50 @@ func ResolveTripleslashReference(moduleName string, containingFile string) strin } func GetNormalizedPathComponents(path string, currentDirectory string) []string { - return reducePathComponents(GetPathComponents(path, currentDirectory)) + combined := CombinePaths(currentDirectory, path) + return getNormalizedPathComponentsFromCombined(combined) +} + +func getNormalizedPathComponentsFromCombined(path string) []string { + rootLength := GetRootLength(path) + // Always include the root component (empty string for relative paths). + components := make([]string, 1, 8) + components[0] = path[:rootLength] + + for i := rootLength; i < len(path); { + // Skip directory separators (handles consecutive separators and trailing '/'). + for i < len(path) && path[i] == '/' { + i++ + } + if i >= len(path) { + break + } + + start := i + for i < len(path) && path[i] != '/' { + i++ + } + component := path[start:i] + + if component == "" || component == "." { + continue + } + if component == ".." { + if len(components) > 1 { + if components[len(components)-1] != ".." { + components = components[:len(components)-1] + continue + } + } else if components[0] != "" { + // If this is an absolute path, we can't go above the root. + continue + } + } + + components = append(components, component) + } + + return components } func GetNormalizedAbsolutePathWithoutRoot(fileName string, currentDirectory string) string { From 323caa45102838fbf56f40739cc8e9ff19bfa899 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 15:18:45 -0800 Subject: [PATCH 38/41] More perf optimizations --- internal/vfs/vfsmatch/new.go | 135 +++++++++++++++++-------- internal/vfs/vfsmatch/vfsmatch_test.go | 68 +++++++++++-- 2 files changed, 152 insertions(+), 51 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index ea91b8f840..2d118b1988 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -131,48 +131,49 @@ func parseSegments(s string) []segment { // matches returns true if path matches this pattern. func (p *globPattern) matches(path string) bool { - return p.matchPath(path, 0, 0, false) + return p.matchPathParts(path, "", 0, 0, false) +} + +// matchesParts returns true if prefix+suffix matches this pattern. +// This avoids allocating a combined string for common call sites where prefix ends with '/'. +func (p *globPattern) matchesParts(prefix, suffix string) bool { + return p.matchPathParts(prefix, suffix, 0, 0, false) } // matchesPrefix returns true if files under this directory path could match. // Used to skip directories during traversal. -func (p *globPattern) matchesPrefix(path string) bool { - return p.matchPath(path, 0, 0, true) +// matchesPrefixParts returns true if files under prefix+suffix could match. +func (p *globPattern) matchesPrefixParts(prefix, suffix string) bool { + return p.matchPathParts(prefix, suffix, 0, 0, true) } -// matchPath checks if path matches the pattern starting from the given offsets. -// If prefixOnly is true, returns true when path is exhausted (prefix matching for directories). -func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly bool) bool { +// matchPathParts is like matchPath, but operates on a virtual path formed by prefix+suffix. +// Offsets are in the combined string. +func (p *globPattern) matchPathParts(prefix, suffix string, pathOffset, compIdx int, prefixOnly bool) bool { for { - pathPart, nextOffset, ok := nextPathPart(path, pathOffset) + pathPart, nextOffset, ok := nextPathPartParts(prefix, suffix, pathOffset) if !ok { if prefixOnly { - return true // Path exhausted - could potentially match + return true } return p.patternSatisfied(compIdx) } if compIdx >= len(p.components) { - // Exclude patterns match prefixes (e.g., "node_modules" excludes "node_modules/foo") return p.isExclude && !prefixOnly } comp := p.components[compIdx] - switch comp.kind { case kindDoubleAsterisk: - // ** can match zero directories: try skipping it - if p.matchPath(path, pathOffset, compIdx+1, prefixOnly) { + if p.matchPathParts(prefix, suffix, pathOffset, compIdx+1, prefixOnly) { return true } - // ** should not match hidden dirs or package folders (for includes) if !p.isExclude && (isHiddenPath(pathPart) || isPackageFolder(pathPart)) { return false } - // ** matches this directory, try next path part with same ** pathOffset = nextOffset continue - case kindLiteral: if comp.skipPackageFolders && isPackageFolder(pathPart) { panic("unreachable: literal components never have skipPackageFolders") @@ -180,7 +181,6 @@ func (p *globPattern) matchPath(path string, pathOffset, compIdx int, prefixOnly if !p.stringsEqual(comp.literal, pathPart) { return false } - case kindWildcard: if comp.skipPackageFolders && isPackageFolder(pathPart) { return false @@ -208,30 +208,69 @@ func (p *globPattern) patternSatisfied(compIdx int) bool { } // nextPathPart extracts the next path component from path starting at offset. -func nextPathPart(path string, offset int) (part string, nextOffset int, ok bool) { - if offset >= len(path) { +func nextPathPartSingle(s string, offset int) (part string, nextOffset int, ok bool) { + if offset >= len(s) { return "", offset, false } - - // Handle leading slash (root of absolute path) - if offset == 0 && path[0] == '/' { + if offset == 0 && len(s) > 0 && s[0] == '/' { return "", 1, true } - - // Skip consecutive slashes - for offset < len(path) && path[offset] == '/' { + for offset < len(s) && s[offset] == '/' { offset++ } - if offset >= len(path) { + if offset >= len(s) { return "", offset, false } - - // Find end of this component - rest := path[offset:] + rest := s[offset:] if idx := strings.IndexByte(rest, '/'); idx >= 0 { return rest[:idx], offset + idx, true } - return rest, len(path), true + return rest, len(s), true +} + +func nextPathPartParts(prefix, suffix string, offset int) (part string, nextOffset int, ok bool) { + // Fast paths: keep the hot single-string scan tight. + if len(suffix) == 0 { + return nextPathPartSingle(prefix, offset) + } + if len(prefix) == 0 { + return nextPathPartSingle(suffix, offset) + } + + // For matchFilesNoRegex call sites, prefix is a directory path ending in '/', + // and suffix is a single entry name (no '/'). That makes this significantly + // simpler than a general-purpose "virtual concatenation" scanner. + + totalLen := len(prefix) + len(suffix) + if offset >= totalLen { + return "", offset, false + } + + // Handle leading slash (root of absolute path) + if offset == 0 && prefix[0] == '/' { + return "", 1, true + } + + // Scan within prefix. + if offset < len(prefix) { + for offset < len(prefix) && prefix[offset] == '/' { + offset++ + } + if offset < len(prefix) { + rest := prefix[offset:] + idx := strings.IndexByte(rest, '/') + // idx is guaranteed >= 0 for the call sites we care about because prefix ends in '/'. + return rest[:idx], offset + idx, true + } + // Fall through into suffix region. + } + + // Scan suffix: it's a single component. + sOff := offset - len(prefix) + if sOff >= len(suffix) { + return "", offset, false + } + return suffix[sOff:], totalLen, true } // matchWildcard matches a path component against wildcard segments. @@ -317,7 +356,12 @@ func (p *globPattern) hasMinJsSuffix(filename string) bool { if p.caseSensitive { return strings.HasSuffix(filename, ".min.js") } - return strings.HasSuffix(strings.ToLower(filename), ".min.js") + const minJs = ".min.js" + if len(filename) < len(minJs) { + return false + } + // Avoid allocating via strings.ToLower; compare suffix case-insensitively. + return strings.EqualFold(filename[len(filename)-len(minJs):], minJs) } func (p *globPattern) patternMentionsMinSuffix(segs []segment) bool { @@ -392,10 +436,10 @@ func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSe return m } -// MatchesFile returns the index of the matching include pattern, or -1 if excluded/no match. -func (m *globMatcher) MatchesFile(path string) int { +// MatchesFileParts is like MatchesFile but matches against prefix+suffix without allocating. +func (m *globMatcher) MatchesFileParts(prefix, suffix string) int { for _, exc := range m.excludes { - if exc.matches(path) { + if exc.matchesParts(prefix, suffix) { return -1 } } @@ -406,27 +450,25 @@ func (m *globMatcher) MatchesFile(path string) int { return 0 } for i, inc := range m.includes { - if inc.matches(path) { + if inc.matchesParts(prefix, suffix) { return i } } return -1 } -// MatchesDirectory returns true if this directory could contain matching files. -func (m *globMatcher) MatchesDirectory(path string) bool { +// MatchesDirectoryParts is like MatchesDirectory but matches against prefix+suffix without allocating. +func (m *globMatcher) MatchesDirectoryParts(prefix, suffix string) bool { for _, exc := range m.excludes { - if exc.matches(path) { + if exc.matchesParts(prefix, suffix) { return false } } - if len(m.includes) == 0 { return !m.hadIncludes } - for _, inc := range m.includes { - if inc.matchesPrefix(path) { + if inc.matchesPrefixParts(prefix, suffix) { return true } } @@ -462,7 +504,7 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { continue } - if idx := v.fileMatcher.MatchesFile(absPrefix + file); idx >= 0 { + if idx := v.fileMatcher.MatchesFileParts(absPrefix, file); idx >= 0 { v.results[idx] = append(v.results[idx], pathPrefix+file) } } @@ -476,10 +518,11 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { } for _, dir := range entries.Directories { - absDir := absPrefix + dir - if v.directoryMatcher.MatchesDirectory(absDir) { - v.visit(pathPrefix+dir, absDir, depth) + if !v.directoryMatcher.MatchesDirectoryParts(absPrefix, dir) { + continue } + absDir := absPrefix + dir + v.visit(pathPrefix+dir, absDir, depth) } } @@ -505,6 +548,10 @@ func matchFilesNoRegex(path string, extensions, excludes, includes []string, use v.visit(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) } + // Fast path: a single include bucket (or no includes) doesn't need flattening. + if len(v.results) == 1 { + return v.results[0] + } return core.Flatten(v.results) } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index aa8f4b1f58..44f1974b2e 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1722,23 +1722,23 @@ func TestGlobPatternInternals(t *testing.T) { path := "/dev//foo///bar" // First call - returns empty for root - part, offset, ok := nextPathPart(path, 0) + part, offset, ok := nextPathPartParts(path, "", 0) assert.Assert(t, ok) assert.Equal(t, part, "") assert.Equal(t, offset, 1) // Second call - should skip consecutive slashes after /dev - part, offset, ok = nextPathPart(path, 1) + part, offset, ok = nextPathPartParts(path, "", 1) assert.Assert(t, ok) assert.Equal(t, part, "dev") // Third call - should skip the double slashes before foo - part, offset, ok = nextPathPart(path, offset) + part, offset, ok = nextPathPartParts(path, "", offset) assert.Assert(t, ok) assert.Equal(t, part, "foo") // Fourth call - should skip the triple slashes before bar - part, _, ok = nextPathPart(path, offset) + part, _, ok = nextPathPartParts(path, "", offset) assert.Assert(t, ok) assert.Equal(t, part, "bar") }) @@ -1748,15 +1748,69 @@ func TestGlobPatternInternals(t *testing.T) { path := "/dev/" // Skip to after "dev" - _, offset, ok := nextPathPart(path, 0) // root + _, offset, ok := nextPathPartParts(path, "", 0) // root assert.Assert(t, ok) - _, offset, ok = nextPathPart(path, offset) // dev + _, offset, ok = nextPathPartParts(path, "", offset) // dev assert.Assert(t, ok) // Now at trailing slash, should return not ok - _, _, ok = nextPathPart(path, offset) + _, _, ok = nextPathPartParts(path, "", offset) assert.Assert(t, !ok) }) + t.Run("nextPathPartParts handles empty prefix", func(t *testing.T) { + t.Parallel() + path := "/dev//foo" + + part, offset, ok := nextPathPartParts("", path, 0) + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + part, offset, ok = nextPathPartParts("", path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + part, _, ok = nextPathPartParts("", path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + }) + + t.Run("nextPathPartParts returns not ok when only slashes remain", func(t *testing.T) { + t.Parallel() + prefix := "/dev/" + suffix := "foo" + + _, offset, ok := nextPathPartParts(prefix, suffix, 0) // root + assert.Assert(t, ok) + + part, offset, ok := nextPathPartParts(prefix, suffix, offset) // dev + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + part, offset, ok = nextPathPartParts(prefix, suffix, offset) // foo + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + assert.Equal(t, offset, len(prefix)+len(suffix)) + + _, _, ok = nextPathPartParts(prefix, suffix, offset) + assert.Assert(t, !ok) + }) + + t.Run("nextPathPartParts parses from suffix region", func(t *testing.T) { + t.Parallel() + prefix := "/" + suffix := "a" + + part, offset, ok := nextPathPartParts(prefix, suffix, 0) // root + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + part, _, ok = nextPathPartParts(prefix, suffix, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "a") + }) + t.Run("question mark segment at end of string", func(t *testing.T) { t.Parallel() // Create pattern with question mark that should fail when string is exhausted From 041843f40fb332cbe960c46a2ea51618e0d29560 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 15:21:09 -0800 Subject: [PATCH 39/41] Random exported funcs --- internal/vfs/vfsmatch/new.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index 2d118b1988..c4aaf608a0 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -436,8 +436,8 @@ func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSe return m } -// MatchesFileParts is like MatchesFile but matches against prefix+suffix without allocating. -func (m *globMatcher) MatchesFileParts(prefix, suffix string) int { +// matchesFileParts is like MatchesFile but matches against prefix+suffix without allocating. +func (m *globMatcher) matchesFileParts(prefix, suffix string) int { for _, exc := range m.excludes { if exc.matchesParts(prefix, suffix) { return -1 @@ -457,8 +457,8 @@ func (m *globMatcher) MatchesFileParts(prefix, suffix string) int { return -1 } -// MatchesDirectoryParts is like MatchesDirectory but matches against prefix+suffix without allocating. -func (m *globMatcher) MatchesDirectoryParts(prefix, suffix string) bool { +// matchesDirectoryParts is like MatchesDirectory but matches against prefix+suffix without allocating. +func (m *globMatcher) matchesDirectoryParts(prefix, suffix string) bool { for _, exc := range m.excludes { if exc.matchesParts(prefix, suffix) { return false @@ -504,7 +504,7 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { continue } - if idx := v.fileMatcher.MatchesFileParts(absPrefix, file); idx >= 0 { + if idx := v.fileMatcher.matchesFileParts(absPrefix, file); idx >= 0 { v.results[idx] = append(v.results[idx], pathPrefix+file) } } @@ -518,7 +518,7 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { } for _, dir := range entries.Directories { - if !v.directoryMatcher.MatchesDirectoryParts(absPrefix, dir) { + if !v.directoryMatcher.matchesDirectoryParts(absPrefix, dir) { continue } absDir := absPrefix + dir From 9a6cbbb63b872c31b6c863bc59e9ef2f0a316a4e Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 16:49:01 -0800 Subject: [PATCH 40/41] Stop using pointer for depth --- internal/project/ata/discovertypings.go | 2 +- internal/tsoptions/tsconfigparsing.go | 2 +- internal/vfs/vfsmatch/bench_test.go | 8 ++++---- internal/vfs/vfsmatch/new.go | 11 +++++------ internal/vfs/vfsmatch/old.go | 11 +++++------ internal/vfs/vfsmatch/vfsmatch.go | 6 +++++- internal/vfs/vfsmatch/vfsmatch_test.go | 26 ++++++++++++++----------- 7 files changed, 36 insertions(+), 30 deletions(-) diff --git a/internal/project/ata/discovertypings.go b/internal/project/ata/discovertypings.go index 3f8ba209db..26632012e8 100644 --- a/internal/project/ata/discovertypings.go +++ b/internal/project/ata/discovertypings.go @@ -223,7 +223,7 @@ func addTypingNamesAndGetFilesToWatch( } else { // And #2. Depth = 3 because scoped packages look like `node_modules/@foo/bar/package.json` depth := 3 - for _, manifestPath := range vfsmatch.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, &depth) { + for _, manifestPath := range vfsmatch.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, depth) { if tspath.GetBaseFileName(manifestPath) != manifestName { continue } diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index d6099214af..812074e129 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -1655,7 +1655,7 @@ func getFileNamesFromConfigSpecs( var jsonOnlyIncludeMatchers vfsmatch.SpecMatchers if len(validatedIncludeSpecs) > 0 { - files := vfsmatch.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, nil) + files := vfsmatch.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, vfsmatch.UnlimitedDepth) for _, file := range files { if tspath.FileExtensionIs(file, tspath.ExtensionJson) { if jsonOnlyIncludeMatchers == nil { diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index ce98822ab0..7038297d47 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -101,7 +101,7 @@ func BenchmarkReadDirectory(b *testing.B) { }, } - var benchOnly func(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string + var benchOnly func(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string // For benchmark comparison // benchOnly = matchFiles // benchOnly = matchFilesNoRegex @@ -112,7 +112,7 @@ func BenchmarkReadDirectory(b *testing.B) { host := cachedvfs.From(bc.host()) b.ReportAllocs() for b.Loop() { - benchOnly(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) + benchOnly(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) } }) continue @@ -122,7 +122,7 @@ func BenchmarkReadDirectory(b *testing.B) { host := cachedvfs.From(bc.host()) b.ReportAllocs() for b.Loop() { - matchFiles(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) + matchFiles(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) } }) @@ -130,7 +130,7 @@ func BenchmarkReadDirectory(b *testing.B) { host := cachedvfs.From(bc.host()) b.ReportAllocs() for b.Loop() { - matchFilesNoRegex(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", nil, host) + matchFilesNoRegex(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) } }) } diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index c4aaf608a0..c6bb8e6d92 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -486,7 +486,7 @@ type globVisitor struct { results [][]string } -func (v *globVisitor) visit(path, absolutePath string, depth *int) { +func (v *globVisitor) visit(path, absolutePath string, depth int) { // Detect symlink cycles realPath := v.host.Realpath(absolutePath) canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) @@ -509,12 +509,11 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { } } - if depth != nil { - newDepth := *depth - 1 - if newDepth == 0 { + if depth != UnlimitedDepth { + depth-- + if depth == 0 { return } - depth = &newDepth } for _, dir := range entries.Directories { @@ -527,7 +526,7 @@ func (v *globVisitor) visit(path, absolutePath string, depth *int) { } // matchFilesNoRegex matches files using compiled glob patterns (no regex). -func matchFilesNoRegex(path string, extensions, excludes, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { +func matchFilesNoRegex(path string, extensions, excludes, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) absolutePath := tspath.CombinePaths(currentDirectory, path) diff --git a/internal/vfs/vfsmatch/old.go b/internal/vfs/vfsmatch/old.go index 9aa5fe0afd..be8fc3fd55 100644 --- a/internal/vfs/vfsmatch/old.go +++ b/internal/vfs/vfsmatch/old.go @@ -294,7 +294,7 @@ type visitor struct { func (v *visitor) visitDirectory( path string, absolutePath string, - depth *int, + depth int, ) { // Use the real path (with symlinks resolved) for cycle detection. // This prevents infinite loops when symlinks create cycles. @@ -327,12 +327,11 @@ func (v *visitor) visitDirectory( } } - if depth != nil { - newDepth := *depth - 1 - if newDepth == 0 { + if depth != UnlimitedDepth { + depth-- + if depth == 0 { return } - depth = &newDepth } for _, current := range directories { @@ -345,7 +344,7 @@ func (v *visitor) visitDirectory( } // path is the directory of the tsconfig.json -func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host vfs.FS) []string { +func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go index db61f91b7a..4643e36994 100644 --- a/internal/vfs/vfsmatch/vfsmatch.go +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -1,6 +1,7 @@ package vfsmatch import ( + "math" "sort" "strings" @@ -21,9 +22,12 @@ const ( UsageExclude ) +// UnlimitedDepth can be passed as the depth argument to indicate there is no depth limit. +const UnlimitedDepth = math.MaxInt + const newMatch = true -func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { +func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { if newMatch { return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 44f1974b2e..45af8759a8 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -17,15 +17,15 @@ func ptrTo[T any](v T) *T { } // readDirectoryFunc is a function type for ReadDirectory implementations -type readDirectoryFunc func(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string +type readDirectoryFunc func(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string // readDirectoryOld wraps matchFiles with the expected test signature -func readDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { +func readDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } // readDirectoryNew wraps matchFilesNoRegex with the expected test signature -func readDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { +func readDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) } @@ -166,7 +166,7 @@ type readDirTestCase struct { extensions []string excludes []string includes []string - depth *int + depth int expect func(t *testing.T, got []string) } @@ -179,7 +179,11 @@ func runReadDirectoryCase(t *testing.T, tc readDirTestCase, readDir readDirector if path == "" { path = "/dev" } - got := readDir(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, tc.depth) + depth := tc.depth + if depth == 0 { + depth = UnlimitedDepth + } + got := readDir(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, depth) tc.expect(t, got) } @@ -581,7 +585,7 @@ func TestReadDirectory(t *testing.T) { name: "depth limit one", host: caseInsensitiveHost, extensions: []string{".ts", ".tsx", ".d.ts"}, - depth: ptrTo(1), + depth: 1, expect: func(t *testing.T, got []string) { for _, f := range got { suffix := f[len("/dev/"):] @@ -593,7 +597,7 @@ func TestReadDirectory(t *testing.T) { name: "depth limit two", host: caseInsensitiveHost, extensions: []string{".ts", ".tsx", ".d.ts"}, - depth: ptrTo(2), + depth: 2, expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/a.ts")) assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) @@ -1870,7 +1874,7 @@ func TestGlobPatternInternals(t *testing.T) { // Explicit literal path should work got := matchFilesNoRegex("/dev", []string{".ts"}, nil, - []string{"node_modules/pkg/index.ts"}, false, "/", nil, host) + []string{"node_modules/pkg/index.ts"}, false, "/", UnlimitedDepth, host) assert.Assert(t, slices.Contains(got, "/dev/node_modules/pkg/index.ts")) }) } @@ -1941,7 +1945,7 @@ func TestReadDirectoryConsecutiveSlashes(t *testing.T) { }, false) // The matchFilesNoRegex function normalizes paths, but we can test internal handling - got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"**/*.ts"}, false, "/", nil, host) + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"**/*.ts"}, false, "/", UnlimitedDepth, host) assert.Assert(t, len(got) >= 2, "should find files") assert.Assert(t, slices.Contains(got, "/dev/a.ts")) assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) @@ -1959,7 +1963,7 @@ func TestGlobPatternLiteralWithPackageFolders(t *testing.T) { "/dev/node_modules/b.ts": "", }, false) - got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"*/*.ts"}, false, "/", nil, host) + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"*/*.ts"}, false, "/", UnlimitedDepth, host) assert.Assert(t, !slices.Contains(got, "/dev/node_modules/b.ts"), "should skip node_modules with wildcard") }) @@ -1970,7 +1974,7 @@ func TestGlobPatternLiteralWithPackageFolders(t *testing.T) { "/dev/node_modules/b.ts": "", }, false) - got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"node_modules/b.ts"}, false, "/", nil, host) + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"node_modules/b.ts"}, false, "/", UnlimitedDepth, host) assert.Assert(t, slices.Contains(got, "/dev/node_modules/b.ts"), "should include explicit node_modules path") }) } From e20b040e2cbd491f48812c9884ed76a5374e3770 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Thu, 18 Dec 2025 20:01:27 -0800 Subject: [PATCH 41/41] globPattern as a value type --- internal/vfs/vfsmatch/bench_test.go | 6 +-- internal/vfs/vfsmatch/new.go | 58 ++++++++++++++------------ internal/vfs/vfsmatch/vfsmatch_test.go | 24 +++++------ 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go index 7038297d47..3ad6822be2 100644 --- a/internal/vfs/vfsmatch/bench_test.go +++ b/internal/vfs/vfsmatch/bench_test.go @@ -186,7 +186,7 @@ func BenchmarkPatternCompilation(b *testing.B) { for _, p := range patterns { b.Run(p.name, func(b *testing.B) { for b.Loop() { - compileGlobPattern(p.spec, "/project", UsageFiles, true) + _, _ = compileGlobPattern(p.spec, "/project", UsageFiles, true) } }) } @@ -241,8 +241,8 @@ func BenchmarkPatternMatching(b *testing.B) { } for _, tc := range testCases { - pattern := compileGlobPattern(tc.spec, "/project", UsageFiles, true) - if pattern == nil { + pattern, ok := compileGlobPattern(tc.spec, "/project", UsageFiles, true) + if !ok { continue } diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go index c6bb8e6d92..ecf906ec08 100644 --- a/internal/vfs/vfsmatch/new.go +++ b/internal/vfs/vfsmatch/new.go @@ -51,13 +51,13 @@ const ( ) // compileGlobPattern compiles a glob spec (e.g., "src/**/*.ts") into a pattern. -// Returns nil if the pattern would match nothing. -func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) *globPattern { +// Returns (pattern, false) if the pattern would match nothing. +func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) (globPattern, bool) { parts := tspath.GetNormalizedPathComponents(spec, basePath) // "src/**" without a filename matches nothing (for include patterns) if usage != UsageExclude && core.LastOrNil(parts) == "**" { - return nil + return globPattern{}, false } // Normalize root: "/home/" -> "/home" @@ -68,18 +68,18 @@ func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive parts = append(parts, "**", "*") } - p := &globPattern{ + p := globPattern{ isExclude: usage == UsageExclude, caseSensitive: caseSensitive, excludeMinJs: usage == UsageFiles, + // Avoid slice growth during compilation. + components: make([]component, 0, len(parts)), } - // Avoid slice growth during compilation. - p.components = make([]component, 0, len(parts)) for _, part := range parts { p.components = append(p.components, parseComponent(part, usage != UsageExclude)) } - return p + return p, true } // parseComponent converts a path segment string into a component. @@ -415,21 +415,25 @@ func ensureTrailingSlash(s string) string { // globMatcher combines include and exclude patterns for file matching. type globMatcher struct { - includes []*globPattern - excludes []*globPattern + includes []globPattern + excludes []globPattern hadIncludes bool // true if include specs were provided (even if none compiled) } func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSensitive bool, usage Usage) *globMatcher { - m := &globMatcher{hadIncludes: len(includeSpecs) > 0} + m := &globMatcher{ + hadIncludes: len(includeSpecs) > 0, + includes: make([]globPattern, 0, len(includeSpecs)), + excludes: make([]globPattern, 0, len(excludeSpecs)), + } for _, spec := range includeSpecs { - if p := compileGlobPattern(spec, basePath, usage, caseSensitive); p != nil { + if p, ok := compileGlobPattern(spec, basePath, usage, caseSensitive); ok { m.includes = append(m.includes, p) } } for _, spec := range excludeSpecs { - if p := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); p != nil { + if p, ok := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); ok { m.excludes = append(m.excludes, p) } } @@ -438,8 +442,8 @@ func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSe // matchesFileParts is like MatchesFile but matches against prefix+suffix without allocating. func (m *globMatcher) matchesFileParts(prefix, suffix string) int { - for _, exc := range m.excludes { - if exc.matchesParts(prefix, suffix) { + for i := range m.excludes { + if m.excludes[i].matchesParts(prefix, suffix) { return -1 } } @@ -449,8 +453,8 @@ func (m *globMatcher) matchesFileParts(prefix, suffix string) int { } return 0 } - for i, inc := range m.includes { - if inc.matchesParts(prefix, suffix) { + for i := range m.includes { + if m.includes[i].matchesParts(prefix, suffix) { return i } } @@ -459,16 +463,16 @@ func (m *globMatcher) matchesFileParts(prefix, suffix string) int { // matchesDirectoryParts is like MatchesDirectory but matches against prefix+suffix without allocating. func (m *globMatcher) matchesDirectoryParts(prefix, suffix string) bool { - for _, exc := range m.excludes { - if exc.matchesParts(prefix, suffix) { + for i := range m.excludes { + if m.excludes[i].matchesParts(prefix, suffix) { return false } } if len(m.includes) == 0 { return !m.hadIncludes } - for _, inc := range m.includes { - if inc.matchesPrefixParts(prefix, suffix) { + for i := range m.includes { + if m.includes[i].matchesPrefixParts(prefix, suffix) { return true } } @@ -556,13 +560,13 @@ func matchFilesNoRegex(path string, extensions, excludes, includes []string, use // globSpecMatcher wraps multiple glob patterns for matching paths. type globSpecMatcher struct { - patterns []*globPattern + patterns []globPattern } // MatchString returns true if any pattern matches the path. func (m *globSpecMatcher) MatchString(path string) bool { - for _, p := range m.patterns { - if p.matches(path) { + for i := range m.patterns { + if m.patterns[i].matches(path) { return true } } @@ -571,8 +575,8 @@ func (m *globSpecMatcher) MatchString(path string) bool { // MatchIndex returns the index of the first matching pattern, or -1. func (m *globSpecMatcher) MatchIndex(path string) int { - for i, p := range m.patterns { - if p.matches(path) { + for i := range m.patterns { + if m.patterns[i].matches(path) { return i } } @@ -584,9 +588,9 @@ func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSen if len(specs) == 0 { return nil } - var patterns []*globPattern + patterns := make([]globPattern, 0, len(specs)) for _, spec := range specs { - if p := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); p != nil { + if p, ok := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); ok { patterns = append(patterns, p) } } diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go index 45af8759a8..8b7b23449f 100644 --- a/internal/vfs/vfsmatch/vfsmatch_test.go +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -1818,8 +1818,8 @@ func TestGlobPatternInternals(t *testing.T) { t.Run("question mark segment at end of string", func(t *testing.T) { t.Parallel() // Create pattern with question mark that should fail when string is exhausted - p := compileGlobPattern("a?", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("a?", "/", UsageFiles, true) + assert.Assert(t, ok) // Should match "ab" assert.Assert(t, p.matches("/ab")) @@ -1831,8 +1831,8 @@ func TestGlobPatternInternals(t *testing.T) { t.Run("star segment with complex pattern", func(t *testing.T) { t.Parallel() // Pattern like "a*b*c" requires backtracking in star matching - p := compileGlobPattern("a*b*c", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("a*b*c", "/", UsageFiles, true) + assert.Assert(t, ok) // Should match "abc" assert.Assert(t, p.matches("/abc")) @@ -1887,8 +1887,8 @@ func TestMatchSegmentsEdgeCases(t *testing.T) { t.Parallel() // This tests the case where question mark encounters a slash character // which should fail since ? doesn't match / - p := compileGlobPattern("a?b", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("a?b", "/", UsageFiles, true) + assert.Assert(t, ok) // "a/b" should not match "a?b" pattern since ? shouldn't match / // But this is a single component pattern, so / wouldn't be in the component @@ -1903,8 +1903,8 @@ func TestMatchSegmentsEdgeCases(t *testing.T) { t.Run("star with no trailing content", func(t *testing.T) { t.Parallel() // Test that star can match to end of string - p := compileGlobPattern("a*", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("a*", "/", UsageFiles, true) + assert.Assert(t, ok) assert.Assert(t, p.matches("/a")) assert.Assert(t, p.matches("/abc")) @@ -1914,8 +1914,8 @@ func TestMatchSegmentsEdgeCases(t *testing.T) { t.Run("multiple stars in pattern", func(t *testing.T) { t.Parallel() // Test patterns with multiple stars that require backtracking - p := compileGlobPattern("*a*", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("*a*", "/", UsageFiles, true) + assert.Assert(t, ok) assert.Assert(t, p.matches("/a")) assert.Assert(t, p.matches("/Xa")) @@ -1927,8 +1927,8 @@ func TestMatchSegmentsEdgeCases(t *testing.T) { t.Run("literal segment not matching", func(t *testing.T) { t.Parallel() // Test literal segment that's longer than remaining string - p := compileGlobPattern("abcdefgh.ts", "/", UsageFiles, true) - assert.Assert(t, p != nil) + p, ok := compileGlobPattern("abcdefgh.ts", "/", UsageFiles, true) + assert.Assert(t, ok) assert.Assert(t, !p.matches("/abc.ts")) // different literal assert.Assert(t, p.matches("/abcdefgh.ts")) // exact match