diff --git a/internal/ls/autoimports.go b/internal/ls/autoimports.go index c88f426880..eaaef3d857 100644 --- a/internal/ls/autoimports.go +++ b/internal/ls/autoimports.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/astnav" "github.com/microsoft/typescript-go/internal/binder" @@ -24,7 +23,7 @@ import ( "github.com/microsoft/typescript-go/internal/packagejson" "github.com/microsoft/typescript-go/internal/stringutil" "github.com/microsoft/typescript-go/internal/tspath" - "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) type SymbolExportInfo struct { @@ -1384,15 +1383,15 @@ func forEachExternalModuleToImportFrom( // useAutoImportProvider bool, cb func(module *ast.Symbol, moduleFile *ast.SourceFile, checker *checker.Checker, isFromPackageJson bool), ) { - var excludePatterns []*regexp2.Regexp + var excludeMatcher vfsmatch.SpecMatcher if preferences.AutoImportFileExcludePatterns != nil { - excludePatterns = getIsExcludedPatterns(preferences, program.UseCaseSensitiveFileNames()) + excludeMatcher = getIsExcludedMatcher(preferences, program.UseCaseSensitiveFileNames()) } forEachExternalModule( ch, program.GetSourceFiles(), - excludePatterns, + excludeMatcher, func(module *ast.Symbol, file *ast.SourceFile) { cb(module, file, ch, false) }, @@ -1414,35 +1413,26 @@ func forEachExternalModuleToImportFrom( // } } -func getIsExcludedPatterns(preferences *lsutil.UserPreferences, useCaseSensitiveFileNames bool) []*regexp2.Regexp { +func getIsExcludedMatcher(preferences *lsutil.UserPreferences, useCaseSensitiveFileNames bool) vfsmatch.SpecMatcher { if preferences.AutoImportFileExcludePatterns == nil { return nil } - var patterns []*regexp2.Regexp - for _, spec := range preferences.AutoImportFileExcludePatterns { - pattern := vfs.GetSubPatternFromSpec(spec, "", vfs.UsageExclude, vfs.WildcardMatcher{}) - if pattern != "" { - if re := vfs.GetRegexFromPattern(pattern, useCaseSensitiveFileNames); re != nil { - patterns = append(patterns, re) - } - } - } - return patterns + return vfsmatch.NewSpecMatcher(preferences.AutoImportFileExcludePatterns, "", vfsmatch.UsageExclude, useCaseSensitiveFileNames) } func forEachExternalModule( ch *checker.Checker, allSourceFiles []*ast.SourceFile, - excludePatterns []*regexp2.Regexp, + excludeMatcher vfsmatch.SpecMatcher, cb func(moduleSymbol *ast.Symbol, sourceFile *ast.SourceFile), ) { var isExcluded func(*ast.SourceFile) bool = func(_ *ast.SourceFile) bool { return false } - if excludePatterns != nil { - isExcluded = getIsExcluded(excludePatterns) + if excludeMatcher != nil { + isExcluded = getIsExcluded(excludeMatcher) } for _, ambient := range ch.GetAmbientModules() { - if !strings.Contains(ambient.Name, "*") && !(excludePatterns != nil && core.Every(ambient.Declarations, func(d *ast.Node) bool { + if !strings.Contains(ambient.Name, "*") && !(excludeMatcher != nil && core.Every(ambient.Declarations, func(d *ast.Node) bool { return isExcluded(ast.GetSourceFileOfNode(d)) })) { cb(ambient, nil /*sourceFile*/) @@ -1455,15 +1445,13 @@ func forEachExternalModule( } } -func getIsExcluded(excludePatterns []*regexp2.Regexp) func(sourceFile *ast.SourceFile) bool { +func getIsExcluded(excludeMatcher vfsmatch.SpecMatcher) func(sourceFile *ast.SourceFile) bool { // !!! SymlinkCache // const realpathsWithSymlinks = host.getSymlinkCache?.().getSymlinkedDirectoriesByRealpath(); return func(sourceFile *ast.SourceFile) bool { fileName := sourceFile.FileName() - for _, p := range excludePatterns { - if matched, _ := p.MatchString(fileName); matched { - return true - } + if excludeMatcher.MatchString(fileName) { + return true } // !! SymlinkCache // if (realpathsWithSymlinks?.size && pathContainsNodeModules(fileName)) { diff --git a/internal/project/ata/discovertypings.go b/internal/project/ata/discovertypings.go index 8a87b3ea92..26632012e8 100644 --- a/internal/project/ata/discovertypings.go +++ b/internal/project/ata/discovertypings.go @@ -14,6 +14,7 @@ import ( "github.com/microsoft/typescript-go/internal/semver" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) func isTypingUpToDate(cachedTyping *CachedTyping, availableTypingVersions map[string]string) bool { @@ -222,7 +223,7 @@ func addTypingNamesAndGetFilesToWatch( } else { // And #2. Depth = 3 because scoped packages look like `node_modules/@foo/bar/package.json` depth := 3 - for _, manifestPath := range vfs.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, &depth) { + for _, manifestPath := range vfsmatch.ReadDirectory(fs, projectRootPath, packagesFolderPath, []string{tspath.ExtensionJson}, nil, nil, depth) { if tspath.GetBaseFileName(manifestPath) != manifestName { continue } diff --git a/internal/tsoptions/parsedcommandline.go b/internal/tsoptions/parsedcommandline.go index a00e80854a..34ba336260 100644 --- a/internal/tsoptions/parsedcommandline.go +++ b/internal/tsoptions/parsedcommandline.go @@ -15,6 +15,7 @@ import ( "github.com/microsoft/typescript-go/internal/outputpaths" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) const ( @@ -326,7 +327,7 @@ func (p *ParsedCommandLine) PossiblyMatchesFileName(fileName string) bool { } for _, include := range p.ConfigFile.configFileSpecs.validatedIncludeSpecs { - if !strings.ContainsAny(include, "*?") && !vfs.IsImplicitGlob(include) { + if !strings.ContainsAny(include, "*?") && !vfsmatch.IsImplicitGlob(include) { includePath := tspath.ToPath(include, p.GetCurrentDirectory(), p.UseCaseSensitiveFileNames()) if includePath == path { return true diff --git a/internal/tsoptions/tsconfigparsing.go b/internal/tsoptions/tsconfigparsing.go index 9668bb70b4..812074e129 100644 --- a/internal/tsoptions/tsconfigparsing.go +++ b/internal/tsoptions/tsconfigparsing.go @@ -2,13 +2,10 @@ package tsoptions import ( "cmp" - "fmt" "reflect" - "regexp" "slices" "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/collections" "github.com/microsoft/typescript-go/internal/core" @@ -20,6 +17,7 @@ import ( "github.com/microsoft/typescript-go/internal/parser" "github.com/microsoft/typescript-go/internal/tspath" "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) type extendsResult struct { @@ -106,13 +104,15 @@ func (c *configFileSpecs) matchesExclude(fileName string, comparePathsOptions ts if len(c.validatedExcludeSpecs) == 0 { return false } - excludePattern := vfs.GetRegularExpressionForWildcard(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, "exclude") - excludeRegex := vfs.GetRegexFromPattern(excludePattern, comparePathsOptions.UseCaseSensitiveFileNames) - if match, err := excludeRegex.MatchString(fileName); err == nil && match { + excludeMatcher := vfsmatch.NewSpecMatcher(c.validatedExcludeSpecs, comparePathsOptions.CurrentDirectory, vfsmatch.UsageExclude, comparePathsOptions.UseCaseSensitiveFileNames) + if excludeMatcher == nil { + return false + } + if excludeMatcher.MatchString(fileName) { return true } if !tspath.HasExtension(fileName) { - if match, err := excludeRegex.MatchString(tspath.EnsureTrailingDirectorySeparator(fileName)); err == nil && match { + if excludeMatcher.MatchString(tspath.EnsureTrailingDirectorySeparator(fileName)) { return true } } @@ -124,12 +124,9 @@ func (c *configFileSpecs) getMatchedIncludeSpec(fileName string, comparePathsOpt return "" } for index, spec := range c.validatedIncludeSpecs { - includePattern := vfs.GetPatternFromSpec(spec, comparePathsOptions.CurrentDirectory, "files") - if includePattern != "" { - includeRegex := vfs.GetRegexFromPattern(includePattern, comparePathsOptions.UseCaseSensitiveFileNames) - if match, err := includeRegex.MatchString(fileName); err == nil && match { - return c.validatedIncludeSpecsBeforeSubstitution[index] - } + includeMatcher := vfsmatch.NewSingleSpecMatcher(spec, comparePathsOptions.CurrentDirectory, vfsmatch.UsageFiles, comparePathsOptions.UseCaseSensitiveFileNames) + if includeMatcher != nil && includeMatcher.MatchString(fileName) { + return c.validatedIncludeSpecsBeforeSubstitution[index] } } return "" @@ -1386,7 +1383,7 @@ func validateSpecs(specs any, disallowTrailingRecursion bool, jsonSourceFile *as func specToDiagnostic(spec string, disallowTrailingRecursion bool) *diagnostics.Message { if disallowTrailingRecursion { - if ok, _ := regexp.MatchString(invalidTrailingRecursionPattern, spec); ok { + if invalidTrailingRecursion(spec) { return diagnostics.File_specification_cannot_end_in_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0 } } else if invalidDotDotAfterRecursiveWildcard(spec) { @@ -1395,6 +1392,13 @@ func specToDiagnostic(spec string, disallowTrailingRecursion bool) *diagnostics. return nil } +func invalidTrailingRecursion(spec string) bool { + // Matches **, /**, **/, and /**/, but not a**b. + // Strip optional trailing slash, then check if it ends with /** or is just ** + s := strings.TrimSuffix(spec, "/") + return s == "**" || strings.HasSuffix(s, "/**") +} + func invalidDotDotAfterRecursiveWildcard(s string) bool { // We used to use the regex /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/ to check for this case, but // in v8, that has polynomial performance because the recursive wildcard match - **/ - @@ -1419,18 +1423,6 @@ func invalidDotDotAfterRecursiveWildcard(s string) bool { return lastDotIndex > wildcardIndex } -// Tests for a path that ends in a recursive directory wildcard. -// -// Matches **, \**, **\, and \**\, but not a**b. -// NOTE: used \ in place of / above to avoid issues with multiline comments. -// -// Breakdown: -// -// (^|\/) # matches either the beginning of the string or a directory separator. -// \*\* # matches the recursive directory wildcard "**". -// \/?$ # matches an optional trailing directory separator at the end of the string. -const invalidTrailingRecursionPattern = `(?:^|\/)\*\*\/?$` - func GetTsConfigPropArrayElementValue(tsConfigSourceFile *ast.SourceFile, propKey string, elementValue string) *ast.StringLiteral { callback := GetCallbackForFindingPropertyAssignmentByValue(elementValue) return ForEachTsConfigPropArray(tsConfigSourceFile, propKey, func(property *ast.PropertyAssignment) *ast.StringLiteral { @@ -1661,23 +1653,19 @@ func getFileNamesFromConfigSpecs( literalFileMap.Set(keyMappper(fileName), file) } - var jsonOnlyIncludeRegexes []*regexp2.Regexp + var jsonOnlyIncludeMatchers vfsmatch.SpecMatchers if len(validatedIncludeSpecs) > 0 { - files := vfs.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, nil) + files := vfsmatch.ReadDirectory(host, basePath, basePath, core.Flatten(supportedExtensionsWithJsonIfResolveJsonModule), validatedExcludeSpecs, validatedIncludeSpecs, vfsmatch.UnlimitedDepth) for _, file := range files { if tspath.FileExtensionIs(file, tspath.ExtensionJson) { - if jsonOnlyIncludeRegexes == nil { + if jsonOnlyIncludeMatchers == nil { includes := core.Filter(validatedIncludeSpecs, func(include string) bool { return strings.HasSuffix(include, tspath.ExtensionJson) }) - includeFilePatterns := core.Map(vfs.GetRegularExpressionsForWildcards(includes, basePath, "files"), func(pattern string) string { return fmt.Sprintf("^%s$", pattern) }) - if includeFilePatterns != nil { - jsonOnlyIncludeRegexes = core.Map(includeFilePatterns, func(pattern string) *regexp2.Regexp { - return vfs.GetRegexFromPattern(pattern, host.UseCaseSensitiveFileNames()) - }) - } else { - jsonOnlyIncludeRegexes = nil - } + jsonOnlyIncludeMatchers = vfsmatch.NewSpecMatchers(includes, basePath, vfsmatch.UsageFiles, host.UseCaseSensitiveFileNames()) + } + var includeIndex int = -1 + if jsonOnlyIncludeMatchers != nil { + includeIndex = jsonOnlyIncludeMatchers.MatchIndex(file) } - includeIndex := core.FindIndex(jsonOnlyIncludeRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(file)) }) if includeIndex != -1 { key := keyMappper(file) if !literalFileMap.Has(key) && !wildCardJsonFileMap.Has(key) { diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index a782b1123c..56abf49291 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -3,9 +3,8 @@ package tsoptions import ( "strings" - "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/tspath" - "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfsmatch" ) func getWildcardDirectories(include []string, exclude []string, comparePathsOptions tspath.ComparePathsOptions) map[string]bool { @@ -26,15 +25,7 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti return nil } - rawExcludeRegex := vfs.GetRegularExpressionForWildcard(exclude, comparePathsOptions.CurrentDirectory, "exclude") - var excludeRegex *regexp2.Regexp - if rawExcludeRegex != "" { - flags := regexp2.ECMAScript - if !comparePathsOptions.UseCaseSensitiveFileNames { - flags |= regexp2.IgnoreCase - } - excludeRegex = regexp2.MustCompile(rawExcludeRegex, regexp2.RegexOptions(flags)) - } + excludeMatcher := vfsmatch.NewSpecMatcher(exclude, comparePathsOptions.CurrentDirectory, vfsmatch.UsageExclude, comparePathsOptions.UseCaseSensitiveFileNames) wildcardDirectories := make(map[string]bool) wildCardKeyToPath := make(map[string]string) @@ -43,10 +34,8 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti for _, file := range include { spec := tspath.NormalizeSlashes(tspath.CombinePaths(comparePathsOptions.CurrentDirectory, file)) - if excludeRegex != nil { - if matched, _ := excludeRegex.MatchString(spec); matched { - continue - } + if excludeMatcher != nil && excludeMatcher.MatchString(spec) { + continue } match := getWildcardDirectoryFromSpec(spec, comparePathsOptions.UseCaseSensitiveFileNames) @@ -100,9 +89,6 @@ func toCanonicalKey(path string, useCaseSensitiveFileNames bool) string { return strings.ToLower(path) } -// wildcardDirectoryPattern matches paths with wildcard characters -var wildcardDirectoryPattern = regexp2.MustCompile(`^[^*?]*(?=\/[^/]*[*?])`, 0) - // wildcardDirectoryMatch represents the result of a wildcard directory match type wildcardDirectoryMatch struct { Key string @@ -111,27 +97,30 @@ type wildcardDirectoryMatch struct { } func getWildcardDirectoryFromSpec(spec string, useCaseSensitiveFileNames bool) *wildcardDirectoryMatch { - match, _ := wildcardDirectoryPattern.FindStringMatch(spec) - if match != nil { - // We check this with a few `Index` calls because it's more efficient than complex regex - questionWildcardIndex := strings.Index(spec, "?") - starWildcardIndex := strings.Index(spec, "*") - lastDirectorySeparatorIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator) - - // Determine if this should be watched recursively - recursive := (questionWildcardIndex != -1 && questionWildcardIndex < lastDirectorySeparatorIndex) || - (starWildcardIndex != -1 && starWildcardIndex < lastDirectorySeparatorIndex) - - return &wildcardDirectoryMatch{ - Key: toCanonicalKey(match.String(), useCaseSensitiveFileNames), - Path: match.String(), - Recursive: recursive, + // Find the first occurrence of a wildcard character + firstWildcard := strings.IndexAny(spec, "*?") + if firstWildcard != -1 { + // Find the last directory separator before the wildcard + lastSepBeforeWildcard := strings.LastIndexByte(spec[:firstWildcard], tspath.DirectorySeparator) + if lastSepBeforeWildcard != -1 { + path := spec[:lastSepBeforeWildcard] + lastDirectorySeparatorIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator) + + // Determine if this should be watched recursively: + // recursive if the wildcard appears in a directory segment (not just the final file segment) + recursive := firstWildcard < lastDirectorySeparatorIndex + + return &wildcardDirectoryMatch{ + Key: toCanonicalKey(path, useCaseSensitiveFileNames), + Path: path, + Recursive: recursive, + } } } if lastSepIndex := strings.LastIndexByte(spec, tspath.DirectorySeparator); lastSepIndex != -1 { lastSegment := spec[lastSepIndex+1:] - if vfs.IsImplicitGlob(lastSegment) { + if vfsmatch.IsImplicitGlob(lastSegment) { path := tspath.RemoveTrailingDirectorySeparator(spec) return &wildcardDirectoryMatch{ Key: toCanonicalKey(path, useCaseSensitiveFileNames), diff --git a/internal/tspath/path.go b/internal/tspath/path.go index 70e2cfe3e7..8ed231e0e6 100644 --- a/internal/tspath/path.go +++ b/internal/tspath/path.go @@ -332,7 +332,50 @@ func ResolveTripleslashReference(moduleName string, containingFile string) strin } func GetNormalizedPathComponents(path string, currentDirectory string) []string { - return reducePathComponents(GetPathComponents(path, currentDirectory)) + combined := CombinePaths(currentDirectory, path) + return getNormalizedPathComponentsFromCombined(combined) +} + +func getNormalizedPathComponentsFromCombined(path string) []string { + rootLength := GetRootLength(path) + // Always include the root component (empty string for relative paths). + components := make([]string, 1, 8) + components[0] = path[:rootLength] + + for i := rootLength; i < len(path); { + // Skip directory separators (handles consecutive separators and trailing '/'). + for i < len(path) && path[i] == '/' { + i++ + } + if i >= len(path) { + break + } + + start := i + for i < len(path) && path[i] != '/' { + i++ + } + component := path[start:i] + + if component == "" || component == "." { + continue + } + if component == ".." { + if len(components) > 1 { + if components[len(components)-1] != ".." { + components = components[:len(components)-1] + continue + } + } else if components[0] != "" { + // If this is an absolute path, we can't go above the root. + continue + } + } + + components = append(components, component) + } + + return components } func GetNormalizedAbsolutePathWithoutRoot(fileName string, currentDirectory string) string { diff --git a/internal/vfs/vfsmatch/bench_test.go b/internal/vfs/vfsmatch/bench_test.go new file mode 100644 index 0000000000..3ad6822be2 --- /dev/null +++ b/internal/vfs/vfsmatch/bench_test.go @@ -0,0 +1,257 @@ +package vfsmatch + +import ( + "testing" + + "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/cachedvfs" + "github.com/microsoft/typescript-go/internal/vfs/vfstest" +) + +// Benchmark test cases using the same hosts as the unit tests + +func BenchmarkReadDirectory(b *testing.B) { + benchCases := []struct { + name string + host func() vfs.FS + path string + extensions []string + excludes []string + includes []string + }{ + { + name: "LiteralIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts"}, + }, + { + name: "WildcardIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + }, + { + name: "RecursiveWildcard", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + }, + { + name: "RecursiveWithExcludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"**/*.ts"}, + }, + { + name: "ComplexPattern", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + }, + { + name: "DottedFolders", + host: dottedFoldersHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + }, + { + name: "CommonPackageFolders", + host: commonFoldersHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + }, + { + name: "NoIncludes", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + }, + { + name: "MultipleRecursive", + host: caseInsensitiveHost, + path: "/dev", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + }, + { + name: "LargeFileSystem", + host: largeFileSystemHost, + path: "/project", + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"src/**/*.ts"}, + excludes: []string{"**/node_modules/**", "**/*.test.ts"}, + }, + { + name: "LargeAllFiles", + host: largeFileSystemHost, + path: "/project", + extensions: []string{".ts", ".tsx", ".js"}, + excludes: []string{"**/node_modules/**"}, + includes: []string{"**/*"}, + }, + } + + var benchOnly func(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string + // For benchmark comparison + // benchOnly = matchFiles + // benchOnly = matchFilesNoRegex + + for _, bc := range benchCases { + if benchOnly != nil { + b.Run(bc.name, func(b *testing.B) { + host := cachedvfs.From(bc.host()) + b.ReportAllocs() + for b.Loop() { + benchOnly(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) + } + }) + continue + } + + b.Run("Old/"+bc.name, func(b *testing.B) { + host := cachedvfs.From(bc.host()) + b.ReportAllocs() + for b.Loop() { + matchFiles(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) + } + }) + + b.Run("New/"+bc.name, func(b *testing.B) { + host := cachedvfs.From(bc.host()) + b.ReportAllocs() + for b.Loop() { + matchFilesNoRegex(bc.path, bc.extensions, bc.excludes, bc.includes, host.UseCaseSensitiveFileNames(), "/", UnlimitedDepth, host) + } + }) + } +} + +// largeFileSystemHost creates a more realistic file system with many files +func largeFileSystemHost() vfs.FS { + files := make(map[string]string) + + // Create a realistic project structure + dirs := []string{ + "/project/src", + "/project/src/components", + "/project/src/utils", + "/project/src/services", + "/project/src/models", + "/project/src/hooks", + "/project/test", + "/project/node_modules/react", + "/project/node_modules/typescript", + "/project/node_modules/@types/node", + } + + // Add files to each directory + for _, dir := range dirs { + for j := range 20 { + files[dir+"/file"+string(rune('a'+j))+".ts"] = "" + files[dir+"/file"+string(rune('a'+j))+".test.ts"] = "" + } + } + + // Add some dotted directories + files["/project/src/.hidden/secret.ts"] = "" + files["/project/.config/settings.ts"] = "" + + return vfstest.FromMap(files, false) +} + +// BenchmarkPatternCompilation benchmarks the pattern compilation step +func BenchmarkPatternCompilation(b *testing.B) { + patterns := []struct { + name string + spec string + }{ + {"Literal", "src/file.ts"}, + {"SingleWildcard", "src/*.ts"}, + {"QuestionMark", "src/?.ts"}, + {"DoubleAsterisk", "**/file.ts"}, + {"Complex", "src/**/components/*.tsx"}, + {"DottedPattern", "**/.*/*"}, + } + + for _, p := range patterns { + b.Run(p.name, func(b *testing.B) { + for b.Loop() { + _, _ = compileGlobPattern(p.spec, "/project", UsageFiles, true) + } + }) + } +} + +// BenchmarkPatternMatching benchmarks pattern matching against paths +func BenchmarkPatternMatching(b *testing.B) { + testCases := []struct { + name string + spec string + paths []string + }{ + { + name: "LiteralMatch", + spec: "src/file.ts", + paths: []string{ + "/project/src/file.ts", + "/project/src/other.ts", + "/project/lib/file.ts", + }, + }, + { + name: "WildcardMatch", + spec: "src/*.ts", + paths: []string{ + "/project/src/file.ts", + "/project/src/component.ts", + "/project/src/deep/file.ts", + "/project/lib/file.ts", + }, + }, + { + name: "RecursiveMatch", + spec: "**/file.ts", + paths: []string{ + "/project/file.ts", + "/project/src/file.ts", + "/project/src/deep/nested/file.ts", + "/project/src/other.ts", + }, + }, + { + name: "ComplexMatch", + spec: "src/**/components/*.tsx", + paths: []string{ + "/project/src/components/Button.tsx", + "/project/src/features/auth/components/Login.tsx", + "/project/src/components/Button.ts", + "/project/lib/components/Button.tsx", + }, + }, + } + + for _, tc := range testCases { + pattern, ok := compileGlobPattern(tc.spec, "/project", UsageFiles, true) + if !ok { + continue + } + + b.Run(tc.name, func(b *testing.B) { + for b.Loop() { + for _, path := range tc.paths { + pattern.matches(path) + } + } + }) + } +} diff --git a/internal/vfs/vfsmatch/new.go b/internal/vfs/vfsmatch/new.go new file mode 100644 index 0000000000..ecf906ec08 --- /dev/null +++ b/internal/vfs/vfsmatch/new.go @@ -0,0 +1,606 @@ +package vfsmatch + +import ( + "strings" + + "github.com/microsoft/typescript-go/internal/collections" + "github.com/microsoft/typescript-go/internal/core" + "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" +) + +// globPattern is a compiled glob pattern for matching file paths without regex. +type globPattern struct { + components []component // path segments to match (e.g., ["src", "**", "*.ts"]) + isExclude bool // exclude patterns have different matching rules + caseSensitive bool + excludeMinJs bool // for "files" patterns, exclude .min.js by default +} + +// component is a single path segment in a glob pattern. +// Examples: "src" (literal), "*" (wildcard), "*.ts" (wildcard), "**" (recursive) +type component struct { + kind componentKind + literal string // for kindLiteral: the exact string to match + segments []segment // for kindWildcard: parsed wildcard pattern + // Include patterns with wildcards skip common package folders (node_modules, etc.) + skipPackageFolders bool +} + +type componentKind int + +const ( + kindLiteral componentKind = iota // exact match (e.g., "src") + kindWildcard // contains * or ? (e.g., "*.ts") + kindDoubleAsterisk // ** matches zero or more directories +) + +// segment is a piece of a wildcard component. +// Example: "*.ts" becomes [segStar, segLiteral(".ts")] +type segment struct { + kind segmentKind + literal string // only for segLiteral +} + +type segmentKind int + +const ( + segLiteral segmentKind = iota // exact text + segStar // * matches any chars except / + segQuestion // ? matches single char except / +) + +// compileGlobPattern compiles a glob spec (e.g., "src/**/*.ts") into a pattern. +// Returns (pattern, false) if the pattern would match nothing. +func compileGlobPattern(spec string, basePath string, usage Usage, caseSensitive bool) (globPattern, bool) { + parts := tspath.GetNormalizedPathComponents(spec, basePath) + + // "src/**" without a filename matches nothing (for include patterns) + if usage != UsageExclude && core.LastOrNil(parts) == "**" { + return globPattern{}, false + } + + // Normalize root: "/home/" -> "/home" + parts[0] = tspath.RemoveTrailingDirectorySeparator(parts[0]) + + // Directories implicitly match all files: "src" -> "src/**/*" + if IsImplicitGlob(core.LastOrNil(parts)) { + parts = append(parts, "**", "*") + } + + p := globPattern{ + isExclude: usage == UsageExclude, + caseSensitive: caseSensitive, + excludeMinJs: usage == UsageFiles, + // Avoid slice growth during compilation. + components: make([]component, 0, len(parts)), + } + + for _, part := range parts { + p.components = append(p.components, parseComponent(part, usage != UsageExclude)) + } + return p, true +} + +// parseComponent converts a path segment string into a component. +func parseComponent(s string, isInclude bool) component { + if s == "**" { + return component{kind: kindDoubleAsterisk} + } + if !strings.ContainsAny(s, "*?") { + return component{kind: kindLiteral, literal: s} + } + return component{ + kind: kindWildcard, + segments: parseSegments(s), + skipPackageFolders: isInclude, + } +} + +// parseSegments breaks "*.ts" into [segStar, segLiteral(".ts")] +func parseSegments(s string) []segment { + // Preallocate based on wildcard count: each wildcard contributes 1 segment, + // and each wildcard can split literals into at most one extra literal segment. + wildcards := 0 + for i := range len(s) { + if s[i] == '*' || s[i] == '?' { + wildcards++ + } + } + result := make([]segment, 0, 2*wildcards+1) + start := 0 + for i := range len(s) { + switch s[i] { + case '*', '?': + if i > start { + result = append(result, segment{kind: segLiteral, literal: s[start:i]}) + } + if s[i] == '*' { + result = append(result, segment{kind: segStar}) + } else { + result = append(result, segment{kind: segQuestion}) + } + start = i + 1 + } + } + if start < len(s) { + result = append(result, segment{kind: segLiteral, literal: s[start:]}) + } + return result +} + +// matches returns true if path matches this pattern. +func (p *globPattern) matches(path string) bool { + return p.matchPathParts(path, "", 0, 0, false) +} + +// matchesParts returns true if prefix+suffix matches this pattern. +// This avoids allocating a combined string for common call sites where prefix ends with '/'. +func (p *globPattern) matchesParts(prefix, suffix string) bool { + return p.matchPathParts(prefix, suffix, 0, 0, false) +} + +// matchesPrefix returns true if files under this directory path could match. +// Used to skip directories during traversal. +// matchesPrefixParts returns true if files under prefix+suffix could match. +func (p *globPattern) matchesPrefixParts(prefix, suffix string) bool { + return p.matchPathParts(prefix, suffix, 0, 0, true) +} + +// matchPathParts is like matchPath, but operates on a virtual path formed by prefix+suffix. +// Offsets are in the combined string. +func (p *globPattern) matchPathParts(prefix, suffix string, pathOffset, compIdx int, prefixOnly bool) bool { + for { + pathPart, nextOffset, ok := nextPathPartParts(prefix, suffix, pathOffset) + if !ok { + if prefixOnly { + return true + } + return p.patternSatisfied(compIdx) + } + + if compIdx >= len(p.components) { + return p.isExclude && !prefixOnly + } + + comp := p.components[compIdx] + switch comp.kind { + case kindDoubleAsterisk: + if p.matchPathParts(prefix, suffix, pathOffset, compIdx+1, prefixOnly) { + return true + } + if !p.isExclude && (isHiddenPath(pathPart) || isPackageFolder(pathPart)) { + return false + } + pathOffset = nextOffset + continue + case kindLiteral: + if comp.skipPackageFolders && isPackageFolder(pathPart) { + panic("unreachable: literal components never have skipPackageFolders") + } + if !p.stringsEqual(comp.literal, pathPart) { + return false + } + case kindWildcard: + if comp.skipPackageFolders && isPackageFolder(pathPart) { + return false + } + if !p.matchWildcard(comp.segments, pathPart) { + return false + } + } + + pathOffset = nextOffset + compIdx++ + } +} + +// patternSatisfied checks if remaining pattern components can match empty input. +func (p *globPattern) patternSatisfied(compIdx int) bool { + // A pattern is satisfied when remaining components can match empty input. + // For both include and exclude patterns, only trailing "**" components may match nothing. + for _, c := range p.components[compIdx:] { + if c.kind != kindDoubleAsterisk { + return false + } + } + return true +} + +// nextPathPart extracts the next path component from path starting at offset. +func nextPathPartSingle(s string, offset int) (part string, nextOffset int, ok bool) { + if offset >= len(s) { + return "", offset, false + } + if offset == 0 && len(s) > 0 && s[0] == '/' { + return "", 1, true + } + for offset < len(s) && s[offset] == '/' { + offset++ + } + if offset >= len(s) { + return "", offset, false + } + rest := s[offset:] + if idx := strings.IndexByte(rest, '/'); idx >= 0 { + return rest[:idx], offset + idx, true + } + return rest, len(s), true +} + +func nextPathPartParts(prefix, suffix string, offset int) (part string, nextOffset int, ok bool) { + // Fast paths: keep the hot single-string scan tight. + if len(suffix) == 0 { + return nextPathPartSingle(prefix, offset) + } + if len(prefix) == 0 { + return nextPathPartSingle(suffix, offset) + } + + // For matchFilesNoRegex call sites, prefix is a directory path ending in '/', + // and suffix is a single entry name (no '/'). That makes this significantly + // simpler than a general-purpose "virtual concatenation" scanner. + + totalLen := len(prefix) + len(suffix) + if offset >= totalLen { + return "", offset, false + } + + // Handle leading slash (root of absolute path) + if offset == 0 && prefix[0] == '/' { + return "", 1, true + } + + // Scan within prefix. + if offset < len(prefix) { + for offset < len(prefix) && prefix[offset] == '/' { + offset++ + } + if offset < len(prefix) { + rest := prefix[offset:] + idx := strings.IndexByte(rest, '/') + // idx is guaranteed >= 0 for the call sites we care about because prefix ends in '/'. + return rest[:idx], offset + idx, true + } + // Fall through into suffix region. + } + + // Scan suffix: it's a single component. + sOff := offset - len(prefix) + if sOff >= len(suffix) { + return "", offset, false + } + return suffix[sOff:], totalLen, true +} + +// matchWildcard matches a path component against wildcard segments. +func (p *globPattern) matchWildcard(segs []segment, s string) bool { + // Include patterns: wildcards at start cannot match hidden files + if !p.isExclude && len(segs) > 0 && isHiddenPath(s) && (segs[0].kind == segStar || segs[0].kind == segQuestion) { + return false + } + + // Fast path: single * followed by literal suffix (e.g., "*.ts") + if len(segs) == 2 && segs[0].kind == segStar && segs[1].kind == segLiteral { + suffix := segs[1].literal + if len(s) < len(suffix) || !p.stringsEqual(suffix, s[len(s)-len(suffix):]) { + return false + } + return p.checkMinJsExclusion(s, segs) + } + + return p.matchSegments(segs, 0, s, 0) && p.checkMinJsExclusion(s, segs) +} + +// matchSegments recursively matches segments against string s. +func (p *globPattern) matchSegments(segs []segment, segIdx int, s string, sIdx int) bool { + if segIdx >= len(segs) { + return sIdx >= len(s) + } + + seg := segs[segIdx] + + switch seg.kind { + case segLiteral: + end := sIdx + len(seg.literal) + if end > len(s) { + return false + } + if !p.stringsEqual(seg.literal, s[sIdx:end]) { + return false + } + return p.matchSegments(segs, segIdx+1, s, end) + + case segQuestion: + if sIdx >= len(s) || s[sIdx] == '/' { + return false + } + return p.matchSegments(segs, segIdx+1, s, sIdx+1) + + case segStar: + // Try matching 0, 1, 2, ... characters (but not /) + if p.matchSegments(segs, segIdx+1, s, sIdx) { + return true + } + for i := sIdx; i < len(s) && s[i] != '/'; i++ { + if p.matchSegments(segs, segIdx+1, s, i+1) { + return true + } + } + return false + default: + panic("unreachable: unknown segment kind") + } +} + +// checkMinJsExclusion returns false if this is a .min.js file that should be excluded. +func (p *globPattern) checkMinJsExclusion(filename string, segs []segment) bool { + if !p.excludeMinJs { + return true + } + + // Preserve legacy behavior: + // - When matching is case-sensitive, only the exact ".min.js" suffix is excluded by default. + // - When matching is case-insensitive, any casing variant is excluded by default. + if !p.hasMinJsSuffix(filename) { + return true + } + // Allow when the user's pattern explicitly references the .min. suffix. + if p.patternMentionsMinSuffix(segs) { + return true + } + return false +} + +func (p *globPattern) hasMinJsSuffix(filename string) bool { + if p.caseSensitive { + return strings.HasSuffix(filename, ".min.js") + } + const minJs = ".min.js" + if len(filename) < len(minJs) { + return false + } + // Avoid allocating via strings.ToLower; compare suffix case-insensitively. + return strings.EqualFold(filename[len(filename)-len(minJs):], minJs) +} + +func (p *globPattern) patternMentionsMinSuffix(segs []segment) bool { + for _, seg := range segs { + if seg.kind != segLiteral { + continue + } + lit := seg.literal + if !p.caseSensitive { + lit = strings.ToLower(lit) + } + if strings.Contains(lit, ".min.js") || strings.Contains(lit, ".min.") { + return true + } + } + return false +} + +// stringsEqual compares strings with appropriate case sensitivity. +func (p *globPattern) stringsEqual(a, b string) bool { + if p.caseSensitive { + return a == b + } + return strings.EqualFold(a, b) +} + +// isHiddenPath checks if a path component is hidden (starts with dot). +func isHiddenPath(name string) bool { + return len(name) > 0 && name[0] == '.' +} + +// isPackageFolder checks if name is a common package folder (node_modules, etc.) +func isPackageFolder(name string) bool { + switch len(name) { + case len("node_modules"): + return strings.EqualFold(name, "node_modules") + case len("jspm_packages"): + return strings.EqualFold(name, "jspm_packages") + case len("bower_components"): + return strings.EqualFold(name, "bower_components") + } + return false +} + +func ensureTrailingSlash(s string) string { + if len(s) > 0 && s[len(s)-1] != '/' { + return s + "/" + } + return s +} + +// globMatcher combines include and exclude patterns for file matching. +type globMatcher struct { + includes []globPattern + excludes []globPattern + hadIncludes bool // true if include specs were provided (even if none compiled) +} + +func newGlobMatcher(includeSpecs, excludeSpecs []string, basePath string, caseSensitive bool, usage Usage) *globMatcher { + m := &globMatcher{ + hadIncludes: len(includeSpecs) > 0, + includes: make([]globPattern, 0, len(includeSpecs)), + excludes: make([]globPattern, 0, len(excludeSpecs)), + } + + for _, spec := range includeSpecs { + if p, ok := compileGlobPattern(spec, basePath, usage, caseSensitive); ok { + m.includes = append(m.includes, p) + } + } + for _, spec := range excludeSpecs { + if p, ok := compileGlobPattern(spec, basePath, UsageExclude, caseSensitive); ok { + m.excludes = append(m.excludes, p) + } + } + return m +} + +// matchesFileParts is like MatchesFile but matches against prefix+suffix without allocating. +func (m *globMatcher) matchesFileParts(prefix, suffix string) int { + for i := range m.excludes { + if m.excludes[i].matchesParts(prefix, suffix) { + return -1 + } + } + if len(m.includes) == 0 { + if m.hadIncludes { + return -1 + } + return 0 + } + for i := range m.includes { + if m.includes[i].matchesParts(prefix, suffix) { + return i + } + } + return -1 +} + +// matchesDirectoryParts is like MatchesDirectory but matches against prefix+suffix without allocating. +func (m *globMatcher) matchesDirectoryParts(prefix, suffix string) bool { + for i := range m.excludes { + if m.excludes[i].matchesParts(prefix, suffix) { + return false + } + } + if len(m.includes) == 0 { + return !m.hadIncludes + } + for i := range m.includes { + if m.includes[i].matchesPrefixParts(prefix, suffix) { + return true + } + } + return false +} + +// globVisitor traverses directories matching files against glob patterns. +type globVisitor struct { + host vfs.FS + fileMatcher *globMatcher + directoryMatcher *globMatcher + extensions []string + useCaseSensitiveFileNames bool + visited collections.Set[string] + results [][]string +} + +func (v *globVisitor) visit(path, absolutePath string, depth int) { + // Detect symlink cycles + realPath := v.host.Realpath(absolutePath) + canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) + if v.visited.Has(canonicalPath) { + return + } + v.visited.Add(canonicalPath) + + entries := v.host.GetAccessibleEntries(absolutePath) + + pathPrefix := ensureTrailingSlash(path) + absPrefix := ensureTrailingSlash(absolutePath) + + for _, file := range entries.Files { + if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(file, v.extensions) { + continue + } + if idx := v.fileMatcher.matchesFileParts(absPrefix, file); idx >= 0 { + v.results[idx] = append(v.results[idx], pathPrefix+file) + } + } + + if depth != UnlimitedDepth { + depth-- + if depth == 0 { + return + } + } + + for _, dir := range entries.Directories { + if !v.directoryMatcher.matchesDirectoryParts(absPrefix, dir) { + continue + } + absDir := absPrefix + dir + v.visit(pathPrefix+dir, absDir, depth) + } +} + +// matchFilesNoRegex matches files using compiled glob patterns (no regex). +func matchFilesNoRegex(path string, extensions, excludes, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string { + path = tspath.NormalizePath(path) + currentDirectory = tspath.NormalizePath(currentDirectory) + absolutePath := tspath.CombinePaths(currentDirectory, path) + + fileMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageFiles) + directoryMatcher := newGlobMatcher(includes, excludes, absolutePath, useCaseSensitiveFileNames, UsageDirectories) + + v := globVisitor{ + host: host, + fileMatcher: fileMatcher, + directoryMatcher: directoryMatcher, + extensions: extensions, + useCaseSensitiveFileNames: useCaseSensitiveFileNames, + results: make([][]string, max(len(fileMatcher.includes), 1)), + } + + for _, basePath := range getBasePaths(path, includes, useCaseSensitiveFileNames) { + v.visit(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) + } + + // Fast path: a single include bucket (or no includes) doesn't need flattening. + if len(v.results) == 1 { + return v.results[0] + } + return core.Flatten(v.results) +} + +// globSpecMatcher wraps multiple glob patterns for matching paths. +type globSpecMatcher struct { + patterns []globPattern +} + +// MatchString returns true if any pattern matches the path. +func (m *globSpecMatcher) MatchString(path string) bool { + for i := range m.patterns { + if m.patterns[i].matches(path) { + return true + } + } + return false +} + +// MatchIndex returns the index of the first matching pattern, or -1. +func (m *globSpecMatcher) MatchIndex(path string) int { + for i := range m.patterns { + if m.patterns[i].matches(path) { + return i + } + } + return -1 +} + +// newGlobSpecMatcher creates a matcher for multiple glob specs. +func newGlobSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { + if len(specs) == 0 { + return nil + } + patterns := make([]globPattern, 0, len(specs)) + for _, spec := range specs { + if p, ok := compileGlobPattern(spec, basePath, usage, useCaseSensitiveFileNames); ok { + patterns = append(patterns, p) + } + } + if len(patterns) == 0 { + return nil + } + return &globSpecMatcher{patterns: patterns} +} + +// newGlobSingleSpecMatcher creates a matcher for a single glob spec. +func newGlobSingleSpecMatcher(spec, basePath string, usage Usage, useCaseSensitiveFileNames bool) *globSpecMatcher { + return newGlobSpecMatcher([]string{spec}, basePath, usage, useCaseSensitiveFileNames) +} diff --git a/internal/vfs/utilities.go b/internal/vfs/vfsmatch/old.go similarity index 69% rename from internal/vfs/utilities.go rename to internal/vfs/vfsmatch/old.go index af4c616a3e..be8fc3fd55 100644 --- a/internal/vfs/utilities.go +++ b/internal/vfs/vfsmatch/old.go @@ -1,20 +1,19 @@ -package vfs +package vfsmatch import ( "fmt" "regexp" - "sort" "strings" "sync" "github.com/dlclark/regexp2" "github.com/microsoft/typescript-go/internal/collections" "github.com/microsoft/typescript-go/internal/core" - "github.com/microsoft/typescript-go/internal/stringutil" "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" ) -type FileMatcherPatterns struct { +type fileMatcherPatterns struct { // One pattern for each "include" spec. includeFilePatterns []string // One pattern matching one of any of the "include" specs. @@ -24,25 +23,17 @@ type FileMatcherPatterns struct { basePaths []string } -type Usage string - -const ( - UsageFiles Usage = "files" - UsageDirectories Usage = "directories" - UsageExclude Usage = "exclude" -) - -func GetRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { +func getRegularExpressionsForWildcards(specs []string, basePath string, usage Usage) []string { if len(specs) == 0 { return nil } return core.Map(specs, func(spec string) string { - return GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + return getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) }) } -func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { - patterns := GetRegularExpressionsForWildcards(specs, basePath, usage) +func getRegularExpressionForWildcard(specs []string, basePath string, usage Usage) string { + patterns := getRegularExpressionsForWildcards(specs, basePath, usage) if len(patterns) == 0 { return "" } @@ -55,7 +46,7 @@ func GetRegularExpressionForWildcard(specs []string, basePath string, usage Usag // If excluding, match "foo/bar/baz...", but if including, only allow "foo". var terminator string - if usage == "exclude" { + if usage == UsageExclude { terminator = "($|/)" } else { terminator = "$" @@ -75,18 +66,11 @@ func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) } } -// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, -// and does not contain any glob characters itself. -func IsImplicitGlob(lastPathComponent string) bool { - return !strings.ContainsAny(lastPathComponent, ".*?") -} - // Reserved characters - only escape actual regex metacharacters. // Go's regexp doesn't support \x escape sequences for arbitrary characters, // so we only escape characters that have special meaning in regex. var ( reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) - wildcardCharCodes = []rune{'*', '?'} ) var ( @@ -94,7 +78,7 @@ var ( implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))" ) -type WildcardMatcher struct { +type wildcardMatcher struct { singleAsteriskRegexFragment string doubleAsteriskRegexFragment string replaceWildcardCharacter func(match string) string @@ -110,7 +94,7 @@ const ( singleAsteriskRegexFragment = "[^/]*" ) -var filesMatcher = WildcardMatcher{ +var filesMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character @@ -120,7 +104,7 @@ var filesMatcher = WildcardMatcher{ }, } -var directoriesMatcher = WildcardMatcher{ +var directoriesMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character @@ -130,7 +114,7 @@ var directoriesMatcher = WildcardMatcher{ }, } -var excludeMatcher = WildcardMatcher{ +var excludeMatcher = wildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, doubleAsteriskRegexFragment: "(/.+?)?", replaceWildcardCharacter: func(match string) string { @@ -138,30 +122,30 @@ var excludeMatcher = WildcardMatcher{ }, } -var wildcardMatchers = map[Usage]WildcardMatcher{ +var wildcardMatchers = map[Usage]wildcardMatcher{ UsageFiles: filesMatcher, UsageDirectories: directoriesMatcher, UsageExclude: excludeMatcher, } -func GetPatternFromSpec( +func getPatternFromSpec( spec string, basePath string, usage Usage, ) string { - pattern := GetSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) + pattern := getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) if pattern == "" { return "" } - ending := core.IfElse(usage == "exclude", "($|/)", "$") + ending := core.IfElse(usage == UsageExclude, "($|/)", "$") return fmt.Sprintf("^(%s)%s", pattern, ending) } -func GetSubPatternFromSpec( +func getSubPatternFromSpec( spec string, basePath string, usage Usage, - matcher WildcardMatcher, + matcher wildcardMatcher, ) string { matcher = wildcardMatchers[usage] @@ -171,7 +155,7 @@ func GetSubPatternFromSpec( hasWrittenComponent := false components := tspath.GetNormalizedPathComponents(spec, basePath) lastComponent := core.LastOrNil(components) - if usage != "exclude" && lastComponent == "**" { + if usage != UsageExclude && lastComponent == "**" { return "" } @@ -188,7 +172,7 @@ func GetSubPatternFromSpec( if component == "**" { subpattern.WriteString(matcher.doubleAsteriskRegexFragment) } else { - if usage == "directories" { + if usage == UsageDirectories { subpattern.WriteString("(") optionalCount++ } @@ -197,7 +181,7 @@ func GetSubPatternFromSpec( subpattern.WriteRune(tspath.DirectorySeparator) } - if usage != "exclude" { + if usage != UsageExclude { var componentPattern strings.Builder if strings.HasPrefix(component, "*") { componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?") @@ -233,72 +217,18 @@ func GetSubPatternFromSpec( return subpattern.String() } -func getIncludeBasePath(absolute string) string { - wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) - if wildcardOffset < 0 { - // No "*" or "?" in the path - if !tspath.HasExtension(absolute) { - return absolute - } else { - return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) - } - } - return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] -} - -// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. -func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { - // Storage for our results in the form of literal paths (e.g. the paths as written by the user). - basePaths := []string{path} - - if len(includes) > 0 { - // Storage for literal base paths amongst the include patterns. - includeBasePaths := []string{} - for _, include := range includes { - // We also need to check the relative paths by converting them to absolute and normalizing - // in case they escape the base path (e.g "..\somedirectory") - var absolute string - if tspath.IsRootedDiskPath(include) { - absolute = include - } else { - absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) - } - // Append the literal and canonical candidate base paths. - includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) - } - - // Sort the offsets array using either the literal or canonical path representations. - stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) - sort.SliceStable(includeBasePaths, func(i, j int) bool { - return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 - }) - - // Iterate over each include base path and include unique base paths that are not a - // subpath of an existing base path - for _, includeBasePath := range includeBasePaths { - if core.Every(basePaths, func(basepath string) bool { - return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) - }) { - basePaths = append(basePaths, includeBasePath) - } - } - } - - return basePaths -} - // getFileMatcherPatterns generates file matching patterns based on the provided path, // includes, excludes, and other parameters. path is the directory of the tsconfig.json file. -func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns { +func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) fileMatcherPatterns { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) absolutePath := tspath.CombinePaths(currentDirectory, path) - return FileMatcherPatterns{ - includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), - includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"), - includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"), - excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"), + return fileMatcherPatterns{ + includeFilePatterns: core.Map(getRegularExpressionsForWildcards(includes, absolutePath, UsageFiles), func(pattern string) string { return "^" + pattern + "$" }), + includeFilePattern: getRegularExpressionForWildcard(includes, absolutePath, UsageFiles), + includeDirectoryPattern: getRegularExpressionForWildcard(includes, absolutePath, UsageDirectories), + excludePattern: getRegularExpressionForWildcard(excludes, absolutePath, UsageExclude), basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), } } @@ -313,7 +243,7 @@ var ( regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp) ) -func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { +func getRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { opts := regexp2.RegexOptions(regexp2.ECMAScript) if !useCaseSensitiveFileNames { opts |= regexp2.IgnoreCase @@ -356,7 +286,7 @@ type visitor struct { includeDirectoryRegex *regexp2.Regexp extensions []string useCaseSensitiveFileNames bool - host FS + host vfs.FS visited collections.Set[string] results [][]string } @@ -364,9 +294,12 @@ type visitor struct { func (v *visitor) visitDirectory( path string, absolutePath string, - depth *int, + depth int, ) { - canonicalPath := tspath.GetCanonicalFileName(absolutePath, v.useCaseSensitiveFileNames) + // Use the real path (with symlinks resolved) for cycle detection. + // This prevents infinite loops when symlinks create cycles. + realPath := v.host.Realpath(absolutePath) + canonicalPath := tspath.GetCanonicalFileName(realPath, v.useCaseSensitiveFileNames) if v.visited.Has(canonicalPath) { return } @@ -394,12 +327,11 @@ func (v *visitor) visitDirectory( } } - if depth != nil { - newDepth := *depth - 1 - if newDepth == 0 { + if depth != UnlimitedDepth { + depth-- + if depth == 0 { return } - depth = &newDepth } for _, current := range directories { @@ -412,22 +344,22 @@ func (v *visitor) visitDirectory( } // path is the directory of the tsconfig.json -func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host FS) []string { +func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth int, host vfs.FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory) var includeFileRegexes []*regexp2.Regexp if patterns.includeFilePatterns != nil { - includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) }) + includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return getRegexFromPattern(pattern, useCaseSensitiveFileNames) }) } var includeDirectoryRegex *regexp2.Regexp if patterns.includeDirectoryPattern != "" { - includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) + includeDirectoryRegex = getRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) } var excludeRegex *regexp2.Regexp if patterns.excludePattern != "" { - excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) + excludeRegex = getRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) } // Associate an array of results with each include regex. This keeps results in order of the "include" order. @@ -458,6 +390,62 @@ func matchFiles(path string, extensions []string, excludes []string, includes [] return core.Flatten(results) } -func ReadDirectory(host FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { - return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +// regexSpecMatcher wraps a regexp2.Regexp for SpecMatcher interface. +type regexSpecMatcher struct { + re *regexp2.Regexp +} + +func (m *regexSpecMatcher) MatchString(path string) bool { + if m == nil || m.re == nil { + return false + } + matched, err := m.re.MatchString(path) + return err == nil && matched +} + +// newRegexSpecMatcher creates a regex-based matcher for multiple specs. +func newRegexSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatcher { + pattern := getRegularExpressionForWildcard(specs, basePath, usage) + if pattern == "" { + return nil + } + return ®exSpecMatcher{re: getRegexFromPattern(pattern, useCaseSensitiveFileNames)} +} + +// newRegexSingleSpecMatcher creates a regex-based matcher for a single spec. +func newRegexSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatcher { + pattern := getPatternFromSpec(spec, basePath, usage) + if pattern == "" { + return nil + } + return ®exSpecMatcher{re: getRegexFromPattern(pattern, useCaseSensitiveFileNames)} +} + +// regexSpecMatchers holds a list of individual regex matchers for index lookup. +type regexSpecMatchers struct { + matchers []*regexp2.Regexp +} + +func (m *regexSpecMatchers) MatchIndex(path string) int { + for i, re := range m.matchers { + if matched, err := re.MatchString(path); err == nil && matched { + return i + } + } + return -1 +} + +// newRegexSpecMatchers creates individual regex matchers for each spec. +func newRegexSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) *regexSpecMatchers { + patterns := getRegularExpressionsForWildcards(specs, basePath, usage) + if len(patterns) == 0 { + return nil + } + matchers := make([]*regexp2.Regexp, len(patterns)) + for i, pattern := range patterns { + // Wrap pattern with ^ and $ for full match + fullPattern := "^" + pattern + "$" + matchers[i] = getRegexFromPattern(fullPattern, useCaseSensitiveFileNames) + } + return ®exSpecMatchers{matchers: matchers} } diff --git a/internal/vfs/vfsmatch/stringer_generated.go b/internal/vfs/vfsmatch/stringer_generated.go new file mode 100644 index 0000000000..18d4c40eac --- /dev/null +++ b/internal/vfs/vfsmatch/stringer_generated.go @@ -0,0 +1,26 @@ +// Code generated by "stringer -type=Usage -trimprefix=Usage -output=stringer_generated.go"; DO NOT EDIT. + +package vfsmatch + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UsageFiles-0] + _ = x[UsageDirectories-1] + _ = x[UsageExclude-2] +} + +const _Usage_name = "FilesDirectoriesExclude" + +var _Usage_index = [...]uint8{0, 5, 16, 23} + +func (i Usage) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_Usage_index)-1 { + return "Usage(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Usage_name[_Usage_index[idx]:_Usage_index[idx+1]] +} diff --git a/internal/vfs/vfsmatch/vfsmatch.go b/internal/vfs/vfsmatch/vfsmatch.go new file mode 100644 index 0000000000..4643e36994 --- /dev/null +++ b/internal/vfs/vfsmatch/vfsmatch.go @@ -0,0 +1,156 @@ +package vfsmatch + +import ( + "math" + "sort" + "strings" + + "github.com/microsoft/typescript-go/internal/core" + "github.com/microsoft/typescript-go/internal/stringutil" + "github.com/microsoft/typescript-go/internal/tspath" + "github.com/microsoft/typescript-go/internal/vfs" +) + +//go:generate go tool golang.org/x/tools/cmd/stringer -type=Usage -trimprefix=Usage -output=stringer_generated.go +//go:generate go tool mvdan.cc/gofumpt -w stringer_generated.go + +type Usage int8 + +const ( + UsageFiles Usage = iota + UsageDirectories + UsageExclude +) + +// UnlimitedDepth can be passed as the depth argument to indicate there is no depth limit. +const UnlimitedDepth = math.MaxInt + +const newMatch = true + +func ReadDirectory(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { + if newMatch { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) + } + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// IsImplicitGlob checks if a path component is implicitly a glob. +// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, +// and does not contain any glob characters itself. +func IsImplicitGlob(lastPathComponent string) bool { + return !strings.ContainsAny(lastPathComponent, ".*?") +} + +// SpecMatcher is an interface for matching file paths against compiled glob patterns. +type SpecMatcher interface { + // MatchString returns true if the given path matches the pattern. + MatchString(path string) bool +} + +// SpecMatchers is an interface for matching file paths against multiple compiled glob patterns. +// It can return the index of the matching pattern. +type SpecMatchers interface { + // MatchIndex returns the index of the first matching pattern, or -1 if none match. + MatchIndex(path string) int +} + +// NewSpecMatcher creates a matcher for one or more glob specs. +// It returns a matcher that can test if paths match any of the patterns. +func NewSpecMatcher(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + if newMatch { + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} + +// NewSingleSpecMatcher creates a matcher for a single glob spec. +// Returns nil if the spec compiles to an empty pattern (e.g., trailing ** for non-exclude). +func NewSingleSpecMatcher(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + if newMatch { + if m := newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} + +// NewSpecMatchers creates individual matchers for each spec, allowing lookup of which spec matched. +// Returns nil if no valid patterns could be compiled from the specs. +func NewSpecMatchers(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + if newMatch { + if m := newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil + } + if m := newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames); m != nil { + return m + } + return nil +} + +var wildcardCharCodes = []rune{'*', '?'} + +func getIncludeBasePath(absolute string) string { + wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) + if wildcardOffset < 0 { + // No "*" or "?" in the path + if !tspath.HasExtension(absolute) { + return absolute + } else { + return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) + } + } + return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] +} + +// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. +func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { + // Storage for our results in the form of literal paths (e.g. the paths as written by the user). + basePaths := []string{path} + + if len(includes) > 0 { + // Storage for literal base paths amongst the include patterns. + includeBasePaths := []string{} + for _, include := range includes { + // We also need to check the relative paths by converting them to absolute and normalizing + // in case they escape the base path (e.g "..\somedirectory") + var absolute string + if tspath.IsRootedDiskPath(include) { + absolute = include + } else { + absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) + } + // Append the literal and canonical candidate base paths. + includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) + } + + // Sort the offsets array using either the literal or canonical path representations. + stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) + sort.SliceStable(includeBasePaths, func(i, j int) bool { + return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 + }) + + // Iterate over each include base path and include unique base paths that are not a + // subpath of an existing base path + for _, includeBasePath := range includeBasePaths { + if core.Every(basePaths, func(basepath string) bool { + return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) + }) { + basePaths = append(basePaths, includeBasePath) + } + } + } + + return basePaths +} diff --git a/internal/vfs/vfsmatch/vfsmatch_test.go b/internal/vfs/vfsmatch/vfsmatch_test.go new file mode 100644 index 0000000000..8b7b23449f --- /dev/null +++ b/internal/vfs/vfsmatch/vfsmatch_test.go @@ -0,0 +1,1980 @@ +package vfsmatch + +import ( + "slices" + "testing" + + "github.com/microsoft/typescript-go/internal/vfs" + "github.com/microsoft/typescript-go/internal/vfs/vfstest" + "gotest.tools/v3/assert" +) + +// Test cases modeled after TypeScript's matchFiles tests in +// _submodules/TypeScript/src/testRunner/unittests/config/matchFiles.ts + +func ptrTo[T any](v T) *T { + return &v +} + +// readDirectoryFunc is a function type for ReadDirectory implementations +type readDirectoryFunc func(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string + +// readDirectoryOld wraps matchFiles with the expected test signature +func readDirectoryOld(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { + return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// readDirectoryNew wraps matchFilesNoRegex with the expected test signature +func readDirectoryNew(host vfs.FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth int) []string { + return matchFilesNoRegex(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) +} + +// readDirectoryImplementations contains all implementations to test +var readDirectoryImplementations = []struct { + name string + fn readDirectoryFunc +}{ + {"Old", readDirectoryOld}, + {"New", readDirectoryNew}, +} + +// caseInsensitiveHost simulates a Windows-like file system +func caseInsensitiveHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/b.js": "", + "/dev/c.d.ts": "", + "/dev/z/a.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bbz.ts": "", + "/dev/z/bba.ts": "", + "/dev/x/a.ts": "", + "/dev/x/aa.ts": "", + "/dev/x/b.ts": "", + "/dev/x/y/a.ts": "", + "/dev/x/y/b.ts": "", + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + "/ext/ext.ts": "", + "/ext/b/a..b.ts": "", + }, false) +} + +// caseSensitiveHost simulates a Unix-like case-sensitive file system +func caseSensitiveHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/b.js": "", + "/dev/A.ts": "", + "/dev/B.ts": "", + "/dev/c.d.ts": "", + "/dev/z/a.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bbz.ts": "", + "/dev/z/bba.ts": "", + "/dev/x/a.ts": "", + "/dev/x/b.ts": "", + "/dev/x/y/a.ts": "", + "/dev/x/y/b.ts": "", + "/dev/q/a/c/b/d.ts": "", + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.MIN.js": "", + }, true) +} + +// commonFoldersHost includes node_modules, bower_components, jspm_packages +func commonFoldersHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/x/a.ts": "", + "/dev/node_modules/a.ts": "", + "/dev/bower_components/a.ts": "", + "/dev/jspm_packages/a.ts": "", + }, false) +} + +// dottedFoldersHost includes files and folders starting with a dot +func dottedFoldersHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/x/d.ts": "", + "/dev/x/y/d.ts": "", + "/dev/x/y/.e.ts": "", + "/dev/x/.y/a.ts": "", + "/dev/.z/.b.ts": "", + "/dev/.z/c.ts": "", + "/dev/w/.u/e.ts": "", + "/dev/g.min.js/.g/g.ts": "", + }, false) +} + +// mixedExtensionHost has various file extensions +func mixedExtensionHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.tsx": "", + "/dev/b.d.ts": "", + "/dev/b.jsx": "", + "/dev/c.tsx": "", + "/dev/c.js": "", + "/dev/d.js": "", + "/dev/e.jsx": "", + "/dev/f.other": "", + }, false) +} + +// sameNamedDeclarationsHost has files with same names but different extensions +func sameNamedDeclarationsHost() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.tsx": "", + "/dev/a.d.ts": "", + "/dev/b.tsx": "", + "/dev/b.ts": "", + "/dev/c.tsx": "", + "/dev/m.ts": "", + "/dev/m.d.ts": "", + "/dev/n.tsx": "", + "/dev/n.ts": "", + "/dev/n.d.ts": "", + "/dev/o.ts": "", + "/dev/x.d.ts": "", + }, false) +} + +type readDirTestCase struct { + name string + host func() vfs.FS + currentDir string + path string + extensions []string + excludes []string + includes []string + depth int + expect func(t *testing.T, got []string) +} + +func runReadDirectoryCase(t *testing.T, tc readDirTestCase, readDir readDirectoryFunc) { + currentDir := tc.currentDir + if currentDir == "" { + currentDir = "/" + } + path := tc.path + if path == "" { + path = "/dev" + } + depth := tc.depth + if depth == 0 { + depth = UnlimitedDepth + } + got := readDir(tc.host(), currentDir, path, tc.extensions, tc.excludes, tc.includes, depth) + tc.expect(t, got) +} + +func TestReadDirectory(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "defaults include common package folders", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "literal includes without exclusions", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/b.ts"}) + }, + }, + { + name: "literal includes with non ts extensions excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.js", "b.js"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 0) + }, + }, + { + name: "literal includes missing files excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z.ts", "x.ts"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 0) + }, + }, + { + name: "literal includes with literal excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"b.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts"}) + }, + }, + { + name: "literal includes with wildcard excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts"}) + }, + }, + { + name: "literal includes with recursive excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"a.ts", "b.ts", "x/a.ts", "x/b.ts", "x/y/a.ts", "x/y/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts"}) + }, + }, + { + name: "case sensitive exclude is respected", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"B.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/B.ts"}) + }, + }, + { + name: "explicit includes keep common package folders", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"a.ts", "b.ts", "node_modules/a.ts", "bower_components/a.ts", "jspm_packages/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "wildcard include sorted order", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + expect: func(t *testing.T, got []string) { + expected := []string{ + "/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts", + "/dev/x/a.ts", "/dev/x/aa.ts", "/dev/x/b.ts", + } + assert.DeepEqual(t, got, expected) + }, + }, + { + name: "wildcard include same named declarations excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/a.d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/c.d.ts")) + }, + }, + { + name: "wildcard star matches only ts files", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, contains(f, ".ts") || contains(f, ".tsx") || contains(f, ".d.ts"), "unexpected file: %s", f) + } + assert.Assert(t, !slices.Contains(got, "/dev/a.js")) + assert.Assert(t, !slices.Contains(got, "/dev/b.js")) + }, + }, + { + name: "wildcard question mark single character", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/?.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/x/a.ts", "/dev/x/b.ts"}) + }, + }, + { + name: "wildcard recursive directory", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "double asterisk matches zero-or-more directories", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 2) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "wildcard multiple recursive directories", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/y/**/a.ts", "x/**/a.ts", "z/**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, len(got) > 0) + }, + }, + { + name: "wildcard case sensitive matching", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/A.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/A.ts"}) + }, + }, + { + name: "wildcard missing files excluded", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/z.ts"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "exclude folders with wildcards", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"z", "x"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/z/") && !contains(f, "/x/"), "should not contain z or x: %s", f) + } + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "include paths outside project absolute", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*", "/ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include paths outside project relative", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"*", "../ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include files containing double dots", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"/ext/b/a..b.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + { + name: "exclude files containing double dots", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"/ext/b/a..b.ts"}, + includes: []string{"/ext/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + assert.Assert(t, !slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + { + name: "common package folders implicitly excluded", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/bower_components/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/jspm_packages/a.ts")) + }, + }, + { + name: "common package folders explicit recursive include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts", "**/node_modules/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "common package folders wildcard include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "common package folders explicit wildcard include", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*/a.ts", "node_modules/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "dotted folders not implicitly included", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/*", "w/*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/d.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/.e.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "dotted folders explicitly included", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/.y/a.ts", "/dev/.z/.b.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/.z/.b.ts")) + }, + }, + { + name: "dotted folders recursive wildcard matches directories", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/.z/c.ts")) + assert.Assert(t, slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "trailing recursive include returns empty", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "trailing recursive exclude removes everything", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "multiple recursive directory patterns in includes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "multiple recursive directory patterns in excludes", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/x/**"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "implicit globbification expands directory", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/aba.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/b.ts")) + }, + }, + { + name: "exclude patterns starting with starstar", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/x"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/x/"), "should not contain /x/: %s", f) + } + }, + }, + { + name: "include patterns starting with starstar", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x", "**/a/**/b"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) + }, + }, + { + name: "depth limit one", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + depth: 1, + expect: func(t *testing.T, got []string) { + for _, f := range got { + suffix := f[len("/dev/"):] + assert.Assert(t, !contains(suffix, "/"), "depth 1 should not include nested files: %s", f) + } + }, + }, + { + name: "depth limit two", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + depth: 2, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/z/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/a.ts")) + }, + }, + { + name: "mixed extensions only ts", + host: mixedExtensionHost, + extensions: []string{".ts"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".ts"), "should only have .ts files: %s", f) + } + }, + }, + { + name: "mixed extensions ts and tsx", + host: mixedExtensionHost, + extensions: []string{".ts", ".tsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".ts") || hasSuffix(f, ".tsx"), "should only have .ts or .tsx files: %s", f) + } + }, + }, + { + name: "mixed extensions js and jsx", + host: mixedExtensionHost, + extensions: []string{".js", ".jsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".js") || hasSuffix(f, ".jsx"), "should only have .js or .jsx files: %s", f) + } + }, + }, + { + name: "min js files excluded by wildcard", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/a.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/b.js")) + assert.Assert(t, !slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, !slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "min js exclusion is case-sensitive on case-sensitive FS", + host: caseSensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/a.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/b.js")) + // Legacy behavior: only lowercase ".min.js" is excluded by default when matching is case-sensitive. + assert.Assert(t, slices.Contains(got, "/dev/js/d.MIN.js")) + }, + }, + { + name: "min js files explicitly included", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*.min.js"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "min js files included when pattern mentions .min.", + host: caseInsensitiveHost, + extensions: []string{".js"}, + includes: []string{"js/*.min.*"}, + expect: func(t *testing.T, got []string) { + assert.Equal(t, len(got), 2) + assert.Assert(t, slices.Contains(got, "/dev/js/d.min.js")) + assert.Assert(t, slices.Contains(got, "/dev/js/ab.min.js")) + }, + }, + { + name: "exclude literal node_modules folder", + host: commonFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"node_modules"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/a.ts")) + }, + }, + { + name: "same named declarations include ts", + host: sameNamedDeclarationsHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0) }, + }, + { + name: "same named declarations include tsx", + host: sameNamedDeclarationsHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*.tsx"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, hasSuffix(f, ".tsx"), "should only have .tsx files: %s", f) + } + }, + }, + { + name: "empty includes returns all matching files", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, len(got) > 0) + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + }, + }, + { + name: "nil extensions returns all files", + host: caseInsensitiveHost, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/a.js")) + }, + }, + { + name: "empty extensions slice returns all files", + host: caseInsensitiveHost, + extensions: []string{}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0, "expected files to be returned") }, + }, + } + + for _, tc := range cases { + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } + } +} + +// Helper functions +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(substr) == 0 || + (len(s) > len(substr) && containsAt(s, substr))) +} + +func containsAt(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + +// Additional tests for helper functions + +func TestIsImplicitGlob(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expected bool + }{ + {name: "simple", input: "foo", expected: true}, + {name: "folder", input: "src", expected: true}, + {name: "with extension", input: "foo.ts", expected: false}, + {name: "trailing dot", input: "foo.", expected: false}, + {name: "star", input: "*", expected: false}, + {name: "question", input: "?", expected: false}, + {name: "star suffix", input: "foo*", expected: false}, + {name: "question suffix", input: "foo?", expected: false}, + {name: "dot name", input: "foo.bar", expected: false}, + {name: "empty", input: "", expected: true}, + } + + for _, tt := range tests { + tc := tt + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := IsImplicitGlob(tc.input) + assert.Equal(t, result, tc.expected) + }) + } +} + +func TestGetRegularExpressionForWildcard(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + usage Usage + expected string + assertFn func(t *testing.T, got string) + }{ + {name: "nil specs", specs: nil, usage: UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "empty specs", specs: []string{}, usage: UsageFiles, expected: "", assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "single spec", specs: []string{"*.ts"}, usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "multiple specs", specs: []string{"*.ts", "*.tsx"}, usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := getRegularExpressionForWildcard(tc.specs, "/", tc.usage) + if tc.assertFn != nil { + tc.assertFn(t, result) + } else { + assert.Equal(t, result, tc.expected) + } + }) + } +} + +func TestGetRegularExpressionsForWildcards(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + usage Usage + assertFn func(t *testing.T, got []string) + }{ + {name: "nil specs", specs: nil, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "empty specs", specs: []string{}, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Assert(t, got == nil) }}, + {name: "two specs", specs: []string{"*.ts", "*.tsx"}, usage: UsageFiles, assertFn: func(t *testing.T, got []string) { assert.Equal(t, len(got), 2) }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := getRegularExpressionsForWildcards(tc.specs, "/", tc.usage) + tc.assertFn(t, result) + }) + } +} + +func TestGetPatternFromSpec(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + spec string + usage Usage + assertFn func(t *testing.T, got string) + }{ + {name: "files usage", spec: "*.ts", usage: UsageFiles, assertFn: func(t *testing.T, got string) { + assert.Assert(t, got != "") + assert.Assert(t, hasSuffix(got, "$")) + }}, + {name: "directories usage", spec: "src", usage: UsageDirectories, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + {name: "exclude usage", spec: "node_modules", usage: UsageExclude, assertFn: func(t *testing.T, got string) { + assert.Assert(t, got != "") + assert.Assert(t, contains(got, "($|/)")) + }}, + {name: "trailing starstar non exclude", spec: "**", usage: UsageFiles, assertFn: func(t *testing.T, got string) { assert.Equal(t, got, "") }}, + {name: "trailing starstar exclude allowed", spec: "**", usage: UsageExclude, assertFn: func(t *testing.T, got string) { assert.Assert(t, got != "") }}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + result := getPatternFromSpec(tc.spec, "/", tc.usage) + tc.assertFn(t, result) + }) + } +} + +// Edge case tests for various pattern scenarios +func TestReadDirectoryEdgeCases(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "rooted include path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"/dev/a.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/a.ts")) }, + }, + { + name: "include with extension in path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"a.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/a.ts")) }, + }, + { + name: "special regex characters in path", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/file+test.ts": "", + "/dev/file[0].ts": "", + "/dev/file(1).ts": "", + "/dev/file$money.ts": "", + "/dev/file^start.ts": "", + "/dev/file|pipe.ts": "", + "/dev/file#hash.ts": "", + }, false) + }, + extensions: []string{".ts"}, + includes: []string{"file+test.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/file+test.ts")) }, + }, + { + name: "include pattern starting with question mark", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"?.ts"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "include pattern starting with star", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"*b.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/b.ts")) }, + }, + { + name: "case insensitive file matching", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/File.ts": "", + "/dev/FILE.ts": "", + }, true) + }, + extensions: []string{".ts"}, + includes: []string{"*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) == 2) }, + }, + { + name: "nested subdirectory base path", + host: caseSensitiveHost, + extensions: []string{".ts"}, + includes: []string{"q/a/c/b/d.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) }, + }, + { + name: "current directory differs from path", + host: caseInsensitiveHost, + extensions: []string{".ts"}, + includes: []string{"z/*.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, len(got) > 0) }, + }, + } + + for _, tc := range cases { + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } + } +} + +func TestReadDirectoryEmptyIncludes(t *testing.T) { + t.Parallel() + cases := []readDirTestCase{ + { + name: "empty includes slice behavior", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/root/a.ts": "", + }, true) + }, + path: "/root", + currentDir: "/", + extensions: []string{".ts"}, + includes: []string{}, + expect: func(t *testing.T, got []string) { + if len(got) == 0 { + return + } + assert.Assert(t, slices.Contains(got, "/root/a.ts")) + }, + }, + } + + for _, tc := range cases { + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } + } +} + +// TestReadDirectorySymlinkCycle tests that cyclic symlinks don't cause infinite loops. +// The cycle is detected by the vfs package using Realpath for cycle detection. +// This means directories with cyclic symlinks will be skipped during traversal. +func TestReadDirectorySymlinkCycle(t *testing.T) { + t.Parallel() + cases := []readDirTestCase{ + { + name: "detects and skips symlink cycles", + host: func() vfs.FS { + return vfstest.FromMap(map[string]any{ + "/root/file.ts": "", + "/root/a/file.ts": "", + "/root/a/b": vfstest.Symlink("/root/a"), + }, true) + }, + path: "/root", + currentDir: "/", + extensions: []string{".ts"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/root/file.ts", "/root/a/file.ts"} + assert.DeepEqual(t, got, expected) + }, + }, + } + + for _, tc := range cases { + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } + } +} + +// TestReadDirectoryMatchesTypeScriptBaselines contains tests that verify the Go implementation +// matches the TypeScript baseline outputs from _submodules/TypeScript/tests/baselines/reference/config/matchFiles/ +func TestReadDirectoryMatchesTypeScriptBaselines(t *testing.T) { + t.Parallel() + + cases := []readDirTestCase{ + { + name: "sorted in include order then alphabetical", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/z/a.ts": "", + "/dev/z/aba.ts": "", + "/dev/z/abz.ts": "", + "/dev/z/b.ts": "", + "/dev/z/bba.ts": "", + "/dev/z/bbz.ts": "", + "/dev/x/a.ts": "", + "/dev/x/aa.ts": "", + "/dev/x/b.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z/*.ts", "x/*.ts"}, + expect: func(t *testing.T, got []string) { + expected := []string{ + "/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts", + "/dev/x/a.ts", "/dev/x/aa.ts", "/dev/x/b.ts", + } + assert.DeepEqual(t, got, expected) + }, + }, + { + name: "recursive wildcards match dotted directories", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/x/d.ts": "", + "/dev/x/y/d.ts": "", + "/dev/x/y/.e.ts": "", + "/dev/x/.y/a.ts": "", + "/dev/.z/.b.ts": "", + "/dev/.z/c.ts": "", + "/dev/w/.u/e.ts": "", + "/dev/g.min.js/.g/g.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/.*/*"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/dev/.z/c.ts", "/dev/g.min.js/.g/g.ts", "/dev/w/.u/e.ts", "/dev/x/.y/a.ts"} + assert.Equal(t, len(got), len(expected)) + for _, want := range expected { + assert.Assert(t, slices.Contains(got, want)) + } + }, + }, + { + name: "common package folders implicitly excluded with wildcard", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/a.d.ts": "", + "/dev/a.js": "", + "/dev/b.ts": "", + "/dev/x/a.ts": "", + "/dev/node_modules/a.ts": "", + "/dev/bower_components/a.ts": "", + "/dev/jspm_packages/a.ts": "", + }, false) + }, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts"}) }, + }, + { + name: "js wildcard excludes min js files", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + }, false) + }, + extensions: []string{".js"}, + includes: []string{"js/*"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/js/a.js", "/dev/js/b.js"}) }, + }, + { + name: "explicit min js pattern includes min files", + host: func() vfs.FS { + return vfstest.FromMap(map[string]string{ + "/dev/js/a.js": "", + "/dev/js/b.js": "", + "/dev/js/d.min.js": "", + "/dev/js/ab.min.js": "", + }, false) + }, + extensions: []string{".js"}, + includes: []string{"js/*.min.js"}, + expect: func(t *testing.T, got []string) { + expected := []string{"/dev/js/ab.min.js", "/dev/js/d.min.js"} + assert.Equal(t, len(got), len(expected)) + for _, want := range expected { + assert.Assert(t, slices.Contains(got, want)) + } + }, + }, + { + name: "literal excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"b.ts"}, + includes: []string{"a.ts", "b.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/a.ts"}) }, + }, + { + name: "wildcard excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"*.ts", "z/??z.ts", "*/b.ts"}, + includes: []string{"a.ts", "b.ts", "z/a.ts", "z/abz.ts", "z/aba.ts", "x/b.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts"}) }, + }, + { + name: "recursive excludes baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**/b.ts"}, + includes: []string{"a.ts", "b.ts", "x/a.ts", "x/b.ts", "x/y/a.ts", "x/y/b.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts"}) + }, + }, + { + name: "question mark baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/?.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/x/a.ts", "/dev/x/b.ts"}) }, + }, + { + name: "recursive directory pattern baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/a.ts"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/a.ts", "/dev/x/a.ts", "/dev/x/y/a.ts", "/dev/z/a.ts"}) + }, + }, + { + name: "case sensitive baseline", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/A.ts"}, + expect: func(t *testing.T, got []string) { assert.DeepEqual(t, got, []string{"/dev/A.ts"}) }, + }, + { + name: "exclude folders baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"z", "x"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { + for _, f := range got { + assert.Assert(t, !contains(f, "/z/") && !contains(f, "/x/"), "should not contain z or x: %s", f) + } + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/b.ts")) + }, + }, + { + name: "implicit glob expansion baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"z"}, + expect: func(t *testing.T, got []string) { + assert.DeepEqual(t, got, []string{"/dev/z/a.ts", "/dev/z/aba.ts", "/dev/z/abz.ts", "/dev/z/b.ts", "/dev/z/bba.ts", "/dev/z/bbz.ts"}) + }, + }, + { + name: "trailing recursive directory baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "exclude trailing recursive directory baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"**/*"}, + expect: func(t *testing.T, got []string) { assert.Equal(t, len(got), 0) }, + }, + { + name: "multiple recursive directory patterns baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/aa.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/b.ts")) + }, + }, + { + name: "include dirs with starstar prefix baseline", + host: caseSensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"**/x", "**/a/**/b"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) + assert.Assert(t, slices.Contains(got, "/dev/q/a/c/b/d.ts")) + }, + }, + { + name: "dotted folders not implicitly included baseline", + host: dottedFoldersHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"x/**/*", "w/*/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/x/d.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/y/d.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/.y/a.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/x/y/.e.ts")) + assert.Assert(t, !slices.Contains(got, "/dev/w/.u/e.ts")) + }, + }, + { + name: "include paths outside project baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + includes: []string{"*", "/ext/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + }, + }, + { + name: "include files with double dots baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"**"}, + includes: []string{"/ext/b/a..b.ts"}, + expect: func(t *testing.T, got []string) { assert.Assert(t, slices.Contains(got, "/ext/b/a..b.ts")) }, + }, + { + name: "exclude files with double dots baseline", + host: caseInsensitiveHost, + extensions: []string{".ts", ".tsx", ".d.ts"}, + excludes: []string{"/ext/b/a..b.ts"}, + includes: []string{"/ext/**/*"}, + expect: func(t *testing.T, got []string) { + assert.Assert(t, slices.Contains(got, "/ext/ext.ts")) + assert.Assert(t, !slices.Contains(got, "/ext/b/a..b.ts")) + }, + }, + } + + for _, tc := range cases { + for _, impl := range readDirectoryImplementations { + t.Run(impl.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + runReadDirectoryCase(t, tc, impl.fn) + }) + } + } +} + +// TestSpecMatcher tests the SpecMatcher API +func TestSpecMatcher(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + matchingPaths []string + nonMatchingPaths []string + }{ + { + name: "simple wildcard", + specs: []string{"*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/b.ts", "/project/foo.ts"}, + nonMatchingPaths: []string{"/project/a.js", "/project/sub/a.ts"}, + }, + { + name: "recursive wildcard", + specs: []string{"**/*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/sub/deep/a.ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "exclude pattern", + specs: []string{"node_modules"}, + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/node_modules/foo"}, + nonMatchingPaths: []string{"/project/node_modules", "/project/src"}, + }, + { + name: "case insensitive", + specs: []string{"*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: false, + matchingPaths: []string{"/project/A.TS", "/project/B.Ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "multiple specs", + specs: []string{"*.ts", "*.tsx"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts", "/project/b.tsx"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matcher := NewSpecMatcher(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if matcher == nil { + t.Fatal("matcher should not be nil") + } + for _, path := range tc.matchingPaths { + assert.Assert(t, matcher.MatchString(path), "should match: %s", path) + } + for _, path := range tc.nonMatchingPaths { + assert.Assert(t, !matcher.MatchString(path), "should not match: %s", path) + } + }) + } +} + +func TestSpecMatcher_MatchString(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher + }{ + { + name: "Old", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newRegexSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []bool + }{ + { + name: "simple wildcard files", + specs: []string{"*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, false, false}, + }, + { + name: "recursive wildcard files", + specs: []string{"**/*.ts"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, true, false}, + }, + { + name: "exclude pattern matches prefix", + specs: []string{"node_modules"}, + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/node_modules", "/project/node_modules/foo", "/project/src"}, + expected: []bool{false, true, false}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchString(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + +func TestSingleSpecMatcher_MatchString(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher + }{ + { + name: "Old", + new: func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newRegexSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(spec string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatcher { + return newGlobSingleSpecMatcher(spec, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + spec string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []bool + }{ + { + name: "single spec wildcard", + spec: "*.ts", + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts", "/project/a.js"}, + expected: []bool{true, false, false}, + }, + { + name: "single spec trailing starstar exclude allowed", + spec: "**", + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/sub/a.ts"}, + expected: []bool{true, true}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchString(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + +func TestSpecMatchers_MatchIndex(t *testing.T) { + t.Parallel() + + implementations := []struct { + name string + new func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers + }{ + { + name: "Old", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + return newRegexSpecMatchers(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + { + name: "New", + new: func(specs []string, basePath string, usage Usage, useCaseSensitiveFileNames bool) SpecMatchers { + return newGlobSpecMatcher(specs, basePath, usage, useCaseSensitiveFileNames) + }, + }, + } + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + paths []string + expected []int + }{ + { + name: "index lookup prefers first match", + specs: []string{"*.ts", "*.tsx"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + paths: []string{"/project/a.ts", "/project/a.tsx", "/project/a.js"}, + expected: []int{0, 1, -1}, + }, + { + name: "exclude index lookup", + specs: []string{"node_modules", "bower_components"}, + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + paths: []string{"/project/node_modules", "/project/node_modules/foo", "/project/bower_components", "/project/bower_components/bar", "/project/src"}, + expected: []int{-1, 0, -1, 1, -1}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, len(tc.paths), len(tc.expected)) + for _, impl := range implementations { + t.Run(impl.name, func(t *testing.T) { + t.Parallel() + m := impl.new(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + assert.Assert(t, m != nil) + for i, path := range tc.paths { + assert.Equal(t, m.MatchIndex(path), tc.expected[i], "path: %s", path) + } + }) + } + }) + } +} + +func TestSingleSpecMatcher(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + spec string + basePath string + usage Usage + useCaseSensitiveFileNames bool + expectNil bool + matchingPaths []string + nonMatchingPaths []string + }{ + { + name: "simple spec", + spec: "*.ts", + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/a.ts"}, + nonMatchingPaths: []string{"/project/a.js"}, + }, + { + name: "trailing ** non-exclude returns nil", + spec: "**", + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + expectNil: true, + }, + { + name: "trailing ** exclude works", + spec: "**", + basePath: "/project", + usage: UsageExclude, + useCaseSensitiveFileNames: true, + matchingPaths: []string{"/project/anything", "/project/deep/path"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matcher := NewSingleSpecMatcher(tc.spec, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if tc.expectNil { + assert.Assert(t, matcher == nil, "should be nil") + return + } + if matcher == nil { + t.Fatal("matcher should not be nil") + } + for _, path := range tc.matchingPaths { + assert.Assert(t, matcher.MatchString(path), "should match: %s", path) + } + for _, path := range tc.nonMatchingPaths { + assert.Assert(t, !matcher.MatchString(path), "should not match: %s", path) + } + }) + } +} + +func TestSpecMatchers(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + specs []string + basePath string + usage Usage + useCaseSensitiveFileNames bool + expectNil bool + pathToIndex map[string]int + }{ + { + name: "multiple specs return correct index", + specs: []string{"*.ts", "*.tsx", "*.js"}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + pathToIndex: map[string]int{ + "/project/a.ts": 0, + "/project/b.tsx": 1, + "/project/c.js": 2, + "/project/d.css": -1, // no match + }, + }, + { + name: "empty specs returns nil", + specs: []string{}, + basePath: "/project", + usage: UsageFiles, + useCaseSensitiveFileNames: true, + expectNil: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + matchers := NewSpecMatchers(tc.specs, tc.basePath, tc.usage, tc.useCaseSensitiveFileNames) + if tc.expectNil { + assert.Assert(t, matchers == nil, "should be nil") + return + } + if matchers == nil { + t.Fatal("matchers should not be nil") + } + for path, expectedIndex := range tc.pathToIndex { + gotIndex := matchers.MatchIndex(path) + assert.Equal(t, gotIndex, expectedIndex, "path: %s", path) + } + }) + } +} + +// TestGlobPatternInternals tests internal glob pattern matching logic +// to ensure edge cases are covered that may not be hit by ReadDirectory tests +func TestGlobPatternInternals(t *testing.T) { + t.Parallel() + + t.Run("nextPathPart handles consecutive slashes", func(t *testing.T) { + t.Parallel() + // Test path with consecutive slashes + path := "/dev//foo///bar" + + // First call - returns empty for root + part, offset, ok := nextPathPartParts(path, "", 0) + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + // Second call - should skip consecutive slashes after /dev + part, offset, ok = nextPathPartParts(path, "", 1) + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + // Third call - should skip the double slashes before foo + part, offset, ok = nextPathPartParts(path, "", offset) + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + + // Fourth call - should skip the triple slashes before bar + part, _, ok = nextPathPartParts(path, "", offset) + assert.Assert(t, ok) + assert.Equal(t, part, "bar") + }) + + t.Run("nextPathPart handles path ending with slashes", func(t *testing.T) { + t.Parallel() + path := "/dev/" + + // Skip to after "dev" + _, offset, ok := nextPathPartParts(path, "", 0) // root + assert.Assert(t, ok) + _, offset, ok = nextPathPartParts(path, "", offset) // dev + assert.Assert(t, ok) + // Now at trailing slash, should return not ok + _, _, ok = nextPathPartParts(path, "", offset) + assert.Assert(t, !ok) + }) + + t.Run("nextPathPartParts handles empty prefix", func(t *testing.T) { + t.Parallel() + path := "/dev//foo" + + part, offset, ok := nextPathPartParts("", path, 0) + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + part, offset, ok = nextPathPartParts("", path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + part, _, ok = nextPathPartParts("", path, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + }) + + t.Run("nextPathPartParts returns not ok when only slashes remain", func(t *testing.T) { + t.Parallel() + prefix := "/dev/" + suffix := "foo" + + _, offset, ok := nextPathPartParts(prefix, suffix, 0) // root + assert.Assert(t, ok) + + part, offset, ok := nextPathPartParts(prefix, suffix, offset) // dev + assert.Assert(t, ok) + assert.Equal(t, part, "dev") + + part, offset, ok = nextPathPartParts(prefix, suffix, offset) // foo + assert.Assert(t, ok) + assert.Equal(t, part, "foo") + assert.Equal(t, offset, len(prefix)+len(suffix)) + + _, _, ok = nextPathPartParts(prefix, suffix, offset) + assert.Assert(t, !ok) + }) + + t.Run("nextPathPartParts parses from suffix region", func(t *testing.T) { + t.Parallel() + prefix := "/" + suffix := "a" + + part, offset, ok := nextPathPartParts(prefix, suffix, 0) // root + assert.Assert(t, ok) + assert.Equal(t, part, "") + assert.Equal(t, offset, 1) + + part, _, ok = nextPathPartParts(prefix, suffix, offset) + assert.Assert(t, ok) + assert.Equal(t, part, "a") + }) + + t.Run("question mark segment at end of string", func(t *testing.T) { + t.Parallel() + // Create pattern with question mark that should fail when string is exhausted + p, ok := compileGlobPattern("a?", "/", UsageFiles, true) + assert.Assert(t, ok) + + // Should match "ab" + assert.Assert(t, p.matches("/ab")) + + // Should NOT match "a" (question mark requires a character) + assert.Assert(t, !p.matches("/a")) + }) + + t.Run("star segment with complex pattern", func(t *testing.T) { + t.Parallel() + // Pattern like "a*b*c" requires backtracking in star matching + p, ok := compileGlobPattern("a*b*c", "/", UsageFiles, true) + assert.Assert(t, ok) + + // Should match "abc" + assert.Assert(t, p.matches("/abc")) + + // Should match "aXbYc" + assert.Assert(t, p.matches("/aXbYc")) + + // Should match "aXXXbYYYc" + assert.Assert(t, p.matches("/aXXXbYYYc")) + + // Should NOT match "aXbY" (no trailing c) + assert.Assert(t, !p.matches("/aXbY")) + }) + + t.Run("ensureTrailingSlash with existing slash", func(t *testing.T) { + t.Parallel() + // Test that ensureTrailingSlash doesn't double-add slashes + result := ensureTrailingSlash("/dev/") + assert.Equal(t, result, "/dev/") + + result = ensureTrailingSlash("/") + assert.Equal(t, result, "/") + }) + + t.Run("ensureTrailingSlash with empty string", func(t *testing.T) { + t.Parallel() + result := ensureTrailingSlash("") + assert.Equal(t, result, "") + }) + + t.Run("literal component with package folder in include", func(t *testing.T) { + t.Parallel() + // When a literal include path goes through a package folder, + // the skipPackageFolders flag on literal components should not block it + // because literal components in includes don't have skipPackageFolders=true + host := vfstest.FromMap(map[string]string{ + "/dev/node_modules/pkg/index.ts": "", + }, false) + + // Explicit literal path should work + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, + []string{"node_modules/pkg/index.ts"}, false, "/", UnlimitedDepth, host) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/pkg/index.ts")) + }) +} + +// TestMatchSegmentsEdgeCases tests edge cases in the matchSegments function +func TestMatchSegmentsEdgeCases(t *testing.T) { + t.Parallel() + + t.Run("question mark before slash in string", func(t *testing.T) { + t.Parallel() + // This tests the case where question mark encounters a slash character + // which should fail since ? doesn't match / + p, ok := compileGlobPattern("a?b", "/", UsageFiles, true) + assert.Assert(t, ok) + + // "a/b" should not match "a?b" pattern since ? shouldn't match / + // But this is a single component pattern, so / wouldn't be in the component + // We need to test this within the segment matching + + // Create a pattern that will exercise question mark matching edge cases + assert.Assert(t, p.matches("/aXb")) // X matches ? + assert.Assert(t, !p.matches("/ab")) // nothing to match ? + assert.Assert(t, !p.matches("/aXYb")) // XY is too many chars for ? + }) + + t.Run("star with no trailing content", func(t *testing.T) { + t.Parallel() + // Test that star can match to end of string + p, ok := compileGlobPattern("a*", "/", UsageFiles, true) + assert.Assert(t, ok) + + assert.Assert(t, p.matches("/a")) + assert.Assert(t, p.matches("/abc")) + assert.Assert(t, p.matches("/aXYZ")) + }) + + t.Run("multiple stars in pattern", func(t *testing.T) { + t.Parallel() + // Test patterns with multiple stars that require backtracking + p, ok := compileGlobPattern("*a*", "/", UsageFiles, true) + assert.Assert(t, ok) + + assert.Assert(t, p.matches("/a")) + assert.Assert(t, p.matches("/Xa")) + assert.Assert(t, p.matches("/aX")) + assert.Assert(t, p.matches("/XaY")) + assert.Assert(t, !p.matches("/XYZ")) // no 'a' + }) + + t.Run("literal segment not matching", func(t *testing.T) { + t.Parallel() + // Test literal segment that's longer than remaining string + p, ok := compileGlobPattern("abcdefgh.ts", "/", UsageFiles, true) + assert.Assert(t, ok) + + assert.Assert(t, !p.matches("/abc.ts")) // different literal + assert.Assert(t, p.matches("/abcdefgh.ts")) // exact match + }) +} + +// TestReadDirectoryConsecutiveSlashes tests handling of paths with consecutive slashes +func TestReadDirectoryConsecutiveSlashes(t *testing.T) { + t.Parallel() + + host := vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/x/b.ts": "", + }, false) + + // The matchFilesNoRegex function normalizes paths, but we can test internal handling + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"**/*.ts"}, false, "/", UnlimitedDepth, host) + assert.Assert(t, len(got) >= 2, "should find files") + assert.Assert(t, slices.Contains(got, "/dev/a.ts")) + assert.Assert(t, slices.Contains(got, "/dev/x/b.ts")) +} + +// TestGlobPatternLiteralWithPackageFolders tests literal component behavior with package folders +func TestGlobPatternLiteralWithPackageFolders(t *testing.T) { + t.Parallel() + + t.Run("wildcard skips package folders", func(t *testing.T) { + t.Parallel() + // Wildcard patterns should skip node_modules + host := vfstest.FromMap(map[string]string{ + "/dev/a.ts": "", + "/dev/node_modules/b.ts": "", + }, false) + + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"*/*.ts"}, false, "/", UnlimitedDepth, host) + assert.Assert(t, !slices.Contains(got, "/dev/node_modules/b.ts"), "should skip node_modules with wildcard") + }) + + t.Run("explicit literal includes package folder", func(t *testing.T) { + t.Parallel() + // Explicit literal paths should include package folders + host := vfstest.FromMap(map[string]string{ + "/dev/node_modules/b.ts": "", + }, false) + + got := matchFilesNoRegex("/dev", []string{".ts"}, nil, []string{"node_modules/b.ts"}, false, "/", UnlimitedDepth, host) + assert.Assert(t, slices.Contains(got, "/dev/node_modules/b.ts"), "should include explicit node_modules path") + }) +}