From adc69d8c745d2f531eeb56d16f8bf88b08ed811d Mon Sep 17 00:00:00 2001 From: ksemenenko Date: Sat, 11 Oct 2025 17:49:39 +0200 Subject: [PATCH 1/4] Optimize zip detection search and reuse separators --- .github/workflows/mime-sync.yml | 43 ++ .../MimeTypeSourceGenerator.cs | 22 +- .../ManagedCode.MimeTypes.Sync.csproj | 12 + ManagedCode.MimeTypes.Sync/Program.cs | 338 ++++++++ .../ContentDetectionTests.cs | 117 +++ ManagedCode.MimeTypes.Tests/GeneratorTests.cs | 37 +- .../MimeCategoryTests.cs | 37 +- ManagedCode.MimeTypes.sln | 16 +- .../ManagedCode.MimeTypes.csproj | 2 +- ManagedCode.MimeTypes/MimeHelper.cs | 731 ++++++++++++++++-- ManagedCode.MimeTypes/MimeTypeCategory.cs | 8 +- ManagedCode.MimeTypes/mimeTypes.json | 83 +- README.md | 97 ++- 13 files changed, 1416 insertions(+), 127 deletions(-) create mode 100644 .github/workflows/mime-sync.yml create mode 100644 ManagedCode.MimeTypes.Sync/ManagedCode.MimeTypes.Sync.csproj create mode 100644 ManagedCode.MimeTypes.Sync/Program.cs create mode 100644 ManagedCode.MimeTypes.Tests/ContentDetectionTests.cs diff --git a/.github/workflows/mime-sync.yml b/.github/workflows/mime-sync.yml new file mode 100644 index 0000000..aec0b0d --- /dev/null +++ b/.github/workflows/mime-sync.yml @@ -0,0 +1,43 @@ +name: MIME database sync + +on: + schedule: + - cron: '0 3 * * 1' + workflow_dispatch: + +jobs: + sync: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: 8.0.x + + - name: Restore dependencies + run: dotnet restore + + - name: Run MIME sync tool + run: dotnet run --project ManagedCode.MimeTypes.Sync --configuration Release -- --prefer-remote + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: "chore: sync MIME database" + title: "chore: sync MIME database" + body: | + Automated update of MIME database from configured sources. + branch: chore/sync-mime-database + delete-branch: true + labels: | + automation + dependencies diff --git a/ManagedCode.MimeTypes.Generator/MimeTypeSourceGenerator.cs b/ManagedCode.MimeTypes.Generator/MimeTypeSourceGenerator.cs index fdf7c2f..c73762b 100644 --- a/ManagedCode.MimeTypes.Generator/MimeTypeSourceGenerator.cs +++ b/ManagedCode.MimeTypes.Generator/MimeTypeSourceGenerator.cs @@ -65,13 +65,16 @@ public void Execute(GeneratorExecutionContext context) foreach (var item in properties) { - defineDictionaryBuilder.AppendLine($"MimeTypes.Add(string.Intern(\"{item.Name}\"),string.Intern(\"{item.Value}\"));"); - types[ParseKey(item.Name)] = item.Value.ToString(); + var extension = item.Name.Trim(); + var mimeValue = item.Value.ToString()?.Trim() ?? string.Empty; + + defineDictionaryBuilder.AppendLine($"RegisterMimeTypeInternal(\"{Escape(extension)}\", \"{Escape(mimeValue)}\");"); + types[ParseKey(extension)] = mimeValue; } foreach (var item in types) { - propertyBuilder.AppendLine($"public static string {item.Key} => \"{item.Value}\";"); + propertyBuilder.AppendLine($"public static string {item.Key} => \"{Escape(item.Value)}\";"); } context.AddSource("MimeHelper.Properties.cs", SourceText.From(@$" @@ -124,17 +127,24 @@ private string GetMimeTypesPath(GeneratorExecutionContext context) return possiblePaths.FirstOrDefault(File.Exists) ?? possiblePaths[0]; } - private string ParseKey(string key) + private static string ParseKey(string key) { if (char.IsDigit(key[0])) { key = "_" + key; } - - key = key.Replace("-", "_"); + + key = key.Replace("-", "_").Replace('.', '_'); return key.ToUpperInvariant(); } + + private static string Escape(string value) + { + return value + .Replace("\\", "\\\\") + .Replace("\"", "\\\""); + } } diff --git a/ManagedCode.MimeTypes.Sync/ManagedCode.MimeTypes.Sync.csproj b/ManagedCode.MimeTypes.Sync/ManagedCode.MimeTypes.Sync.csproj new file mode 100644 index 0000000..bf7db67 --- /dev/null +++ b/ManagedCode.MimeTypes.Sync/ManagedCode.MimeTypes.Sync.csproj @@ -0,0 +1,12 @@ + + + Exe + net8.0 + preview + enable + enable + + + + + diff --git a/ManagedCode.MimeTypes.Sync/Program.cs b/ManagedCode.MimeTypes.Sync/Program.cs new file mode 100644 index 0000000..184be6b --- /dev/null +++ b/ManagedCode.MimeTypes.Sync/Program.cs @@ -0,0 +1,338 @@ +using System.Net.Http; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Text.Encodings.Web; +using System.Text.Json; + +var exitCode = await MimeTypeSyncTool.RunAsync(args); +return exitCode; + +internal static class MimeTypeSyncTool +{ + private static readonly string[] DefaultSources = + { + "https://raw.githubusercontent.com/jshttp/mime-db/master/db.json", + "https://raw.githubusercontent.com/apache/httpd/trunk/docs/conf/mime.types" + }; + + private static readonly char[] MimeTypeSeparators = new[] { ' ', '\t' }; + + public static async Task RunAsync(string[] args) + { + try + { + var options = SyncOptions.Parse(args); + var remoteData = await LoadRemoteAsync(options.Sources); + var existing = LoadExisting(options.OutputPath); + var merged = Merge(remoteData, existing, options); + + WriteOutput(options.OutputPath, merged); + + Console.WriteLine($"Updated {options.OutputPath} with {merged.Count.ToString("N0", CultureInfo.InvariantCulture)} entries (remote: {remoteData.Count.ToString("N0", CultureInfo.InvariantCulture)}, existing: {existing.Count.ToString("N0", CultureInfo.InvariantCulture)})."); + return 0; + } + catch (Exception ex) + { + Console.Error.WriteLine(ex); + return 1; + } + } + + private static async Task> LoadRemoteAsync(IReadOnlyList sources) + { + if (sources.Count == 0) + { + return new Dictionary(StringComparer.OrdinalIgnoreCase); + } + + using var client = new HttpClient(); + client.DefaultRequestHeaders.UserAgent.ParseAdd("ManagedCode.MimeTypes.Sync/1.0"); + + var aggregate = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var source in sources) + { + var data = await LoadRawDataAsync(client, source); + var parsed = ParseRemoteData(source, data); + + foreach (var kvp in parsed) + { + aggregate[kvp.Key] = kvp.Value; + } + } + return aggregate; + } + + private static async Task LoadRawDataAsync(HttpClient client, string source) + { + if (Uri.TryCreate(source, UriKind.Absolute, out var uri) && uri.Scheme.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return await client.GetByteArrayAsync(uri); + } + + return await File.ReadAllBytesAsync(source); + } + + private static Dictionary ParseRemoteData(string source, byte[] data) + { + var firstNonWhitespace = data.FirstOrDefault(static b => !char.IsWhiteSpace((char)b)); + if (firstNonWhitespace == '{' || firstNonWhitespace == '[') + { + return ParseJsonSource(source, data); + } + + return ParseMimeTypesListing(source, data); + } + + private static Dictionary ParseJsonSource(string source, byte[] data) + { + using var document = JsonDocument.Parse(data); + var dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); + + if (document.RootElement.ValueKind == JsonValueKind.Object) + { + foreach (var property in document.RootElement.EnumerateObject()) + { + switch (property.Value.ValueKind) + { + case JsonValueKind.Object when property.Value.TryGetProperty("extensions", out var extensionsElement): + AddExtensions(dictionary, property.Name, extensionsElement); + break; + case JsonValueKind.String: + AddExtension(dictionary, property.Name, property.Value.GetString()); + break; + case JsonValueKind.Array: + foreach (var item in property.Value.EnumerateArray()) + { + if (item.ValueKind == JsonValueKind.String) + { + AddExtension(dictionary, item.GetString(), property.Name); + } + } + break; + } + } + } + else if (document.RootElement.ValueKind == JsonValueKind.Array) + { + foreach (var element in document.RootElement.EnumerateArray()) + { + if (element.ValueKind == JsonValueKind.Object && + element.TryGetProperty("extension", out var extensionProperty) && + element.TryGetProperty("mime", out var mimeProperty)) + { + AddExtension(dictionary, extensionProperty.GetString(), mimeProperty.GetString()); + } + } + } + else + { + Console.WriteLine($"Warning: Unsupported JSON format from {source}." ); + } + + return dictionary; + } + + private static Dictionary ParseMimeTypesListing(string source, byte[] data) + { + var dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); + using var reader = new StreamReader(new MemoryStream(data), Encoding.UTF8, true); + + while (reader.ReadLine() is { } line) + { + line = line.Trim(); + if (line.Length == 0 || line.StartsWith('#')) + { + continue; + } + + var parts = line.Split(MimeTypeSeparators, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length < 2) + { + continue; + } + + var mime = parts[0]; + for (var i = 1; i < parts.Length; i++) + { + AddExtension(dictionary, parts[i], mime); + } + } + + if (dictionary.Count == 0) + { + Console.WriteLine($"Warning: No MIME entries parsed from {source}." ); + } + + return dictionary; + } + + private static void AddExtensions(Dictionary dictionary, string mime, JsonElement extensionsElement) + { + foreach (var extension in extensionsElement.EnumerateArray()) + { + if (extension.ValueKind == JsonValueKind.String) + { + AddExtension(dictionary, extension.GetString(), mime); + } + } + } + + private static void AddExtension(Dictionary dictionary, string? extension, string? mime) + { + var normalized = NormalizeExtension(extension); + if (string.IsNullOrEmpty(normalized) || string.IsNullOrWhiteSpace(mime)) + { + return; + } + + dictionary[normalized] = mime!; + } + + private static Dictionary LoadExisting(string outputPath) + { + if (!File.Exists(outputPath)) + { + return new Dictionary(StringComparer.OrdinalIgnoreCase); + } + + using var document = JsonDocument.Parse(File.ReadAllText(outputPath)); + var dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var property in document.RootElement.EnumerateObject()) + { + var normalized = NormalizeExtension(property.Name); + var value = property.Value.GetString(); + if (string.IsNullOrEmpty(normalized) || string.IsNullOrEmpty(value)) + { + continue; + } + + dictionary[normalized] = value; + } + + return dictionary; + } + + private static Dictionary Merge(Dictionary remote, Dictionary existing, SyncOptions options) + { + var result = new Dictionary(remote, StringComparer.OrdinalIgnoreCase); + + foreach (var kvp in existing) + { + if (options.PreferRemote) + { + result.TryAdd(kvp.Key, kvp.Value); + } + else + { + result[kvp.Key] = kvp.Value; + } + } + + foreach (var kvp in CustomMappings()) + { + result[kvp.Key] = kvp.Value; + } + + return result; + } + + private static IEnumerable> CustomMappings() + { + yield return new KeyValuePair("tar.gz", "application/gzip"); + yield return new KeyValuePair("tar.bz2", "application/x-bzip2"); + yield return new KeyValuePair("tar.xz", "application/x-xz"); + yield return new KeyValuePair("tar.zst", "application/zstd"); + yield return new KeyValuePair("d.ts", "application/typescript"); + yield return new KeyValuePair("cjs", "application/node"); + yield return new KeyValuePair("mjs", "text/javascript"); + yield return new KeyValuePair("wasm", "application/wasm"); + yield return new KeyValuePair("heic", "image/heic"); + yield return new KeyValuePair("heif", "image/heif"); + yield return new KeyValuePair("ics", "text/calendar"); + yield return new KeyValuePair("ps1", "application/x-powershell"); + yield return new KeyValuePair("appx", "application/vnd.ms-appx"); + } + + private static void WriteOutput(string outputPath, Dictionary data) + { + Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!); + + using var stream = File.Create(outputPath); + using var writer = new Utf8JsonWriter(stream, new JsonWriterOptions + { + Indented = true, + Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping + }); + + writer.WriteStartObject(); + foreach (var kvp in data.OrderBy(static x => x.Key, StringComparer.OrdinalIgnoreCase)) + { + writer.WriteString(kvp.Key, kvp.Value); + } + + writer.WriteEndObject(); + } + + private static string NormalizeExtension(string? extension) + { + if (string.IsNullOrWhiteSpace(extension)) + { + return string.Empty; + } + + return extension.Trim().Trim('.').ToLowerInvariant(); + } + + private sealed record SyncOptions(IReadOnlyList Sources, string OutputPath, bool PreferRemote) + { + public static SyncOptions Parse(string[] args) + { + var sources = new List(DefaultSources); + string? output = null; + bool preferRemote = false; + var customSources = false; + + for (var i = 0; i < args.Length; i++) + { + switch (args[i]) + { + case "--source" when i + 1 < args.Length: + if (!customSources) + { + sources.Clear(); + customSources = true; + } + + foreach (var value in args[++i].Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)) + { + sources.Add(value); + } + break; + case "--add-source" when i + 1 < args.Length: + foreach (var value in args[++i].Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)) + { + sources.Add(value); + } + break; + case "--reset-sources": + sources.Clear(); + customSources = true; + break; + case "--output" when i + 1 < args.Length: + output = args[++i]; + break; + case "--prefer-remote": + preferRemote = true; + break; + } + } + + output ??= Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "ManagedCode.MimeTypes", "mimeTypes.json")); + + return new SyncOptions(sources, output, preferRemote); + } + } +} diff --git a/ManagedCode.MimeTypes.Tests/ContentDetectionTests.cs b/ManagedCode.MimeTypes.Tests/ContentDetectionTests.cs new file mode 100644 index 0000000..7270693 --- /dev/null +++ b/ManagedCode.MimeTypes.Tests/ContentDetectionTests.cs @@ -0,0 +1,117 @@ +using System; +using System.IO; +using System.Linq; +using System.Text; +using Shouldly; +using Xunit; + +namespace ManagedCode.MimeTypes.Tests; + +public class ContentDetectionTests +{ + [Fact] + public void PdfHeader_ShouldBeDetected() + { + var pdfBytes = Encoding.ASCII.GetBytes("%PDF-1.7\n"); + Detect(pdfBytes).ShouldBe("application/pdf"); + } + + [Fact] + public void PngHeader_ShouldBeDetected() + { + var pngBytes = new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00 }; + Detect(pngBytes).ShouldBe("image/png"); + } + + [Fact] + public void WebpHeader_ShouldBeDetected() + { + var webpBytes = Combine( + new byte[] { 0x52, 0x49, 0x46, 0x46, 0x2A, 0x00, 0x00, 0x00 }, + Encoding.ASCII.GetBytes("WEBP"), + new byte[] { 0x56, 0x50, 0x38, 0x4C }); + Detect(webpBytes).ShouldBe("image/webp"); + } + + [Fact] + public void Mp4Header_ShouldBeDetected() + { + var mp4Bytes = Combine( + new byte[] { 0x00, 0x00, 0x00, 0x18 }, + Encoding.ASCII.GetBytes("ftyp"), + Encoding.ASCII.GetBytes("isom"), + Encoding.ASCII.GetBytes("isom")); + Detect(mp4Bytes).ShouldBe("video/mp4"); + } + + [Fact] + public void ZipHeader_ShouldFallbackToZip() + { + var zipBytes = Combine( + new byte[] { 0x50, 0x4B, 0x03, 0x04 }, + Encoding.ASCII.GetBytes("hello.txt")); + Detect(zipBytes).ShouldBe("application/zip"); + } + + [Fact] + public void DocxHeader_ShouldBeDetected() + { + var docxBytes = Combine( + new byte[] { 0x50, 0x4B, 0x03, 0x04 }, + Encoding.ASCII.GetBytes("word/document.xml")); + Detect(docxBytes).ShouldBe("application/vnd.openxmlformats-officedocument.wordprocessingml.document"); + } + + [Fact] + public void ShortStream_ShouldReturnDefault() + { + using var stream = new MemoryStream(new byte[] { 0x01, 0x02 }); + MimeHelper.GetMimeTypeByContent(stream).ShouldBe("application/octet-stream"); + stream.Position.ShouldBe(0); + } + + [Fact] + public void EmptyStream_ShouldReturnDefault() + { + using var stream = new MemoryStream(); + MimeHelper.GetMimeTypeByContent(stream).ShouldBe("application/octet-stream"); + } + + [Fact] + public void FilePathOverload_ShouldDetect() + { + var pdfBytes = Encoding.ASCII.GetBytes("%PDF-2.0\n"); + var tempFile = Path.GetTempFileName(); + try + { + File.WriteAllBytes(tempFile, pdfBytes); + MimeHelper.GetMimeTypeByContent(tempFile).ShouldBe("application/pdf"); + } + finally + { + File.Delete(tempFile); + } + } + + private static string Detect(byte[] bytes) + { + using var stream = new MemoryStream(bytes); + var detected = MimeHelper.GetMimeTypeByContent(stream); + stream.Position.ShouldBe(0); + return detected; + } + + private static byte[] Combine(params byte[][] segments) + { + var totalLength = segments.Sum(static s => s.Length); + var buffer = new byte[totalLength]; + var offset = 0; + foreach (var segment in segments) + { + Buffer.BlockCopy(segment, 0, buffer, offset, segment.Length); + offset += segment.Length; + } + + return buffer; + } +} diff --git a/ManagedCode.MimeTypes.Tests/GeneratorTests.cs b/ManagedCode.MimeTypes.Tests/GeneratorTests.cs index 79d4121..bd06a76 100644 --- a/ManagedCode.MimeTypes.Tests/GeneratorTests.cs +++ b/ManagedCode.MimeTypes.Tests/GeneratorTests.cs @@ -13,8 +13,11 @@ public void ExtensionsTest() MimeHelper.GetMimeType(".gz").ShouldBe("application/gzip"); MimeHelper.GetMimeType("word.docx").ShouldBe("application/vnd.openxmlformats-officedocument.wordprocessingml.document"); MimeHelper.GetMimeType("C:\\\\users\\file.txt").ShouldBe("text/plain"); + MimeHelper.GetMimeType("https://cdn.example.com/assets/image.png?version=1").ShouldBe("image/png"); + MimeHelper.GetMimeType("ARCHIVE.TAR.GZ").ShouldBe("application/gzip"); + MimeHelper.GetMimeType("module.d.ts").ShouldBe("application/typescript"); } - + [Fact] public void EmptyExtensionsTest() { @@ -43,4 +46,36 @@ public void GeneratedDictionaryTest() MimeHelper.GetMimeType(".docx").ShouldBe("application/vnd.openxmlformats-officedocument.wordprocessingml.document"); MimeHelper.GetMimeType(".7z").ShouldBe("application/x-7z-compressed"); } + + [Fact] + public void GetExtensionsShouldReturnKnownExtensions() + { + var jpegExtensions = MimeHelper.GetExtensions("image/jpeg"); + jpegExtensions.ShouldContain(".jpg"); + jpegExtensions.ShouldContain(".jpeg"); + jpegExtensions.ShouldContain(".jpe"); + + MimeHelper.TryGetExtensions("application/x-unknown", out _).ShouldBeFalse(); + } + + [Fact] + public void RuntimeRegistrationShouldUpdateLookups() + { + const string extension = "customext"; + const string mime = "application/x-custom"; + + try + { + MimeHelper.RegisterMimeType(extension, mime); + MimeHelper.GetMimeType($"file.{extension}").ShouldBe(mime); + + MimeHelper.TryGetExtensions(mime, out var extensions).ShouldBeTrue(); + extensions.ShouldContain($".{extension}"); + } + finally + { + MimeHelper.UnregisterMimeType(extension).ShouldBeTrue(); + MimeHelper.GetMimeType(extension).ShouldBe("application/octet-stream"); + } + } } \ No newline at end of file diff --git a/ManagedCode.MimeTypes.Tests/MimeCategoryTests.cs b/ManagedCode.MimeTypes.Tests/MimeCategoryTests.cs index f88a951..61364aa 100644 --- a/ManagedCode.MimeTypes.Tests/MimeCategoryTests.cs +++ b/ManagedCode.MimeTypes.Tests/MimeCategoryTests.cs @@ -81,7 +81,7 @@ public void CertificateMimeTypes_ShouldBeCertificate(string mime) [InlineData("")] [InlineData(null)] [InlineData(" ")] - public void EmptyMimeTypes_ShouldBeUnknown(string mime) + public void EmptyMimeTypes_ShouldBeUnknown(string? mime) { MimeHelper.GetMimeCategory(mime).ShouldBe(MimeTypeCategory.Unknown); } @@ -116,6 +116,41 @@ public void PresentationMimeTypes_ShouldBePresentation(string mime) MimeHelper.IsPresentation(mime).ShouldBeTrue(); } + [Theory] + [InlineData("application/javascript")] + [InlineData("text/x-php")] + [InlineData("application/x-sh")] + public void ScriptMimeTypes_ShouldBeScript(string mime) + { + MimeHelper.GetMimeCategory(mime).ShouldBe(MimeTypeCategory.Script); + MimeHelper.IsScript(mime).ShouldBeTrue(); + } + + [Theory] + [InlineData("application/octet-stream")] + [InlineData("application/x-binary")] + public void BinaryMimeTypes_ShouldBeBinary(string mime) + { + MimeHelper.GetMimeCategory(mime).ShouldBe(MimeTypeCategory.Binary); + MimeHelper.IsBinary(mime).ShouldBeTrue(); + } + + [Theory] + [InlineData("multipart/form-data")] + [InlineData("multipart/related")] + public void MultipartMimeTypes_ShouldBeMultipart(string mime) + { + MimeHelper.GetMimeCategory(mime).ShouldBe(MimeTypeCategory.Multipart); + } + + [Theory] + [InlineData("message/rfc822")] + [InlineData("message/global")] + public void MessageMimeTypes_ShouldBeMessage(string mime) + { + MimeHelper.GetMimeCategory(mime).ShouldBe(MimeTypeCategory.Message); + } + [Fact] public void InvalidMimeType_ShouldBeUnknown() { diff --git a/ManagedCode.MimeTypes.sln b/ManagedCode.MimeTypes.sln index 01ef955..2b16ea8 100644 --- a/ManagedCode.MimeTypes.sln +++ b/ManagedCode.MimeTypes.sln @@ -6,6 +6,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ManagedCode.MimeTypes.Gener EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ManagedCode.MimeTypes.Tests", "ManagedCode.MimeTypes.Tests\ManagedCode.MimeTypes.Tests.csproj", "{25FA73FF-FD66-4C66-9357-325AE3F2F709}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ManagedCode.MimeTypes.Sync", "ManagedCode.MimeTypes.Sync\ManagedCode.MimeTypes.Sync.csproj", "{272C6BCD-6A06-4ED5-AD04-035F471BCB02}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -20,9 +22,13 @@ Global {C4805F29-78B8-4BD8-86FE-4E322419E8DE}.Debug|Any CPU.Build.0 = Debug|Any CPU {C4805F29-78B8-4BD8-86FE-4E322419E8DE}.Release|Any CPU.ActiveCfg = Release|Any CPU {C4805F29-78B8-4BD8-86FE-4E322419E8DE}.Release|Any CPU.Build.0 = Release|Any CPU - {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Debug|Any CPU.Build.0 = Debug|Any CPU - {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Release|Any CPU.ActiveCfg = Release|Any CPU - {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection + {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Debug|Any CPU.Build.0 = Debug|Any CPU + {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Release|Any CPU.ActiveCfg = Release|Any CPU + {25FA73FF-FD66-4C66-9357-325AE3F2F709}.Release|Any CPU.Build.0 = Release|Any CPU + {272C6BCD-6A06-4ED5-AD04-035F471BCB02}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {272C6BCD-6A06-4ED5-AD04-035F471BCB02}.Debug|Any CPU.Build.0 = Debug|Any CPU + {272C6BCD-6A06-4ED5-AD04-035F471BCB02}.Release|Any CPU.ActiveCfg = Release|Any CPU + {272C6BCD-6A06-4ED5-AD04-035F471BCB02}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection EndGlobal diff --git a/ManagedCode.MimeTypes/ManagedCode.MimeTypes.csproj b/ManagedCode.MimeTypes/ManagedCode.MimeTypes.csproj index 84b4471..70bd5e5 100644 --- a/ManagedCode.MimeTypes/ManagedCode.MimeTypes.csproj +++ b/ManagedCode.MimeTypes/ManagedCode.MimeTypes.csproj @@ -2,7 +2,7 @@ enable - 13 + preview net8.0;net9.0 true $(BaseIntermediateOutputPath)Generated diff --git a/ManagedCode.MimeTypes/MimeHelper.cs b/ManagedCode.MimeTypes/MimeHelper.cs index dea12d8..f6fff54 100644 --- a/ManagedCode.MimeTypes/MimeHelper.cs +++ b/ManagedCode.MimeTypes/MimeHelper.cs @@ -1,15 +1,128 @@ using System; +using System.Buffers; using System.Collections.Generic; using System.IO; +using System.Linq; +using System.Text; using System.Text.RegularExpressions; +using System.Threading; namespace ManagedCode.MimeTypes; public static partial class MimeHelper { - private static readonly IDictionary MimeTypes = new Dictionary(StringComparer.InvariantCultureIgnoreCase); + private const string DefaultMimeType = "application/octet-stream"; + private const int ZipProbeLength = 560; + + private static readonly ReaderWriterLockSlim SyncRoot = new(LockRecursionPolicy.NoRecursion); + private static readonly Dictionary MimeTypes = new(StringComparer.OrdinalIgnoreCase); + private static readonly Dictionary> ExtensionsByMime = new(StringComparer.OrdinalIgnoreCase); + + private static readonly Regex XmlPattern = new(@"^(?:application|text)/.*?\+?xml$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex JsonPattern = new(@"^application/(?:.*?\+)?json5?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ArchivePattern = new(@"^application/(?:zip|x-(?:7z|rar|tar|bzip2|gzip)-compressed|gzip|vnd\.rar)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ExecutablePattern = new(@"^application/(?:x-msdownload|x-executable|x-msi|x-apple-diskimage)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex CertificatePattern = new(@"^application/(?:x-x509-ca-cert|pkix-cert|x-pkcs12)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex CalendarPattern = new(@"^(?:text/calendar|application/ics)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex EmailPattern = new(@"^(?:message/(?:rfc822|global)|application/(?:mbox|x-msmessage))", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex WordPattern = new(@"^application/(?:msword|vnd\.openxmlformats-officedocument\.wordprocessingml\.|vnd\.ms-word\.|vnd\.oasis\.opendocument\.text)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex SpreadsheetPattern = new(@"^application/(?:vnd\.ms-excel|vnd\.openxmlformats-officedocument\.spreadsheetml\.|vnd\.oasis\.opendocument\.spreadsheet)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex PresentationPattern = new(@"^application/(?:vnd\.ms-powerpoint|vnd\.openxmlformats-officedocument\.presentationml\.|vnd\.oasis\.opendocument\.presentation)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex ScriptPattern = new(@"^(?:application|text)/(?:javascript|ecmascript|x-php|x-sh|x-shellscript|x-python|x-ruby|x-perl)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly HashSet ScriptMimeSet = new(StringComparer.OrdinalIgnoreCase) + { + "application/javascript", + "application/x-javascript", + "text/javascript", + "text/ecmascript", + "application/x-php", + "application/x-httpd-php", + "application/x-sh", + "application/x-shellscript", + "text/x-shellscript", + "text/x-python", + "application/x-python", + "text/x-ruby", + "application/x-ruby", + "text/x-perl", + "application/x-perl" + }; + + private readonly struct MagicSignature + { + public MagicSignature(byte[] signature, string mime, int offset = 0) + { + Signature = signature; + Mime = mime; + Offset = offset; + } + + public byte[] Signature { get; } + public string Mime { get; } + public int Offset { get; } + } + + private static readonly MagicSignature[] MagicSignatures = + { + new(new byte[] { 0x25, 0x50, 0x44, 0x46 }, "application/pdf"), + new(new byte[] { 0xFF, 0xD8, 0xFF }, "image/jpeg"), + new(new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A }, "image/png"), + new(new byte[] { 0x47, 0x49, 0x46, 0x38 }, "image/gif"), + new(new byte[] { 0x42, 0x4D }, "image/bmp"), + new(new byte[] { 0x49, 0x49, 0x2A, 0x00 }, "image/tiff"), + new(new byte[] { 0x4D, 0x4D, 0x00, 0x2A }, "image/tiff"), + new(new byte[] { 0x00, 0x00, 0x01, 0x00 }, "image/x-icon"), + new(new byte[] { 0x38, 0x42, 0x50, 0x53 }, "image/vnd.adobe.photoshop"), + new(new byte[] { 0x1F, 0x8B }, "application/gzip"), + new(new byte[] { 0x42, 0x5A, 0x68 }, "application/x-bzip2"), + new(new byte[] { 0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00 }, "application/x-rar-compressed"), + new(new byte[] { 0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C }, "application/x-7z-compressed"), + new(new byte[] { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }, "application/x-xz"), + new(new byte[] { 0x4F, 0x67, 0x67, 0x53 }, "audio/ogg"), + new(new byte[] { 0x66, 0x4C, 0x61, 0x43 }, "audio/flac"), + new(new byte[] { 0x49, 0x44, 0x33 }, "audio/mpeg"), + new(new byte[] { 0x53, 0x51, 0x4C, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6F, 0x72, 0x6D, 0x61, 0x74, 0x20, 0x33, 0x00 }, "application/x-sqlite3"), + new(new byte[] { 0x25, 0x21, 0x50, 0x53 }, "application/postscript"), + new(new byte[] { 0x7F, 0x45, 0x4C, 0x46 }, "application/x-executable"), + new(new byte[] { 0x25, 0x21, 0x50, 0x53, 0x2D }, "application/postscript"), + new(new byte[] { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 }, "application/vnd.ms-office"), + new(new byte[] { 0xFF, 0xFE, 0x3C, 0x00 }, "text/xml", 0), + new(new byte[] { 0x3C, 0x00, 0x3F, 0x00 }, "text/xml", 0) + }; + + private static readonly int MaxSignatureLength = MagicSignatures.Max(static signature => signature.Offset + signature.Signature.Length); + private static readonly byte[] RiffSignature = { 0x52, 0x49, 0x46, 0x46 }; + private static readonly byte[] WebpFourCC = { 0x57, 0x45, 0x42, 0x50 }; + private static readonly byte[] AviFourCC = { 0x41, 0x56, 0x49, 0x20 }; + private static readonly byte[] WaveFourCC = { 0x57, 0x41, 0x56, 0x45 }; + private static readonly byte[] FtypFourCC = { 0x66, 0x74, 0x79, 0x70 }; + private static readonly byte[] QuickTimeBrand = Encoding.ASCII.GetBytes("qt "); + private static readonly byte[][] Mp4Brands = + { + Encoding.ASCII.GetBytes("isom"), + Encoding.ASCII.GetBytes("iso2"), + Encoding.ASCII.GetBytes("avc1"), + Encoding.ASCII.GetBytes("mp41"), + Encoding.ASCII.GetBytes("mp42"), + Encoding.ASCII.GetBytes("dash"), + Encoding.ASCII.GetBytes("mmp4"), + Encoding.ASCII.GetBytes("MSNV"), + Encoding.ASCII.GetBytes("M4V "), + Encoding.ASCII.GetBytes("MP4V"), + Encoding.ASCII.GetBytes("3gp4") + }; + private static readonly byte[] TorrentPrefix = Encoding.ASCII.GetBytes("d8:announce"); + private static readonly byte[] ZipSignature = { 0x50, 0x4B, 0x03, 0x04 }; + private static readonly byte[] ZipEmptySignature = { 0x50, 0x4B, 0x05, 0x06 }; + private static readonly byte[] RarSignature = { 0x52, 0x61, 0x72, 0x21 }; + private static readonly byte[] MzSignature = { 0x4D, 0x5A, 0x90, 0x00 }; + private static readonly byte[] RtfSignature = { 0x7B, 0x5C, 0x72, 0x74 }; + private static readonly int MaxContentSniffLength = Math.Max(MaxSignatureLength, ZipProbeLength); static partial void Init(); + static MimeHelper() { Init(); @@ -17,130 +130,257 @@ static MimeHelper() public static string GetMimeType(FileInfo file) { - return GetMimeType(file.Extension); + if (file == null) + { + throw new ArgumentNullException(nameof(file)); + } + + return GetMimeType(file.Name); } - public static string GetMimeType(string extension) + public static string GetMimeType(string? value) { - if (string.IsNullOrWhiteSpace(extension)) + if (string.IsNullOrWhiteSpace(value)) { - return "application/octet-stream"; + return DefaultMimeType; } - - var parsedExtension = Path.GetExtension(extension); - if (!string.IsNullOrEmpty(parsedExtension)) + + foreach (var candidate in EnumerateExtensionCandidates(value!)) { - extension = parsedExtension; + var normalized = NormalizeExtensionKey(candidate); + if (normalized.Length == 0) + { + continue; + } + + SyncRoot.EnterReadLock(); + try + { + if (MimeTypes.TryGetValue(normalized, out var mime)) + { + return mime; + } + } + finally + { + SyncRoot.ExitReadLock(); + } } - - extension = extension.Replace('.', '\0'); - return MimeTypes.TryGetValue(extension, out var mime) ? mime : "application/octet-stream"; + + return DefaultMimeType; } - - private static readonly Regex XmlPattern = new(@"^(?:application|text)/.*?\+?xml$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex JsonPattern = new(@"^application/(?:.*?\+)?json5?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ArchivePattern = new(@"^application/(?:zip|x-(?:7z|rar|tar|bzip2|gzip)-compressed|gzip|vnd\.rar|octet-stream)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ExecutablePattern = new(@"^application/(?:x-msdownload|x-executable|x-msi|x-apple-diskimage|octet-stream)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CertificatePattern = new(@"^application/(?:x-x509-ca-cert|pkix-cert|x-pkcs12)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - - // New patterns for specific document types - private static readonly Regex WordPattern = new(@"^application/(?:msword|vnd\.openxmlformats-officedocument\.wordprocessingml\.|vnd\.ms-word\.|vnd\.oasis\.opendocument\.text)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex SpreadsheetPattern = new(@"^application/(?:vnd\.ms-excel|vnd\.openxmlformats-officedocument\.spreadsheetml\.|vnd\.oasis\.opendocument\.spreadsheet)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex PresentationPattern = new(@"^application/(?:vnd\.ms-powerpoint|vnd\.openxmlformats-officedocument\.presentationml\.|vnd\.oasis\.opendocument\.presentation)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CalendarPattern = new(@"^(?:text/calendar|application/ics)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex EmailPattern = new(@"^(?:message/(?:rfc822|global)|application/(?:mbox|x-msmessage))", RegexOptions.Compiled | RegexOptions.IgnoreCase); - - private static readonly Dictionary MimeTypesForContent = new() - { - { new byte[] { 0x25, 0x50, 0x44, 0x46 }, "application/pdf" }, // PDF - { new byte[] { 0xFF, 0xD8, 0xFF }, "image/jpeg" }, // JPEG - { new byte[] { 0x89, 0x50, 0x4E, 0x47 }, "image/png" }, // PNG - { new byte[] { 0x47, 0x49, 0x46, 0x38 }, "image/gif" }, // GIF - { new byte[] { 0x50, 0x4B, 0x03, 0x04 }, "application/zip" }, // ZIP - { new byte[] { 0x1F, 0x8B }, "application/gzip" } // GZIP - }; - - public static string GetMimeTypeByContent(string filePath) + + public static IReadOnlyCollection GetExtensions(string mime) + { + return TryGetExtensions(mime, out var extensions) ? extensions : Array.Empty(); + } + + public static bool TryGetExtensions(string mime, out IReadOnlyCollection extensions) + { + extensions = Array.Empty(); + if (string.IsNullOrWhiteSpace(mime)) + { + return false; + } + + SyncRoot.EnterReadLock(); + try + { + if (ExtensionsByMime.TryGetValue(mime.Trim(), out var set) && set.Count > 0) + { + extensions = set.Select(static e => "." + e).Distinct(StringComparer.OrdinalIgnoreCase).OrderBy(static e => e, StringComparer.OrdinalIgnoreCase).ToArray(); + return true; + } + } + finally + { + SyncRoot.ExitReadLock(); + } + + return false; + } + + public static void RegisterMimeType(string extension, string mime) { - byte[] fileHeader = new byte[4]; - using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read)) + RegisterMimeTypeInternal(extension, mime, overwrite: true); + } + + public static bool UnregisterMimeType(string extension) + { + if (string.IsNullOrWhiteSpace(extension)) { - fs.ReadExactly(fileHeader, 0, fileHeader.Length); + return false; } - foreach (var mime in MimeTypesForContent) + var normalized = NormalizeExtensionKey(extension); + if (normalized.Length == 0) { - if (fileHeader.AsSpan().Slice(0, mime.Key.Length).SequenceEqual(mime.Key.AsSpan())) + return false; + } + + SyncRoot.EnterWriteLock(); + try + { + if (!MimeTypes.TryGetValue(normalized, out var mime)) + { + return false; + } + + var removed = MimeTypes.Remove(normalized); + if (removed && ExtensionsByMime.TryGetValue(mime, out var set)) { - return mime.Value; + set.Remove(normalized); + if (set.Count == 0) + { + ExtensionsByMime.Remove(mime); + } } + + return removed; + } + finally + { + SyncRoot.ExitWriteLock(); + } + } + + public static string GetMimeTypeByContent(string filePath) + { + if (filePath == null) + { + throw new ArgumentNullException(nameof(filePath)); } - return "application/octet-stream"; // Default MIME type + using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + return GetMimeTypeByContent(fileStream); } public static string GetMimeTypeByContent(Stream fileStream) { - byte[] fileHeader = new byte[4]; - fileStream.ReadExactly(fileHeader, 0, fileHeader.Length); + if (fileStream == null) + { + throw new ArgumentNullException(nameof(fileStream)); + } + + var buffer = ArrayPool.Shared.Rent(MaxContentSniffLength); + long? position = null; - foreach (var mime in MimeTypesForContent) + try { - if (fileHeader.AsSpan().Slice(0, mime.Key.Length).SequenceEqual(mime.Key.AsSpan())) + if (fileStream.CanSeek) + { + position = fileStream.Position; + } + + var bytesRead = ReadUpTo(fileStream, buffer, MaxContentSniffLength); + + if (position.HasValue && fileStream.CanSeek) + { + fileStream.Seek(position.Value, SeekOrigin.Begin); + } + + if (bytesRead <= 0) { - return mime.Value; + return DefaultMimeType; } + + var header = new ReadOnlySpan(buffer, 0, bytesRead); + var detected = DetectMimeType(header); + return detected ?? DefaultMimeType; } + finally + { + if (position.HasValue && fileStream.CanSeek) + { + fileStream.Seek(position.Value, SeekOrigin.Begin); + } - return "application/octet-stream"; // Default MIME type + ArrayPool.Shared.Return(buffer, true); + } } - public static MimeTypeCategory GetMimeCategory(string mime) { if (string.IsNullOrWhiteSpace(mime)) + { return MimeTypeCategory.Unknown; - + } + var m = mime.ToLowerInvariant(); - - // Check formats that can appear in multiple primary types first + if (XmlPattern.IsMatch(m)) return MimeTypeCategory.Xml; if (JsonPattern.IsMatch(m)) return MimeTypeCategory.Json; - - // Primary types + if (m.StartsWith("video/")) return MimeTypeCategory.Video; if (m.StartsWith("audio/")) return MimeTypeCategory.Audio; if (m.StartsWith("image/")) return MimeTypeCategory.Image; if (m.StartsWith("font/")) return MimeTypeCategory.Font; if (m.StartsWith("model/")) return MimeTypeCategory.Model; - - // Text types (after checking for XML/JSON) - if (m.StartsWith("text/")) return MimeTypeCategory.Text; - - // Document types + if (m.StartsWith("multipart/")) return MimeTypeCategory.Multipart; + if (m.StartsWith("message/")) return MimeTypeCategory.Message; + + if (m.StartsWith("text/")) + { + if (ScriptPattern.IsMatch(m)) + { + return MimeTypeCategory.Script; + } + + return MimeTypeCategory.Text; + } + if (m == "application/pdf") return MimeTypeCategory.Pdf; if (SpreadsheetPattern.IsMatch(m)) return MimeTypeCategory.Spreadsheet; if (PresentationPattern.IsMatch(m)) return MimeTypeCategory.Presentation; if (WordPattern.IsMatch(m)) return MimeTypeCategory.Document; - - // Special types + if (ArchivePattern.IsMatch(m)) return MimeTypeCategory.Archive; if (ExecutablePattern.IsMatch(m)) return MimeTypeCategory.Executable; if (CertificatePattern.IsMatch(m)) return MimeTypeCategory.Certificate; if (CalendarPattern.IsMatch(m)) return MimeTypeCategory.Calendar; if (EmailPattern.IsMatch(m)) return MimeTypeCategory.Email; - - // Generic application type - return m.StartsWith("application/") ? MimeTypeCategory.Document : MimeTypeCategory.Unknown; + + if (ScriptMimeSet.Contains(m)) return MimeTypeCategory.Script; + + if (m == DefaultMimeType || m.EndsWith("/octet-stream", StringComparison.Ordinal)) + { + return MimeTypeCategory.Binary; + } + + if (m.StartsWith("application/")) + { + if (m.Contains("script", StringComparison.Ordinal) || m.Contains("powershell", StringComparison.Ordinal)) + { + return MimeTypeCategory.Script; + } + + if (m.Contains("document", StringComparison.Ordinal) || m.Contains("msword", StringComparison.Ordinal) || m.Contains("officedocument", StringComparison.Ordinal)) + { + return MimeTypeCategory.Document; + } + + if (m.Contains("spreadsheet", StringComparison.Ordinal)) + { + return MimeTypeCategory.Spreadsheet; + } + + if (m.Contains("presentation", StringComparison.Ordinal)) + { + return MimeTypeCategory.Presentation; + } + + return MimeTypeCategory.Binary; + } + + return MimeTypeCategory.Unknown; } - + public static bool IsVideo(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Video; public static bool IsAudio(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Audio; public static bool IsImage(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Image; public static bool IsDocument(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Document; public static bool IsPdf(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Pdf; public static bool IsArchive(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Archive; - public static bool IsText(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Text; public static bool IsJson(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Json; public static bool IsXml(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Xml; @@ -152,4 +392,357 @@ public static MimeTypeCategory GetMimeCategory(string mime) public static bool IsPresentation(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Presentation; public static bool IsCalendar(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Calendar; public static bool IsEmail(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Email; -} \ No newline at end of file + public static bool IsScript(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Script; + public static bool IsBinary(string mime) => GetMimeCategory(mime) == MimeTypeCategory.Binary; + + private static IEnumerable EnumerateExtensionCandidates(string value) + { + var trimmed = value.Trim(); + if (trimmed.Length == 0) + { + yield break; + } + + var separatorIndex = trimmed.IndexOfAny(new[] { '?', '#' }); + if (separatorIndex >= 0) + { + trimmed = trimmed[..separatorIndex]; + } + + string fileName; + try + { + fileName = Path.GetFileName(trimmed); + } + catch (ArgumentException) + { + fileName = trimmed; + } + + if (string.IsNullOrEmpty(fileName)) + { + fileName = trimmed; + } + + fileName = fileName.Trim(); + if (fileName.Length == 0) + { + yield break; + } + + if (!fileName.Contains('.')) + { + var bare = fileName.Trim('.'); + if (bare.Length > 0) + { + yield return bare; + } + yield break; + } + + var yielded = new HashSet(StringComparer.OrdinalIgnoreCase); + var index = fileName.IndexOf('.'); + while (index >= 0 && index < fileName.Length - 1) + { + var candidate = fileName[(index + 1)..].Trim('.'); + if (candidate.Length > 0 && yielded.Add(candidate)) + { + yield return candidate; + } + + index = fileName.IndexOf('.', index + 1); + } + + var sanitized = fileName.Trim('.'); + if (sanitized.Length > 0 && yielded.Add(sanitized)) + { + yield return sanitized; + } + } + + private static string NormalizeExtensionKey(string extension) + { + if (string.IsNullOrWhiteSpace(extension)) + { + return string.Empty; + } + + var normalized = extension.Trim().TrimStart('.'); + return normalized.ToLowerInvariant(); + } + + private static void RegisterMimeTypeInternal(string extension, string mime) + { + RegisterMimeTypeInternal(extension, mime, overwrite: true); + } + + private static void RegisterMimeTypeInternal(string extension, string mime, bool overwrite) + { + if (string.IsNullOrWhiteSpace(extension) || string.IsNullOrWhiteSpace(mime)) + { + return; + } + + var normalizedExtension = NormalizeExtensionKey(extension); + if (normalizedExtension.Length == 0) + { + return; + } + + var normalizedMime = string.Intern(mime.Trim()); + + SyncRoot.EnterWriteLock(); + try + { + if (MimeTypes.TryGetValue(normalizedExtension, out var existingMime)) + { + if (!overwrite) + { + return; + } + + if (!string.Equals(existingMime, normalizedMime, StringComparison.Ordinal)) + { + if (ExtensionsByMime.TryGetValue(existingMime, out var existingSet)) + { + existingSet.Remove(normalizedExtension); + if (existingSet.Count == 0) + { + ExtensionsByMime.Remove(existingMime); + } + } + } + } + + MimeTypes[normalizedExtension] = normalizedMime; + + if (!ExtensionsByMime.TryGetValue(normalizedMime, out var set)) + { + set = new HashSet(StringComparer.OrdinalIgnoreCase); + ExtensionsByMime[normalizedMime] = set; + } + + set.Add(normalizedExtension); + } + finally + { + SyncRoot.ExitWriteLock(); + } + } + + private static int ReadUpTo(Stream stream, byte[] buffer, int count) + { + var totalRead = 0; + while (totalRead < count) + { + var read = stream.Read(buffer, totalRead, count - totalRead); + if (read <= 0) + { + break; + } + + totalRead += read; + } + + return totalRead; + } + + private static string? DetectMimeType(ReadOnlySpan header) + { + foreach (var signature in MagicSignatures) + { + if (header.Length < signature.Offset + signature.Signature.Length) + { + continue; + } + + if (header.Slice(signature.Offset, signature.Signature.Length).SequenceEqual(signature.Signature)) + { + return signature.Mime; + } + } + + return DetectComplexSignature(header); + } + + private static string? DetectComplexSignature(ReadOnlySpan header) + { + if (header.Length >= 12 && header[..4].SequenceEqual(RiffSignature)) + { + if (header.Length >= 12) + { + var format = header.Slice(8, 4); + if (format.SequenceEqual(WebpFourCC)) + { + return "image/webp"; + } + + if (format.SequenceEqual(AviFourCC)) + { + return "video/x-msvideo"; + } + + if (format.SequenceEqual(WaveFourCC)) + { + return "audio/wav"; + } + } + } + + if (header.Length >= 12 && header.Slice(4, Math.Min(4, header.Length - 4)).SequenceEqual(FtypFourCC)) + { + if (header.Length >= 12) + { + var brand = header.Slice(8, Math.Min(4, header.Length - 8)); + if (IsMp4Brand(brand)) + { + return "video/mp4"; + } + + if (brand.SequenceEqual(QuickTimeBrand)) + { + return "video/quicktime"; + } + } + } + + if (header.Length >= 4 && header[0] == 0xFF && (header[1] & 0xE0) == 0xE0) + { + return "audio/mpeg"; + } + + if (header.Length >= 4 && (header[..4].SequenceEqual(ZipSignature) || header[..4].SequenceEqual(ZipEmptySignature))) + { + return DetectZipBasedType(header) ?? "application/zip"; + } + + if (header.Length >= 4 && header[..4].SequenceEqual(RarSignature)) + { + return "application/x-rar-compressed"; + } + + if (header.Length >= 4 && header[..4].SequenceEqual(MzSignature)) + { + return "application/x-msdownload"; + } + + if (header.Length >= 4 && header[..4].SequenceEqual(RtfSignature)) + { + return "application/rtf"; + } + + if (header.Length >= TorrentPrefix.Length && header[..TorrentPrefix.Length].SequenceEqual(TorrentPrefix)) + { + return "application/x-bittorrent"; + } + + return null; + } + + private static bool IsMp4Brand(ReadOnlySpan brand) + { + foreach (var known in Mp4Brands) + { + if (brand.SequenceEqual(known)) + { + return true; + } + } + + return false; + } + + private static string? DetectZipBasedType(ReadOnlySpan header) + { + ReadOnlySpan epubPattern = "mimetypeapplication/epub+zip"u8; + if (ContainsAsciiIgnoreCase(header, epubPattern)) + { + return "application/epub+zip"; + } + + ReadOnlySpan odtPattern = "mimetypeapplication/vnd.oasis.opendocument.text"u8; + if (ContainsAsciiIgnoreCase(header, odtPattern)) + { + return "application/vnd.oasis.opendocument.text"; + } + + ReadOnlySpan odsPattern = "mimetypeapplication/vnd.oasis.opendocument.spreadsheet"u8; + if (ContainsAsciiIgnoreCase(header, odsPattern)) + { + return "application/vnd.oasis.opendocument.spreadsheet"; + } + + ReadOnlySpan odpPattern = "mimetypeapplication/vnd.oasis.opendocument.presentation"u8; + if (ContainsAsciiIgnoreCase(header, odpPattern)) + { + return "application/vnd.oasis.opendocument.presentation"; + } + + ReadOnlySpan wordPattern = "word/"u8; + if (ContainsAsciiIgnoreCase(header, wordPattern)) + { + return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; + } + + ReadOnlySpan xlPattern = "xl/"u8; + if (ContainsAsciiIgnoreCase(header, xlPattern)) + { + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; + } + + ReadOnlySpan pptPattern = "ppt/"u8; + if (ContainsAsciiIgnoreCase(header, pptPattern)) + { + return "application/vnd.openxmlformats-officedocument.presentationml.presentation"; + } + + ReadOnlySpan androidManifestPattern = "AndroidManifest.xml"u8; + if (ContainsAsciiIgnoreCase(header, androidManifestPattern)) + { + return "application/vnd.android.package-archive"; + } + + return null; + } + + private static bool ContainsAsciiIgnoreCase(ReadOnlySpan span, ReadOnlySpan pattern) + { + if (pattern.Length == 0 || pattern.Length > span.Length) + { + return false; + } + + for (var i = 0; i <= span.Length - pattern.Length; i++) + { + var j = 0; + + for (; j < pattern.Length; j++) + { + byte a = span[i + j]; + byte b = pattern[j]; + + if ((uint)(a - 'A') <= 'Z' - 'A') + { + a = (byte)(a + 32); + } + + if ((uint)(b - 'A') <= 'Z' - 'A') + { + b = (byte)(b + 32); + } + + if (a != b) + { + break; + } + } + + if (j == pattern.Length) + { + return true; + } + } + + return false; + } +} diff --git a/ManagedCode.MimeTypes/MimeTypeCategory.cs b/ManagedCode.MimeTypes/MimeTypeCategory.cs index 109d615..1642a79 100644 --- a/ManagedCode.MimeTypes/MimeTypeCategory.cs +++ b/ManagedCode.MimeTypes/MimeTypeCategory.cs @@ -19,5 +19,9 @@ public enum MimeTypeCategory Executable, Certificate, Calendar, - Email -} \ No newline at end of file + Email, + Script, + Binary, + Multipart, + Message +} diff --git a/ManagedCode.MimeTypes/mimeTypes.json b/ManagedCode.MimeTypes/mimeTypes.json index 8d36ad0..f8a0623 100644 --- a/ManagedCode.MimeTypes/mimeTypes.json +++ b/ManagedCode.MimeTypes/mimeTypes.json @@ -1,6 +1,7 @@ { "123": "application/vnd.lotus-1-2-3", "1km": "application/vnd.1000minds.decision-model+xml", + "210": "model/step", "3dml": "text/vnd.in3d.3dml", "3ds": "image/x-3ds", "3g2": "video/3gpp2", @@ -19,6 +20,7 @@ "acu": "application/vnd.acucobol", "acutc": "application/vnd.acucorp", "adp": "audio/adpcm", + "adts": "audio/aac", "aep": "application/vnd.audiograph", "afm": "application/x-font-type1", "afp": "application/vnd.ibm.modcap", @@ -31,11 +33,16 @@ "air": "application/vnd.adobe.air-application-installer-package+zip", "ait": "application/vnd.dvb.ait", "ami": "application/vnd.amiga.ami", + "aml": "application/automationml-aml+xml", + "amlx": "application/automationml-amlx+zip", "amr": "audio/amr", "apk": "application/vnd.android.package-archive", "apng": "image/apng", "appcache": "text/cache-manifest", + "appinstaller": "application/appinstaller", "application": "application/x-ms-application", + "appx": "application/vnd.ms-appx", + "appxbundle": "application/appxbundle", "apr": "application/vnd.lotus-approach", "arc": "application/x-freearc", "arj": "application/x-arj", @@ -51,6 +58,8 @@ "atomsvc": "application/atomsvc+xml", "atx": "application/vnd.antix.game-component", "au": "audio/basic", + "avci": "image/avci", + "avcs": "image/avcs", "avi": "video/x-msvideo", "avif": "image/avif", "aw": "application/applixware", @@ -59,15 +68,18 @@ "azv": "image/vnd.airzip.accelerator.azv", "azw": "application/vnd.amazon.ebook", "b16": "image/vnd.pco.b16", + "bary": "model/vnd.bary", "bat": "application/x-msdownload", "bcpio": "application/x-bcpio", "bdf": "application/x-font-bdf", "bdm": "application/vnd.syncml.dm+wbxml", + "bdo": "application/vnd.nato.bindingdataobject+xml", "bdoc": "application/x-bdoc", "bed": "application/vnd.realvnc.bed", "bh2": "application/vnd.fujitsu.oasysprs", "bin": "application/octet-stream", "blb": "application/x-blorb", + "blend": "application/x-blender", "blorb": "application/x-blorb", "bmi": "application/vnd.bmi", "bmml": "application/vnd.balsamiq.bmml+xml", @@ -76,7 +88,10 @@ "box": "application/vnd.previewsystems.box", "boz": "application/x-bzip2", "bpk": "application/octet-stream", + "brush": "application/vnd.procreate.brush", + "brushset": "application/vnd.procrate.brushset", "bsp": "model/vnd.valve.source.compiled-map", + "btf": "image/prs.btif", "btif": "image/prs.btif", "buffer": "application/octet-stream", "bz": "application/x-bzip", @@ -127,6 +142,7 @@ "cjs": "application/node", "cla": "application/vnd.claymore", "class": "application/java-vm", + "cld": "model/vnd.cld", "clkk": "application/vnd.crick.clicker.keyboard", "clkp": "application/vnd.crick.clicker.palette", "clkt": "application/vnd.crick.clicker.template", @@ -143,6 +159,7 @@ "com": "application/x-msdownload", "conf": "text/plain", "cpio": "application/x-cpio", + "cpl": "application/cpl+xml", "cpp": "text/x-c", "cpt": "application/mac-compactpro", "crd": "application/x-mscardfile", @@ -159,9 +176,11 @@ "csv": "text/csv", "cu": "application/cu-seeme", "curl": "text/vnd.curl", + "cwl": "application/cwl", "cww": "application/prs.cww", "cxt": "application/x-director", "cxx": "text/x-c", + "d.ts": "application/typescript", "dae": "model/vnd.collada+xml", "daf": "application/vnd.mobius.daf", "dart": "application/vnd.dart", @@ -169,6 +188,8 @@ "davmount": "application/davmount+xml", "dbf": "application/vnd.dbf", "dbk": "application/docbook+xml", + "dcm": "application/dicom", + "dcmp": "application/vnd.dcmp+xml", "dcr": "application/x-director", "dcurl": "text/vnd.curl.dcurl", "dd2": "application/vnd.oma.dd2+xml", @@ -181,6 +202,7 @@ "der": "application/x-x509-ca-cert", "dfac": "application/vnd.dreamfactory", "dgc": "application/x-dgc-compressed", + "dib": "image/bmp", "dic": "text/x-c", "dir": "application/x-director", "dis": "application/vnd.mobius.dis", @@ -194,6 +216,7 @@ "dmp": "application/vnd.tcpdump.pcap", "dms": "application/octet-stream", "dna": "application/vnd.dna", + "dng": "image/x-adobe-dng", "doc": "application/msword", "docm": "application/vnd.ms-word.document.macroenabled.12", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -202,8 +225,10 @@ "dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "dp": "application/vnd.osgi.dp", "dpg": "application/vnd.dpgraph", + "dpx": "image/dpx", "dra": "audio/vnd.dra", "drle": "image/dicom-rle", + "drm": "application/vnd.procreate.dream", "dsc": "text/prs.lines.tag", "dssc": "application/dssc+der", "dtb": "application/x-dtbook+xml", @@ -245,6 +270,7 @@ "et3": "application/vnd.eszigno3+xml", "etx": "text/x-setext", "eva": "application/x-eva", + "event_stream": "text/event-stream", "evy": "application/x-envoy", "exe": "application/x-msdownload", "exi": "application/exi", @@ -258,7 +284,9 @@ "f4v": "video/x-f4v", "f77": "text/x-fortran", "f90": "text/x-fortran", + "facti": "image/vnd.blockfact.facti", "fbs": "image/vnd.fastbidsheet", + "fbx": "application/vnd.autodesk.fbx", "fcdt": "application/vnd.adobe.formscentral.fcdt", "fcs": "application/vnd.isac.fcs", "fdf": "application/vnd.fdf", @@ -303,17 +331,22 @@ "gca": "application/x-gca-compressed", "gdl": "model/vnd.gdl", "gdoc": "application/vnd.google-apps.document", + "gdraw": "application/vnd.google-apps.drawing", "ged": "text/vnd.familysearch.gedcom", "geo": "application/vnd.dynageo", "geojson": "application/geo+json", "gex": "application/vnd.geometry-explorer", + "gform": "application/vnd.google-apps.form", "ggb": "application/vnd.geogebra.file", + "ggs": "application/vnd.geogebra.slides", "ggt": "application/vnd.geogebra.tool", "ghf": "application/vnd.groove-help", "gif": "image/gif", "gim": "application/vnd.groove-identity-message", + "gjam": "application/vnd.google-apps.jam", "glb": "model/gltf-binary", "gltf": "model/gltf+json", + "gmap": "application/vnd.google-apps.map", "gml": "application/gml+xml", "gmx": "application/vnd.gmx", "gnumeric": "application/x-gnumeric", @@ -326,8 +359,10 @@ "gre": "application/vnd.geometry-explorer", "grv": "application/vnd.groove-injector", "grxml": "application/srgs+xml", + "gscript": "application/vnd.google-apps.script", "gsf": "application/x-font-ghostscript", "gsheet": "application/vnd.google-apps.spreadsheet", + "gsite": "application/vnd.google-apps.site", "gslides": "application/vnd.google-apps.presentation", "gtar": "application/x-gtar", "gtm": "application/vnd.groove-tool-message", @@ -392,6 +427,7 @@ "iota": "application/vnd.astraea-software.iota", "ipfix": "application/ipfix", "ipk": "application/vnd.shana.informed.package", + "ipynb": "application/x-ipynb+json", "irm": "application/vnd.ibm.rights-management", "irp": "application/vnd.irepository.package+xml", "iso": "application/x-iso9660-image", @@ -401,10 +437,13 @@ "ivu": "application/vnd.immervision-ivu", "jad": "text/vnd.sun.j2me.app-descriptor", "jade": "text/jade", + "jaii": "image/jaii", + "jais": "image/jais", "jam": "application/vnd.jam", "jar": "application/java-archive", "jardiff": "application/x-java-archive-diff", "java": "text/x-java-source", + "jfif": "image/pjpeg", "jhc": "image/jphc", "jisp": "application/vnd.jisp", "jls": "image/jls", @@ -429,6 +468,8 @@ "jsonld": "application/ld+json", "jsonml": "application/jsonml+json", "jsx": "text/jsx", + "jt": "model/jt", + "jxl": "image/jxl", "jxr": "image/jxr", "jxra": "image/jxra", "jxrs": "image/jxrs", @@ -473,6 +514,7 @@ "lnk": "application/x-ms-shortcut", "log": "text/plain", "lostxml": "application/lost+xml", + "lottie": "application/zip+dotlottie", "lrf": "application/octet-stream", "lrm": "application/vnd.ms-lrm", "ltf": "application/vnd.frogans.ltf", @@ -486,11 +528,14 @@ "m1v": "video/mpeg", "m21": "application/mp21", "m2a": "audio/mpeg", + "m2t": "video/mp2t", + "m2ts": "video/mp2t", "m2v": "video/mpeg", "m3a": "audio/mpeg", "m3u": "audio/x-mpegurl", "m3u8": "application/vnd.apple.mpegurl", "m4a": "audio/x-m4a", + "m4b": "audio/mp4", "m4p": "application/mp4", "m4s": "video/iso.segment", "m4u": "video/vnd.mpegurl", @@ -525,6 +570,8 @@ "mft": "application/rpki-manifest", "mgp": "application/vnd.osgeo.mapguide.package", "mgz": "application/vnd.proteus.magazine", + "mht": "message/rfc822", + "mhtml": "message/rfc822", "mid": "audio/midi", "midi": "audio/midi", "mie": "application/x-mie", @@ -532,7 +579,7 @@ "mime": "message/rfc822", "mj2": "video/mj2", "mjp2": "video/mj2", - "mjs": "application/javascript", + "mjs": "text/javascript", "mk3d": "video/x-matroska", "mka": "audio/x-matroska", "mkd": "text/x-markdown", @@ -561,6 +608,7 @@ "mpd": "application/dash+xml", "mpe": "video/mpeg", "mpeg": "video/mpeg", + "mpf": "application/media-policy-dataset+xml", "mpg": "video/mpeg", "mpg4": "video/mp4", "mpga": "audio/mpeg", @@ -581,6 +629,8 @@ "msg": "application/vnd.ms-outlook", "msh": "model/mesh", "msi": "application/x-msdownload", + "msix": "application/msix", + "msixbundle": "application/msixbundle", "msl": "application/vnd.mobius.msl", "msm": "application/octet-stream", "msp": "application/octet-stream", @@ -644,6 +694,8 @@ "ogv": "video/ogg", "ogx": "application/ogg", "omdoc": "application/omdoc+xml", + "one": "application/onenote", + "onea": "application/onenote", "onepkg": "application/onenote", "onetmp": "application/onenote", "onetoc": "application/onenote", @@ -672,6 +724,7 @@ "p": "text/x-pascal", "p10": "application/pkcs10", "p12": "application/x-pkcs12", + "p21": "model/step", "p7b": "application/x-pkcs7-certificates", "p7c": "application/pkcs7-mime", "p7m": "application/pkcs7-mime", @@ -737,6 +790,7 @@ "prf": "application/pics-rules", "provx": "application/provenance+xml", "ps": "application/postscript", + "ps1": "application/x-powershell", "psb": "application/vnd.3gpp.pic-bw-small", "psd": "image/vnd.adobe.photoshop", "psf": "application/x-font-linux-psf", @@ -747,6 +801,8 @@ "pvb": "application/vnd.3gpp.pic-bw-var", "pwn": "application/vnd.3m.post-it-notes", "pya": "audio/vnd.ms-playready.media.pya", + "pyo": "model/vnd.pytha.pyox", + "pyox": "model/vnd.pytha.pyox", "pyv": "video/vnd.ms-playready.media.pyv", "qam": "application/vnd.epson.quickanime", "qbo": "application/vnd.intu.qbo", @@ -892,10 +948,13 @@ "st": "application/vnd.sailingtracker.track", "stc": "application/vnd.sun.xml.calc.template", "std": "application/vnd.sun.xml.draw.template", + "step": "model/step", "stf": "application/vnd.wt.stf", "sti": "application/vnd.sun.xml.impress.template", "stk": "application/hyperstudio", "stl": "model/stl", + "stp": "model/step", + "stpnc": "model/step", "stpx": "model/step+xml", "stpxz": "model/step-xml+zip", "stpz": "model/step+zip", @@ -929,6 +988,10 @@ "tao": "application/vnd.tao.intent-module-archive", "tap": "image/vnd.tencent.tap", "tar": "application/x-tar", + "tar.bz2": "application/x-bzip2", + "tar.gz": "application/gzip", + "tar.xz": "application/x-xz", + "tar.zst": "application/zstd", "tcap": "application/vnd.3gpp2.tcap", "tcl": "application/x-tcl", "td": "application/urc-targetdesc+xml", @@ -969,6 +1032,7 @@ "txf": "application/vnd.mobius.txf", "txt": "text/plain", "u32": "application/x-authorware-bin", + "u3d": "model/u3d", "u8dsn": "message/global-delivery-status", "u8hdr": "message/global-headers", "u8mdn": "message/global-disposition-notification", @@ -980,10 +1044,12 @@ "ulx": "application/x-glulx", "umj": "application/vnd.umajin", "unityweb": "application/vnd.unity", + "uo": "application/vnd.uoml+xml", "uoml": "application/vnd.uoml+xml", "uri": "text/uri-list", "uris": "text/uri-list", "urls": "text/uri-list", + "usda": "model/vnd.usda", "usdz": "model/vnd.usdz+zip", "ustar": "application/x-ustar", "utz": "application/vnd.uiq.theme", @@ -1026,6 +1092,7 @@ "vcx": "application/vnd.vcx", "vdi": "application/x-virtualbox-vdi", "vds": "model/vnd.sap.vds", + "vdx": "application/vnd.ms-visio.viewer", "vhd": "application/x-virtualbox-vhd", "vis": "application/vnd.visionary", "viv": "video/vnd.vivo", @@ -1035,6 +1102,7 @@ "vox": "application/x-authorware-bin", "vrml": "model/vrml", "vsd": "application/vnd.visio", + "vsdx": "application/vnd.visio", "vsf": "application/vnd.vsf", "vss": "application/vnd.visio", "vst": "application/vnd.visio", @@ -1042,6 +1110,7 @@ "vtf": "image/vnd.valve.source.texture", "vtt": "text/vtt", "vtu": "model/vnd.vtu", + "vtx": "application/vnd.visio", "vxml": "application/voicexml+xml", "w3d": "application/x-director", "wad": "application/x-doom", @@ -1062,7 +1131,9 @@ "webmanifest": "application/manifest+json", "webp": "image/webp", "wg": "application/vnd.pmi.widget", + "wgsl": "text/wgsl", "wgt": "application/widget", + "wif": "application/watcherinfo+xml", "wks": "application/vnd.ms-works", "wm": "video/x-ms-wm", "wma": "audio/x-ms-wma", @@ -1095,8 +1166,6 @@ "x3dv": "model/x3d-vrml", "x3dvz": "model/x3d+vrml", "x3dz": "model/x3d+xml", - "x_b": "model/vnd.parasolid.transmit.binary", - "x_t": "model/vnd.parasolid.transmit.text", "xaml": "application/xaml+xml", "xap": "application/x-silverlight-app", "xar": "application/vnd.xara", @@ -1106,6 +1175,7 @@ "xbm": "image/x-xbitmap", "xca": "application/xcap-caps+xml", "xcs": "application/calendar+xml", + "xdcf": "application/vnd.gov.sk.xmldatacontainer+xml", "xdf": "application/xcap-diff+xml", "xdm": "application/vnd.syncml.dm+xml", "xdp": "application/vnd.adobe.xdp+xml", @@ -1117,6 +1187,7 @@ "xfdf": "application/vnd.adobe.xfdf", "xfdl": "application/vnd.xfdl", "xht": "application/xhtml+xml", + "xhtm": "application/vnd.pwg-xhtml-print+xml", "xhtml": "application/xhtml+xml", "xhvml": "application/xv+xml", "xif": "image/vnd.xiff", @@ -1146,6 +1217,7 @@ "xpw": "application/vnd.intercon.formnet", "xpx": "application/vnd.intercon.formnet", "xsd": "application/xml", + "xsf": "application/prs.xsf+xml", "xsl": "application/xslt+xml", "xslt": "application/xslt+xml", "xsm": "application/vnd.syncml+xml", @@ -1156,6 +1228,8 @@ "xwd": "image/x-xwindowdump", "xyz": "chemical/x-xyz", "xz": "application/x-xz", + "x_b": "model/vnd.parasolid.transmit.binary", + "x_t": "model/vnd.parasolid.transmit.text", "yaml": "text/yaml", "yang": "application/yang", "yin": "application/yin+xml", @@ -1173,6 +1247,5 @@ "zip": "application/zip", "zir": "application/vnd.zul", "zirz": "application/vnd.zul", - "zmm": "application/vnd.handheld-entertainment+xml", - "event_stream":"text/event-stream" + "zmm": "application/vnd.handheld-entertainment+xml" } \ No newline at end of file diff --git a/README.md b/README.md index 2506e42..71d4bff 100644 --- a/README.md +++ b/README.md @@ -7,58 +7,81 @@ [![nuget](https://github.com/managedcode/MimeTypes/actions/workflows/nuget.yml/badge.svg?branch=main)](https://github.com/managedcode/MimeTypes/actions/workflows/nuget.yml) [![CodeQL](https://github.com/managedcode/MimeTypes/actions/workflows/codeql-analysis.yml/badge.svg?branch=main)](https://github.com/managedcode/MimeTypes/actions/workflows/codeql-analysis.yml) -| Version | Package | Description | -| ------- |-------------------------------------------------------------------------------------------------------------------------------------|-----------------| -|[![NuGet Package](https://img.shields.io/nuget/v/ManagedCode.MimeTypes.svg)](https://www.nuget.org/packages/ManagedCode.MimeTypes) | [ManagedCode.MimeTypes](https://www.nuget.org/packages/ManagedCode.MimeTypes) | Core | +| Version | Package | Description | +| ------- | ------- | ----------- | +|[![NuGet Package](https://img.shields.io/nuget/v/ManagedCode.MimeTypes.svg)](https://www.nuget.org/packages/ManagedCode.MimeTypes)|[ManagedCode.MimeTypes](https://www.nuget.org/packages/ManagedCode.MimeTypes)|Core library| --- +## Why MimeTypes? +MIME (Multipurpose Internet Mail Extensions) values describe the media type of a payload. They appear in HTTP headers, file upload + workflows, messaging protocols and countless integrations. Unfortunately the canonical values are long strings, which makes code + prone to typos and hard to validate. + +`ManagedCode.MimeTypes` ships a generated helper with more than **1,200** extensions sourced from the [jshttp/mime-db](https://github.com/jshttp/mime-db) +project, [Apache's canonical `mime.types` list](https://github.com/apache/httpd/blob/trunk/docs/conf/mime.types) and curated overrides, smart heuristics for multi-part extensions (such as `.tar.gz`), runtime registration APIs and rich helpers for detecting and + categorising data by content. + +## Feature overview +* Generated extension → MIME map based on the latest mime-db dataset (plus curated compound extensions such as `tar.gz`, `d.ts`, `ps1`, …). +* Reverse lookup API that returns the extensions known for a given MIME value. +* Runtime registration/unregistration so applications can plug in custom corporate formats. +* Content sniffing for common file signatures (PDF, PNG, JPEG, GIF, WebP, MP4, ZIP/OOXML, ODF, APK, etc.) with graceful handling of short or empty streams. +* Extended categorisation enum covering document, audio/video, script, binary, multipart and message families with convenience predicates. +* CLI utility to refresh `mimeTypes.json` directly from `mime-db` or a custom source. + +## Quick start +```csharp +using ManagedCode.MimeTypes; -## Motivation -MIME (Multipurpose Internet Mail Extensions) types are used to specify the type of data that a file contains, such as text, images, or video. These types are often used in web development to indicate the type of content in HTTP responses. - -Working with MIME types in C# can be cumbersome, as they are typically represented as strings. This can make it difficult to ensure the correct usage and spelling of MIME types, and can lead to errors and inconsistencies in your code. +// Extension based lookup (handles multi-part extensions automatically) +var gzip = MimeHelper.GetMimeType("archive.tar.gz"); // application/gzip +var typeScript = MimeHelper.GetMimeType("module.d.ts"); // application/typescript -Our project, MimeType, provides a convenient way to work with MIME types in C#. It defines a set of properties for each MIME type, allowing you to use properties instead of strings in your code. This makes it easy to ensure the correct usage and spelling of MIME types, and can make your code more readable and maintainable. +// Content-based detection +using var stream = File.OpenRead("report.pdf"); +var detected = MimeHelper.GetMimeTypeByContent(stream); // application/pdf -## Features -Defines a set of properties for each MIME type, allowing you to use properties instead of strings in your code. -Makes it easy to ensure the correct usage and spelling of MIME types. -Improves the readability and maintainability of your code. +// Categorisation helpers +if (MimeHelper.IsDocument(detected)) +{ + // do something useful +} -## Example -Here's an example of how you might use the MimeType project to specify the content type of an HTTP response in C#: +// Reverse lookup +var jpegExtensions = MimeHelper.GetExtensions("image/jpeg"); // .jpeg, .jpg, .jpe -``` csharp -using ManagedCode.MimeTypes; -``` -``` csharp -// Set the content type of the response to "text/plain". -response.ContentType = MimeType.TextPlain; +// Runtime registration (and clean-up) +MimeHelper.RegisterMimeType("acme", "application/x-acme"); +var custom = MimeHelper.GetMimeType("invoice.acme"); +MimeHelper.UnregisterMimeType("acme"); ``` -## Installation -To install the MimeType project, you can use NuGet: +## Keeping the database fresh +A small console utility is included to synchronise `mimeTypes.json` with upstream datasets and our curated overrides. The repository also +ships a scheduled GitHub Actions workflow that runs the sync tool weekly and opens a pull request whenever new MIME definitions are published. -``` csharp -dotnet add package ManagedCode.MimeTypes -``` +```bash +# Update the data file in-place +dotnet run --project ManagedCode.MimeTypes.Sync -## Usage -To use the MimeType project, you will need to add a reference to the MimeType namespace in your C# code: +# Provide custom sources or output +DOTNET_CLI_TELEMETRY_OPTOUT=1 dotnet run --project ManagedCode.MimeTypes.Sync -- \ + --source https://example.com/primary-mime-db.json \ + --add-source https://example.com/additional-mime-map.json \ + --output ./artifacts/mimeTypes.json -``` csharp -using MimeType; +# Start with a clean slate and prefer local overrides +dotnet run --project ManagedCode.MimeTypes.Sync -- --reset-sources --prefer-remote ``` -Then, you can use the properties defined by the MimeType class to specify MIME types in your code. For example: -``` csharp -// Set the content type of the response to "application/pdf". -response.ContentType = MimeHelper.PDF; +Running the tool re-generates the JSON file, which in turn updates the generated helper during the next build. -// Set the content type of the response to ""text/plain"". -response.ContentType = MimeHelper.GetMimeType("file.txt"); +## Installation +```bash +dotnet add package ManagedCode.MimeTypes ``` -## Conclusion -In summary, the MimeType project provides a convenient and easy-to-use way to work with MIME types in C#. Its properties make it easy to ensure the correct usage and spelling of MIME types, and can improve the readability and maintainability of your code. We hope you find it useful in your own projects! +## Contributing +Issues and PRs are welcome! Run `dotnet test` before sending a contribution, and feel free to use the sync utility to keep the MIME +catalogue current. From 0c8ea83ffa3763cd0c327ee7986975a81b13091b Mon Sep 17 00:00:00 2001 From: ksemenenko Date: Sat, 11 Oct 2025 18:03:15 +0200 Subject: [PATCH 2/4] Improve extension normalization performance --- ManagedCode.MimeTypes/MimeHelper.cs | 46 +++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/ManagedCode.MimeTypes/MimeHelper.cs b/ManagedCode.MimeTypes/MimeHelper.cs index f6fff54..94712f8 100644 --- a/ManagedCode.MimeTypes/MimeHelper.cs +++ b/ManagedCode.MimeTypes/MimeHelper.cs @@ -31,6 +31,8 @@ public static partial class MimeHelper private static readonly Regex ScriptPattern = new(@"^(?:application|text)/(?:javascript|ecmascript|x-php|x-sh|x-shellscript|x-python|x-ruby|x-perl)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly SearchValues QueryFragmentSeparators = SearchValues.Create("?#"); + private static readonly HashSet ScriptMimeSet = new(StringComparer.OrdinalIgnoreCase) { "application/javascript", @@ -403,7 +405,7 @@ private static IEnumerable EnumerateExtensionCandidates(string value) yield break; } - var separatorIndex = trimmed.IndexOfAny(new[] { '?', '#' }); + var separatorIndex = trimmed.AsSpan().IndexOfAny(QueryFragmentSeparators); if (separatorIndex >= 0) { trimmed = trimmed[..separatorIndex]; @@ -441,10 +443,12 @@ private static IEnumerable EnumerateExtensionCandidates(string value) } var yielded = new HashSet(StringComparer.OrdinalIgnoreCase); + var sanitized = fileName.Trim('.'); var index = fileName.IndexOf('.'); while (index >= 0 && index < fileName.Length - 1) { - var candidate = fileName[(index + 1)..].Trim('.'); + var remainder = fileName[(index + 1)..]; + var candidate = remainder.Trim('.'); if (candidate.Length > 0 && yielded.Add(candidate)) { yield return candidate; @@ -453,7 +457,6 @@ private static IEnumerable EnumerateExtensionCandidates(string value) index = fileName.IndexOf('.', index + 1); } - var sanitized = fileName.Trim('.'); if (sanitized.Length > 0 && yielded.Add(sanitized)) { yield return sanitized; @@ -467,8 +470,41 @@ private static string NormalizeExtensionKey(string extension) return string.Empty; } - var normalized = extension.Trim().TrimStart('.'); - return normalized.ToLowerInvariant(); + var span = extension.AsSpan(); + + var start = 0; + var end = span.Length - 1; + + while (start <= end && char.IsWhiteSpace(span[start])) + { + start++; + } + + while (end >= start && char.IsWhiteSpace(span[end])) + { + end--; + } + + while (start <= end && span[start] == '.') + { + start++; + } + + if (start > end) + { + return string.Empty; + } + + var length = end - start + 1; + + return string.Create(length, (extension, start, length), static (destination, state) => + { + var (source, offset, count) = state; + for (var i = 0; i < count; i++) + { + destination[i] = char.ToLowerInvariant(source[offset + i]); + } + }); } private static void RegisterMimeTypeInternal(string extension, string mime) From 195f0cfcf392242a650b378dbb2d05160024a21e Mon Sep 17 00:00:00 2001 From: ksemenenko Date: Sat, 11 Oct 2025 18:17:38 +0200 Subject: [PATCH 3/4] Restore ArrayPool import --- ManagedCode.MimeTypes.Sync/Program.cs | 4 +- ManagedCode.MimeTypes.Tests/GeneratorTests.cs | 17 +++---- ManagedCode.MimeTypes/MimeHelper.cs | 46 ++++++++----------- 3 files changed, 28 insertions(+), 39 deletions(-) diff --git a/ManagedCode.MimeTypes.Sync/Program.cs b/ManagedCode.MimeTypes.Sync/Program.cs index 184be6b..b8f6dbc 100644 --- a/ManagedCode.MimeTypes.Sync/Program.cs +++ b/ManagedCode.MimeTypes.Sync/Program.cs @@ -16,8 +16,6 @@ internal static class MimeTypeSyncTool "https://raw.githubusercontent.com/apache/httpd/trunk/docs/conf/mime.types" }; - private static readonly char[] MimeTypeSeparators = new[] { ' ', '\t' }; - public static async Task RunAsync(string[] args) { try @@ -147,7 +145,7 @@ private static Dictionary ParseMimeTypesListing(string source, b continue; } - var parts = line.Split(MimeTypeSeparators, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + var parts = line.Split(' ', '\t', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); if (parts.Length < 2) { continue; diff --git a/ManagedCode.MimeTypes.Tests/GeneratorTests.cs b/ManagedCode.MimeTypes.Tests/GeneratorTests.cs index bd06a76..7a2222f 100644 --- a/ManagedCode.MimeTypes.Tests/GeneratorTests.cs +++ b/ManagedCode.MimeTypes.Tests/GeneratorTests.cs @@ -61,21 +61,18 @@ public void GetExtensionsShouldReturnKnownExtensions() [Fact] public void RuntimeRegistrationShouldUpdateLookups() { - const string extension = "customext"; - const string mime = "application/x-custom"; - try { - MimeHelper.RegisterMimeType(extension, mime); - MimeHelper.GetMimeType($"file.{extension}").ShouldBe(mime); + MimeHelper.RegisterMimeType("customext", "application/x-custom"); + MimeHelper.GetMimeType("file.customext").ShouldBe("application/x-custom"); - MimeHelper.TryGetExtensions(mime, out var extensions).ShouldBeTrue(); - extensions.ShouldContain($".{extension}"); + MimeHelper.TryGetExtensions("application/x-custom", out var extensions).ShouldBeTrue(); + extensions.ShouldContain(".customext"); } finally { - MimeHelper.UnregisterMimeType(extension).ShouldBeTrue(); - MimeHelper.GetMimeType(extension).ShouldBe("application/octet-stream"); + MimeHelper.UnregisterMimeType("customext").ShouldBeTrue(); + MimeHelper.GetMimeType("customext").ShouldBe("application/octet-stream"); } } -} \ No newline at end of file +} diff --git a/ManagedCode.MimeTypes/MimeHelper.cs b/ManagedCode.MimeTypes/MimeHelper.cs index 94712f8..5ca5002 100644 --- a/ManagedCode.MimeTypes/MimeHelper.cs +++ b/ManagedCode.MimeTypes/MimeHelper.cs @@ -31,7 +31,7 @@ public static partial class MimeHelper private static readonly Regex ScriptPattern = new(@"^(?:application|text)/(?:javascript|ecmascript|x-php|x-sh|x-shellscript|x-python|x-ruby|x-perl)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly SearchValues QueryFragmentSeparators = SearchValues.Create("?#"); + private static readonly char[] QueryFragmentSeparators = { '?', '#' }; private static readonly HashSet ScriptMimeSet = new(StringComparer.OrdinalIgnoreCase) { @@ -605,40 +605,34 @@ private static int ReadUpTo(Stream stream, byte[] buffer, int count) { if (header.Length >= 12 && header[..4].SequenceEqual(RiffSignature)) { - if (header.Length >= 12) + var format = header.Slice(8, 4); + if (format.SequenceEqual(WebpFourCC)) { - var format = header.Slice(8, 4); - if (format.SequenceEqual(WebpFourCC)) - { - return "image/webp"; - } + return "image/webp"; + } - if (format.SequenceEqual(AviFourCC)) - { - return "video/x-msvideo"; - } + if (format.SequenceEqual(AviFourCC)) + { + return "video/x-msvideo"; + } - if (format.SequenceEqual(WaveFourCC)) - { - return "audio/wav"; - } + if (format.SequenceEqual(WaveFourCC)) + { + return "audio/wav"; } } - if (header.Length >= 12 && header.Slice(4, Math.Min(4, header.Length - 4)).SequenceEqual(FtypFourCC)) + if (header.Length >= 12 && header.Slice(4, 4).SequenceEqual(FtypFourCC)) { - if (header.Length >= 12) + var brand = header.Slice(8, 4); + if (IsMp4Brand(brand)) { - var brand = header.Slice(8, Math.Min(4, header.Length - 8)); - if (IsMp4Brand(brand)) - { - return "video/mp4"; - } + return "video/mp4"; + } - if (brand.SequenceEqual(QuickTimeBrand)) - { - return "video/quicktime"; - } + if (brand.SequenceEqual(QuickTimeBrand)) + { + return "video/quicktime"; } } From 4d46481778af59520e1233d55b8f145da265ee37 Mon Sep 17 00:00:00 2001 From: ksemenenko Date: Sat, 11 Oct 2025 18:26:52 +0200 Subject: [PATCH 4/4] Update ManagedCode.MimeTypes/MimeHelper.cs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ManagedCode.MimeTypes/MimeHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ManagedCode.MimeTypes/MimeHelper.cs b/ManagedCode.MimeTypes/MimeHelper.cs index 5ca5002..22b90e9 100644 --- a/ManagedCode.MimeTypes/MimeHelper.cs +++ b/ManagedCode.MimeTypes/MimeHelper.cs @@ -31,7 +31,7 @@ public static partial class MimeHelper private static readonly Regex ScriptPattern = new(@"^(?:application|text)/(?:javascript|ecmascript|x-php|x-sh|x-shellscript|x-python|x-ruby|x-perl)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly char[] QueryFragmentSeparators = { '?', '#' }; + private static readonly SearchValues QueryFragmentSeparators = SearchValues.Create(new[] { '?', '#' }); private static readonly HashSet ScriptMimeSet = new(StringComparer.OrdinalIgnoreCase) {