diff --git a/PathReduxTests/HashCodes/ControllableHashCode.cs b/PathReduxTests/HashCodes/ControllableHashCode.cs new file mode 100644 index 0000000..13e80d5 --- /dev/null +++ b/PathReduxTests/HashCodes/ControllableHashCode.cs @@ -0,0 +1,44 @@ +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace PathReduxTests.HashCodes +{ + public class ControllableHashCode : IHashCode + { + private StringBuilder stringBuilder = new StringBuilder(); + private bool dead = false; + + public void Add(char value) + { + stringBuilder.Append(value); + } + + public int ToHashCode() + { + deadCheck(); + + string arg = stringBuilder.ToString(); + + // Use comma as delimiter between desired hash number and remaining text. + int commaPos = arg.IndexOf(','); + + if(commaPos == -1) + throw new Exception($"{nameof(ControllableHashCode)} requires , in each string"); + + if(int.TryParse(arg.Substring(0, commaPos), out int result)) + return result; + + throw new Exception("Text before , must be an integer"); + } + + private void deadCheck() + { + if(dead) + throw new Exception("Cannot call ToHashCode() twice"); + + dead = true; + } + } +} diff --git a/PathReduxTests/HashCodes/DeterministicHashCode.cs b/PathReduxTests/HashCodes/DeterministicHashCode.cs new file mode 100644 index 0000000..43cf462 --- /dev/null +++ b/PathReduxTests/HashCodes/DeterministicHashCode.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace PathReduxTests.HashCodes +{ + // Want a deterministic hash function so our tests are repeatable. + // https://andrewlock.net/why-is-string-gethashcode-different-each-time-i-run-my-program-in-net-core/ + + public class DeterministicHashCode : IHashCode + { + private bool dead = false; + private bool odd = false; + private int hash1 = 352654597; //(5381 << 16) + 5381; + private int hash2 = 352654597; + + public void Add(char value) + { + unchecked + { + if(!odd) + { + hash1 = ((hash1 << 5) + hash1) ^ value; + + } + else + { + hash2 = ((hash2 << 5) + hash2) ^ value; + + } + } + + odd = !odd; + } + + public int ToHashCode() + { + deadCheck(); + + unchecked + { + return hash1 + (hash2 * 1566083941); + } + } + + private void deadCheck() + { + if(dead) + throw new Exception("Cannot call ToHashCode() twice"); + + dead = true; + } + } +} diff --git a/PathReduxTests/PathRedux/CharBufferTests.cs b/PathReduxTests/PathRedux/CharBufferTests.cs new file mode 100644 index 0000000..2a7d9d1 --- /dev/null +++ b/PathReduxTests/PathRedux/CharBufferTests.cs @@ -0,0 +1,109 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.PathRedux; +using Shouldly; + +namespace YellowCounter.FileSystemState.Tests.PathRedux +{ + [TestClass] + public class CharBufferTests + { + [TestMethod] + public void CharBuffer1() + { + var charBuffer = new CharBuffer(100); + + int idx1 = charBuffer.Store("Hello"); + int idx2 = charBuffer.Store("World"); + + charBuffer.Retrieve(idx1).ToString().ShouldBe("Hello"); + charBuffer.Retrieve(idx2).ToString().ShouldBe("World"); + } + + [TestMethod] + public void CharBuffer2() + { + var charBuffer = new CharBuffer(100); + + int idx1 = charBuffer.Store("Hello"); + int idx2 = charBuffer.Store("World"); + + charBuffer.Retrieve(new[] { idx1, idx2 }).ToString().ShouldBe("HelloWorld"); + } + + [TestMethod] + public void CharBufferRealloc() + { + var charBuffer = new CharBuffer(13); + + int idx1 = charBuffer.Store("Hello"); + int idx2 = charBuffer.Store("World"); + + var helloSpan = charBuffer.Retrieve(idx1); + + var worldSpan = charBuffer.Retrieve(idx2); + + charBuffer.Resize(25); + + // These spans are still pointing at the old buffer - how does it avoid + // freeing up the memory? + helloSpan.ToString().ShouldBe("Hello"); + worldSpan.ToString().ShouldBe("World"); + + var hello2Span = charBuffer.Retrieve(idx1); + var world2Span = charBuffer.Retrieve(idx2); + + hello2Span.ToString().ShouldBe("Hello"); + world2Span.ToString().ShouldBe("World"); + } + + [TestMethod] + public void CharBufferEnumerate() + { + var charBuffer = new CharBuffer(100); + + int idx1 = charBuffer.Store("Hello"); + int idx2 = charBuffer.Store("World"); + + var results = new List(); + foreach(var item in charBuffer) + { + results.Add(item.Span.ToString()); + } + + results.ShouldBe(new[] { "Hello", "World" }); + } + + [TestMethod] + public void CharBufferMaxCapacity() + { + // To store the text "Hello" without expanding, we need 5 chars for Hello, + // 1 char for the null terminator of Hello, and 1 char for the null terminator + // of the overall buffer. + var charBuffer = new CharBuffer(7); + + int idx1 = charBuffer.Store("Hello"); + idx1.ShouldNotBe(-1); + charBuffer.Capacity.ShouldBe(7); + + charBuffer.Retrieve(idx1).ToString().ShouldBe("Hello"); + + int c = 0; + foreach(var itm in charBuffer) + { + if(c == 0) + { + itm.Pos.ShouldBe(0); + itm.Span.ToString().ShouldBe("Hello"); + } + else + { + throw new Exception("Should only have one item"); + } + c++; + } + } + } +} diff --git a/PathReduxTests/PathRedux/HashBucketTests.cs b/PathReduxTests/PathRedux/HashBucketTests.cs new file mode 100644 index 0000000..19a906a --- /dev/null +++ b/PathReduxTests/PathRedux/HashBucketTests.cs @@ -0,0 +1,106 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.PathRedux; +using Shouldly; + +namespace PathReduxTests.PathRedux +{ + [TestClass] + public class HashBucketTests + { + [TestMethod] + public void HashBucketStoreRetrieve() + { + var m = new HashBucket(2, 2); + + m.Store(0, 123456).ShouldBe(true); + m.Store(0, 765432).ShouldBe(true); + + var result = m.Retrieve(0); + + result.ToArray().ShouldBe(new[] { 123456, 765432 }); + } + + [TestMethod] + public void HashBucketStoreFlowpast() + { + var m = new HashBucket(2, 2); + + m.Store(1, 123456).ShouldBe(true); + m.Store(1, 765432).ShouldBe(true); + + var result = m.Retrieve(1); + + result.ToArray().ShouldBe(new[] { 123456, 765432 }); + } + + [TestMethod] + public void HashBucketStoreZero() + { + var m = new HashBucket(2, 2); + + // It can store a zero + m.Store(0, 0).ShouldBe(true); + + var result = m.Retrieve(0); + result.ToArray().ShouldBe(new[] { 0 }); + } + + [TestMethod] + public void HashBucketChainLimit() + { + var m = new HashBucket(8, 2); + + m.Store(0, 100).ShouldBe(true); + m.Store(0, 200).ShouldBe(true); + m.Store(0, 300).ShouldBe(false); + + var result = m.Retrieve(0); + + result.ToArray().ShouldBe(new[] { 100, 200 }); + } + + [TestMethod] + public void HashBucketOverlap() + { + var m = new HashBucket(8, 8); + + // The values are going to overlap. + m.Store(0, 100).ShouldBe(true); + m.Store(1, 200).ShouldBe(true); + m.Store(0, 300).ShouldBe(true); + + var result = m.Retrieve(0); + + result.ToArray().ShouldBe(new[] { 100, 200, 300 }); + } + + [TestMethod] + public void HashBucketOverlapLimited() + { + var m = new HashBucket(8, 2); + + // If we set the max chain to a lower value then the overlap + // won't occur. + m.Store(0, 100).ShouldBe(true); + m.Store(1, 200).ShouldBe(true); + m.Store(0, 300).ShouldBe(false); + + m.Retrieve(0).ToArray().ShouldBe(new[] { 100, 200 }); + m.Retrieve(1).ToArray().ShouldBe(new[] { 200 }); + } + + [TestMethod] + public void HashBucketWraparound() + { + var m = new HashBucket(4, 2); + + m.Store(3, 100).ShouldBe(true); + m.Store(3, 200).ShouldBe(true); + + m.Retrieve(3).ToArray().ShouldBe(new[] { 100, 200 }); + } + } +} diff --git a/PathReduxTests/PathRedux/HashedCharBufferTests.cs b/PathReduxTests/PathRedux/HashedCharBufferTests.cs new file mode 100644 index 0000000..d435c3b --- /dev/null +++ b/PathReduxTests/PathRedux/HashedCharBufferTests.cs @@ -0,0 +1,114 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using NSubstitute; +using PathReduxTests.HashCodes; +using Shouldly; +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.PathRedux; + +namespace PathReduxTests.PathRedux +{ + [TestClass] + public class HashedCharBufferTests + { + + [TestMethod] + public void HashedCharBufferAddAndRetrieveNoClash() + { + var buf = new HashedCharBuffer(new HashedCharBufferOptions() + { + NewHashCode = () => new DeterministicHashCode(), + InitialCharCapacity = 20, + InitialHashCapacity = 16, + LinearSearchLimit = 3 + }); + + buf.Store("Hello"); + buf.Store("World"); + + buf.Find("Hello").ShouldBe(0); + buf.Find("World").ShouldBe(6); + + buf.Retrieve(0).ToString().ShouldBe("Hello"); + buf.Retrieve(6).ToString().ShouldBe("World"); + } + + [TestMethod] + public void HashedCharBufferAddAndRetrieveClash() + { + var buf = new HashedCharBuffer(new HashedCharBufferOptions() + { + NewHashCode = () => new ControllableHashCode(), + InitialCharCapacity = 20, + InitialHashCapacity = 16, + LinearSearchLimit = 3 + }); + + buf.Store("1,Hello"); + buf.Store("1,World"); + + //// Confirm that both strings the same hashcode. + //buf.HashFunction.HashSequence("1,Hello").ShouldBe(1); + //buf.HashFunction.HashSequence("1,World").ShouldBe(1); + + buf.Find("1,Hello").ShouldBe(0); + buf.Find("1,World").ShouldBe(8); + + buf.Retrieve(0).ToString().ShouldBe("1,Hello"); + buf.Retrieve(8).ToString().ShouldBe("1,World"); + } + + [TestMethod] + public void HashedCharBufferHashCollision() + { + // Allow only 1 item in the linear search phase + var buf = new HashedCharBuffer(new HashedCharBufferOptions() + { + NewHashCode = () => new ControllableHashCode(), + InitialCharCapacity = 20, + InitialHashCapacity = 16, + LinearSearchLimit = 1 + }); + + buf.Store("1,Hello"); + + Should.Throw(() => + { + buf.Store("1,World"); + }, typeof(Exception)).Message.ShouldBe("Too many hash collisions. Increase LinearSearchLimit to overcome."); + } + + [TestMethod] + public void HashedCharBufferAddAndRetrieveClashRunOutX() + { + + // Allow 1 items in the linear search phase + var buf = new HashedCharBuffer(new HashedCharBufferOptions() + { + NewHashCode = () => new ControllableHashCode(), + InitialCharCapacity = 20, + InitialHashCapacity = 8, + LinearSearchLimit = 1 + }); + + buf.HashCapacity.ShouldBe(8); + + // Fix the hash codes to the same value modulo 8 + + buf.Store("1,Hello"); + buf.Store("9,World"); + + buf.Find("1,Hello").ShouldBe(0); + buf.Find("9,World").ShouldBe(8); + + buf.Retrieve(0).ToString().ShouldBe("1,Hello"); + buf.Retrieve(8).ToString().ShouldBe("9,World"); + + // Hash capacity will have doubled to avoid clash of hashes + // 1 % 8 and 9 % 8 + // Once we double, we get 16 hash buckets so clash avoided. + buf.HashCapacity.ShouldBe(16); + } + } +} diff --git a/PathReduxTests/PathRedux/PathStorageTests.cs b/PathReduxTests/PathRedux/PathStorageTests.cs new file mode 100644 index 0000000..f8496a4 --- /dev/null +++ b/PathReduxTests/PathRedux/PathStorageTests.cs @@ -0,0 +1,67 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.PathRedux; +using Shouldly; +using PathReduxTests.HashCodes; + +namespace PathReduxTests.PathRedux +{ + [TestClass] + public class PathStorageTests + { + [TestMethod] + public void PathStorage1() + { + // Trying to trigger it rebuilding the text -> character buffer + var ps = new PathStorage(new PathStorageOptions() + { + NewHashCode = () => new DeterministicHashCode(), + InitialCharCapacity = 4, + InitialHashCapacity = 2, + LinearSearchLimit = 128, + HashBucketMaxChain = 128, + HashBucketInitialCapacity = 2, + }); + + var results = new List(); + + results.Add(ps.Store(@"C:\abc")); + results.Add(ps.Store(@"C:\abc\xyz")); + results.Add(ps.Store(@"C:\abc\cde")); + results.Add(ps.Store(@"C:\mmm\cde")); + results.Add(ps.Store(@"C:\abc")); + + ps.CreateString(results[0]).ShouldBe(@"C:\abc"); + ps.CreateString(results[1]).ShouldBe(@"C:\abc\xyz"); + ps.CreateString(results[2]).ShouldBe(@"C:\abc\cde"); + ps.CreateString(results[3]).ShouldBe(@"C:\mmm\cde"); + results[4].ShouldBe(results[0]); + } + + [TestMethod] + public void PathStorage2() + { + // Trying to trigger it rebuilding the text -> character buffer + var ps = new PathStorage(new PathStorageOptions() + { + NewHashCode = () => new DeterministicHashCode(), + InitialCharCapacity = 4, + InitialHashCapacity = 2, + LinearSearchLimit = 128, + HashBucketMaxChain = 128, + HashBucketInitialCapacity = 2, + }); + + var results = new List(); + + results.Add(ps.Store(@"C:\abc")); + results.Add(ps.Store(@"C:\abc\xyz")); + results.Add(ps.Store(@"C:\abc")); + + ps.CreateString(results[0]).ShouldBe(@"C:\abc"); + results[2].ShouldBe(results[0]); + } + } +} diff --git a/PathReduxTests/PathReduxTests.csproj b/PathReduxTests/PathReduxTests.csproj new file mode 100644 index 0000000..082d74e --- /dev/null +++ b/PathReduxTests/PathReduxTests.csproj @@ -0,0 +1,23 @@ + + + + netcoreapp3.1 + + false + + + + + + + + + + + + + + + + + diff --git a/PathReduxTests/Watcher/WatcherTests.cs b/PathReduxTests/Watcher/WatcherTests.cs new file mode 100644 index 0000000..31796d9 --- /dev/null +++ b/PathReduxTests/Watcher/WatcherTests.cs @@ -0,0 +1,45 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using YellowCounter.FileSystemState; +using Shouldly; + +namespace PathReduxTests.Watcher +{ + [TestClass] + public class WatcherTests + { + [TestMethod] + public void FileSystemWatcherNoChange() + { + var dir = GetRandomDirectory(); + + try + { + + File.WriteAllText(Path.Combine(dir, "text1.txt"), "Hello"); + File.WriteAllText(Path.Combine(dir, "blah.txt"), "Hello"); + + var watcher = new FileSystemState(dir, options: new EnumerationOptions { RecurseSubdirectories = true }); + watcher.LoadState(); + + var q = watcher.GetChanges(); + q.Count.ShouldBe(0); + + } + finally + { + Directory.Delete(dir, true); + } + } + + private string GetRandomDirectory() + { + var path = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + Directory.CreateDirectory(path); + return path; + } + } +} diff --git a/YellowCounter.FileSystemState.Tests/ReadOnlySpanCharHashing.cs b/YellowCounter.FileSystemState.Tests/ReadOnlySpanCharHashing.cs new file mode 100644 index 0000000..c42bd8a --- /dev/null +++ b/YellowCounter.FileSystemState.Tests/ReadOnlySpanCharHashing.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Xunit; +using System.Linq; +using YellowCounter.FileSystemState; + +namespace YellowCounter.FileSystemState.Tests +{ + public class ReadOnlySpanCharHashing + { + [Fact] + public void Test1() + { + var x = new ReadOnlySpan("Hello".ToCharArray()); + var y = new ReadOnlySpan("Hello".ToCharArray()); + + // Note that each run of the program gets a new key so we can't rely + // on a specific fixed value. + Assert.Equal(x.GetHashOfContents(), y.GetHashOfContents()); + } + } +} diff --git a/YellowCounter.FileSystemState.Tests/SerializableTests.cs b/YellowCounter.FileSystemState.Tests/SerializableTests.cs index 49f8dbe..cc4be45 100644 --- a/YellowCounter.FileSystemState.Tests/SerializableTests.cs +++ b/YellowCounter.FileSystemState.Tests/SerializableTests.cs @@ -4,83 +4,83 @@ public class FileSystemStateSerializableTests { - [Fact] - public void RoundTripDoesNotAffectOriginalTest() - { - string currentDir = Utility.GetRandomDirectory(); - string fileName = Path.GetRandomFileName() + ".txt"; - string fullName = Path.Combine(currentDir, fileName); + //[Fact] + //public void RoundTripDoesNotAffectOriginalTest() + //{ + // string currentDir = Utility.GetRandomDirectory(); + // string fileName = Path.GetRandomFileName() + ".txt"; + // string fullName = Path.Combine(currentDir, fileName); - FileSystemState state = new FileSystemState(currentDir, "*.csv"); - FileSystemState state2 = new FileSystemState(currentDir, "*.txt"); + // FileSystemState state = new FileSystemState(currentDir, "*.csv"); + // FileSystemState state2 = new FileSystemState(currentDir, "*.txt"); - state.LoadState(); - RoundTrip(state, state2); + // state.LoadState(); + // RoundTrip(state, state2); - using (var file = File.Create(fullName)) { } + // using (var file = File.Create(fullName)) { } - try - { - Assert.Empty(state.GetChanges()); - Assert.Single(state2.GetChanges()); - } - finally - { - Directory.Delete(currentDir, true); - } - } + // try + // { + // Assert.Empty(state.GetChanges()); + // Assert.Single(state2.GetChanges()); + // } + // finally + // { + // Directory.Delete(currentDir, true); + // } + //} - [Fact] - public void RoundTripVersionReset_NoChanges_Test() - { - string currentDir = Utility.GetRandomDirectory(); - string fileName = Path.GetRandomFileName(); - string fullName = Path.Combine(currentDir, fileName); - using (var file = File.Create(fullName)) { } + //[Fact] + //public void RoundTripVersionReset_NoChanges_Test() + //{ + // string currentDir = Utility.GetRandomDirectory(); + // string fileName = Path.GetRandomFileName(); + // string fullName = Path.Combine(currentDir, fileName); + // using (var file = File.Create(fullName)) { } - FileSystemState state = new FileSystemState(currentDir); - state.LoadState(); - state.GetChanges(); + // FileSystemState state = new FileSystemState(currentDir); + // state.LoadState(); + // state.GetChanges(); - FileSystemState state2 = new FileSystemState(currentDir); - RoundTrip(state, state2); + // FileSystemState state2 = new FileSystemState(currentDir); + // RoundTrip(state, state2); - try - { - Assert.Empty(state.GetChanges()); - Assert.Empty(state2.GetChanges()); - } - finally - { - Directory.Delete(currentDir, true); - } - } + // try + // { + // Assert.Empty(state.GetChanges()); + // Assert.Empty(state2.GetChanges()); + // } + // finally + // { + // Directory.Delete(currentDir, true); + // } + //} - [Fact] - public void RoundTripVersionReset_Deletion_Test() - { - string currentDir = Utility.GetRandomDirectory(); - string fileName = Path.GetRandomFileName(); - string fullName = Path.Combine(currentDir, fileName); - using (var file = File.Create(fullName)) { } + //[Fact] + //public void RoundTripVersionReset_Deletion_Test() + //{ + // string currentDir = Utility.GetRandomDirectory(); + // string fileName = Path.GetRandomFileName(); + // string fullName = Path.Combine(currentDir, fileName); + // using (var file = File.Create(fullName)) { } - FileSystemState state = new FileSystemState(currentDir); - state.LoadState(); + // FileSystemState state = new FileSystemState(currentDir); + // state.LoadState(); - FileSystemState state2 = new FileSystemState(currentDir); - RoundTrip(state, state2); - File.Delete(fullName); + // FileSystemState state2 = new FileSystemState(currentDir); + // RoundTrip(state, state2); + // File.Delete(fullName); - try - { - Assert.Single(state.GetChanges()); - Assert.Single(state2.GetChanges()); - } - finally - { - Directory.Delete(currentDir, true); - } - } + // try + // { + // Assert.Single(state.GetChanges()); + // Assert.Single(state2.GetChanges()); + // } + // finally + // { + // Directory.Delete(currentDir, true); + // } + //} private static void RoundTrip(FileSystemState source, FileSystemState destination) { diff --git a/YellowCounter.FileSystemState.Tests/UnitTests.cs b/YellowCounter.FileSystemState.Tests/UnitTests.cs index bffd142..7044f41 100644 --- a/YellowCounter.FileSystemState.Tests/UnitTests.cs +++ b/YellowCounter.FileSystemState.Tests/UnitTests.cs @@ -11,7 +11,7 @@ public static void FileSystemWatcher_ctor_Defaults() string path = Environment.CurrentDirectory; var watcher = new FileSystemState(path); - Assert.Equal(path, watcher.Path); + Assert.Equal(path, watcher.RootDir); Assert.Equal("*", watcher.Filter); Assert.NotNull(watcher.EnumerationOptions); } @@ -23,7 +23,7 @@ public static void FileSystemWatcher_ctor_OptionalParams() const string filter = "*.csv"; var watcher = new FileSystemState(currentDir, filter, new EnumerationOptions { RecurseSubdirectories = true }); - Assert.Equal(currentDir, watcher.Path); + Assert.Equal(currentDir, watcher.RootDir); Assert.Equal(filter, watcher.Filter); Assert.True(watcher.EnumerationOptions.RecurseSubdirectories); } @@ -32,7 +32,7 @@ public static void FileSystemWatcher_ctor_OptionalParams() public static void FileSystemWatcher_ctor_Null() { // Not valid - Assert.Throws("path", () => new FileSystemState(null)); + Assert.Throws("rootDir", () => new FileSystemState(null)); Assert.Throws("filter", () => new FileSystemState(Environment.CurrentDirectory, null)); // Valid @@ -126,6 +126,79 @@ public static void FileSystemWatcher_Changed_File() } } + + + [Fact] + public static void FileSystemWatcher_Renamed_File() + { + string currentDir = Utility.GetRandomDirectory(); + string fileName = Path.GetRandomFileName(); + string newName = Path.GetRandomFileName(); + string fullName = Path.Combine(currentDir, fileName); + + + FileSystemState watcher = new FileSystemState(currentDir); + + using(FileStream file = File.Create(fullName)) { } + watcher.LoadState(); + + File.Move(fullName, Path.Combine(currentDir, newName)); + + var changes = watcher.GetChanges(); + + try + { + Assert.Single(changes); + FileChange change = changes[0]; + Assert.Equal(WatcherChangeTypes.Renamed, change.ChangeType); + Assert.Equal(fileName, change.OldName); + Assert.Equal(currentDir, change.OldDirectory); + Assert.Equal(newName, change.Name); + Assert.Equal(currentDir, change.Directory); + } + finally + { + Directory.Delete(currentDir, true); + } + } + + [Fact] + public static void FileSystemWatcher_Renamed_Directory() + { + string currentDir = Utility.GetRandomDirectory(); + string fileName = Path.GetRandomFileName(); + string subDir = Path.Combine(currentDir, "subdir"); + string fullName = Path.Combine(currentDir, fileName); + string newName = Path.Combine(subDir, fileName); + + FileSystemState watcher = new FileSystemState(currentDir, options: new EnumerationOptions() { RecurseSubdirectories = true }); + + Directory.CreateDirectory(subDir); + + using(FileStream file = File.Create(fullName)) { } + watcher.LoadState(); + + File.Move(fullName, Path.Combine(currentDir, newName)); + + var changes = watcher.GetChanges(); + + try + { + Assert.Single(changes); + FileChange change = changes[0]; + Assert.Equal(WatcherChangeTypes.Renamed, change.ChangeType); + Assert.Equal(fileName, change.OldName); + Assert.Equal(currentDir, change.OldDirectory); + Assert.Equal(fileName, change.Name); + Assert.Equal(subDir, change.Directory); + } + finally + { + Directory.Delete(subDir, true); + Directory.Delete(currentDir, true); + } + } + [Fact] public static void FileSystemWatcher_Filter() { @@ -223,4 +296,17 @@ public static void FileSystemWatcher_Recursive() Directory.Delete(currentDir, true); } } + + + [Fact] + public static void FileSystemWatcher_BigDir() + { + string currentDir = @"C:\Users\SpanWork\Documents"; + //string currentDir = @"C:\Users\SpanWork\Documents\Olleco\Scrapbook\DBAzure"; + FileSystemState watcher = new FileSystemState(currentDir, options: new EnumerationOptions { RecurseSubdirectories = true }); + watcher.LoadState(); + + var q = watcher.GetChanges(); + Assert.Empty(q); + } } diff --git a/YellowCounter.FileSystemState.Tests/YellowCounter.FileSystemState.Tests.csproj b/YellowCounter.FileSystemState.Tests/YellowCounter.FileSystemState.Tests.csproj index d42f712..757f763 100644 --- a/YellowCounter.FileSystemState.Tests/YellowCounter.FileSystemState.Tests.csproj +++ b/YellowCounter.FileSystemState.Tests/YellowCounter.FileSystemState.Tests.csproj @@ -8,6 +8,9 @@ + + + diff --git a/YellowCounter.FileSystemState.sln b/YellowCounter.FileSystemState.sln index a4f00e3..a038a13 100644 --- a/YellowCounter.FileSystemState.sln +++ b/YellowCounter.FileSystemState.sln @@ -1,12 +1,14 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27703.2026 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29728.190 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YellowCounter.FileSystemState", "YellowCounter.FileSystemState\YellowCounter.FileSystemState.csproj", "{8C085D5D-AC6F-48D9-A547-B6C92E18D2FB}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YellowCounter.FileSystemState", "YellowCounter.FileSystemState\YellowCounter.FileSystemState.csproj", "{8C085D5D-AC6F-48D9-A547-B6C92E18D2FB}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YellowCounter.FileSystemState.Tests", "YellowCounter.FileSystemState.Tests\YellowCounter.FileSystemState.Tests.csproj", "{EE22E810-4ADC-4399-9C72-B2B70831EB05}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PathReduxTests", "PathReduxTests\PathReduxTests.csproj", "{33F0288C-B927-4145-84E1-321BD5AD8996}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -21,6 +23,10 @@ Global {EE22E810-4ADC-4399-9C72-B2B70831EB05}.Debug|Any CPU.Build.0 = Debug|Any CPU {EE22E810-4ADC-4399-9C72-B2B70831EB05}.Release|Any CPU.ActiveCfg = Release|Any CPU {EE22E810-4ADC-4399-9C72-B2B70831EB05}.Release|Any CPU.Build.0 = Release|Any CPU + {33F0288C-B927-4145-84E1-321BD5AD8996}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {33F0288C-B927-4145-84E1-321BD5AD8996}.Debug|Any CPU.Build.0 = Debug|Any CPU + {33F0288C-B927-4145-84E1-321BD5AD8996}.Release|Any CPU.ActiveCfg = Release|Any CPU + {33F0288C-B927-4145-84E1-321BD5AD8996}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/YellowCounter.FileSystemState/FileChange.cs b/YellowCounter.FileSystemState/FileChange.cs index 5baa7e5..a87bceb 100644 --- a/YellowCounter.FileSystemState/FileChange.cs +++ b/YellowCounter.FileSystemState/FileChange.cs @@ -14,9 +14,19 @@ internal FileChange(string directory, string path, WatcherChangeTypes type) Name = path; ChangeType = type; } + internal FileChange(string directory, string path, WatcherChangeTypes type, string oldDirectory, string oldName) + { + Directory = directory; + Name = path; + ChangeType = type; + OldDirectory = oldDirectory; + OldName = oldName; + } public string Directory { get; } public string Name { get; } + public string OldDirectory { get; } + public string OldName { get; } public WatcherChangeTypes ChangeType { get; } } } diff --git a/YellowCounter.FileSystemState/FileChangeList.cs b/YellowCounter.FileSystemState/FileChangeList.cs index a911185..5a8d64f 100644 --- a/YellowCounter.FileSystemState/FileChangeList.cs +++ b/YellowCounter.FileSystemState/FileChangeList.cs @@ -10,5 +10,7 @@ public class FileChangeList : List internal void AddChanged(string directory, string path) => Add(new FileChange(directory, path, WatcherChangeTypes.Changed)); internal void AddRemoved(string directory, string path) => Add(new FileChange(directory, path, WatcherChangeTypes.Deleted)); + internal void AddRenamed(string directory, string path, string oldDirectory, string oldPath) => + Add(new FileChange(directory, path, WatcherChangeTypes.Renamed, oldDirectory, oldPath)); } } diff --git a/YellowCounter.FileSystemState/FileState.cs b/YellowCounter.FileSystemState/FileState.cs index ddd27a0..0c624a4 100644 --- a/YellowCounter.FileSystemState/FileState.cs +++ b/YellowCounter.FileSystemState/FileState.cs @@ -8,12 +8,20 @@ namespace YellowCounter.FileSystemState [Serializable] internal class FileState { - [NonSerialized] - public long Version; // removal notification are implemented something similar to "mark and sweep". This value is incremented in the mark phase - - public string Directory; - public string Path; + //[NonSerialized] + public FileStateFlags Flags; + public int DirectoryRef; + public int FilenameRef; public DateTimeOffset LastWriteTimeUtc; public long Length; } + + [Flags] + public enum FileStateFlags : byte + { + None = 0, + Seen = 1, + Created = 2, + Changed = 4, + } } diff --git a/YellowCounter.FileSystemState/FileSystemChangesEnumerator.cs b/YellowCounter.FileSystemState/FileSystemChangesEnumerator.cs index 93444a4..03ad864 100644 --- a/YellowCounter.FileSystemState/FileSystemChangesEnumerator.cs +++ b/YellowCounter.FileSystemState/FileSystemChangesEnumerator.cs @@ -2,46 +2,72 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System; +using System.Collections.Generic; +using System.IO; using System.IO.Enumeration; +using System.Runtime.InteropServices; namespace YellowCounter.FileSystemState { - internal class FileSystemChangeEnumerator: FileSystemEnumerator + internal class FileSystemChangeEnumerator : FileSystemEnumerator { - private FileChangeList _changes = new FileChangeList(); - private string _currentDirectory; - private FileSystemState _watcher; + private readonly string filter; + private IAcceptFileSystemEntry acceptFileSystemEntry; + //private string currentDirectory; - public FileSystemChangeEnumerator(FileSystemState watcher) - : base(watcher.Path, watcher.EnumerationOptions) + private static bool ignoreCase; + + static FileSystemChangeEnumerator() { - _watcher = watcher; + ignoreCase = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + || RuntimeInformation.IsOSPlatform(OSPlatform.OSX); } - public FileChangeList Changes => _changes; + public FileSystemChangeEnumerator( + string filter, + string path, + EnumerationOptions enumerationOptions, + IAcceptFileSystemEntry acceptFileSystemEntry) + : base(path, enumerationOptions) + { + this.filter = filter; + this.acceptFileSystemEntry = acceptFileSystemEntry; + } + + public void Scan() + { + // Enumerating causes TransformEntry() to be called repeatedly + while(MoveNext()) { } + } protected override void OnDirectoryFinished(ReadOnlySpan directory) - => _currentDirectory = null; + { + //currentDirectory = null; - protected override string TransformEntry(ref FileSystemEntry entry) + base.OnDirectoryFinished(directory); + } + + protected override object TransformEntry(ref FileSystemEntry entry) { - _watcher.DetermineChange(_currentDirectory, ref _changes, ref entry); + acceptFileSystemEntry.Accept(ref entry); return null; } protected override bool ShouldIncludeEntry(ref FileSystemEntry entry) { - // Don't want to convert this to string every time - if (_currentDirectory == null) - _currentDirectory = entry.Directory.ToString(); + //if(currentDirectory == null) + // currentDirectory = entry.Directory.ToString(); - return _watcher.ShouldIncludeEntry(ref entry); - } + if(entry.IsDirectory) + return false; - protected override bool ShouldRecurseIntoEntry(ref FileSystemEntry entry) - { - return _watcher.ShouldRecurseIntoEntry(ref entry); + if(FileSystemName.MatchesSimpleExpression(filter, entry.FileName, ignoreCase: ignoreCase)) + return true; + + return false; } + + protected override bool ShouldRecurseIntoEntry(ref FileSystemEntry entry) => true; } } diff --git a/YellowCounter.FileSystemState/FileSystemState.cs b/YellowCounter.FileSystemState/FileSystemState.cs index 7191809..d7dc5df 100644 --- a/YellowCounter.FileSystemState/FileSystemState.cs +++ b/YellowCounter.FileSystemState/FileSystemState.cs @@ -4,111 +4,255 @@ using System.IO.Enumeration; using System.Runtime.InteropServices; using System.Runtime.Serialization.Formatters.Binary; +using System.Linq; +using YellowCounter.FileSystemState.PathRedux; +using YellowCounter.FileSystemState.HashCodes; namespace YellowCounter.FileSystemState { - public class FileSystemState + public class FileSystemState : IAcceptFileSystemEntry { - private long _version = default; - private PathToFileStateHashtable _state = new PathToFileStateHashtable(); + private PathToFileStateHashtable _state; - public FileSystemState(string path, string filter = "*", EnumerationOptions options = null) + public FileSystemState(string rootDir, string filter = "*", EnumerationOptions options = null) { - Path = path ?? throw new ArgumentNullException(nameof(path)); - Filter = filter ?? throw new ArgumentNullException(nameof(filter)); + this.RootDir = rootDir ?? throw new ArgumentNullException(nameof(rootDir)); + this.Filter = filter ?? throw new ArgumentNullException(nameof(filter)); - if (!Directory.Exists(path)) + if(!Directory.Exists(rootDir)) throw new DirectoryNotFoundException(); EnumerationOptions = options ?? new EnumerationOptions(); + + this.pathStorage = new PathStorage(new PathStorageOptions() + { + NewHashCode = () => new StandardHashCode(), + InitialCharCapacity = 1024, + InitialHashCapacity = 256, + LinearSearchLimit = 128, + HashBucketMaxChain = 128, + HashBucketInitialCapacity = 64 + }); + + _state = new PathToFileStateHashtable(this.pathStorage); } - public string Path { get; set; } + public string RootDir { get; set; } public string Filter { get; set; } public EnumerationOptions EnumerationOptions { get; set; } + private readonly PathStorage pathStorage; + public void LoadState() { - GetChanges(); + // Set initial baseline by reading current directory state without returning + // every file as a change. + gatherChanges(); + acceptChanges(); } public void LoadState(Stream stream) { - BinaryFormatter serializer = new BinaryFormatter(); - _state = (PathToFileStateHashtable)serializer.Deserialize(stream); + //BinaryFormatter serializer = new BinaryFormatter(); + //_state = (PathToFileStateHashtable)serializer.Deserialize(stream); } public void SaveState(Stream stream) { - BinaryFormatter serializer = new BinaryFormatter(); - serializer.Serialize(stream, _state); + //BinaryFormatter serializer = new BinaryFormatter(); + //serializer.Serialize(stream, _state); } // This function walks all watched files, collects changes, and updates state public FileChangeList GetChanges() { - _version++; + // Get the raw file changes, either create, file change or removal. + var (creates, changes, removals) = getFileChanges(); + + // Match up the creates and removals to get the renames + var renames = matchRenames(creates, removals); + + // Convert to the output format. + var result = convertToFileChanges(creates, changes, removals, renames); + + return result; + } + + + private void gatherChanges() + { + var enumerator = new FileSystemChangeEnumerator( + this.Filter, + this.RootDir, + this.EnumerationOptions, + this); + + enumerator.Scan(); + } + + public void Accept(ref FileSystemEntry fileSystemEntry) + { + _state.Mark(ref fileSystemEntry); + } + + private void acceptChanges() + { + // Clear out the files that have been removed or renamed from our state. + _state.Sweep(); + } + + private FileChangeList convertToFileChanges( + IEnumerable creates, + IEnumerable changes, + IEnumerable removals, + IEnumerable<(FileState NewFile, FileState OldFile)> renames) + { + var createResults = creates + .Except(renames.Select(x => x.NewFile)) + .Select(x => newFileChange(x.DirectoryRef, x.FilenameRef, WatcherChangeTypes.Created)) + ; + + var changeResults = changes + .Select(x => newFileChange(x.DirectoryRef, x.FilenameRef, WatcherChangeTypes.Changed)) + ; + + var removeResults = removals + .Except(renames.Select(x => x.OldFile)) + .Select(x => newFileChange(x.DirectoryRef, x.FilenameRef, WatcherChangeTypes.Deleted)) + ; + + var renameResults = renames.Select(x => newFileChange2( + x.NewFile.DirectoryRef, + x.NewFile.FilenameRef, + WatcherChangeTypes.Renamed, + x.OldFile.DirectoryRef, + x.OldFile.FilenameRef)) + ; + + var result = new FileChangeList(); + + result.AddRange(createResults); + result.AddRange(changeResults); + result.AddRange(removeResults); + result.AddRange(renameResults); - var enumerator = new FileSystemChangeEnumerator(this); - while (enumerator.MoveNext()) + return result; + + FileChange newFileChange( + int directoryRef, + int filenameRef, + WatcherChangeTypes changeType) { - // Ignore `.Current` + return new FileChange( + pathStorage.CreateString(directoryRef), + pathStorage.CreateString(filenameRef), + changeType); } - var changes = enumerator.Changes; - List<(string directory, string path)> removals = GetRemovals(); - foreach (var (directory, path) in removals) + FileChange newFileChange2( + int newDirectoryRef, + int newFilenameRef, + WatcherChangeTypes changeType, + int oldDirectoryRef, + int oldFilenameRef + ) { - changes.AddRemoved(directory, path); - _state.Remove(directory, path); + return new FileChange( + pathStorage.CreateString(newDirectoryRef), + pathStorage.CreateString(newFilenameRef), + changeType, + pathStorage.CreateString(oldDirectoryRef), + pathStorage.CreateString(oldFilenameRef) + ); } - - return changes; } - private List<(string directory, string path)> GetRemovals() + private ( + IEnumerable creates, + IEnumerable changes, + IEnumerable removals) getFileChanges() { - List<(string, string)> removals = new List<(string, string)>(); - foreach (var value in _state.Values) + var creates = new List(); + var changes = new List(); + var removals = new List(); + + gatherChanges(); + + foreach(var x in _state.Read()) { - if (value.Version != _version) + if(x.Flags.HasFlag(FileStateFlags.Seen)) { - removals.Add((value.Directory, value.Path)); + if(x.Flags.HasFlag(FileStateFlags.Created)) + creates.Add(x); + else if(x.Flags.HasFlag(FileStateFlags.Changed)) + changes.Add(x); } + else + removals.Add(x); } - return removals; + acceptChanges(); + + return (creates, changes, removals); } - protected internal virtual void DetermineChange(string directory, ref FileChangeList changes, ref FileSystemEntry file) + private IEnumerable<(FileState NewFile, FileState OldFile)> matchRenames( + IEnumerable creates, + IEnumerable removals) { - string path = file.FileName.ToString(); + // Want to match creates and removals to convert to renames either by: + // Same directory, different name + // or different directory, same name. + return matchRenames(creates, removals, false) + .Concat(matchRenames(creates, removals, true)); + } - FileState fileState = _state.Get(directory, path); - if (fileState == null) // file added - { - fileState = new FileState(); - fileState.Directory = directory; - fileState.Path = path; - fileState.LastWriteTimeUtc = file.LastWriteTimeUtc; - fileState.Length = file.Length; - fileState.Version = _version; - _state.Add(directory, path, fileState); - changes.AddAdded(directory, path); - return; - } + private IEnumerable<(FileState NewFile, FileState OldFile)> matchRenames( + IEnumerable creates, + IEnumerable removals, + bool byName) + { + var createsByTime = creates + .GroupBy(x => new + { + // Group by last write time, length and directory or filename + x.LastWriteTimeUtc, + x.Length, + Name = byName ? x.DirectoryRef : x.FilenameRef + }, + (x, y) => new + { + // Return key fields, and list of all created files for the + // given (time, length, path) key + x.LastWriteTimeUtc, + x.Length, + x.Name, + Creates = y.ToList() + }) + .ToList(); - fileState.Version = _version; + var removesByTime = removals + .GroupBy(x => new { x.LastWriteTimeUtc, x.Length, Name = byName ? x.DirectoryRef : x.FilenameRef }, + (x, y) => new { x.LastWriteTimeUtc, x.Length, x.Name, Removes = y.ToList() }) + .ToList(); - var previousState = fileState; - if (file.LastWriteTimeUtc != fileState.LastWriteTimeUtc || file.Length != fileState.Length) - { - changes.AddChanged(directory, fileState.Path); - fileState.LastWriteTimeUtc = file.LastWriteTimeUtc; - fileState.Length = file.Length; - } + // Join creates and removes by (time, length, directory), then filter to + // only those matches which are unambiguous. + return createsByTime.Join(removesByTime, + x => new { x.LastWriteTimeUtc, x.Length, x.Name }, + x => new { x.LastWriteTimeUtc, x.Length, x.Name }, + (x, y) => new { x.Creates, y.Removes } + ) + .Where(x => x.Creates.Count == 1 && x.Removes.Count == 1) + .Select(x => ( + NewFile: x.Creates[0], + OldFile: x.Removes[0] + )) + .ToList(); } + + protected internal virtual bool ShouldIncludeEntry(ref FileSystemEntry entry) { if (entry.IsDirectory) return false; @@ -121,5 +265,6 @@ protected internal virtual bool ShouldIncludeEntry(ref FileSystemEntry entry) } protected internal virtual bool ShouldRecurseIntoEntry(ref FileSystemEntry entry) => true; + } } diff --git a/YellowCounter.FileSystemState/HashCodes/HashCodeExtensions.cs b/YellowCounter.FileSystemState/HashCodes/HashCodeExtensions.cs new file mode 100644 index 0000000..fc24afa --- /dev/null +++ b/YellowCounter.FileSystemState/HashCodes/HashCodeExtensions.cs @@ -0,0 +1,33 @@ +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState.HashCodes +{ + public static class HashCodeExtensions + { + public static int HashSequence(this IHashCode hashCode, ReadOnlySpan span) + { + foreach(var elem in span) + { + hashCode.Add(elem); + } + + return hashCode.ToHashCode(); + } + + public static int HashSequence(this IHashCode hashCode, ReadOnlySequence seq) + { + foreach(var mem in seq) + { + foreach(var elem in mem.Span) + { + hashCode.Add(elem); + } + } + + return hashCode.ToHashCode(); + } + } +} diff --git a/YellowCounter.FileSystemState/HashCodes/IHashCode.cs b/YellowCounter.FileSystemState/HashCodes/IHashCode.cs new file mode 100644 index 0000000..00104dc --- /dev/null +++ b/YellowCounter.FileSystemState/HashCodes/IHashCode.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState.HashCodes +{ + public interface IHashCode + { + void Add(char value); + int ToHashCode(); + } +} diff --git a/YellowCounter.FileSystemState/HashCodes/StandardHashCode.cs b/YellowCounter.FileSystemState/HashCodes/StandardHashCode.cs new file mode 100644 index 0000000..70c8672 --- /dev/null +++ b/YellowCounter.FileSystemState/HashCodes/StandardHashCode.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState.HashCodes +{ + public struct StandardHashCode : IHashCode + { + private HashCode hashCode; + public void Add(char value) + { + hashCode.Add(value); + } + + public int ToHashCode() => hashCode.ToHashCode(); + } +} diff --git a/YellowCounter.FileSystemState/IAcceptFileSystemEntry.cs b/YellowCounter.FileSystemState/IAcceptFileSystemEntry.cs new file mode 100644 index 0000000..7e395f9 --- /dev/null +++ b/YellowCounter.FileSystemState/IAcceptFileSystemEntry.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.IO.Enumeration; +using System.Text; + +namespace YellowCounter.FileSystemState +{ + public interface IAcceptFileSystemEntry + { + void Accept(ref FileSystemEntry fileSystemEntry); + } +} diff --git a/YellowCounter.FileSystemState/IStringInternPool.cs b/YellowCounter.FileSystemState/IStringInternPool.cs new file mode 100644 index 0000000..1f43dc3 --- /dev/null +++ b/YellowCounter.FileSystemState/IStringInternPool.cs @@ -0,0 +1,9 @@ +using System; + +namespace YellowCounter.FileSystemState +{ + public interface IStringInternPool + { + string Intern(ref ReadOnlySpan span); + } +} \ No newline at end of file diff --git a/YellowCounter.FileSystemState/PathRedux/CharBuffer.cs b/YellowCounter.FileSystemState/PathRedux/CharBuffer.cs new file mode 100644 index 0000000..f2c87c9 --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/CharBuffer.cs @@ -0,0 +1,229 @@ +using System; +using System.Buffers; +using System.Collections; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public class CharBuffer + { + + private Memory buffer; + private int pos; + + public CharBuffer(int capacity) + { + buffer = new char[capacity]; + } + + public int Capacity => buffer.Length; + + public void Resize(int capacity) + { + if(capacity < pos) + throw new Exception("Cannot resize because data truncation would occur"); + + var newBuffer = new char[capacity]; + + this.buffer.CopyTo(newBuffer); + + this.buffer = newBuffer; + } + + public int Store(ReadOnlySpan input) + { + // We need space for our text, our null terminator, and an extra + // null terminator for the end of the buffer. + if(input.Length + pos + 1 >= buffer.Length) + return -1; + + var bufSpan = buffer.Span; + + // Return current buffer start position as the result. + var result = pos; + + // Write the text into our buffer + input.CopyTo(bufSpan.Slice(pos, input.Length)); + pos += input.Length; + + // Null terminate + bufSpan[pos] = '\0'; + pos++; + + return result; + } + + public int Match(ReadOnlySpan arg, ReadOnlySpan indices) + { + var bufSpan = buffer.Span; + + foreach(int idx in indices) + { + if(bufSpan.Slice(idx, arg.Length).SequenceEqual(arg)) + { + // Check for null terminator so we don't match to a + // longer string. + if(bufSpan[idx + arg.Length] == '\0') + return idx; + } + } + + // -1 for not found. + return -1; + } + + public ReadOnlySpan Retrieve(int index) + { + var bufSpan = buffer.Span; + + var begin = bufSpan.Slice(index); + + int len = begin.IndexOf('\0'); + + return begin.Slice(0, len); + } + + public string CreateString(IEnumerable indices) + { + int totalLen = 0; + var posLens = new List(); + // Gather up pos / lens + + var bufSpan = buffer.Span; + + foreach(var idx in indices) + { + var tail = bufSpan.Slice(idx); + var len = tail.IndexOf('\0'); + + totalLen += len; + posLens.Add(new PosLen(idx, len)); + //var text = tail.Slice(0, len); + } + + // String in REVERSE ORDER of indices - this is because we start at + // the end and then point back to the parent, grandparent etc. + return String.Create(totalLen, (buffer, posLens, totalLen), + (chars, state) => + { + var span = state.buffer.Span; + var pos = state.totalLen; + + foreach(var posLen in posLens) + { + var text = span.Slice(posLen.Pos, posLen.Len); + + pos -= posLen.Len; + + text.CopyTo(chars.Slice(pos, posLen.Len)); + } + }); + } + + private readonly struct PosLen + { + public PosLen(int pos, int len) + { + this.Pos = pos; + this.Len = len; + } + public int Pos { get; } + public int Len { get; } + } + + public Enumerator GetEnumerator() + { + var bufSpan = buffer.Span; + + return new Enumerator(bufSpan); + } + + public ref struct Enumerator + { + private int pos; + private int len; + ReadOnlySpan bufSpan; + Item current; + + public Enumerator(ReadOnlySpan bufSpan) + { + pos = -1; + len = 0; + this.bufSpan = bufSpan; + current = new Item(); + } + + public readonly Item Current => current; + public bool MoveNext() + { + // Advance past zero terminator and previous string. + pos += 1 + len; + + var tail = bufSpan.Slice(pos); + + // Reached the end? End enumerating. The end of the buffer + // has a double null terminator \0\0. + if(tail[0] == '\0') + return false; + + len = tail.IndexOf('\0'); + + this.current.Span = tail.Slice(0, len); + this.current.Pos = pos; + + return true; + } + } + + public ref struct Item + { + public ReadOnlySpan Span; + public int Pos; + } + + + public ReadOnlySequence Retrieve(IEnumerable indices) + { + Segment root = null; + Segment current = null; + + int len = 0; + + foreach(var idx in indices) + { + var tail = buffer.Slice(idx); + len = tail.Span.IndexOf('\0'); + var text = tail.Slice(0, len); + + if(root == null) + { + root = new Segment(text); + current = root; + } + else + { + current = current.Add(text); + } + } + + return new ReadOnlySequence(root, 0, current, len); + } + + class Segment : ReadOnlySequenceSegment + { + public Segment(ReadOnlyMemory memory) + => Memory = memory; + public Segment Add(ReadOnlyMemory mem) + { + var segment = new Segment(mem); + segment.RunningIndex = RunningIndex + + Memory.Length; + Next = segment; + return segment; + } + } + + } + +} diff --git a/YellowCounter.FileSystemState/PathRedux/HashBucket.cs b/YellowCounter.FileSystemState/PathRedux/HashBucket.cs new file mode 100644 index 0000000..e1bb2b1 --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/HashBucket.cs @@ -0,0 +1,95 @@ +using System; +using System.Buffers; +using System.Collections; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Text; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public class HashBucket + { + private Memory mem; + private readonly int capacity; + private readonly int maxChain; + private BitArray usage; + + public HashBucket(int capacity, int maxChain) + { + mem = new int[capacity + maxChain]; + usage = new BitArray(capacity); + + this.capacity = capacity; + this.maxChain = maxChain; + } + + public int Capacity => this.capacity; + public int MaxChain => this.maxChain; + + public bool Store(int hash, int value) + { + int bucket = bucketFromHash(hash); + + var span = mem.Span; + + for(int c = 0; c < maxChain; c++) + { + int i = bucket + c; + int j = i % capacity; + + bool wrapAround = i != j; + + if(!usage[j]) + { + span[j] = value; + usage[j] = true; + + // If wrapping around we have two copies of the values, + // one at the normal position and one in the runoff area + // at the end of the memory buffer. + // This so we have a contiguous span to slice for the + // return. + if(wrapAround) + { + span[i] = value; + } + + return true; + } + + } + + return false; + } + + /// + /// Modulo divide the hash by our capacity + /// + /// + /// + private int bucketFromHash(int hash) => (int)unchecked((uint)hash % (uint)Capacity); + + + public ReadOnlySpan Retrieve(int hash) + { + int bucket = bucketFromHash(hash); + + var span = mem.Span; + + int c = 0; + + while(c < maxChain) + { + int j = (bucket + c) % capacity; + + if(!usage[j]) + break; + + c++; + } + + return span.Slice(bucket, c); + } + + } +} diff --git a/YellowCounter.FileSystemState/PathRedux/HashedCharBuffer.cs b/YellowCounter.FileSystemState/PathRedux/HashedCharBuffer.cs new file mode 100644 index 0000000..c057930 --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/HashedCharBuffer.cs @@ -0,0 +1,121 @@ +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public class HashedCharBuffer + { + private readonly int linearSearchLimit; + private CharBuffer charBuffer; + private HashBucket chainedLookup; + private readonly Func newHashCode; + + public HashedCharBuffer(HashedCharBufferOptions options) + { + charBuffer = new CharBuffer(options.InitialCharCapacity); + chainedLookup = new HashBucket(options.InitialHashCapacity, options.LinearSearchLimit); + + this.newHashCode = options.NewHashCode; + this.linearSearchLimit = options.LinearSearchLimit; + } + + public int LinearSearchLimit => this.linearSearchLimit; + public int CharCapacity => charBuffer.Capacity; + public int HashCapacity => chainedLookup.Capacity; + + /// + /// Returns index position + /// + /// + /// + public int Store(ReadOnlySpan text) + { + int hash = hashSequence(text); + int foundPos = findByHash(hash, text); + + if(foundPos != -1) + return foundPos; + + int pos = charBuffer.Store(text); + if(pos == -1) + { + int newSize = charBuffer.Capacity * 2; + if(newSize < text.Length + charBuffer.Capacity + 2) // Allow 2 for null terminators + newSize = charBuffer.Capacity + text.Length + 2; + + charBuffer.Resize(newSize); + + pos = charBuffer.Store(text); + if(pos == -1) + throw new Exception("Resizing charBuffer didn't give us enough space"); + } + + if(!chainedLookup.Store(hash, pos)) + { + rebuildLookup(); + chainedLookup.Store(hash, pos); + } + + return pos; + } + + public ReadOnlySpan Retrieve(int pos) => charBuffer.Retrieve(pos); + public ReadOnlySequence Retrieve(IEnumerable indices) => charBuffer.Retrieve(indices); + + public string CreateString(IEnumerable indices) => charBuffer.CreateString(indices); + + public int Find(ReadOnlySpan text) + { + int hash = hashSequence(text); + return findByHash(hash, text); + } + + private int findByHash(int hash, ReadOnlySpan text) + { + var indices = chainedLookup.Retrieve(hash); + return charBuffer.Match(text, indices); + } + + private int hashSequence(ReadOnlySpan text) => newHashCode().HashSequence(text); + + private void rebuildLookup() + { + // Doubling capacity will halve the number of moduloed hash collisions + var newLookup = new HashBucket(chainedLookup.Capacity * 2, linearSearchLimit); + + // Populate a new lookup from our existing data. + foreach(var itm in charBuffer) + { + if(!newLookup.Store(hashSequence(itm.Span), itm.Pos)) + throw new Exception($"Too many hash collisions. Increase {nameof(LinearSearchLimit)} to overcome."); + } + + // Use the new lookup + chainedLookup = newLookup; + } + } +} + + + + + +// Split by backslash / slash + +// Starting at the longest sequence, +// e.g. C:\abc\cde\efg\ghi\ +// then going backwards as +// C:\abc\cde\efg\ +// C:\abc\cde\ +// C:\abc\ + +// Generate the hashcode of the text. +// Look up the hashcode in the dictionary +// If we found it, we will get two things: +// Index of the tail entry +// Index of the parent + +// Create a new record \ No newline at end of file diff --git a/YellowCounter.FileSystemState/PathRedux/HashedCharBufferOptions.cs b/YellowCounter.FileSystemState/PathRedux/HashedCharBufferOptions.cs new file mode 100644 index 0000000..0823c7c --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/HashedCharBufferOptions.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public class HashedCharBufferOptions + { + public Func NewHashCode { get; set; } + public int InitialCharCapacity { get; set; } + public int InitialHashCapacity { get; set; } + public int LinearSearchLimit { get; set; } + } +} diff --git a/YellowCounter.FileSystemState/PathRedux/IPathStorage.cs b/YellowCounter.FileSystemState/PathRedux/IPathStorage.cs new file mode 100644 index 0000000..b39cc01 --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/IPathStorage.cs @@ -0,0 +1,10 @@ +using System; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public interface IPathStorage + { + string CreateString(int idx); + int Store(ReadOnlySpan arg); + } +} \ No newline at end of file diff --git a/YellowCounter.FileSystemState/PathRedux/PathStorage.cs b/YellowCounter.FileSystemState/PathRedux/PathStorage.cs new file mode 100644 index 0000000..3ba5eb2 --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/PathStorage.cs @@ -0,0 +1,201 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace YellowCounter.FileSystemState.PathRedux +{ + /// + /// Storing a long list of full paths from a recursive directory search involves + /// a lot of repeats: + /// C:\abc\def + /// C:\abc\def\ghi + /// C:\abc\def\jkl + /// C:\abc\def\mno + /// + /// This class implements a Parent Pointer Tree, it splits the path by the directory + /// separator, stores the final text after the \, then a pointer to the entry for + /// the parent directory. This occurs recursively so we only store the text for each + /// folder name once. + /// + public class PathStorage : IPathStorage + { + private HashedCharBuffer buf; + private HashBucket buckets; + private List entries; + private const int Root = -1; // The root entry's ParentIdx is set to this. + + private Func newHashCode; + + public PathStorage(PathStorageOptions options) + { + this.newHashCode = options.NewHashCode; + + buf = new HashedCharBuffer(new HashedCharBufferOptions() + { + NewHashCode = options.NewHashCode, + InitialCharCapacity = options.InitialCharCapacity, + InitialHashCapacity = options.InitialHashCapacity, + LinearSearchLimit = options.LinearSearchLimit + }); + + buckets = new HashBucket( + options.HashBucketInitialCapacity, + options.HashBucketMaxChain); + + entries = new List(); + } + + public int Store(ReadOnlySpan arg) + { + var hash = newHashCode().HashSequence(arg); + + foreach(var idx in buckets.Retrieve(hash)) + { + if(match(idx, arg)) + { + return idx; + } + } + + // Find a slash or backslash. + int slashPos = arg.LastIndexOfAny(new[] { '\\', '/' }); + + int parentIdx; + int textRef; + + // No more slash delimiters, so store a root entry (parent index -1). + if(slashPos == -1) + { + parentIdx = Root; + textRef = buf.Store(arg); + } + else + { + // Recursively call back to ourselves to store all text + // up to the parent directory name. This might find an + // existing entry or need to create one. + parentIdx = this.Store(arg.Slice(0, slashPos)); + + // Store the text from the slash onwards as our entry. + textRef = buf.Store(arg.Slice(slashPos)); + } + + int result = entries.Count; + entries.Add(new Entry(textRef, parentIdx)); + + if(!buckets.Store(hash, result)) + { + // Rebuild buckets from List twice as big + rebuildBuckets(); + + if(!buckets.Store(hash, result)) + throw new Exception($"Too many hash collisions in {nameof(PathStorage)}"); + } + + return result; + } + + private void rebuildBuckets() + { + var newBuckets = new HashBucket(buckets.Capacity * 2, buckets.MaxChain); + + for(int idx = 0; idx < entries.Count; idx++) + { + var hashCode = newHashCode(); + + foreach(var textRef in chain(idx).Reverse()) + { + var text = buf.Retrieve(textRef); + foreach(var elem in text) + { + hashCode.Add(elem); + } + } + + int hash = hashCode.ToHashCode(); + + newBuckets.Store(hash, idx); + } + + this.buckets = newBuckets; + } + + public int HashEntry(int idx) + { + var text = buf.Retrieve(chain(idx)); + + return 0; + } + + public string CreateString(int idx) + { + return buf.CreateString(chain(idx)); + } + + private IEnumerable chain(int idx) + { + int cursorIdx = idx; + + while(cursorIdx != Root) + { + var entry = entries[cursorIdx]; + + yield return entry.TextRef; + cursorIdx = entry.ParentIdx; + } + } + + private bool match(int idx, ReadOnlySpan arg) + { + int argStart = arg.Length; + int cursorIdx = idx; + + while(true) + { + var entry = entries[cursorIdx]; + + var text = buf.Retrieve(entry.TextRef); + + argStart -= text.Length; + + if(argStart < 0) + return false; + + var argSlice = arg.Slice(argStart, text.Length); + + if(!text.SequenceEqual(argSlice)) + return false; + + // Loop round to our parent entry + cursorIdx = entry.ParentIdx; + + if(cursorIdx == Root) + { + // If the target has no parent, and we've examined all of arg + // then we've got a correct match + if(argStart == 0) + return true; + + return false; + } + } + + } + + private readonly struct Entry + { + public Entry(int textRef, int parentIdx) + { + this.TextRef = textRef; + this.ParentIdx = parentIdx; + } + + public int TextRef { get; } + public int ParentIdx { get; } + } + + } +} diff --git a/YellowCounter.FileSystemState/PathRedux/PathStorageOptions.cs b/YellowCounter.FileSystemState/PathRedux/PathStorageOptions.cs new file mode 100644 index 0000000..a6a06ed --- /dev/null +++ b/YellowCounter.FileSystemState/PathRedux/PathStorageOptions.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Text; +using YellowCounter.FileSystemState.HashCodes; + +namespace YellowCounter.FileSystemState.PathRedux +{ + public class PathStorageOptions + { + public int HashBucketInitialCapacity { get; set; } + public int HashBucketMaxChain { get; set; } + public Func NewHashCode { get; set; } + public int InitialCharCapacity { get; set; } + public int InitialHashCapacity { get; set; } + public int LinearSearchLimit { get; set; } + } +} diff --git a/YellowCounter.FileSystemState/PathToFileStateHashtable.cs b/YellowCounter.FileSystemState/PathToFileStateHashtable.cs index c544c2a..2f69514 100644 --- a/YellowCounter.FileSystemState/PathToFileStateHashtable.cs +++ b/YellowCounter.FileSystemState/PathToFileStateHashtable.cs @@ -1,20 +1,143 @@ using System; using System.Collections.Generic; using System.Runtime.Serialization; +using System.Linq; +using System.IO.Enumeration; +using YellowCounter.FileSystemState.PathRedux; +using System.Diagnostics; namespace YellowCounter.FileSystemState { [Serializable] - internal class PathToFileStateHashtable : Dictionary<(string directory, string file), FileState>, ISerializable + internal class PathToFileStateHashtable { - public PathToFileStateHashtable() { } + Dictionary> dict; + private readonly IPathStorage pathStorage; - public void Add(string directory, string file, FileState value) => Add((directory, file), value); + public PathToFileStateHashtable(IPathStorage pathStorage) + { + dict = new Dictionary>(); - public void Remove(string directory, string file) => Remove((directory, file)); + this.pathStorage = pathStorage; + } - public FileState Get(string directory, string file) => this.GetValueOrDefault((directory, file)); + internal void Mark(ref FileSystemEntry input) + { + int dirRef = pathStorage.Store(input.Directory); + int filenameRef = pathStorage.Store(input.FileName); - protected PathToFileStateHashtable(SerializationInfo info, StreamingContext context) : base(info, context) { } + int hashCode = HashCode.Combine(dirRef.GetHashCode(), filenameRef.GetHashCode()); + + if(dict.TryGetValue(hashCode, out var fileStates)) + { + bool found = false; + + // Normally there will only be 1 but we could get a hash collision. + foreach(var existing in fileStates) + { + // We've only matched on hashcode so far, so there could be false + // matches in here. Do a proper comparision on filename/directory. + if(existing.FilenameRef == filenameRef && existing.DirectoryRef == dirRef) + { + // Found the file; compare to our existing record so we can + // detect if it has been modified. + markExisting(existing, input); + + found = true; + break; + } + } + + // Hash collision! Add on the end of the list. + if(!found) + { + fileStates.Add(newFileState(input)); + } + } + else + { + // Not seen before, create a 1-element list and add to the dictionary. + dict.Add(hashCode, new List() { newFileState(input) }); + } + + FileState newFileState(FileSystemEntry input) + { + var fileState = new FileState(); + + fileState.Flags = FileStateFlags.Created | FileStateFlags.Seen; + + fileState.DirectoryRef = dirRef; + fileState.FilenameRef = filenameRef; + + fileState.LastWriteTimeUtc = input.LastWriteTimeUtc; + fileState.Length = input.Length; + + return fileState; + } + } + + private void markExisting(FileState fs, FileSystemEntry input) + { + // Mark that we've seen the file. + fs.Flags |= FileStateFlags.Seen; + + // Has it changed since we last saw it? + if(fs.LastWriteTimeUtc != input.LastWriteTimeUtc + || fs.Length != input.Length) + { + fs.Flags |= FileStateFlags.Changed; + + // Update the last write time / file length. + fs.LastWriteTimeUtc = input.LastWriteTimeUtc; + fs.Length = input.Length; + } + } + + + + public IEnumerable Read() + { + foreach(var x in dict.Values.SelectMany(y => y)) + { + yield return x; + } + } + + public void Sweep() + { + var toRemove = new List(); + + // Go through every list of filestates in our state dictionary + foreach(var (hash, list) in dict) + { + // Remove any item in the list which we didn't see on the last mark + // phase (every item that is seen gets the LastSeenVersion updated) + //list.RemoveAll(x => x.LastSeenVersion != version); + + list.RemoveAll(x => !x.Flags.HasFlag(FileStateFlags.Seen)); + + // In the normal case where there are no hash collisions, this will + // remove the one and only item from the list. We can then remove + // the hash entry from the dictionary. + // If there was a hash collision, the reduced-size list would remain. + if(list.Count == 0) + { + toRemove.Add(hash); + } + + // Clear the flags on all remaining items. + foreach(var x in list) + { + x.Flags = FileStateFlags.None; + } + } + + // We can't remove the items while iterating so remove here instead. + foreach(var hash in toRemove) + { + dict.Remove(hash); + } + } } + } diff --git a/YellowCounter.FileSystemState/ReadOnlySpanExtensions.cs b/YellowCounter.FileSystemState/ReadOnlySpanExtensions.cs new file mode 100644 index 0000000..7e72bc8 --- /dev/null +++ b/YellowCounter.FileSystemState/ReadOnlySpanExtensions.cs @@ -0,0 +1,28 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState +{ + public static class ReadOnlySpanExtensions + { + /// + /// Combine hashcodes of each element in the ReadOnlySpan + /// + /// + /// + /// + public static int GetHashOfContents(this ReadOnlySpan span) + { + // struct so allocated on stack + var hash = new HashCode(); + + foreach(var elem in span) + { + hash.Add(elem); + } + + return hash.ToHashCode(); + } + } +} diff --git a/YellowCounter.FileSystemState/StringInternPool.cs b/YellowCounter.FileSystemState/StringInternPool.cs new file mode 100644 index 0000000..15c9483 --- /dev/null +++ b/YellowCounter.FileSystemState/StringInternPool.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace YellowCounter.FileSystemState +{ + /// + /// Not thread-safe string interning. + /// Probably needs a garbage collector at some point? + /// + public class StringInternPool : IStringInternPool + { + public Dictionary> dict = new Dictionary>(); + + public string Intern(ref ReadOnlySpan span) + { + int hash = span.GetHashOfContents(); + + if(dict.TryGetValue(hash, out var strings)) + { + foreach(var s in strings) + { + // Interned case - found existing string which matches. + if(span.Equals(s, StringComparison.Ordinal)) + return s; + } + + // Hash collision + string newString = span.ToString(); + strings.Add(newString); + + return newString; + } + else + { + // Add new item + string newString = span.ToString(); + + var newList = new List(); + newList.Add(newString); + + dict.Add(hash, newList); + + return newString; + } + } + } +}