diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c9019053..c8a45416f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,12 @@ The following emojis are used to highlight certain changes: ### Added +- `ipld/unixfs/io/directory`: We have made some changes to unixfs directory tooling: + - We have exposed creator methods for new `BasicDirectory` and `HAMTDirectory`, that complement the existing `NewDirectory()` which creates dynamic directories. + - We have added `WithCidBuilder(...)` and `WithMaxLinks(...)` as options to these new methods so that empty directories can be initilized as wished from the get-go. + - `WithMaxLinks(...)` is a new option that allows to set a limit to the number of children that a directory DAG node can have. For details on what exactly it does for each of the directory types, please check the documentation. + - `WithStats(...)` is a new option that allows to set the Stat information for the new directory node. + ### Changed - upgrade to `go-libp2p` [v0.41.1](https://github.com/libp2p/go-libp2p/releases/tag/v0.41.1) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index edac04b22..7858bd9ef 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -4,6 +4,7 @@ import ( "context" "errors" "os" + "time" "github.com/alecthomas/units" mdag "github.com/ipfs/boxo/ipld/merkledag" @@ -69,6 +70,79 @@ type Directory interface { // GetCidBuilder returns the CID Builder used. GetCidBuilder() cid.Builder + + // setMaxLinks sets the max width of the Directory. + setMaxLinks(n int) +} + +type directoryWithOptions interface { + Directory + // setMaxLinks sets the max width of the Directory. + setMaxLinks(n int) + // used for hamt directories only + setHAMTMaxFanout(n int) + setStats(os.FileMode, time.Time) +} + +type DirectoryOption func(directoryWithOptions) + +// WithMaxLinks stablishes the max number of links allowed for a Basic +// directory or a Dynamic directory with an underlying Basic directory: +// +// - On Dynamic directories using a BasicDirectory, it can trigger conversion +// to HAMT when set and exceeded. The subsequent HAMT nodes will use +// MaxHAMTFanout as ShardWidth when set, or DefaultShardWidth +// otherwise. Conversion can be triggered too based on HAMTShardingSize. +// +// - On Dynamic directories using a HAMTDirectory, it can trigger conversion +// to BasicDirectory when the number of directory entries is below MaxLinks (and +// HAMTShardingSize allows). +// +// - On pure Basic directories, it causes an error when adding more than +// MaxLinks children. +func WithMaxLinks(n int) DirectoryOption { + return func(d directoryWithOptions) { + d.setMaxLinks(n) + } +} + +// WithMaxHAMTFanout stablishes the maximum fanout factor (number of links) for +// a HAMT directory or a Dynamic directory with an underlying HAMT directory: +// +// - On Dynamic directories, it specifies the HAMT fanout when a HAMT +// used. When unset, DefaultShardWidth applies. +// +// - On pure HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth +// is used. +// +// HAMT directories require this value to be a power of 2 and a multiple of +// 8. If it is not the case, it will not be used and DefaultHAMTWidth will be +// used instead. +func WithMaxHAMTFanout(n int) DirectoryOption { + if n > 0 && !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } + if n == 0 { + n = DefaultShardWidth + } + return func(d directoryWithOptions) { + d.setHAMTMaxFanout(n) + } +} + +// WithCidBuilder sets the CidBuilder for new Directories. +func WithCidBuilder(cb cid.Builder) DirectoryOption { + return func(d directoryWithOptions) { + d.SetCidBuilder(cb) + } +} + +// WithStat can be used to set the empty directory node permissions. +func WithStat(mode os.FileMode, mtime time.Time) DirectoryOption { + return func(d directoryWithOptions) { + d.setStats(mode, mtime) + } } // TODO: Evaluate removing `dserv` from this layer and providing it in MFS. @@ -84,6 +158,12 @@ func init() { linksize.LinkSizeFunction = productionLinkSize } +var ( + _ directoryWithOptions = (*DynamicDirectory)(nil) + _ directoryWithOptions = (*BasicDirectory)(nil) + _ directoryWithOptions = (*HAMTDirectory)(nil) +) + // BasicDirectory is the basic implementation of `Directory`. All the entries // are stored in a single node. type BasicDirectory struct { @@ -96,6 +176,15 @@ type BasicDirectory struct { // (We maintain this value up to date even if the HAMTShardingSize is off // since potentially the option could be activated on the fly.) estimatedSize int + totalLinks int + + // opts + // maxNumberOfLinks. If set, can trigger conversion to HAMT directory. + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time } // HAMTDirectory is the HAMT implementation of `Directory`. @@ -104,30 +193,103 @@ type HAMTDirectory struct { shard *hamt.Shard dserv ipld.DAGService + // opts + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time + // Track the changes in size by the AddChild and RemoveChild calls // for the HAMTShardingSize option. sizeChange int + totalLinks int } -func newEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory { - return newBasicDirectoryFromNode(dserv, format.EmptyDirNode()) +// NewBasicDirectory creates an empty basic directory with the given options. +func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDirectory { + basicDir := &BasicDirectory{ + dserv: dserv, + maxHAMTFanout: DefaultShardWidth, + } + + for _, o := range opts { + o(basicDir) + } + + var node *mdag.ProtoNode + if basicDir.mode > 0 { + node = format.EmptyDirNodeWithStat(basicDir.mode, basicDir.mtime) + } else { + node = format.EmptyDirNode() + } + basicDir.node = node + basicDir.node.SetCidBuilder(basicDir.cidBuilder) + + // Scan node links (if any) to restore estimated size. + basicDir.computeEstimatedSizeAndTotalLinks() + + return basicDir } -func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { +// NewBasicDirectoryFromNode creates a basic directory wrapping the given +// ProtoNode. +func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { basicDir := new(BasicDirectory) basicDir.node = node basicDir.dserv = dserv // Scan node links (if any) to restore estimated size. - basicDir.computeEstimatedSize() + basicDir.computeEstimatedSizeAndTotalLinks() return basicDir } -// NewDirectory returns a Directory implemented by DynamicDirectory -// containing a BasicDirectory that can be converted to a HAMTDirectory. -func NewDirectory(dserv ipld.DAGService) Directory { - return &DynamicDirectory{newEmptyBasicDirectory(dserv)} +// NewHAMTDirectory creates an empty HAMT directory with the given options. +func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) { + dir := &HAMTDirectory{ + dserv: dserv, + sizeChange: sizeChange, + maxHAMTFanout: DefaultShardWidth, + } + + for _, opt := range opts { + opt(dir) + } + + shard, err := hamt.NewShard(dir.dserv, dir.maxHAMTFanout) + if err != nil { + return nil, err + } + + shard.SetCidBuilder(dir.cidBuilder) + dir.shard = shard + + return dir, nil +} + +// NewHAMTDirectoryFromNode creates a HAMT directory from the given node, +// which must correspond to an existing HAMT. +func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { + dir := &HAMTDirectory{ + dserv: dserv, + } + + shard, err := hamt.NewHamtFromDag(dserv, node) + if err != nil { + return nil, err + } + dir.shard = shard + dir.totalLinks = len(node.Links()) + + return dir, nil +} + +// NewDirectory returns a Directory implemented by DynamicDirectory containing +// a BasicDirectory that automatically converts to a from a HAMTDirectory +// based on HAMTShardingSize, MaxLinks and MaxHAMTFanout (when set). +func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) Directory { + return &DynamicDirectory{NewBasicDirectory(dserv, opts...)} } // ErrNotADir implies that the given node was not a unixfs directory @@ -148,23 +310,37 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err switch fsNode.Type() { case format.TDirectory: - return &DynamicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil + return &DynamicDirectory{NewBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: - shard, err := hamt.NewHamtFromDag(dserv, node) + hamtDir, err := NewHAMTDirectoryFromNode(dserv, node) if err != nil { return nil, err } - return &DynamicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil + return &DynamicDirectory{hamtDir}, nil } return nil, ErrNotADir } -func (d *BasicDirectory) computeEstimatedSize() { +func (d *BasicDirectory) setMaxLinks(n int) { + d.maxLinks = n +} + +func (d *BasicDirectory) setHAMTMaxFanout(n int) { + d.maxHAMTFanout = n +} + +func (d *BasicDirectory) setStats(mode os.FileMode, mtime time.Time) { + d.mode = mode + d.mtime = mtime +} + +func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() { d.estimatedSize = 0 // err is just breaking the iteration and we always return nil _ = d.ForEachLink(context.TODO(), func(l *ipld.Link) error { d.addToEstimatedSize(l.Name, l.Cid) + d.totalLinks++ return nil }) // ForEachLink will never fail traversing the BasicDirectory @@ -181,13 +357,16 @@ func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) { // Something has gone very wrong. Log an error and recompute the // size from scratch. log.Error("BasicDirectory's estimatedSize went below 0") - d.computeEstimatedSize() + d.computeEstimatedSizeAndTotalLinks() } } // SetCidBuilder implements the `Directory` interface. func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { - d.node.SetCidBuilder(builder) + d.cidBuilder = builder + if d.node != nil { + d.node.SetCidBuilder(builder) + } } // AddChild implements the `Directory` interface. It adds (or replaces) @@ -219,7 +398,15 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil + switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize + switchMaxLinks := false + // We should switch if maxLinks is set, we have reached it and a new link is being + // added. + if nodeToAdd != nil && entryToRemove == nil && d.maxLinks > 0 && + (d.totalLinks+1) > d.maxLinks { + switchMaxLinks = true + } + return switchShardingSize || switchMaxLinks, nil } // addLinkChild adds the link as an entry to this directory under the given @@ -228,8 +415,16 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip // Remove old link and account for size change (if it existed; ignore // `ErrNotExist` otherwise). err := d.RemoveChild(ctx, name) - if err != nil && err != os.ErrNotExist { - return err + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return err + } + } else { // existed + d.totalLinks-- + } + + if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks { + return errors.New("BasicDirectory: cannot add child: maxLinks reached") } err = d.node.AddRawLink(name, link) @@ -237,6 +432,7 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip return err } d.addToEstimatedSize(name, link.Cid) + d.totalLinks++ return nil } @@ -304,6 +500,7 @@ func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error { // The name actually existed so we should update the estimated size. d.removeFromEstimatedSize(link.Name, link.Cid) + d.totalLinks-- return d.node.RemoveNodeLink(name) // GetNodeLink didn't return ErrLinkNotFound so this won't fail with that @@ -321,22 +518,18 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder { } // switchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, error) { - hamtDir := new(HAMTDirectory) - hamtDir.dserv = d.dserv - - shard, err := hamt.NewShard(d.dserv, DefaultShardWidth) +func (d *BasicDirectory) switchToSharding(ctx context.Context, opts ...DirectoryOption) (*HAMTDirectory, error) { + hamtDir, err := NewHAMTDirectory(d.dserv, 0, opts...) if err != nil { return nil, err } - shard.SetCidBuilder(d.node.CidBuilder()) - hamtDir.shard = shard for _, lnk := range d.node.Links() { err = hamtDir.shard.SetLink(ctx, lnk.Name, lnk) if err != nil { return nil, err } + hamtDir.totalLinks++ } return hamtDir, nil @@ -344,7 +537,23 @@ func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, // SetCidBuilder implements the `Directory` interface. func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { - d.shard.SetCidBuilder(builder) + d.cidBuilder = builder + if d.shard != nil { + d.shard.SetCidBuilder(builder) + } +} + +func (d *HAMTDirectory) setMaxLinks(n int) { + d.maxLinks = n +} + +func (d *HAMTDirectory) setHAMTMaxFanout(n int) { + d.maxHAMTFanout = n +} + +func (d *HAMTDirectory) setStats(mode os.FileMode, mtime time.Time) { + d.mode = mode + d.mtime = mtime } // AddChild implements the `Directory` interface. @@ -358,6 +567,9 @@ func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) d.removeFromSizeChange(oldChild.Name, oldChild.Cid) } d.addToSizeChange(name, nd.Cid()) + if oldChild == nil { + d.totalLinks++ + } return nil } @@ -396,6 +608,7 @@ func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { if oldChild != nil { d.removeFromSizeChange(oldChild.Name, oldChild.Cid) + d.totalLinks-- } return nil @@ -412,9 +625,10 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { } // switchToBasic returns a BasicDirectory implementation of this directory. -func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, error) { - basicDir := newEmptyBasicDirectory(d.dserv) - basicDir.SetCidBuilder(d.GetCidBuilder()) +func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) { + // needsoSwichToBasicDir checks d.maxLinks is appropiate. No check is + // performed here. + basicDir := NewBasicDirectory(d.dserv, opts...) err := d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) @@ -472,14 +686,33 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - if d.sizeChange+operationSizeChange >= 0 { - // We won't have reduced the HAMT net size. - return false, nil + // We must switch if size and maxlinks are below threshold + canSwitchSize := false + // Directory size reduced, perhaps below limit. + if d.sizeChange+operationSizeChange < 0 { + canSwitchSize, err = d.sizeBelowThreshold(ctx, operationSizeChange) + if err != nil { + return false, err + } + } + + canSwitchMaxLinks := true + if d.maxLinks > 0 { + total := d.totalLinks + if nodeToAdd != nil { + total++ + } + if entryToRemove != nil { + total-- + } + if total > d.maxLinks { + // prevent switching as we would end up with too many links + canSwitchMaxLinks = false + } } - // We have reduced the directory size, check if went below the - // HAMTShardingSize threshold to trigger a switch. - return d.sizeBelowThreshold(ctx, operationSizeChange) + return canSwitchSize && canSwitchMaxLinks, nil + } // Evaluate directory size and a future sizeChange and check if it will be below @@ -535,7 +768,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) // to switch from BasicDirectory to HAMTDirectory and backwards based on // size. type DynamicDirectory struct { - Directory + directoryWithOptions } var _ Directory = (*DynamicDirectory)(nil) @@ -543,7 +776,7 @@ var _ Directory = (*DynamicDirectory)(nil) // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - hamtDir, ok := d.Directory.(*HAMTDirectory) + hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) if ok { // We evaluate a switch in the HAMTDirectory case even for an AddChild // as it may overwrite an existing entry and end up actually reducing @@ -554,7 +787,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No } if switchToBasic { - basicDir, err := hamtDir.switchToBasic(ctx) + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithMaxHAMTFanout(hamtDir.maxHAMTFanout), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } @@ -562,15 +795,15 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.Directory = basicDir + d.directoryWithOptions = basicDir return nil } - return d.Directory.AddChild(ctx, name, nd) + return d.directoryWithOptions.AddChild(ctx, name, nd) } // BasicDirectory - basicDir := d.Directory.(*BasicDirectory) + basicDir := d.directoryWithOptions.(*BasicDirectory) switchToHAMT, err := basicDir.needsToSwitchToHAMTDir(name, nd) if err != nil { return err @@ -578,7 +811,14 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if !switchToHAMT { return basicDir.AddChild(ctx, name, nd) } - hamtDir, err = basicDir.switchToSharding(ctx) + + hamtFanout := DefaultShardWidth + // Verify that our maxLinks is usuable for ShardWidth (power of 2, multiple of 8) + if validShardWidth(basicDir.maxHAMTFanout) { + hamtFanout = basicDir.maxHAMTFanout + } + + hamtDir, err = basicDir.switchToSharding(ctx, WithMaxHAMTFanout(hamtFanout), WithMaxLinks(basicDir.maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) if err != nil { return err } @@ -586,7 +826,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.Directory = hamtDir + d.directoryWithOptions = hamtDir return nil } @@ -594,9 +834,9 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { - hamtDir, ok := d.Directory.(*HAMTDirectory) + hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) if !ok { - return d.Directory.RemoveChild(ctx, name) + return d.directoryWithOptions.RemoveChild(ctx, name) } switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name, nil) @@ -608,14 +848,31 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { return hamtDir.RemoveChild(ctx, name) } - basicDir, err := hamtDir.switchToBasic(ctx) + maxLinks := hamtDir.maxLinks + // We have not removed the element that violates MaxLinks, so we have to +1 the limit. We -1 below. + if maxLinks > 0 { + maxLinks++ + } + + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(maxLinks), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } + err = basicDir.RemoveChild(ctx, name) if err != nil { return err } - d.Directory = basicDir + + if maxLinks > 0 { + basicDir.maxLinks-- + } + d.directoryWithOptions = basicDir return nil } + +// validShardWidth verifies that the given number is positive, a power of 2 +// and a multiple of 8. +func validShardWidth(n int) bool { + return n > 0 && (n&(n-1)) == 0 && n&7 == 0 +} diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index f69ad0b27..f5301b8c8 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -17,7 +17,6 @@ import ( mdag "github.com/ipfs/boxo/ipld/merkledag" mdtest "github.com/ipfs/boxo/ipld/merkledag/test" ft "github.com/ipfs/boxo/ipld/unixfs" - "github.com/ipfs/boxo/ipld/unixfs/hamt" "github.com/ipfs/boxo/ipld/unixfs/internal" "github.com/ipfs/boxo/ipld/unixfs/private/linksize" blocks "github.com/ipfs/go-block-format" @@ -26,6 +25,7 @@ import ( dssync "github.com/ipfs/go-datastore/sync" ipld "github.com/ipfs/go-ipld-format" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestEmptyNode(t *testing.T) { @@ -117,7 +117,7 @@ func TestDuplicateAddDir(t *testing.T) { func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir := NewBasicDirectory(ds) testDirectorySizeEstimation(t, basicDir, ds, func(dir Directory) int { return dir.(*BasicDirectory).estimatedSize @@ -126,7 +126,7 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { func TestHAMTDirectory_sizeChange(t *testing.T) { ds := mdtest.Mock() - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) testDirectorySizeEstimation(t, hamtDir, ds, func(dir Directory) int { @@ -205,13 +205,15 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory); !ok { + t.Helper() + if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DynamicDirectory).Directory.(*HAMTDirectory); !ok { + t.Helper() + if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } } @@ -228,7 +230,7 @@ func TestProductionLinkSize(t *testing.T) { assert.Equal(t, 48, productionLinkSize(link.Name, link.Cid)) ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir := NewBasicDirectory(ds) assert.NoError(t, err) for i := 0; i < 10; i++ { basicDir.AddChild(context.Background(), strconv.FormatUint(uint64(i), 10), ft.EmptyFileNode()) @@ -297,8 +299,8 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { err := ds.Add(ctx, child) assert.NoError(t, err) - basicDir := newEmptyBasicDirectory(ds) - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + basicDir := NewBasicDirectory(ds) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) for i := 0; i < 1000; i++ { basicDir.AddChild(ctx, strconv.FormatUint(uint64(i), 10), child) @@ -307,9 +309,9 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { compareDirectoryEntries(t, basicDir, hamtDir) hamtDirFromSwitch, err := basicDir.switchToSharding(ctx) - assert.NoError(t, err) + require.NoError(t, err) basicDirFromSwitch, err := hamtDir.switchToBasic(ctx) - assert.NoError(t, err) + require.NoError(t, err) compareDirectoryEntries(t, basicDir, basicDirFromSwitch) compareDirectoryEntries(t, hamtDir, hamtDirFromSwitch) } @@ -391,7 +393,7 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { // the HAMTShardingSize threshold. nodesToFetch += thresholdToWidthRatio - hamtDir, err := newHAMTDirectoryFromNode(dsrv, completeHAMTRoot) + hamtDir, err := NewHAMTDirectoryFromNode(dsrv, completeHAMTRoot) assert.NoError(t, err) countGetsDS.resetCounter() @@ -527,27 +529,130 @@ func TestDirBuilder(t *testing.T) { } } -func newHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { - shard, err := hamt.NewHamtFromDag(dserv, node) - if err != nil { - return nil, err +func TestBasicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir := NewBasicDirectory(ds, WithMaxLinks(2)) + + child1 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child1)) + + err := dir.AddChild(ctx, "entry1", child1) + require.NoError(t, err) + + child2 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child2)) + + err = dir.AddChild(ctx, "entry2", child2) + require.NoError(t, err) + + child3 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child3)) + + err = dir.AddChild(ctx, "entry3", child3) + require.Error(t, err) +} + +// TestHAMTDirectoryWithMaxLinks tests that no HAMT shard as more than MaxLinks. +func TestHAMTDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir, err := NewHAMTDirectory(ds, 0, WithMaxHAMTFanout(8)) + require.NoError(t, err) + + // Ensure we have at least 2 levels of HAMT by adding many nodes + for i := 0; i < 300; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + } + + dirnd, err := dir.GetNode() + require.NoError(t, err) + + require.Equal(t, 8, len(dirnd.Links()), "Node should not have more than 8 links") + + for _, l := range dirnd.Links() { + childNd, err := ds.Get(ctx, l.Cid) + if err != nil { + t.Errorf("Failed to get node: %s", err) + continue + } + assert.Equal(t, 8, len(childNd.Links()), "Node does not have 8 links") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil } -func newEmptyHAMTDirectory(dserv ipld.DAGService, shardWidth int) (*HAMTDirectory, error) { - shard, err := hamt.NewShard(dserv, shardWidth) - if err != nil { - return nil, err +func TestDynamicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir := NewDirectory(ds, WithMaxLinks(8), WithMaxHAMTFanout(16)) + + for i := 0; i < 8; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic because it has less children than MaxLinks") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil + for i := 8; i < 58; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should be sharded because more children than MaxLinks") + } + + // Check that the directory root node has 8 links + dirnd, err := dir.GetNode() + require.NoError(t, err) + assert.Equal(t, 16, len(dirnd.Links()), "HAMT Directory root node should have 16 links") + + // Continue the code by removing 50 elements while checking that the underlying directory is a HAMT directory. + for i := 57; i >= 9; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should still be sharded while more than 8 children") + } + + // Continue removing elements until the directory is a basic directory again + for i := 8; i >= 0; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic when less than 8 children") + } +} + +// add a test that tests that validShardWidth(n) only returns true with positive numbers that are powers of 8 and multiples of 8. +func TestValidShardWidth(t *testing.T) { + testCases := []struct { + width int + expect bool + }{ + {0, false}, + {-1, false}, + {1, false}, + {2, false}, + {4, false}, + {8, true}, + {16, true}, + {32, true}, + {64, true}, + {512, true}, + {1024, true}, + {4096, true}, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("Width%d", tc.width), func(t *testing.T) { + result := validShardWidth(tc.width) + assert.Equal(t, tc.expect, result, "Expected %v for width %d", tc.expect, tc.width) + }) + } } // countGetsDS is a DAG service that keeps track of the number of