From e1291b4f5bfd2d627b05398b01d179086e47d5be Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 25 Mar 2025 23:41:16 +0100 Subject: [PATCH 01/13] ipld/unixfs/io: better initialization and options for Directories WithMaxWidth() -------------- The new option sets effectively a maximum width to the directories. Currently, on a dynamic directory, the switching to-from Basic to HAMT is controlled by HAMTShardingSize, and the width of the HAMT shards by DefaultShardWidth. When WithMaxLinks() is specified, the switching is additionally controlled by the number of links exceeding the limit. In that case, MaxLinks is used as ShardWidth. The directory can only be converted back to BasicDirectory when the total number of links is below the limit. Backwards compatibility is kept and tests have been added. Note that when setting MaxLinks to a high number, it is possible that we still suffer automatic conversion to HAMT before hitting MaxLinks, if the estimated directory size is above 256KiB (as before). WithStat() ---------- Allows to set Stat on a new directory. WithCidBuilder() --------------- Allows to set the CidBuilder on a new directory. --- CHANGELOG.md | 6 + ipld/unixfs/io/directory.go | 310 ++++++++++++++++++++++++++----- ipld/unixfs/io/directory_test.go | 129 ++++++++++--- 3 files changed, 374 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adb5a03d5..b99838bbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,12 @@ The following emojis are used to highlight certain changes: ### Added +- `ipld/unixfs/io/directory`: We have made some changes to unixfs directory tooling: + - We have exposed creator methods for new `BasicDirectory` and `HAMTDirectory`, that complement the existing `NewDirectory()` which creates dynamic directories. + - We have added `WithCidBuilder(...)` and `WithMaxLinks(...)` as options to these new methods so that empty directories can be initilized as wished from the get-go. + - `WithMaxLinks(...)` is a new option that allows to set a limit to the number of children that a directory DAG node can have. For details on what exactly it does for each of the directory types, please check the documentation. + - `WithStats(...)` is a new option that allows to set the Stat information for the new directory node. + ### Changed - upgrade to `go-libp2p` [v0.41.1](https://github.com/libp2p/go-libp2p/releases/tag/v0.41.1) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index edac04b22..e3ec3b093 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -4,6 +4,7 @@ import ( "context" "errors" "os" + "time" "github.com/alecthomas/units" mdag "github.com/ipfs/boxo/ipld/merkledag" @@ -69,6 +70,57 @@ type Directory interface { // GetCidBuilder returns the CID Builder used. GetCidBuilder() cid.Builder + + // setMaxLinks sets the max width of the Directory. + setMaxLinks(n int) +} + +type directoryWithOptions interface { + Directory + // setMaxLinks sets the max width of the Directory. + setMaxLinks(n int) + setStats(os.FileMode, time.Time) +} + +type DirectoryOption func(directoryWithOptions) + +// WithMaxLinks stablishes the max number of links allowed for a directory: +// +// - On Dynamic directories using a BasicDirectory, it can trigger conversion +// to HAMT when set and exceeded. The subsequent HAMT nodes will use MaxLinks +// as ShardWidth. Conversion can happen too based on HAMTShardingSize. +// +// - On Dynamic directories using a HAMTDirectory, it can trigger conversion +// to BasicDirectory when the number of links is below MaxLinks (and +// HAMTShardingSize allows). +// +// - On Basic directories, it causes an error when adding more than MaxLinks +// children. +// +// - On HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth +// is used. +// +// HAMT directories require this value to be a power of 2 and a multiple of 8. +// If it is not the case, it will not be used and DefaultHAMTWidth be used +// instead. +func WithMaxLinks(n int) DirectoryOption { + return func(d directoryWithOptions) { + d.setMaxLinks(n) + } +} + +// WithCidBuilder sets the CidBuilder for new Directories. +func WithCidBuilder(cb cid.Builder) DirectoryOption { + return func(d directoryWithOptions) { + d.SetCidBuilder(cb) + } +} + +// WithStat can be used to set the empty directory node permissions. +func WithStat(mode os.FileMode, mtime time.Time) DirectoryOption { + return func(d directoryWithOptions) { + d.setStats(mode, mtime) + } } // TODO: Evaluate removing `dserv` from this layer and providing it in MFS. @@ -84,6 +136,12 @@ func init() { linksize.LinkSizeFunction = productionLinkSize } +var ( + _ directoryWithOptions = (*DynamicDirectory)(nil) + _ directoryWithOptions = (*BasicDirectory)(nil) + _ directoryWithOptions = (*HAMTDirectory)(nil) +) + // BasicDirectory is the basic implementation of `Directory`. All the entries // are stored in a single node. type BasicDirectory struct { @@ -96,6 +154,14 @@ type BasicDirectory struct { // (We maintain this value up to date even if the HAMTShardingSize is off // since potentially the option could be activated on the fly.) estimatedSize int + totalLinks int + + // opts + // maxNumberOfLinks. If set, can trigger conversion to HAMT directory. + maxLinks int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time } // HAMTDirectory is the HAMT implementation of `Directory`. @@ -104,30 +170,107 @@ type HAMTDirectory struct { shard *hamt.Shard dserv ipld.DAGService + // opts + maxLinks int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time + // Track the changes in size by the AddChild and RemoveChild calls // for the HAMTShardingSize option. sizeChange int + totalLinks int } -func newEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory { - return newBasicDirectoryFromNode(dserv, format.EmptyDirNode()) +// NewBasicDirectory creates an empty basic directory with the given options. +func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDirectory { + basicDir := new(BasicDirectory) + basicDir.dserv = dserv + + for _, o := range opts { + o(basicDir) + } + + var node *mdag.ProtoNode + if basicDir.mode > 0 { + node = format.EmptyDirNodeWithStat(basicDir.mode, basicDir.mtime) + } else { + node = format.EmptyDirNode() + } + basicDir.node = node + basicDir.node.SetCidBuilder(basicDir.cidBuilder) + + // Scan node links (if any) to restore estimated size. + basicDir.computeEstimatedSizeAndTotalLinks() + + return basicDir } -func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { +// NewBasicDirectoryFromNode creates a basic directory wrapping the given +// ProtoNode. +func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { basicDir := new(BasicDirectory) basicDir.node = node basicDir.dserv = dserv // Scan node links (if any) to restore estimated size. - basicDir.computeEstimatedSize() + basicDir.computeEstimatedSizeAndTotalLinks() return basicDir } -// NewDirectory returns a Directory implemented by DynamicDirectory -// containing a BasicDirectory that can be converted to a HAMTDirectory. -func NewDirectory(dserv ipld.DAGService) Directory { - return &DynamicDirectory{newEmptyBasicDirectory(dserv)} +// NewHAMTDirectory creates an empty HAMT directory with the given options. +func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) { + dir := new(HAMTDirectory) + dir.dserv = dserv + dir.sizeChange = sizeChange + dir.maxLinks = 0 + + for _, opt := range opts { + opt(dir) + } + + shardWidth := DefaultShardWidth + if dir.maxLinks > 0 { + if !validShardWidth(dir.maxLinks) { + return nil, errors.New("MaxLinks must be a power of 2 and multiple of 8") + } + shardWidth = dir.maxLinks + } + + shard, err := hamt.NewShard(dir.dserv, shardWidth) + if err != nil { + return nil, err + } + + shard.SetCidBuilder(dir.cidBuilder) + dir.shard = shard + + return dir, nil +} + +// NewHAMTDirectoryFromNode creates a HAMT directory from the given node, +// which must correspond to an existing HAMT. +func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { + dir := new(HAMTDirectory) + dir.dserv = dserv + dir.sizeChange = 0 + + shard, err := hamt.NewHamtFromDag(dserv, node) + if err != nil { + return nil, err + } + dir.shard = shard + dir.totalLinks = len(node.Links()) + + return dir, nil +} + +// NewDirectory returns a Directory implemented by DynamicDirectory containing +// a BasicDirectory that automatically converts to a from a HAMTDirectory +// based on HAMTShardingSize and MaxLinks (when set). +func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) Directory { + return &DynamicDirectory{NewBasicDirectory(dserv, opts...)} } // ErrNotADir implies that the given node was not a unixfs directory @@ -148,23 +291,33 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err switch fsNode.Type() { case format.TDirectory: - return &DynamicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil + return &DynamicDirectory{NewBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: - shard, err := hamt.NewHamtFromDag(dserv, node) + hamtDir, err := NewHAMTDirectoryFromNode(dserv, node) if err != nil { return nil, err } - return &DynamicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil + return &DynamicDirectory{hamtDir}, nil } return nil, ErrNotADir } -func (d *BasicDirectory) computeEstimatedSize() { +func (d *BasicDirectory) setMaxLinks(n int) { + d.maxLinks = n +} + +func (d *BasicDirectory) setStats(mode os.FileMode, mtime time.Time) { + d.mode = mode + d.mtime = mtime +} + +func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() { d.estimatedSize = 0 // err is just breaking the iteration and we always return nil _ = d.ForEachLink(context.TODO(), func(l *ipld.Link) error { d.addToEstimatedSize(l.Name, l.Cid) + d.totalLinks++ return nil }) // ForEachLink will never fail traversing the BasicDirectory @@ -181,13 +334,16 @@ func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) { // Something has gone very wrong. Log an error and recompute the // size from scratch. log.Error("BasicDirectory's estimatedSize went below 0") - d.computeEstimatedSize() + d.computeEstimatedSizeAndTotalLinks() } } // SetCidBuilder implements the `Directory` interface. func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { - d.node.SetCidBuilder(builder) + d.cidBuilder = builder + if d.node != nil { + d.node.SetCidBuilder(builder) + } } // AddChild implements the `Directory` interface. It adds (or replaces) @@ -219,7 +375,15 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil + switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize + switchMaxLinks := false + // We should switch if we have reached maxLinks and a new link is being + // added and maxLinks is valid for shardWidth. + if nodeToAdd != nil && entryToRemove == nil && validShardWidth(d.maxLinks) && + (d.totalLinks+1) > d.maxLinks { + switchMaxLinks = true + } + return switchShardingSize || switchMaxLinks, nil } // addLinkChild adds the link as an entry to this directory under the given @@ -231,12 +395,20 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip if err != nil && err != os.ErrNotExist { return err } + if err == nil { // existed + d.totalLinks-- + } + + if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks { + return errors.New("BasicDirectory: cannot add child: maxLinks reached") + } err = d.node.AddRawLink(name, link) if err != nil { return err } d.addToEstimatedSize(name, link.Cid) + d.totalLinks++ return nil } @@ -304,6 +476,7 @@ func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error { // The name actually existed so we should update the estimated size. d.removeFromEstimatedSize(link.Name, link.Cid) + d.totalLinks-- return d.node.RemoveNodeLink(name) // GetNodeLink didn't return ErrLinkNotFound so this won't fail with that @@ -321,22 +494,18 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder { } // switchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, error) { - hamtDir := new(HAMTDirectory) - hamtDir.dserv = d.dserv - - shard, err := hamt.NewShard(d.dserv, DefaultShardWidth) +func (d *BasicDirectory) switchToSharding(ctx context.Context, opts ...DirectoryOption) (*HAMTDirectory, error) { + hamtDir, err := NewHAMTDirectory(d.dserv, 0, opts...) if err != nil { return nil, err } - shard.SetCidBuilder(d.node.CidBuilder()) - hamtDir.shard = shard for _, lnk := range d.node.Links() { err = hamtDir.shard.SetLink(ctx, lnk.Name, lnk) if err != nil { return nil, err } + hamtDir.totalLinks++ } return hamtDir, nil @@ -344,7 +513,19 @@ func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, // SetCidBuilder implements the `Directory` interface. func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { - d.shard.SetCidBuilder(builder) + d.cidBuilder = builder + if d.shard != nil { + d.shard.SetCidBuilder(builder) + } +} + +func (d *HAMTDirectory) setMaxLinks(maxLinks int) { + d.maxLinks = maxLinks +} + +func (d *HAMTDirectory) setStats(mode os.FileMode, mtime time.Time) { + d.mode = mode + d.mtime = mtime } // AddChild implements the `Directory` interface. @@ -358,6 +539,9 @@ func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) d.removeFromSizeChange(oldChild.Name, oldChild.Cid) } d.addToSizeChange(name, nd.Cid()) + if oldChild == nil { + d.totalLinks++ + } return nil } @@ -396,6 +580,7 @@ func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { if oldChild != nil { d.removeFromSizeChange(oldChild.Name, oldChild.Cid) + d.totalLinks-- } return nil @@ -412,9 +597,10 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { } // switchToBasic returns a BasicDirectory implementation of this directory. -func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, error) { - basicDir := newEmptyBasicDirectory(d.dserv) - basicDir.SetCidBuilder(d.GetCidBuilder()) +func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) { + // needsToSwichToBasicDir checks d.maxLinks is appropiate. No check is + // performed here. + basicDir := NewBasicDirectory(d.dserv, opts...) err := d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) @@ -472,14 +658,32 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - if d.sizeChange+operationSizeChange >= 0 { - // We won't have reduced the HAMT net size. - return false, nil + // We must switch if size and maxlinks are below threshold + canSwitchSize := false + // Directory size reduced, perhaps below limit. + if d.sizeChange+operationSizeChange < 0 { + canSwitchSize, err = d.sizeBelowThreshold(ctx, operationSizeChange) + if err != nil { + return false, err + } } - // We have reduced the directory size, check if went below the - // HAMTShardingSize threshold to trigger a switch. - return d.sizeBelowThreshold(ctx, operationSizeChange) + canSwitchMaxLinks := true + if d.maxLinks > 0 { + total := d.totalLinks + if nodeToAdd != nil && entryToRemove == nil { + total++ + } else if nodeToAdd == nil && entryToRemove != nil { + total-- + } + if total > d.maxLinks { + // prevent switching as we would end up with too many links + canSwitchMaxLinks = false + } + } + + return canSwitchSize && canSwitchMaxLinks, nil + } // Evaluate directory size and a future sizeChange and check if it will be below @@ -535,7 +739,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) // to switch from BasicDirectory to HAMTDirectory and backwards based on // size. type DynamicDirectory struct { - Directory + directoryWithOptions } var _ Directory = (*DynamicDirectory)(nil) @@ -543,7 +747,7 @@ var _ Directory = (*DynamicDirectory)(nil) // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - hamtDir, ok := d.Directory.(*HAMTDirectory) + hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) if ok { // We evaluate a switch in the HAMTDirectory case even for an AddChild // as it may overwrite an existing entry and end up actually reducing @@ -554,7 +758,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No } if switchToBasic { - basicDir, err := hamtDir.switchToBasic(ctx) + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } @@ -562,15 +766,15 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.Directory = basicDir + d.directoryWithOptions = basicDir return nil } - return d.Directory.AddChild(ctx, name, nd) + return d.directoryWithOptions.AddChild(ctx, name, nd) } // BasicDirectory - basicDir := d.Directory.(*BasicDirectory) + basicDir := d.directoryWithOptions.(*BasicDirectory) switchToHAMT, err := basicDir.needsToSwitchToHAMTDir(name, nd) if err != nil { return err @@ -578,7 +782,16 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if !switchToHAMT { return basicDir.AddChild(ctx, name, nd) } - hamtDir, err = basicDir.switchToSharding(ctx) + + maxLinks := DefaultShardWidth + // Verify that our maxLinks is usuable for ShardWidth (power of 2, multiple of 8) + if validShardWidth(basicDir.maxLinks) { + maxLinks = basicDir.maxLinks + } else if basicDir.maxLinks > 0 { + log.Warning("using DefaultShardWidth for HAMT directories. MaxLinks cannot be used as it is not a power of 2 and multiple of 8.") + } + + hamtDir, err = basicDir.switchToSharding(ctx, WithMaxLinks(maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) if err != nil { return err } @@ -586,7 +799,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.Directory = hamtDir + d.directoryWithOptions = hamtDir return nil } @@ -594,9 +807,9 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { - hamtDir, ok := d.Directory.(*HAMTDirectory) + hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) if !ok { - return d.Directory.RemoveChild(ctx, name) + return d.directoryWithOptions.RemoveChild(ctx, name) } switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name, nil) @@ -608,7 +821,8 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { return hamtDir.RemoveChild(ctx, name) } - basicDir, err := hamtDir.switchToBasic(ctx) + // We have not removed the element that violates MaxLinks, so we have to +1 the limit. We -1 below. + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks+1), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } @@ -616,6 +830,14 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { if err != nil { return err } - d.Directory = basicDir + + basicDir.setMaxLinks(hamtDir.maxLinks - 1) + d.directoryWithOptions = basicDir return nil } + +// validShardWidth verifies that the given number is positive, a power of 2 +// and a multiple of 8. +func validShardWidth(n int) bool { + return n > 0 && (n&(n-1)) == 0 && n%8 == 0 +} diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index f69ad0b27..a89ce4865 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -17,7 +17,6 @@ import ( mdag "github.com/ipfs/boxo/ipld/merkledag" mdtest "github.com/ipfs/boxo/ipld/merkledag/test" ft "github.com/ipfs/boxo/ipld/unixfs" - "github.com/ipfs/boxo/ipld/unixfs/hamt" "github.com/ipfs/boxo/ipld/unixfs/internal" "github.com/ipfs/boxo/ipld/unixfs/private/linksize" blocks "github.com/ipfs/go-block-format" @@ -26,6 +25,7 @@ import ( dssync "github.com/ipfs/go-datastore/sync" ipld "github.com/ipfs/go-ipld-format" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestEmptyNode(t *testing.T) { @@ -117,7 +117,7 @@ func TestDuplicateAddDir(t *testing.T) { func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir := NewBasicDirectory(ds) testDirectorySizeEstimation(t, basicDir, ds, func(dir Directory) int { return dir.(*BasicDirectory).estimatedSize @@ -126,7 +126,7 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { func TestHAMTDirectory_sizeChange(t *testing.T) { ds := mdtest.Mock() - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) testDirectorySizeEstimation(t, hamtDir, ds, func(dir Directory) int { @@ -205,13 +205,13 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DynamicDirectory).Directory.(*HAMTDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } } @@ -228,7 +228,7 @@ func TestProductionLinkSize(t *testing.T) { assert.Equal(t, 48, productionLinkSize(link.Name, link.Cid)) ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir := NewBasicDirectory(ds) assert.NoError(t, err) for i := 0; i < 10; i++ { basicDir.AddChild(context.Background(), strconv.FormatUint(uint64(i), 10), ft.EmptyFileNode()) @@ -297,8 +297,8 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { err := ds.Add(ctx, child) assert.NoError(t, err) - basicDir := newEmptyBasicDirectory(ds) - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + basicDir := NewBasicDirectory(ds) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) for i := 0; i < 1000; i++ { basicDir.AddChild(ctx, strconv.FormatUint(uint64(i), 10), child) @@ -307,9 +307,9 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { compareDirectoryEntries(t, basicDir, hamtDir) hamtDirFromSwitch, err := basicDir.switchToSharding(ctx) - assert.NoError(t, err) + require.NoError(t, err) basicDirFromSwitch, err := hamtDir.switchToBasic(ctx) - assert.NoError(t, err) + require.NoError(t, err) compareDirectoryEntries(t, basicDir, basicDirFromSwitch) compareDirectoryEntries(t, hamtDir, hamtDirFromSwitch) } @@ -391,7 +391,7 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { // the HAMTShardingSize threshold. nodesToFetch += thresholdToWidthRatio - hamtDir, err := newHAMTDirectoryFromNode(dsrv, completeHAMTRoot) + hamtDir, err := NewHAMTDirectoryFromNode(dsrv, completeHAMTRoot) assert.NoError(t, err) countGetsDS.resetCounter() @@ -527,27 +527,102 @@ func TestDirBuilder(t *testing.T) { } } -func newHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { - shard, err := hamt.NewHamtFromDag(dserv, node) - if err != nil { - return nil, err +func TestBasicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir := NewBasicDirectory(ds, WithMaxLinks(2)) + + child1 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child1)) + + err := dir.AddChild(ctx, "entry1", child1) + require.NoError(t, err) + + child2 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child2)) + + err = dir.AddChild(ctx, "entry2", child2) + require.NoError(t, err) + + child3 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child3)) + + err = dir.AddChild(ctx, "entry3", child3) + require.Error(t, err) +} + +// TestHAMTDirectoryWithMaxLinks tests that no HAMT shard as more than MaxLinks. +func TestHAMTDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(8)) + require.NoError(t, err) + + // Ensure we have at least 2 levels of HAMT by adding many nodes + for i := 0; i < 300; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + } + + dirnd, err := dir.GetNode() + require.NoError(t, err) + + require.Equal(t, 8, len(dirnd.Links()), "Node should not have more than 8 links") + + for _, l := range dirnd.Links() { + childNd, err := ds.Get(ctx, l.Cid) + if err != nil { + t.Errorf("Failed to get node: %s", err) + continue + } + assert.Equal(t, 8, len(childNd.Links()), "Node does not have 8 links") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil } -func newEmptyHAMTDirectory(dserv ipld.DAGService, shardWidth int) (*HAMTDirectory, error) { - shard, err := hamt.NewShard(dserv, shardWidth) - if err != nil { - return nil, err +func TestDynamicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir := NewDirectory(ds, WithMaxLinks(8)) + + for i := 0; i < 8; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic because it has less children than MaxLinks") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil + for i := 8; i < 58; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should be sharded because more children than MaxLinks") + } + + // Check that the directory root node has 8 links + dirnd, err := dir.GetNode() + require.NoError(t, err) + assert.Equal(t, 8, len(dirnd.Links()), "HAMT Directory root node should have 8 links") + + // Continue the code by removing 50 elements while checking that the underlying directory is a HAMT directory. + for i := 57; i >= 9; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should still be sharded while more than 8 children") + } + + // Continue removing elements until the directory is a basic directory again + for i := 8; i >= 0; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic when less than 8 children") + } } // countGetsDS is a DAG service that keeps track of the number of From b980e96654c173b6a849c082d223a320c03ac552 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Thu, 3 Apr 2025 13:14:40 +0200 Subject: [PATCH 02/13] ipld/unixfs/directory: address review from @gammazero --- ipld/unixfs/io/directory.go | 38 +++++++++++++++++--------------- ipld/unixfs/io/directory_test.go | 28 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index e3ec3b093..66e20e940 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -221,13 +221,13 @@ func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *Bas // NewHAMTDirectory creates an empty HAMT directory with the given options. func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) { - dir := new(HAMTDirectory) - dir.dserv = dserv - dir.sizeChange = sizeChange - dir.maxLinks = 0 + dir := HAMTDirectory{ + dserv: dserv, + sizeChange: sizeChange, + } for _, opt := range opts { - opt(dir) + opt(&dir) } shardWidth := DefaultShardWidth @@ -246,15 +246,15 @@ func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOp shard.SetCidBuilder(dir.cidBuilder) dir.shard = shard - return dir, nil + return &dir, nil } // NewHAMTDirectoryFromNode creates a HAMT directory from the given node, // which must correspond to an existing HAMT. func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { - dir := new(HAMTDirectory) - dir.dserv = dserv - dir.sizeChange = 0 + dir := HAMTDirectory{ + dserv: dserv, + } shard, err := hamt.NewHamtFromDag(dserv, node) if err != nil { @@ -263,7 +263,7 @@ func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirec dir.shard = shard dir.totalLinks = len(node.Links()) - return dir, nil + return &dir, nil } // NewDirectory returns a Directory implemented by DynamicDirectory containing @@ -392,10 +392,11 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip // Remove old link and account for size change (if it existed; ignore // `ErrNotExist` otherwise). err := d.RemoveChild(ctx, name) - if err != nil && err != os.ErrNotExist { - return err - } - if err == nil { // existed + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return err + } + } else { // existed d.totalLinks-- } @@ -671,9 +672,10 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string canSwitchMaxLinks := true if d.maxLinks > 0 { total := d.totalLinks - if nodeToAdd != nil && entryToRemove == nil { + if nodeToAdd != nil { total++ - } else if nodeToAdd == nil && entryToRemove != nil { + } + if entryToRemove != nil { total-- } if total > d.maxLinks { @@ -788,7 +790,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if validShardWidth(basicDir.maxLinks) { maxLinks = basicDir.maxLinks } else if basicDir.maxLinks > 0 { - log.Warning("using DefaultShardWidth for HAMT directories. MaxLinks cannot be used as it is not a power of 2 and multiple of 8.") + log.Warn("using DefaultShardWidth for HAMT directories. MaxLinks cannot be used as it is not a power of 2 and multiple of 8.") } hamtDir, err = basicDir.switchToSharding(ctx, WithMaxLinks(maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) @@ -839,5 +841,5 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { // validShardWidth verifies that the given number is positive, a power of 2 // and a multiple of 8. func validShardWidth(n int) bool { - return n > 0 && (n&(n-1)) == 0 && n%8 == 0 + return n > 0 && (n&(n-1)) == 0 && n&7 == 0 } diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index a89ce4865..866de794f 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -625,6 +625,34 @@ func TestDynamicDirectoryWithMaxLinks(t *testing.T) { } } +// add a test that tests that validShardWidth(n) only returns true with positive numbers that are powers of 8 and multiples of 8. +func TestValidShardWidth(t *testing.T) { + testCases := []struct { + width int + expect bool + }{ + {0, false}, + {-1, false}, + {1, false}, + {2, false}, + {4, false}, + {8, true}, + {16, true}, + {32, true}, + {64, true}, + {512, true}, + {1024, true}, + {4096, true}, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("Width%d", tc.width), func(t *testing.T) { + result := validShardWidth(tc.width) + assert.Equal(t, tc.expect, result, "Expected %v for width %d", tc.expect, tc.width) + }) + } +} + // countGetsDS is a DAG service that keeps track of the number of // unique CIDs fetched. type countGetsDS struct { From 6e34f0b114e34ff358dcc1a827a7b8d5c4e6a2e9 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Fri, 4 Apr 2025 17:45:01 +0200 Subject: [PATCH 03/13] unixfs/directory: Add specific MaxHAMTFanout() option, separate from MaxLinks --- ipld/unixfs/io/directory.go | 138 +++++++++++++++++++------------ ipld/unixfs/io/directory_test.go | 8 +- 2 files changed, 89 insertions(+), 57 deletions(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index 66e20e940..baff71763 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -79,33 +79,52 @@ type directoryWithOptions interface { Directory // setMaxLinks sets the max width of the Directory. setMaxLinks(n int) + // used for hamt directories only + setHAMTMaxFanout(n int) setStats(os.FileMode, time.Time) } type DirectoryOption func(directoryWithOptions) -// WithMaxLinks stablishes the max number of links allowed for a directory: +// WithMaxLinks stablishes the max number of links allowed for a Basic +// directory or a Dynamic directory with an underlying Basic directory: // // - On Dynamic directories using a BasicDirectory, it can trigger conversion -// to HAMT when set and exceeded. The subsequent HAMT nodes will use MaxLinks -// as ShardWidth. Conversion can happen too based on HAMTShardingSize. +// to HAMT when set and exceeded. The subsequent HAMT nodes will use +// MaxHAMTFanout as ShardWidth when set, or DefaultShardWidth +// otherwise. Conversion can be triggered too based on HAMTShardingSize. // // - On Dynamic directories using a HAMTDirectory, it can trigger conversion -// to BasicDirectory when the number of links is below MaxLinks (and +// to BasicDirectory when the number of directory entries is below MaxLinks (and // HAMTShardingSize allows). // -// - On Basic directories, it causes an error when adding more than MaxLinks -// children. +// - On pure Basic directories, it causes an error when adding more than +// MaxLinks children. +func WithMaxLinks(n int) DirectoryOption { + return func(d directoryWithOptions) { + d.setMaxLinks(n) + } +} + +// WithMaxHAMTFanout stablishes the maximum fanout factor (number of links) for +// a HAMT directory or a Dynamic directory with an underlying HAMT directory: +// +// - On Dynamic directories, it specifies the HAMT fanout when a HAMT +// used. When unset, DefaultShardWidth applies. // -// - On HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth +// - On pure HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth // is used. // -// HAMT directories require this value to be a power of 2 and a multiple of 8. -// If it is not the case, it will not be used and DefaultHAMTWidth be used -// instead. -func WithMaxLinks(n int) DirectoryOption { +// HAMT directories require this value to be a power of 2 and a multiple of +// 8. If it is not the case, it will not be used and DefaultHAMTWidth will be +// used instead. +func WithMaxHAMTFanout(n int) DirectoryOption { + if !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } return func(d directoryWithOptions) { - d.setMaxLinks(n) + d.setHAMTMaxFanout(n) } } @@ -158,10 +177,11 @@ type BasicDirectory struct { // opts // maxNumberOfLinks. If set, can trigger conversion to HAMT directory. - maxLinks int - cidBuilder cid.Builder - mode os.FileMode - mtime time.Time + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time } // HAMTDirectory is the HAMT implementation of `Directory`. @@ -171,10 +191,11 @@ type HAMTDirectory struct { dserv ipld.DAGService // opts - maxLinks int - cidBuilder cid.Builder - mode os.FileMode - mtime time.Time + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time // Track the changes in size by the AddChild and RemoveChild calls // for the HAMTShardingSize option. @@ -184,8 +205,10 @@ type HAMTDirectory struct { // NewBasicDirectory creates an empty basic directory with the given options. func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDirectory { - basicDir := new(BasicDirectory) - basicDir.dserv = dserv + basicDir := &BasicDirectory{ + dserv: dserv, + maxHAMTFanout: DefaultShardWidth, + } for _, o := range opts { o(basicDir) @@ -221,24 +244,17 @@ func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *Bas // NewHAMTDirectory creates an empty HAMT directory with the given options. func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) { - dir := HAMTDirectory{ - dserv: dserv, - sizeChange: sizeChange, + dir := &HAMTDirectory{ + dserv: dserv, + sizeChange: sizeChange, + maxHAMTFanout: DefaultShardWidth, } for _, opt := range opts { - opt(&dir) + opt(dir) } - shardWidth := DefaultShardWidth - if dir.maxLinks > 0 { - if !validShardWidth(dir.maxLinks) { - return nil, errors.New("MaxLinks must be a power of 2 and multiple of 8") - } - shardWidth = dir.maxLinks - } - - shard, err := hamt.NewShard(dir.dserv, shardWidth) + shard, err := hamt.NewShard(dir.dserv, dir.maxHAMTFanout) if err != nil { return nil, err } @@ -246,13 +262,13 @@ func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOp shard.SetCidBuilder(dir.cidBuilder) dir.shard = shard - return &dir, nil + return dir, nil } // NewHAMTDirectoryFromNode creates a HAMT directory from the given node, // which must correspond to an existing HAMT. func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { - dir := HAMTDirectory{ + dir := &HAMTDirectory{ dserv: dserv, } @@ -263,12 +279,12 @@ func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirec dir.shard = shard dir.totalLinks = len(node.Links()) - return &dir, nil + return dir, nil } // NewDirectory returns a Directory implemented by DynamicDirectory containing // a BasicDirectory that automatically converts to a from a HAMTDirectory -// based on HAMTShardingSize and MaxLinks (when set). +// based on HAMTShardingSize, MaxLinks and MaxHAMTFanout (when set). func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) Directory { return &DynamicDirectory{NewBasicDirectory(dserv, opts...)} } @@ -307,6 +323,10 @@ func (d *BasicDirectory) setMaxLinks(n int) { d.maxLinks = n } +func (d *BasicDirectory) setHAMTMaxFanout(n int) { + d.maxHAMTFanout = n +} + func (d *BasicDirectory) setStats(mode os.FileMode, mtime time.Time) { d.mode = mode d.mtime = mtime @@ -377,9 +397,9 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize switchMaxLinks := false - // We should switch if we have reached maxLinks and a new link is being - // added and maxLinks is valid for shardWidth. - if nodeToAdd != nil && entryToRemove == nil && validShardWidth(d.maxLinks) && + // We should switch if maxLinks is set, we have reached it and a new link is being + // added. + if nodeToAdd != nil && entryToRemove == nil && d.maxLinks > 0 && (d.totalLinks+1) > d.maxLinks { switchMaxLinks = true } @@ -520,8 +540,12 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { } } -func (d *HAMTDirectory) setMaxLinks(maxLinks int) { - d.maxLinks = maxLinks +func (d *HAMTDirectory) setMaxLinks(n int) { + d.maxLinks = n +} + +func (d *HAMTDirectory) setHAMTMaxFanout(n int) { + d.maxHAMTFanout = n } func (d *HAMTDirectory) setStats(mode os.FileMode, mtime time.Time) { @@ -599,7 +623,7 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { // switchToBasic returns a BasicDirectory implementation of this directory. func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) { - // needsToSwichToBasicDir checks d.maxLinks is appropiate. No check is + // needsoSwichToBasicDir checks d.maxLinks is appropiate. No check is // performed here. basicDir := NewBasicDirectory(d.dserv, opts...) @@ -760,7 +784,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No } if switchToBasic { - basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithCidBuilder(hamtDir.GetCidBuilder())) + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithMaxHAMTFanout(hamtDir.maxHAMTFanout), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } @@ -785,15 +809,13 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No return basicDir.AddChild(ctx, name, nd) } - maxLinks := DefaultShardWidth + hamtFanout := DefaultShardWidth // Verify that our maxLinks is usuable for ShardWidth (power of 2, multiple of 8) - if validShardWidth(basicDir.maxLinks) { - maxLinks = basicDir.maxLinks - } else if basicDir.maxLinks > 0 { - log.Warn("using DefaultShardWidth for HAMT directories. MaxLinks cannot be used as it is not a power of 2 and multiple of 8.") + if validShardWidth(basicDir.maxHAMTFanout) { + hamtFanout = basicDir.maxHAMTFanout } - hamtDir, err = basicDir.switchToSharding(ctx, WithMaxLinks(maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) + hamtDir, err = basicDir.switchToSharding(ctx, WithMaxHAMTFanout(hamtFanout), WithMaxLinks(basicDir.maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) if err != nil { return err } @@ -823,17 +845,25 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { return hamtDir.RemoveChild(ctx, name) } + maxLinks := hamtDir.maxLinks // We have not removed the element that violates MaxLinks, so we have to +1 the limit. We -1 below. - basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks+1), WithCidBuilder(hamtDir.GetCidBuilder())) + if maxLinks > 0 { + maxLinks++ + } + + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(maxLinks), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } + err = basicDir.RemoveChild(ctx, name) if err != nil { return err } - basicDir.setMaxLinks(hamtDir.maxLinks - 1) + if maxLinks > 0 { + basicDir.maxLinks-- + } d.directoryWithOptions = basicDir return nil } diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index 866de794f..f5301b8c8 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -205,12 +205,14 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { + t.Helper() if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { + t.Helper() if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } @@ -557,7 +559,7 @@ func TestHAMTDirectoryWithMaxLinks(t *testing.T) { ds := mdtest.Mock() ctx := context.Background() - dir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(8)) + dir, err := NewHAMTDirectory(ds, 0, WithMaxHAMTFanout(8)) require.NoError(t, err) // Ensure we have at least 2 levels of HAMT by adding many nodes @@ -587,7 +589,7 @@ func TestDynamicDirectoryWithMaxLinks(t *testing.T) { ds := mdtest.Mock() ctx := context.Background() - dir := NewDirectory(ds, WithMaxLinks(8)) + dir := NewDirectory(ds, WithMaxLinks(8), WithMaxHAMTFanout(16)) for i := 0; i < 8; i++ { child := ft.EmptyDirNode() @@ -608,7 +610,7 @@ func TestDynamicDirectoryWithMaxLinks(t *testing.T) { // Check that the directory root node has 8 links dirnd, err := dir.GetNode() require.NoError(t, err) - assert.Equal(t, 8, len(dirnd.Links()), "HAMT Directory root node should have 8 links") + assert.Equal(t, 16, len(dirnd.Links()), "HAMT Directory root node should have 16 links") // Continue the code by removing 50 elements while checking that the underlying directory is a HAMT directory. for i := 57; i >= 9; i-- { From cbf788ed263d826914cb351c3098d2f81823ad16 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Fri, 4 Apr 2025 18:49:06 +0200 Subject: [PATCH 04/13] unixfs/directory: Let 0 be default for HAMTFanout without warning. --- ipld/unixfs/io/directory.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index baff71763..7858bd9ef 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -119,10 +119,13 @@ func WithMaxLinks(n int) DirectoryOption { // 8. If it is not the case, it will not be used and DefaultHAMTWidth will be // used instead. func WithMaxHAMTFanout(n int) DirectoryOption { - if !validShardWidth(n) { + if n > 0 && !validShardWidth(n) { log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) n = DefaultShardWidth } + if n == 0 { + n = DefaultShardWidth + } return func(d directoryWithOptions) { d.setHAMTMaxFanout(n) } From 8472709379480017050fceddccdc32ff4caf7791 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Fri, 4 Apr 2025 20:00:54 +0200 Subject: [PATCH 05/13] mfs: support MaxLinks and MaxHAMTFanout Directory metadata like mode or mtime is serialized and added to the dag-pb protobuf, so it is persisted with it and reloaded every time a unixfs folder is loaded in into an MFS directory or cached as a child inside one. With MaxLinks and MaxHAMTWidth we don't have this luxury. The idea is that, if we have an MFS root, it is wrapping an unixfs dynamic directory, and at the same time it is wrapping some protobuf (either basic or hamt). Now, when we use it to operate, add subfolders, flush it, sync it etc, we must still remember the max-links configuration and the only way we can do that is to set it to what the parent has (while leaving Stat alone). This should work when we are adding things to MFS and everything expects the same options. When dealing with a long-lived MFS system and we import existing DAGs into it, this is likely going to cause undesired effects (I'm unsure about the current behavour but it may trigger HAMT conversions where before things were left alone etc.). In any case, previous behavour is kept when the options are zero'ed, which happens unless they are explicitally set (so we should be safe wrt Kubo MFS DAG). Implementation required public setters and getters in unixfs/io.Directory. When adding files, for example it is a normal operation to create a new MFS root. Currently this requires creating an empty dag-pb node configured with a Cid builder etc. and then using it to initialize a new root. The new NewEmptyRoot() method exposes the unixfs directory options at the MFS level. It simplifies making new roots and it lets us pass the unixfs options that we wish, including setting MaxWidth or MaxHAMTFanout. --- ipld/unixfs/io/directory.go | 182 ++++++++++++++++++++++++++---------- mfs/dir.go | 74 +++++++++++++-- mfs/ops.go | 31 +++--- mfs/root.go | 27 ++++++ 4 files changed, 245 insertions(+), 69 deletions(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index 7858bd9ef..1b2a7730f 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -71,20 +71,28 @@ type Directory interface { // GetCidBuilder returns the CID Builder used. GetCidBuilder() cid.Builder - // setMaxLinks sets the max width of the Directory. - setMaxLinks(n int) -} + // GetMaxLinks returns the configured value for MaxLinks. + GetMaxLinks() int -type directoryWithOptions interface { - Directory - // setMaxLinks sets the max width of the Directory. - setMaxLinks(n int) - // used for hamt directories only - setHAMTMaxFanout(n int) - setStats(os.FileMode, time.Time) + // SetMaxLinks sets the number of links for the directory. Used when converting + // between Basic and HAMT. + SetMaxLinks(n int) + + // GetMaxHAMTFanout returns the configured value for MaxHAMTFanout. + GetMaxHAMTFanout() int + + // SetMaxHAMTFanout sets the max width of shards when using a HAMT. + // It must be a power of 2 and multiple of 8. Used when converting + // between Basic and HAMT. + SetMaxHAMTFanout(n int) + + // SetStat sets the stat information for the directory. Used when + // converting between Basic and HAMT. + SetStat(os.FileMode, time.Time) } -type DirectoryOption func(directoryWithOptions) +// A DirectoryOption can be used to initialize directories. +type DirectoryOption func(d Directory) // WithMaxLinks stablishes the max number of links allowed for a Basic // directory or a Dynamic directory with an underlying Basic directory: @@ -101,8 +109,8 @@ type DirectoryOption func(directoryWithOptions) // - On pure Basic directories, it causes an error when adding more than // MaxLinks children. func WithMaxLinks(n int) DirectoryOption { - return func(d directoryWithOptions) { - d.setMaxLinks(n) + return func(d Directory) { + d.SetMaxLinks(n) } } @@ -119,29 +127,22 @@ func WithMaxLinks(n int) DirectoryOption { // 8. If it is not the case, it will not be used and DefaultHAMTWidth will be // used instead. func WithMaxHAMTFanout(n int) DirectoryOption { - if n > 0 && !validShardWidth(n) { - log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) - n = DefaultShardWidth - } - if n == 0 { - n = DefaultShardWidth - } - return func(d directoryWithOptions) { - d.setHAMTMaxFanout(n) + return func(d Directory) { + d.SetMaxHAMTFanout(n) } } // WithCidBuilder sets the CidBuilder for new Directories. func WithCidBuilder(cb cid.Builder) DirectoryOption { - return func(d directoryWithOptions) { + return func(d Directory) { d.SetCidBuilder(cb) } } // WithStat can be used to set the empty directory node permissions. func WithStat(mode os.FileMode, mtime time.Time) DirectoryOption { - return func(d directoryWithOptions) { - d.setStats(mode, mtime) + return func(d Directory) { + d.SetStat(mode, mtime) } } @@ -159,9 +160,9 @@ func init() { } var ( - _ directoryWithOptions = (*DynamicDirectory)(nil) - _ directoryWithOptions = (*BasicDirectory)(nil) - _ directoryWithOptions = (*HAMTDirectory)(nil) + _ Directory = (*DynamicDirectory)(nil) + _ Directory = (*BasicDirectory)(nil) + _ Directory = (*HAMTDirectory)(nil) ) // BasicDirectory is the basic implementation of `Directory`. All the entries @@ -257,6 +258,7 @@ func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOp opt(dir) } + // FIXME: do shards not support mtime and mode? shard, err := hamt.NewShard(dir.dserv, dir.maxHAMTFanout) if err != nil { return nil, err @@ -322,17 +324,45 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err return nil, ErrNotADir } -func (d *BasicDirectory) setMaxLinks(n int) { +// GetMaxLinks returns the configured MaxLinks. +func (d *BasicDirectory) GetMaxLinks() int { + return d.maxLinks +} + +// SetMaxLinks sets the maximum number of links for the Directory, but has no +// side effects until new children are added (in which case the new limit +// applies). +func (d *BasicDirectory) SetMaxLinks(n int) { d.maxLinks = n } -func (d *BasicDirectory) setHAMTMaxFanout(n int) { +// GetMaxLinks returns the configured HAMTFanout. +func (d *BasicDirectory) GetMaxHAMTFanout() int { + return d.maxHAMTFanout +} + +// SetMAXHAMTFanout has no relevance for BasicDirectories. +func (d *BasicDirectory) SetMaxHAMTFanout(n int) { + if n > 0 && !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } + if n == 0 { + n = DefaultShardWidth + } + d.maxHAMTFanout = n } -func (d *BasicDirectory) setStats(mode os.FileMode, mtime time.Time) { - d.mode = mode - d.mtime = mtime +// SetStat has no effect and only exists to support dynamic directories. Use +// WithStat when creating a new directory instead. +func (d *BasicDirectory) SetStat(mode os.FileMode, mtime time.Time) { + if mode > 0 { + d.mode = mode + } + if !mtime.IsZero() { + d.mtime = mtime + } } func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() { @@ -543,17 +573,44 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { } } -func (d *HAMTDirectory) setMaxLinks(n int) { +// GetMaxLinks returns the maxLinks value from a HAMTDirectory. +func (d *HAMTDirectory) GetMaxLinks() int { + return d.maxLinks +} + +// SetMaxLinks has no effect and only exists to support Dynamic directories. +func (d *HAMTDirectory) SetMaxLinks(n int) { d.maxLinks = n } -func (d *HAMTDirectory) setHAMTMaxFanout(n int) { +// GetMaxHAMTFanout returns the maxHAMTFanout value from a HAMTDirectory. +func (d *HAMTDirectory) GetMaxHAMTFanout() int { + return d.maxHAMTFanout +} + +// SetMaxHAMTFanout has no effect and only exists to support Dynamic +// directories. Max fanout can be set during creation using +// WithMaxHAMTFanout(). +func (d *HAMTDirectory) SetMaxHAMTFanout(n int) { + if n > 0 && !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } + if n == 0 { + n = DefaultShardWidth + } + d.maxHAMTFanout = n } -func (d *HAMTDirectory) setStats(mode os.FileMode, mtime time.Time) { - d.mode = mode - d.mtime = mtime +// SetStat has no effect and only exists to support Dynamic directories. +func (d *HAMTDirectory) SetStat(mode os.FileMode, mtime time.Time) { + if mode > 0 { + d.mode = mode + } + if !mtime.IsZero() { + d.mtime = mtime + } } // AddChild implements the `Directory` interface. @@ -768,15 +825,39 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) // to switch from BasicDirectory to HAMTDirectory and backwards based on // size. type DynamicDirectory struct { - directoryWithOptions + Directory } var _ Directory = (*DynamicDirectory)(nil) +// SetMaxLinks sets the maximum number of links for the underlying Basic +// directory when used. This operation does not produce any side effects, but +// the new value may trigger Basic-to-HAMT conversions when adding new +// children to Basic directories, or HAMT-to-Basic conversion when operating +// with HAMT directories. +func (d *DynamicDirectory) SetMaxLinks(n int) { + d.Directory.SetMaxLinks(n) +} + +// SetMaxHAMTFanout sets the maximum shard width for HAMT directories. This +// operation does not produce any side effect and only takes effect on a +// conversion from Basic to HAMT directory. +func (d *DynamicDirectory) SetMaxHAMTFanout(n int) { + d.Directory.SetMaxHAMTFanout(n) +} + +// SetStat sets stats information. This operation does not produce any side +// effects. It is taken into account when converting from HAMT to basic +// directory. Mode or mtime can be set independently by using zero for mtime +// or mode respectively. +func (d *DynamicDirectory) SetStat(mode os.FileMode, mtime time.Time) { + d.Directory.SetStat(mode, mtime) +} + // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) + hamtDir, ok := d.Directory.(*HAMTDirectory) if ok { // We evaluate a switch in the HAMTDirectory case even for an AddChild // as it may overwrite an existing entry and end up actually reducing @@ -787,7 +868,12 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No } if switchToBasic { - basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithMaxHAMTFanout(hamtDir.maxHAMTFanout), WithCidBuilder(hamtDir.GetCidBuilder())) + basicDir, err := hamtDir.switchToBasic(ctx, + WithMaxLinks(hamtDir.maxLinks), + WithMaxHAMTFanout(hamtDir.maxHAMTFanout), + WithCidBuilder(hamtDir.GetCidBuilder()), + WithStat(hamtDir.mode, hamtDir.mtime), + ) if err != nil { return err } @@ -795,15 +881,15 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.directoryWithOptions = basicDir + d.Directory = basicDir return nil } - return d.directoryWithOptions.AddChild(ctx, name, nd) + return d.Directory.AddChild(ctx, name, nd) } // BasicDirectory - basicDir := d.directoryWithOptions.(*BasicDirectory) + basicDir := d.Directory.(*BasicDirectory) switchToHAMT, err := basicDir.needsToSwitchToHAMTDir(name, nd) if err != nil { return err @@ -826,7 +912,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if err != nil { return err } - d.directoryWithOptions = hamtDir + d.Directory = hamtDir return nil } @@ -834,9 +920,9 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { - hamtDir, ok := d.directoryWithOptions.(*HAMTDirectory) + hamtDir, ok := d.Directory.(*HAMTDirectory) if !ok { - return d.directoryWithOptions.RemoveChild(ctx, name) + return d.Directory.RemoveChild(ctx, name) } switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name, nil) @@ -867,7 +953,7 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { if maxLinks > 0 { basicDir.maxLinks-- } - d.directoryWithOptions = basicDir + d.Directory = basicDir return nil } diff --git a/mfs/dir.go b/mfs/dir.go index 506d9f7ee..e658ce71b 100644 --- a/mfs/dir.go +++ b/mfs/dir.go @@ -64,6 +64,39 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren }, nil } +// NewEmptyDirectory creates an empty MFS directory with the given options. +// The directory is added to the DAGService. To create a new MFS +// root use NewEmptyRootFolder instead. +func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ipld.DAGService, opts MkdirOpts) (*Directory, error) { + db := uio.NewDirectory(dserv, + uio.WithMaxLinks(opts.MaxLinks), + uio.WithMaxHAMTFanout(opts.MaxHAMTFanout), + uio.WithStat(opts.Mode, opts.ModTime), + uio.WithCidBuilder(opts.CidBuilder), + ) + + nd, err := db.GetNode() + if err != nil { + return nil, err + } + + err = dserv.Add(ctx, nd) + if err != nil { + return nil, err + } + + return &Directory{ + inode: inode{ + name: name, + parent: parent, + dagService: dserv, + }, + ctx: ctx, + unixfsDir: db, + entriesCache: make(map[string]FSNode), + }, nil +} + // GetCidBuilder gets the CID builder of the root node func (d *Directory) GetCidBuilder() cid.Builder { return d.unixfsDir.GetCidBuilder() @@ -145,6 +178,12 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) { return nil, err } + // these options are not persisted so they need to be + // inherited from the parent. + ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks()) + ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) + ndir.unixfsDir.SetCidBuilder(d.unixfsDir.GetCidBuilder()) + d.entriesCache[name] = ndir return ndir, nil case ft.TFile, ft.TRaw, ft.TSymlink: @@ -271,7 +310,11 @@ func (d *Directory) ForEachEntry(ctx context.Context, f func(NodeListing) error) } func (d *Directory) Mkdir(name string) (*Directory, error) { - return d.MkdirWithOpts(name, MkdirOpts{}) + return d.MkdirWithOpts(name, MkdirOpts{ + MaxLinks: d.unixfsDir.GetMaxLinks(), + MaxHAMTFanout: d.unixfsDir.GetMaxHAMTFanout(), + CidBuilder: d.unixfsDir.GetCidBuilder(), + }) } func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, error) { @@ -290,20 +333,17 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro } } - ndir := ft.EmptyDirNodeWithStat(opts.Mode, opts.ModTime) - ndir.SetCidBuilder(d.GetCidBuilder()) - - err = d.dagService.Add(d.ctx, ndir) + dirobj, err := NewEmptyDirectory(d.ctx, name, d, d.dagService, opts) if err != nil { return nil, err } - err = d.unixfsDir.AddChild(d.ctx, name, ndir) + ndir, err := dirobj.GetNode() if err != nil { return nil, err } - dirobj, err := NewDirectory(d.ctx, name, ndir, d, d.dagService) + err = d.unixfsDir.AddChild(d.ctx, name, ndir) if err != nil { return nil, err } @@ -426,7 +466,13 @@ func (d *Directory) SetMode(mode os.FileMode) error { return err } - return d.setNodeData(data, nd.Links()) + err = d.setNodeData(data, nd.Links()) + if err != nil { + return err + } + + d.unixfsDir.SetStat(mode, time.Time{}) + return nil } func (d *Directory) SetModTime(ts time.Time) error { @@ -446,7 +492,12 @@ func (d *Directory) SetModTime(ts time.Time) error { return err } - return d.setNodeData(data, nd.Links()) + err = d.setNodeData(data, nd.Links()) + if err != nil { + return err + } + d.unixfsDir.SetStat(0, ts) + return nil } func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error { @@ -469,6 +520,11 @@ func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error { if err != nil { return err } + + // We need to carry our desired settings. + db.SetMaxLinks(d.unixfsDir.GetMaxLinks()) + db.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) + db.SetCidBuilder(d.unixfsDir.GetCidBuilder()) d.unixfsDir = db return nil diff --git a/mfs/ops.go b/mfs/ops.go index 09dbab00f..f026e7cbe 100644 --- a/mfs/ops.go +++ b/mfs/ops.go @@ -120,11 +120,13 @@ func PutNode(r *Root, path string, nd ipld.Node) error { // MkdirOpts is used by Mkdir type MkdirOpts struct { - Mkparents bool - Flush bool - CidBuilder cid.Builder - Mode os.FileMode - ModTime time.Time + Mkparents bool + Flush bool + CidBuilder cid.Builder + Mode os.FileMode + ModTime time.Time + MaxLinks int + MaxHAMTFanout int } // Mkdir creates a directory at 'path' under the directory 'd', creating @@ -152,16 +154,24 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { } cur := r.GetDirectory() + + // opts to make the parents leave MkParents and Flush as false. + parentsOpts := MkdirOpts{ + CidBuilder: opts.CidBuilder, + Mode: opts.Mode, + ModTime: opts.ModTime, + MaxLinks: opts.MaxLinks, + MaxHAMTFanout: opts.MaxHAMTFanout, + } + for i, d := range parts[:len(parts)-1] { fsn, err := cur.Child(d) if err == os.ErrNotExist && opts.Mkparents { - mkd, err := cur.Mkdir(d) + + mkd, err := cur.MkdirWithOpts(d, parentsOpts) if err != nil { return err } - if opts.CidBuilder != nil { - mkd.SetCidBuilder(opts.CidBuilder) - } fsn = mkd } else if err != nil { return err @@ -180,9 +190,6 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { return err } } - if opts.CidBuilder != nil { - final.SetCidBuilder(opts.CidBuilder) - } if opts.Flush { err := final.Flush() diff --git a/mfs/root.go b/mfs/root.go index c8d307b2d..4835f94d2 100644 --- a/mfs/root.go +++ b/mfs/root.go @@ -136,6 +136,33 @@ func NewRoot(parent context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf return root, nil } +// NewEmptyRoot creates an empty Root directory with the given directory +// options. A replublisher is created if PubFunc is not nil. +func NewEmptyRoot(parent context.Context, ds ipld.DAGService, pf PubFunc, opts MkdirOpts) (*Root, error) { + root := new(Root) + + dir, err := NewEmptyDirectory(parent, "", root, ds, opts) + if err != nil { + return nil, err + } + + // Rather than "dir.GetNode()" because it is cheaper and we have no + // risks. + nd, err := dir.unixfsDir.GetNode() + if err != nil { + return nil, err + } + + var repub *Republisher + if pf != nil { + repub = NewRepublisher(pf, repubQuick, repubLong, nd.Cid()) + } + + root.repub = repub + root.dir = dir + return root, nil +} + // GetDirectory returns the root directory. func (kr *Root) GetDirectory() *Directory { return kr.dir From a6c773db44b1546788f654fc1e9522b3d70a11ce Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 7 Apr 2025 15:54:21 +0200 Subject: [PATCH 06/13] unixfs/io: fix tests --- ipld/unixfs/io/directory_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index f5301b8c8..ec4c1b14e 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -206,14 +206,14 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { t.Helper() - if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*BasicDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { t.Helper() - if _, ok := dir.(*DynamicDirectory).directoryWithOptions.(*HAMTDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } } From 14a0dd57728b355b5e2f66b67103eb41adb5142f Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 7 Apr 2025 16:43:23 +0200 Subject: [PATCH 07/13] unixfs/io/director: fix empty directory with mtime --- ipld/unixfs/io/directory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index 1b2a7730f..e8dc5fb1e 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -219,7 +219,7 @@ func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDir } var node *mdag.ProtoNode - if basicDir.mode > 0 { + if basicDir.mode > 0 || !basicDir.mtime.IsZero() { node = format.EmptyDirNodeWithStat(basicDir.mode, basicDir.mtime) } else { node = format.EmptyDirNode() From cba0024bf83005a0b40fb8cf74a3da1bae4d0f7b Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 7 Apr 2025 17:51:30 +0200 Subject: [PATCH 08/13] mkdir: keep previous behaviour regarding mode and mtime --- mfs/ops.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/mfs/ops.go b/mfs/ops.go index f026e7cbe..1772cc274 100644 --- a/mfs/ops.go +++ b/mfs/ops.go @@ -158,8 +158,6 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { // opts to make the parents leave MkParents and Flush as false. parentsOpts := MkdirOpts{ CidBuilder: opts.CidBuilder, - Mode: opts.Mode, - ModTime: opts.ModTime, MaxLinks: opts.MaxLinks, MaxHAMTFanout: opts.MaxHAMTFanout, } From ae74e18196eb0000f6f3efc83757d1de93941d56 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 7 Apr 2025 23:29:24 +0200 Subject: [PATCH 09/13] unixfs/directory: NewDirectory: Bubble SetCidBuilder errors --- ipld/unixfs/io/directory.go | 24 +++++++++---- ipld/unixfs/io/directory_test.go | 58 +++++++++++++++++++++----------- mfs/dir.go | 6 +++- 3 files changed, 61 insertions(+), 27 deletions(-) diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index e8dc5fb1e..d8b589256 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -208,7 +208,7 @@ type HAMTDirectory struct { } // NewBasicDirectory creates an empty basic directory with the given options. -func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDirectory { +func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) (*BasicDirectory, error) { basicDir := &BasicDirectory{ dserv: dserv, maxHAMTFanout: DefaultShardWidth, @@ -225,12 +225,15 @@ func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDir node = format.EmptyDirNode() } basicDir.node = node - basicDir.node.SetCidBuilder(basicDir.cidBuilder) + err := basicDir.node.SetCidBuilder(basicDir.cidBuilder) + if err != nil { + return nil, err + } // Scan node links (if any) to restore estimated size. basicDir.computeEstimatedSizeAndTotalLinks() - return basicDir + return basicDir, nil } // NewBasicDirectoryFromNode creates a basic directory wrapping the given @@ -290,8 +293,12 @@ func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirec // NewDirectory returns a Directory implemented by DynamicDirectory containing // a BasicDirectory that automatically converts to a from a HAMTDirectory // based on HAMTShardingSize, MaxLinks and MaxHAMTFanout (when set). -func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) Directory { - return &DynamicDirectory{NewBasicDirectory(dserv, opts...)} +func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) (Directory, error) { + bd, err := NewBasicDirectory(dserv, opts...) + if err != nil { + return nil, err + } + return &DynamicDirectory{bd}, nil } // ErrNotADir implies that the given node was not a unixfs directory @@ -685,9 +692,12 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) { // needsoSwichToBasicDir checks d.maxLinks is appropiate. No check is // performed here. - basicDir := NewBasicDirectory(d.dserv, opts...) + basicDir, err := NewBasicDirectory(d.dserv, opts...) + if err != nil { + return nil, err + } - err := d.ForEachLink(ctx, func(lnk *ipld.Link) error { + err = d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) if err != nil { return err diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index ec4c1b14e..a2d97083e 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -37,7 +37,10 @@ func TestEmptyNode(t *testing.T) { func TestDirectoryGrowth(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } ctx := context.Background() d := ft.EmptyDirNode() @@ -52,7 +55,7 @@ func TestDirectoryGrowth(t *testing.T) { } } - _, err := dir.GetNode() + _, err = dir.GetNode() if err != nil { t.Fatal(err) } @@ -91,11 +94,14 @@ func TestDirectoryGrowth(t *testing.T) { func TestDuplicateAddDir(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } ctx := context.Background() nd := ft.EmptyDirNode() - err := dir.AddChild(ctx, "test", nd) + err = dir.AddChild(ctx, "test", nd) if err != nil { t.Fatal(err) } @@ -117,7 +123,10 @@ func TestDuplicateAddDir(t *testing.T) { func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() - basicDir := NewBasicDirectory(ds) + basicDir, err := NewBasicDirectory(ds) + if err != nil { + t.Fatal(err) + } testDirectorySizeEstimation(t, basicDir, ds, func(dir Directory) int { return dir.(*BasicDirectory).estimatedSize @@ -230,7 +239,7 @@ func TestProductionLinkSize(t *testing.T) { assert.Equal(t, 48, productionLinkSize(link.Name, link.Cid)) ds := mdtest.Mock() - basicDir := NewBasicDirectory(ds) + basicDir, err := NewBasicDirectory(ds) assert.NoError(t, err) for i := 0; i < 10; i++ { basicDir.AddChild(context.Background(), strconv.FormatUint(uint64(i), 10), ft.EmptyFileNode()) @@ -253,12 +262,14 @@ func TestDynamicDirectorySwitch(t *testing.T) { defer func() { linksize.LinkSizeFunction = productionLinkSize }() ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + require.NoError(t, err) + checkBasicDirectory(t, dir, "new dir is not BasicDirectory") ctx := context.Background() child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + err = ds.Add(ctx, child) assert.NoError(t, err) err = dir.AddChild(ctx, "1", child) @@ -291,15 +302,19 @@ func TestDynamicDirectorySwitch(t *testing.T) { func TestIntegrityOfDirectorySwitch(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } checkBasicDirectory(t, dir, "new dir is not BasicDirectory") ctx := context.Background() child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + err = ds.Add(ctx, child) assert.NoError(t, err) - basicDir := NewBasicDirectory(ds) + basicDir, err := NewBasicDirectory(ds) + assert.NoError(t, err) hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) for i := 0; i < 1000; i++ { @@ -446,14 +461,15 @@ func sortLinksByName(links []*ipld.Link) { func TestDirBuilder(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) - ctx := context.Background() - - child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + dir, err := NewDirectory(ds) if err != nil { t.Fatal(err) } + ctx := context.Background() + + child := ft.EmptyDirNode() + err = ds.Add(ctx, child) + require.NoError(t, err) count := 5000 @@ -533,12 +549,13 @@ func TestBasicDirectoryWithMaxLinks(t *testing.T) { ds := mdtest.Mock() ctx := context.Background() - dir := NewBasicDirectory(ds, WithMaxLinks(2)) + dir, err := NewBasicDirectory(ds, WithMaxLinks(2)) + require.NoError(t, err) child1 := ft.EmptyDirNode() require.NoError(t, ds.Add(ctx, child1)) - err := dir.AddChild(ctx, "entry1", child1) + err = dir.AddChild(ctx, "entry1", child1) require.NoError(t, err) child2 := ft.EmptyDirNode() @@ -589,7 +606,10 @@ func TestDynamicDirectoryWithMaxLinks(t *testing.T) { ds := mdtest.Mock() ctx := context.Background() - dir := NewDirectory(ds, WithMaxLinks(8), WithMaxHAMTFanout(16)) + dir, err := NewDirectory(ds, WithMaxLinks(8), WithMaxHAMTFanout(16)) + if err != nil { + t.Fatal(err) + } for i := 0; i < 8; i++ { child := ft.EmptyDirNode() diff --git a/mfs/dir.go b/mfs/dir.go index e658ce71b..065bbdc5b 100644 --- a/mfs/dir.go +++ b/mfs/dir.go @@ -68,13 +68,17 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren // The directory is added to the DAGService. To create a new MFS // root use NewEmptyRootFolder instead. func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ipld.DAGService, opts MkdirOpts) (*Directory, error) { - db := uio.NewDirectory(dserv, + db, err := uio.NewDirectory(dserv, uio.WithMaxLinks(opts.MaxLinks), uio.WithMaxHAMTFanout(opts.MaxHAMTFanout), uio.WithStat(opts.Mode, opts.ModTime), uio.WithCidBuilder(opts.CidBuilder), ) + if err != nil { + return nil, err + } + nd, err := db.GetNode() if err != nil { return nil, err From 74a8b639bb509c2b5f1ccaa73b3f580dfbd083d7 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 7 Apr 2025 23:42:48 +0200 Subject: [PATCH 10/13] mfs: attempt to keep previous behaviour in full --- mfs/dir.go | 7 ++++--- mfs/ops.go | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mfs/dir.go b/mfs/dir.go index 065bbdc5b..700f689be 100644 --- a/mfs/dir.go +++ b/mfs/dir.go @@ -186,7 +186,6 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) { // inherited from the parent. ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks()) ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) - ndir.unixfsDir.SetCidBuilder(d.unixfsDir.GetCidBuilder()) d.entriesCache[name] = ndir return ndir, nil @@ -317,7 +316,6 @@ func (d *Directory) Mkdir(name string) (*Directory, error) { return d.MkdirWithOpts(name, MkdirOpts{ MaxLinks: d.unixfsDir.GetMaxLinks(), MaxHAMTFanout: d.unixfsDir.GetMaxHAMTFanout(), - CidBuilder: d.unixfsDir.GetCidBuilder(), }) } @@ -337,6 +335,10 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro } } + // hector: no idea why this option is overriden it must be to keep + // backwards compatibility. CidBuilder from the options is manually + // set in `Mkdir` (ops.go). + opts.CidBuilder = d.GetCidBuilder() dirobj, err := NewEmptyDirectory(d.ctx, name, d, d.dagService, opts) if err != nil { return nil, err @@ -528,7 +530,6 @@ func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error { // We need to carry our desired settings. db.SetMaxLinks(d.unixfsDir.GetMaxLinks()) db.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) - db.SetCidBuilder(d.unixfsDir.GetCidBuilder()) d.unixfsDir = db return nil diff --git a/mfs/ops.go b/mfs/ops.go index 1772cc274..05c2e4332 100644 --- a/mfs/ops.go +++ b/mfs/ops.go @@ -157,7 +157,6 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { // opts to make the parents leave MkParents and Flush as false. parentsOpts := MkdirOpts{ - CidBuilder: opts.CidBuilder, MaxLinks: opts.MaxLinks, MaxHAMTFanout: opts.MaxHAMTFanout, } @@ -170,6 +169,11 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { if err != nil { return err } + // MkdirWithOps uses cur.GetCidBuilder regardless of + // the option. So we must set it manually. + if opts.CidBuilder != nil { + mkd.SetCidBuilder(opts.CidBuilder) + } fsn = mkd } else if err != nil { return err From f96f144d0b807662967b534803427ebdcaad9099 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 8 Apr 2025 00:18:41 +0200 Subject: [PATCH 11/13] mfs: more backwards compat --- mfs/dir.go | 6 +++--- mfs/ops.go | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/mfs/dir.go b/mfs/dir.go index 700f689be..55d7474a2 100644 --- a/mfs/dir.go +++ b/mfs/dir.go @@ -335,9 +335,9 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro } } - // hector: no idea why this option is overriden it must be to keep - // backwards compatibility. CidBuilder from the options is manually - // set in `Mkdir` (ops.go). + // hector: no idea why this option is overriden, but it must be to + // keep backwards compatibility. CidBuilder from the options is + // manually set in `Mkdir` (ops.go) though. opts.CidBuilder = d.GetCidBuilder() dirobj, err := NewEmptyDirectory(d.ctx, name, d, d.dagService, opts) if err != nil { diff --git a/mfs/ops.go b/mfs/ops.go index 05c2e4332..b6345fc3b 100644 --- a/mfs/ops.go +++ b/mfs/ops.go @@ -193,6 +193,12 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { } } + // Again, MkdirWithOpts ignores opts.CidBuilder so must be applied + // here. + if opts.CidBuilder != nil { + final.SetCidBuilder(opts.CidBuilder) + } + if opts.Flush { err := final.Flush() if err != nil { From d8ce74d6b5bf1ad0cd346c5db093ac2bc3b303cc Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 8 Apr 2025 00:54:02 +0200 Subject: [PATCH 12/13] Update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b99838bbf..609894cfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,9 +18,9 @@ The following emojis are used to highlight certain changes: - `ipld/unixfs/io/directory`: We have made some changes to unixfs directory tooling: - We have exposed creator methods for new `BasicDirectory` and `HAMTDirectory`, that complement the existing `NewDirectory()` which creates dynamic directories. - - We have added `WithCidBuilder(...)` and `WithMaxLinks(...)` as options to these new methods so that empty directories can be initilized as wished from the get-go. - - `WithMaxLinks(...)` is a new option that allows to set a limit to the number of children that a directory DAG node can have. For details on what exactly it does for each of the directory types, please check the documentation. - - `WithStats(...)` is a new option that allows to set the Stat information for the new directory node. + - We have added `WithCidBuilder(...)`, `WithMaxLinks(...)`, `WithMaxHAMTFanout(...)` and `WithStat(...)` as options to these new methods so that empty directories can be initilized as wished from the get-go. + - `WithMaxLinks(...)` and `WithMaxHAMTFanout(...)` are new options that allow to set a limit to the number of children that a directory DAG node can have. For details on what they exactly do for each of the directory type, please check the documentation. +- `mfs` supports as well the new `MaxLinks` and `MaxHAMTFanout` options. They have been made part of the `MkdirOptions` object and the methods `NewEmptyDirectory()` and `NewEmptyRoot()` have been added to facilitate the initialization of MFS objects. ### Changed From bcdb9ae3e42be8ef775fa39c76dd833765c8bff4 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 20:24:13 +0200 Subject: [PATCH 13/13] chore: typo --- mfs/root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mfs/root.go b/mfs/root.go index 4835f94d2..7a41f55e8 100644 --- a/mfs/root.go +++ b/mfs/root.go @@ -137,7 +137,7 @@ func NewRoot(parent context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf } // NewEmptyRoot creates an empty Root directory with the given directory -// options. A replublisher is created if PubFunc is not nil. +// options. A republisher is created if PubFunc is not nil. func NewEmptyRoot(parent context.Context, ds ipld.DAGService, pf PubFunc, opts MkdirOpts) (*Root, error) { root := new(Root)