diff --git a/CHANGELOG.md b/CHANGELOG.md index 64993ffc7..c9509bdc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,14 @@ The following emojis are used to highlight certain changes: ### Added -- `ipld/unixfs`: `DagModifier` now allows specifying file DAG Width (`MaxLinks`) [#898](https://github.com/ipfs/boxo/pull/898) +- Control over UnixFS DAG Width + - We have made some changes to allow setting custom max width of UnixFS DAGs. This enables users to produce DAGs that follow current and future community conventions (see the [related discussion](https://discuss.ipfs.tech/t/should-we-profile-cids/18507)). + - `ipld/unixfs`: `DagModifier` now allows specifying file DAG Width (`MaxLinks`) [#898](https://github.com/ipfs/boxo/pull/898) + - `ipld/unixfs/io/directory`: We have made some changes to unixfs directory tooling [#906](https://github.com/ipfs/boxo/pull/906) + - We have exposed creator methods for new `BasicDirectory` and `HAMTDirectory`, that complement the existing `NewDirectory()` which creates dynamic directories. + - We have added `WithCidBuilder(...)`, `WithMaxLinks(...)`, `WithMaxHAMTFanout(...)` and `WithStat(...)` as options to these new methods so that empty directories can be initilized as wished from the get-go. + - `WithMaxLinks(...)` and `WithMaxHAMTFanout(...)` are new options that allow to set a limit to the number of children that a directory DAG node can have. For details on what they exactly do for each of the directory type, please check the documentation. + - `mfs` supports as well the new `MaxLinks` and `MaxHAMTFanout` options. They have been made part of the `MkdirOptions` object and the methods `NewEmptyDirectory()` and `NewEmptyRoot()` have been added to facilitate the initialization of MFS objects. [#906](https://github.com/ipfs/boxo/pull/906) - `provider`: added support for walking partial DAGs in offline mode [#905](https://github.com/ipfs/boxo/pull/905) - a `KeyChanFunc` that traverses DAGs from a given root (`NewDAGProvider`). - a `KeyChanFunc` that buffers all the CIDs in memory from another `KeyChanFunc` (`NewBufferedProvider`). diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go index edac04b22..d8b589256 100644 --- a/ipld/unixfs/io/directory.go +++ b/ipld/unixfs/io/directory.go @@ -4,6 +4,7 @@ import ( "context" "errors" "os" + "time" "github.com/alecthomas/units" mdag "github.com/ipfs/boxo/ipld/merkledag" @@ -69,6 +70,80 @@ type Directory interface { // GetCidBuilder returns the CID Builder used. GetCidBuilder() cid.Builder + + // GetMaxLinks returns the configured value for MaxLinks. + GetMaxLinks() int + + // SetMaxLinks sets the number of links for the directory. Used when converting + // between Basic and HAMT. + SetMaxLinks(n int) + + // GetMaxHAMTFanout returns the configured value for MaxHAMTFanout. + GetMaxHAMTFanout() int + + // SetMaxHAMTFanout sets the max width of shards when using a HAMT. + // It must be a power of 2 and multiple of 8. Used when converting + // between Basic and HAMT. + SetMaxHAMTFanout(n int) + + // SetStat sets the stat information for the directory. Used when + // converting between Basic and HAMT. + SetStat(os.FileMode, time.Time) +} + +// A DirectoryOption can be used to initialize directories. +type DirectoryOption func(d Directory) + +// WithMaxLinks stablishes the max number of links allowed for a Basic +// directory or a Dynamic directory with an underlying Basic directory: +// +// - On Dynamic directories using a BasicDirectory, it can trigger conversion +// to HAMT when set and exceeded. The subsequent HAMT nodes will use +// MaxHAMTFanout as ShardWidth when set, or DefaultShardWidth +// otherwise. Conversion can be triggered too based on HAMTShardingSize. +// +// - On Dynamic directories using a HAMTDirectory, it can trigger conversion +// to BasicDirectory when the number of directory entries is below MaxLinks (and +// HAMTShardingSize allows). +// +// - On pure Basic directories, it causes an error when adding more than +// MaxLinks children. +func WithMaxLinks(n int) DirectoryOption { + return func(d Directory) { + d.SetMaxLinks(n) + } +} + +// WithMaxHAMTFanout stablishes the maximum fanout factor (number of links) for +// a HAMT directory or a Dynamic directory with an underlying HAMT directory: +// +// - On Dynamic directories, it specifies the HAMT fanout when a HAMT +// used. When unset, DefaultShardWidth applies. +// +// - On pure HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth +// is used. +// +// HAMT directories require this value to be a power of 2 and a multiple of +// 8. If it is not the case, it will not be used and DefaultHAMTWidth will be +// used instead. +func WithMaxHAMTFanout(n int) DirectoryOption { + return func(d Directory) { + d.SetMaxHAMTFanout(n) + } +} + +// WithCidBuilder sets the CidBuilder for new Directories. +func WithCidBuilder(cb cid.Builder) DirectoryOption { + return func(d Directory) { + d.SetCidBuilder(cb) + } +} + +// WithStat can be used to set the empty directory node permissions. +func WithStat(mode os.FileMode, mtime time.Time) DirectoryOption { + return func(d Directory) { + d.SetStat(mode, mtime) + } } // TODO: Evaluate removing `dserv` from this layer and providing it in MFS. @@ -84,6 +159,12 @@ func init() { linksize.LinkSizeFunction = productionLinkSize } +var ( + _ Directory = (*DynamicDirectory)(nil) + _ Directory = (*BasicDirectory)(nil) + _ Directory = (*HAMTDirectory)(nil) +) + // BasicDirectory is the basic implementation of `Directory`. All the entries // are stored in a single node. type BasicDirectory struct { @@ -96,6 +177,15 @@ type BasicDirectory struct { // (We maintain this value up to date even if the HAMTShardingSize is off // since potentially the option could be activated on the fly.) estimatedSize int + totalLinks int + + // opts + // maxNumberOfLinks. If set, can trigger conversion to HAMT directory. + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time } // HAMTDirectory is the HAMT implementation of `Directory`. @@ -104,30 +194,111 @@ type HAMTDirectory struct { shard *hamt.Shard dserv ipld.DAGService + // opts + maxLinks int + maxHAMTFanout int + cidBuilder cid.Builder + mode os.FileMode + mtime time.Time + // Track the changes in size by the AddChild and RemoveChild calls // for the HAMTShardingSize option. sizeChange int + totalLinks int } -func newEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory { - return newBasicDirectoryFromNode(dserv, format.EmptyDirNode()) +// NewBasicDirectory creates an empty basic directory with the given options. +func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) (*BasicDirectory, error) { + basicDir := &BasicDirectory{ + dserv: dserv, + maxHAMTFanout: DefaultShardWidth, + } + + for _, o := range opts { + o(basicDir) + } + + var node *mdag.ProtoNode + if basicDir.mode > 0 || !basicDir.mtime.IsZero() { + node = format.EmptyDirNodeWithStat(basicDir.mode, basicDir.mtime) + } else { + node = format.EmptyDirNode() + } + basicDir.node = node + err := basicDir.node.SetCidBuilder(basicDir.cidBuilder) + if err != nil { + return nil, err + } + + // Scan node links (if any) to restore estimated size. + basicDir.computeEstimatedSizeAndTotalLinks() + + return basicDir, nil } -func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { +// NewBasicDirectoryFromNode creates a basic directory wrapping the given +// ProtoNode. +func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { basicDir := new(BasicDirectory) basicDir.node = node basicDir.dserv = dserv // Scan node links (if any) to restore estimated size. - basicDir.computeEstimatedSize() + basicDir.computeEstimatedSizeAndTotalLinks() return basicDir } -// NewDirectory returns a Directory implemented by DynamicDirectory -// containing a BasicDirectory that can be converted to a HAMTDirectory. -func NewDirectory(dserv ipld.DAGService) Directory { - return &DynamicDirectory{newEmptyBasicDirectory(dserv)} +// NewHAMTDirectory creates an empty HAMT directory with the given options. +func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) { + dir := &HAMTDirectory{ + dserv: dserv, + sizeChange: sizeChange, + maxHAMTFanout: DefaultShardWidth, + } + + for _, opt := range opts { + opt(dir) + } + + // FIXME: do shards not support mtime and mode? + shard, err := hamt.NewShard(dir.dserv, dir.maxHAMTFanout) + if err != nil { + return nil, err + } + + shard.SetCidBuilder(dir.cidBuilder) + dir.shard = shard + + return dir, nil +} + +// NewHAMTDirectoryFromNode creates a HAMT directory from the given node, +// which must correspond to an existing HAMT. +func NewHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { + dir := &HAMTDirectory{ + dserv: dserv, + } + + shard, err := hamt.NewHamtFromDag(dserv, node) + if err != nil { + return nil, err + } + dir.shard = shard + dir.totalLinks = len(node.Links()) + + return dir, nil +} + +// NewDirectory returns a Directory implemented by DynamicDirectory containing +// a BasicDirectory that automatically converts to a from a HAMTDirectory +// based on HAMTShardingSize, MaxLinks and MaxHAMTFanout (when set). +func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) (Directory, error) { + bd, err := NewBasicDirectory(dserv, opts...) + if err != nil { + return nil, err + } + return &DynamicDirectory{bd}, nil } // ErrNotADir implies that the given node was not a unixfs directory @@ -148,23 +319,65 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err switch fsNode.Type() { case format.TDirectory: - return &DynamicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil + return &DynamicDirectory{NewBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: - shard, err := hamt.NewHamtFromDag(dserv, node) + hamtDir, err := NewHAMTDirectoryFromNode(dserv, node) if err != nil { return nil, err } - return &DynamicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil + return &DynamicDirectory{hamtDir}, nil } return nil, ErrNotADir } -func (d *BasicDirectory) computeEstimatedSize() { +// GetMaxLinks returns the configured MaxLinks. +func (d *BasicDirectory) GetMaxLinks() int { + return d.maxLinks +} + +// SetMaxLinks sets the maximum number of links for the Directory, but has no +// side effects until new children are added (in which case the new limit +// applies). +func (d *BasicDirectory) SetMaxLinks(n int) { + d.maxLinks = n +} + +// GetMaxLinks returns the configured HAMTFanout. +func (d *BasicDirectory) GetMaxHAMTFanout() int { + return d.maxHAMTFanout +} + +// SetMAXHAMTFanout has no relevance for BasicDirectories. +func (d *BasicDirectory) SetMaxHAMTFanout(n int) { + if n > 0 && !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } + if n == 0 { + n = DefaultShardWidth + } + + d.maxHAMTFanout = n +} + +// SetStat has no effect and only exists to support dynamic directories. Use +// WithStat when creating a new directory instead. +func (d *BasicDirectory) SetStat(mode os.FileMode, mtime time.Time) { + if mode > 0 { + d.mode = mode + } + if !mtime.IsZero() { + d.mtime = mtime + } +} + +func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() { d.estimatedSize = 0 // err is just breaking the iteration and we always return nil _ = d.ForEachLink(context.TODO(), func(l *ipld.Link) error { d.addToEstimatedSize(l.Name, l.Cid) + d.totalLinks++ return nil }) // ForEachLink will never fail traversing the BasicDirectory @@ -181,13 +394,16 @@ func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) { // Something has gone very wrong. Log an error and recompute the // size from scratch. log.Error("BasicDirectory's estimatedSize went below 0") - d.computeEstimatedSize() + d.computeEstimatedSizeAndTotalLinks() } } // SetCidBuilder implements the `Directory` interface. func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { - d.node.SetCidBuilder(builder) + d.cidBuilder = builder + if d.node != nil { + d.node.SetCidBuilder(builder) + } } // AddChild implements the `Directory` interface. It adds (or replaces) @@ -219,7 +435,15 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil + switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize + switchMaxLinks := false + // We should switch if maxLinks is set, we have reached it and a new link is being + // added. + if nodeToAdd != nil && entryToRemove == nil && d.maxLinks > 0 && + (d.totalLinks+1) > d.maxLinks { + switchMaxLinks = true + } + return switchShardingSize || switchMaxLinks, nil } // addLinkChild adds the link as an entry to this directory under the given @@ -228,8 +452,16 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip // Remove old link and account for size change (if it existed; ignore // `ErrNotExist` otherwise). err := d.RemoveChild(ctx, name) - if err != nil && err != os.ErrNotExist { - return err + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return err + } + } else { // existed + d.totalLinks-- + } + + if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks { + return errors.New("BasicDirectory: cannot add child: maxLinks reached") } err = d.node.AddRawLink(name, link) @@ -237,6 +469,7 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip return err } d.addToEstimatedSize(name, link.Cid) + d.totalLinks++ return nil } @@ -304,6 +537,7 @@ func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error { // The name actually existed so we should update the estimated size. d.removeFromEstimatedSize(link.Name, link.Cid) + d.totalLinks-- return d.node.RemoveNodeLink(name) // GetNodeLink didn't return ErrLinkNotFound so this won't fail with that @@ -321,22 +555,18 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder { } // switchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, error) { - hamtDir := new(HAMTDirectory) - hamtDir.dserv = d.dserv - - shard, err := hamt.NewShard(d.dserv, DefaultShardWidth) +func (d *BasicDirectory) switchToSharding(ctx context.Context, opts ...DirectoryOption) (*HAMTDirectory, error) { + hamtDir, err := NewHAMTDirectory(d.dserv, 0, opts...) if err != nil { return nil, err } - shard.SetCidBuilder(d.node.CidBuilder()) - hamtDir.shard = shard for _, lnk := range d.node.Links() { err = hamtDir.shard.SetLink(ctx, lnk.Name, lnk) if err != nil { return nil, err } + hamtDir.totalLinks++ } return hamtDir, nil @@ -344,7 +574,50 @@ func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, // SetCidBuilder implements the `Directory` interface. func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { - d.shard.SetCidBuilder(builder) + d.cidBuilder = builder + if d.shard != nil { + d.shard.SetCidBuilder(builder) + } +} + +// GetMaxLinks returns the maxLinks value from a HAMTDirectory. +func (d *HAMTDirectory) GetMaxLinks() int { + return d.maxLinks +} + +// SetMaxLinks has no effect and only exists to support Dynamic directories. +func (d *HAMTDirectory) SetMaxLinks(n int) { + d.maxLinks = n +} + +// GetMaxHAMTFanout returns the maxHAMTFanout value from a HAMTDirectory. +func (d *HAMTDirectory) GetMaxHAMTFanout() int { + return d.maxHAMTFanout +} + +// SetMaxHAMTFanout has no effect and only exists to support Dynamic +// directories. Max fanout can be set during creation using +// WithMaxHAMTFanout(). +func (d *HAMTDirectory) SetMaxHAMTFanout(n int) { + if n > 0 && !validShardWidth(n) { + log.Warnf("Invalid HAMTMaxFanout: %d. Using default (%d)", n, DefaultShardWidth) + n = DefaultShardWidth + } + if n == 0 { + n = DefaultShardWidth + } + + d.maxHAMTFanout = n +} + +// SetStat has no effect and only exists to support Dynamic directories. +func (d *HAMTDirectory) SetStat(mode os.FileMode, mtime time.Time) { + if mode > 0 { + d.mode = mode + } + if !mtime.IsZero() { + d.mtime = mtime + } } // AddChild implements the `Directory` interface. @@ -358,6 +631,9 @@ func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) d.removeFromSizeChange(oldChild.Name, oldChild.Cid) } d.addToSizeChange(name, nd.Cid()) + if oldChild == nil { + d.totalLinks++ + } return nil } @@ -396,6 +672,7 @@ func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { if oldChild != nil { d.removeFromSizeChange(oldChild.Name, oldChild.Cid) + d.totalLinks-- } return nil @@ -412,11 +689,15 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { } // switchToBasic returns a BasicDirectory implementation of this directory. -func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, error) { - basicDir := newEmptyBasicDirectory(d.dserv) - basicDir.SetCidBuilder(d.GetCidBuilder()) +func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) { + // needsoSwichToBasicDir checks d.maxLinks is appropiate. No check is + // performed here. + basicDir, err := NewBasicDirectory(d.dserv, opts...) + if err != nil { + return nil, err + } - err := d.ForEachLink(ctx, func(lnk *ipld.Link) error { + err = d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) if err != nil { return err @@ -472,14 +753,33 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } - if d.sizeChange+operationSizeChange >= 0 { - // We won't have reduced the HAMT net size. - return false, nil + // We must switch if size and maxlinks are below threshold + canSwitchSize := false + // Directory size reduced, perhaps below limit. + if d.sizeChange+operationSizeChange < 0 { + canSwitchSize, err = d.sizeBelowThreshold(ctx, operationSizeChange) + if err != nil { + return false, err + } + } + + canSwitchMaxLinks := true + if d.maxLinks > 0 { + total := d.totalLinks + if nodeToAdd != nil { + total++ + } + if entryToRemove != nil { + total-- + } + if total > d.maxLinks { + // prevent switching as we would end up with too many links + canSwitchMaxLinks = false + } } - // We have reduced the directory size, check if went below the - // HAMTShardingSize threshold to trigger a switch. - return d.sizeBelowThreshold(ctx, operationSizeChange) + return canSwitchSize && canSwitchMaxLinks, nil + } // Evaluate directory size and a future sizeChange and check if it will be below @@ -540,6 +840,30 @@ type DynamicDirectory struct { var _ Directory = (*DynamicDirectory)(nil) +// SetMaxLinks sets the maximum number of links for the underlying Basic +// directory when used. This operation does not produce any side effects, but +// the new value may trigger Basic-to-HAMT conversions when adding new +// children to Basic directories, or HAMT-to-Basic conversion when operating +// with HAMT directories. +func (d *DynamicDirectory) SetMaxLinks(n int) { + d.Directory.SetMaxLinks(n) +} + +// SetMaxHAMTFanout sets the maximum shard width for HAMT directories. This +// operation does not produce any side effect and only takes effect on a +// conversion from Basic to HAMT directory. +func (d *DynamicDirectory) SetMaxHAMTFanout(n int) { + d.Directory.SetMaxHAMTFanout(n) +} + +// SetStat sets stats information. This operation does not produce any side +// effects. It is taken into account when converting from HAMT to basic +// directory. Mode or mtime can be set independently by using zero for mtime +// or mode respectively. +func (d *DynamicDirectory) SetStat(mode os.FileMode, mtime time.Time) { + d.Directory.SetStat(mode, mtime) +} + // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { @@ -554,7 +878,12 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No } if switchToBasic { - basicDir, err := hamtDir.switchToBasic(ctx) + basicDir, err := hamtDir.switchToBasic(ctx, + WithMaxLinks(hamtDir.maxLinks), + WithMaxHAMTFanout(hamtDir.maxHAMTFanout), + WithCidBuilder(hamtDir.GetCidBuilder()), + WithStat(hamtDir.mode, hamtDir.mtime), + ) if err != nil { return err } @@ -578,7 +907,14 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No if !switchToHAMT { return basicDir.AddChild(ctx, name, nd) } - hamtDir, err = basicDir.switchToSharding(ctx) + + hamtFanout := DefaultShardWidth + // Verify that our maxLinks is usuable for ShardWidth (power of 2, multiple of 8) + if validShardWidth(basicDir.maxHAMTFanout) { + hamtFanout = basicDir.maxHAMTFanout + } + + hamtDir, err = basicDir.switchToSharding(ctx, WithMaxHAMTFanout(hamtFanout), WithMaxLinks(basicDir.maxLinks), WithCidBuilder(basicDir.GetCidBuilder())) if err != nil { return err } @@ -608,14 +944,31 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { return hamtDir.RemoveChild(ctx, name) } - basicDir, err := hamtDir.switchToBasic(ctx) + maxLinks := hamtDir.maxLinks + // We have not removed the element that violates MaxLinks, so we have to +1 the limit. We -1 below. + if maxLinks > 0 { + maxLinks++ + } + + basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(maxLinks), WithCidBuilder(hamtDir.GetCidBuilder())) if err != nil { return err } + err = basicDir.RemoveChild(ctx, name) if err != nil { return err } + + if maxLinks > 0 { + basicDir.maxLinks-- + } d.Directory = basicDir return nil } + +// validShardWidth verifies that the given number is positive, a power of 2 +// and a multiple of 8. +func validShardWidth(n int) bool { + return n > 0 && (n&(n-1)) == 0 && n&7 == 0 +} diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go index f69ad0b27..a2d97083e 100644 --- a/ipld/unixfs/io/directory_test.go +++ b/ipld/unixfs/io/directory_test.go @@ -17,7 +17,6 @@ import ( mdag "github.com/ipfs/boxo/ipld/merkledag" mdtest "github.com/ipfs/boxo/ipld/merkledag/test" ft "github.com/ipfs/boxo/ipld/unixfs" - "github.com/ipfs/boxo/ipld/unixfs/hamt" "github.com/ipfs/boxo/ipld/unixfs/internal" "github.com/ipfs/boxo/ipld/unixfs/private/linksize" blocks "github.com/ipfs/go-block-format" @@ -26,6 +25,7 @@ import ( dssync "github.com/ipfs/go-datastore/sync" ipld "github.com/ipfs/go-ipld-format" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestEmptyNode(t *testing.T) { @@ -37,7 +37,10 @@ func TestEmptyNode(t *testing.T) { func TestDirectoryGrowth(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } ctx := context.Background() d := ft.EmptyDirNode() @@ -52,7 +55,7 @@ func TestDirectoryGrowth(t *testing.T) { } } - _, err := dir.GetNode() + _, err = dir.GetNode() if err != nil { t.Fatal(err) } @@ -91,11 +94,14 @@ func TestDirectoryGrowth(t *testing.T) { func TestDuplicateAddDir(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } ctx := context.Background() nd := ft.EmptyDirNode() - err := dir.AddChild(ctx, "test", nd) + err = dir.AddChild(ctx, "test", nd) if err != nil { t.Fatal(err) } @@ -117,7 +123,10 @@ func TestDuplicateAddDir(t *testing.T) { func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir, err := NewBasicDirectory(ds) + if err != nil { + t.Fatal(err) + } testDirectorySizeEstimation(t, basicDir, ds, func(dir Directory) int { return dir.(*BasicDirectory).estimatedSize @@ -126,7 +135,7 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { func TestHAMTDirectory_sizeChange(t *testing.T) { ds := mdtest.Mock() - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) testDirectorySizeEstimation(t, hamtDir, ds, func(dir Directory) int { @@ -205,12 +214,14 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { + t.Helper() if _, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { + t.Helper() if _, ok := dir.(*DynamicDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } @@ -228,7 +239,7 @@ func TestProductionLinkSize(t *testing.T) { assert.Equal(t, 48, productionLinkSize(link.Name, link.Cid)) ds := mdtest.Mock() - basicDir := newEmptyBasicDirectory(ds) + basicDir, err := NewBasicDirectory(ds) assert.NoError(t, err) for i := 0; i < 10; i++ { basicDir.AddChild(context.Background(), strconv.FormatUint(uint64(i), 10), ft.EmptyFileNode()) @@ -251,12 +262,14 @@ func TestDynamicDirectorySwitch(t *testing.T) { defer func() { linksize.LinkSizeFunction = productionLinkSize }() ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + require.NoError(t, err) + checkBasicDirectory(t, dir, "new dir is not BasicDirectory") ctx := context.Background() child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + err = ds.Add(ctx, child) assert.NoError(t, err) err = dir.AddChild(ctx, "1", child) @@ -289,16 +302,20 @@ func TestDynamicDirectorySwitch(t *testing.T) { func TestIntegrityOfDirectorySwitch(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) + dir, err := NewDirectory(ds) + if err != nil { + t.Fatal(err) + } checkBasicDirectory(t, dir, "new dir is not BasicDirectory") ctx := context.Background() child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + err = ds.Add(ctx, child) assert.NoError(t, err) - basicDir := newEmptyBasicDirectory(ds) - hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + basicDir, err := NewBasicDirectory(ds) + assert.NoError(t, err) + hamtDir, err := NewHAMTDirectory(ds, 0, WithMaxLinks(DefaultShardWidth)) assert.NoError(t, err) for i := 0; i < 1000; i++ { basicDir.AddChild(ctx, strconv.FormatUint(uint64(i), 10), child) @@ -307,9 +324,9 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { compareDirectoryEntries(t, basicDir, hamtDir) hamtDirFromSwitch, err := basicDir.switchToSharding(ctx) - assert.NoError(t, err) + require.NoError(t, err) basicDirFromSwitch, err := hamtDir.switchToBasic(ctx) - assert.NoError(t, err) + require.NoError(t, err) compareDirectoryEntries(t, basicDir, basicDirFromSwitch) compareDirectoryEntries(t, hamtDir, hamtDirFromSwitch) } @@ -391,7 +408,7 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { // the HAMTShardingSize threshold. nodesToFetch += thresholdToWidthRatio - hamtDir, err := newHAMTDirectoryFromNode(dsrv, completeHAMTRoot) + hamtDir, err := NewHAMTDirectoryFromNode(dsrv, completeHAMTRoot) assert.NoError(t, err) countGetsDS.resetCounter() @@ -444,14 +461,15 @@ func sortLinksByName(links []*ipld.Link) { func TestDirBuilder(t *testing.T) { ds := mdtest.Mock() - dir := NewDirectory(ds) - ctx := context.Background() - - child := ft.EmptyDirNode() - err := ds.Add(ctx, child) + dir, err := NewDirectory(ds) if err != nil { t.Fatal(err) } + ctx := context.Background() + + child := ft.EmptyDirNode() + err = ds.Add(ctx, child) + require.NoError(t, err) count := 5000 @@ -527,27 +545,134 @@ func TestDirBuilder(t *testing.T) { } } -func newHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { - shard, err := hamt.NewHamtFromDag(dserv, node) - if err != nil { - return nil, err +func TestBasicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir, err := NewBasicDirectory(ds, WithMaxLinks(2)) + require.NoError(t, err) + + child1 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child1)) + + err = dir.AddChild(ctx, "entry1", child1) + require.NoError(t, err) + + child2 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child2)) + + err = dir.AddChild(ctx, "entry2", child2) + require.NoError(t, err) + + child3 := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child3)) + + err = dir.AddChild(ctx, "entry3", child3) + require.Error(t, err) +} + +// TestHAMTDirectoryWithMaxLinks tests that no HAMT shard as more than MaxLinks. +func TestHAMTDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir, err := NewHAMTDirectory(ds, 0, WithMaxHAMTFanout(8)) + require.NoError(t, err) + + // Ensure we have at least 2 levels of HAMT by adding many nodes + for i := 0; i < 300; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + } + + dirnd, err := dir.GetNode() + require.NoError(t, err) + + require.Equal(t, 8, len(dirnd.Links()), "Node should not have more than 8 links") + + for _, l := range dirnd.Links() { + childNd, err := ds.Get(ctx, l.Cid) + if err != nil { + t.Errorf("Failed to get node: %s", err) + continue + } + assert.Equal(t, 8, len(childNd.Links()), "Node does not have 8 links") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil } -func newEmptyHAMTDirectory(dserv ipld.DAGService, shardWidth int) (*HAMTDirectory, error) { - shard, err := hamt.NewShard(dserv, shardWidth) +func TestDynamicDirectoryWithMaxLinks(t *testing.T) { + ds := mdtest.Mock() + ctx := context.Background() + + dir, err := NewDirectory(ds, WithMaxLinks(8), WithMaxHAMTFanout(16)) if err != nil { - return nil, err + t.Fatal(err) + } + + for i := 0; i < 8; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic because it has less children than MaxLinks") + } + + for i := 8; i < 58; i++ { + child := ft.EmptyDirNode() + require.NoError(t, ds.Add(ctx, child)) + err := dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should be sharded because more children than MaxLinks") } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil + // Check that the directory root node has 8 links + dirnd, err := dir.GetNode() + require.NoError(t, err) + assert.Equal(t, 16, len(dirnd.Links()), "HAMT Directory root node should have 16 links") + + // Continue the code by removing 50 elements while checking that the underlying directory is a HAMT directory. + for i := 57; i >= 9; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkHAMTDirectory(t, dir, "directory should still be sharded while more than 8 children") + } + + // Continue removing elements until the directory is a basic directory again + for i := 8; i >= 0; i-- { + err := dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i)) + require.NoError(t, err) + checkBasicDirectory(t, dir, "directory should be basic when less than 8 children") + } +} + +// add a test that tests that validShardWidth(n) only returns true with positive numbers that are powers of 8 and multiples of 8. +func TestValidShardWidth(t *testing.T) { + testCases := []struct { + width int + expect bool + }{ + {0, false}, + {-1, false}, + {1, false}, + {2, false}, + {4, false}, + {8, true}, + {16, true}, + {32, true}, + {64, true}, + {512, true}, + {1024, true}, + {4096, true}, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("Width%d", tc.width), func(t *testing.T) { + result := validShardWidth(tc.width) + assert.Equal(t, tc.expect, result, "Expected %v for width %d", tc.expect, tc.width) + }) + } } // countGetsDS is a DAG service that keeps track of the number of diff --git a/mfs/dir.go b/mfs/dir.go index 506d9f7ee..55d7474a2 100644 --- a/mfs/dir.go +++ b/mfs/dir.go @@ -64,6 +64,43 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren }, nil } +// NewEmptyDirectory creates an empty MFS directory with the given options. +// The directory is added to the DAGService. To create a new MFS +// root use NewEmptyRootFolder instead. +func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ipld.DAGService, opts MkdirOpts) (*Directory, error) { + db, err := uio.NewDirectory(dserv, + uio.WithMaxLinks(opts.MaxLinks), + uio.WithMaxHAMTFanout(opts.MaxHAMTFanout), + uio.WithStat(opts.Mode, opts.ModTime), + uio.WithCidBuilder(opts.CidBuilder), + ) + + if err != nil { + return nil, err + } + + nd, err := db.GetNode() + if err != nil { + return nil, err + } + + err = dserv.Add(ctx, nd) + if err != nil { + return nil, err + } + + return &Directory{ + inode: inode{ + name: name, + parent: parent, + dagService: dserv, + }, + ctx: ctx, + unixfsDir: db, + entriesCache: make(map[string]FSNode), + }, nil +} + // GetCidBuilder gets the CID builder of the root node func (d *Directory) GetCidBuilder() cid.Builder { return d.unixfsDir.GetCidBuilder() @@ -145,6 +182,11 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) { return nil, err } + // these options are not persisted so they need to be + // inherited from the parent. + ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks()) + ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) + d.entriesCache[name] = ndir return ndir, nil case ft.TFile, ft.TRaw, ft.TSymlink: @@ -271,7 +313,10 @@ func (d *Directory) ForEachEntry(ctx context.Context, f func(NodeListing) error) } func (d *Directory) Mkdir(name string) (*Directory, error) { - return d.MkdirWithOpts(name, MkdirOpts{}) + return d.MkdirWithOpts(name, MkdirOpts{ + MaxLinks: d.unixfsDir.GetMaxLinks(), + MaxHAMTFanout: d.unixfsDir.GetMaxHAMTFanout(), + }) } func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, error) { @@ -290,20 +335,21 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro } } - ndir := ft.EmptyDirNodeWithStat(opts.Mode, opts.ModTime) - ndir.SetCidBuilder(d.GetCidBuilder()) - - err = d.dagService.Add(d.ctx, ndir) + // hector: no idea why this option is overriden, but it must be to + // keep backwards compatibility. CidBuilder from the options is + // manually set in `Mkdir` (ops.go) though. + opts.CidBuilder = d.GetCidBuilder() + dirobj, err := NewEmptyDirectory(d.ctx, name, d, d.dagService, opts) if err != nil { return nil, err } - err = d.unixfsDir.AddChild(d.ctx, name, ndir) + ndir, err := dirobj.GetNode() if err != nil { return nil, err } - dirobj, err := NewDirectory(d.ctx, name, ndir, d, d.dagService) + err = d.unixfsDir.AddChild(d.ctx, name, ndir) if err != nil { return nil, err } @@ -426,7 +472,13 @@ func (d *Directory) SetMode(mode os.FileMode) error { return err } - return d.setNodeData(data, nd.Links()) + err = d.setNodeData(data, nd.Links()) + if err != nil { + return err + } + + d.unixfsDir.SetStat(mode, time.Time{}) + return nil } func (d *Directory) SetModTime(ts time.Time) error { @@ -446,7 +498,12 @@ func (d *Directory) SetModTime(ts time.Time) error { return err } - return d.setNodeData(data, nd.Links()) + err = d.setNodeData(data, nd.Links()) + if err != nil { + return err + } + d.unixfsDir.SetStat(0, ts) + return nil } func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error { @@ -469,6 +526,10 @@ func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error { if err != nil { return err } + + // We need to carry our desired settings. + db.SetMaxLinks(d.unixfsDir.GetMaxLinks()) + db.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout()) d.unixfsDir = db return nil diff --git a/mfs/ops.go b/mfs/ops.go index 09dbab00f..b6345fc3b 100644 --- a/mfs/ops.go +++ b/mfs/ops.go @@ -120,11 +120,13 @@ func PutNode(r *Root, path string, nd ipld.Node) error { // MkdirOpts is used by Mkdir type MkdirOpts struct { - Mkparents bool - Flush bool - CidBuilder cid.Builder - Mode os.FileMode - ModTime time.Time + Mkparents bool + Flush bool + CidBuilder cid.Builder + Mode os.FileMode + ModTime time.Time + MaxLinks int + MaxHAMTFanout int } // Mkdir creates a directory at 'path' under the directory 'd', creating @@ -152,13 +154,23 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { } cur := r.GetDirectory() + + // opts to make the parents leave MkParents and Flush as false. + parentsOpts := MkdirOpts{ + MaxLinks: opts.MaxLinks, + MaxHAMTFanout: opts.MaxHAMTFanout, + } + for i, d := range parts[:len(parts)-1] { fsn, err := cur.Child(d) if err == os.ErrNotExist && opts.Mkparents { - mkd, err := cur.Mkdir(d) + + mkd, err := cur.MkdirWithOpts(d, parentsOpts) if err != nil { return err } + // MkdirWithOps uses cur.GetCidBuilder regardless of + // the option. So we must set it manually. if opts.CidBuilder != nil { mkd.SetCidBuilder(opts.CidBuilder) } @@ -180,6 +192,9 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error { return err } } + + // Again, MkdirWithOpts ignores opts.CidBuilder so must be applied + // here. if opts.CidBuilder != nil { final.SetCidBuilder(opts.CidBuilder) } diff --git a/mfs/root.go b/mfs/root.go index c8d307b2d..7a41f55e8 100644 --- a/mfs/root.go +++ b/mfs/root.go @@ -136,6 +136,33 @@ func NewRoot(parent context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf return root, nil } +// NewEmptyRoot creates an empty Root directory with the given directory +// options. A republisher is created if PubFunc is not nil. +func NewEmptyRoot(parent context.Context, ds ipld.DAGService, pf PubFunc, opts MkdirOpts) (*Root, error) { + root := new(Root) + + dir, err := NewEmptyDirectory(parent, "", root, ds, opts) + if err != nil { + return nil, err + } + + // Rather than "dir.GetNode()" because it is cheaper and we have no + // risks. + nd, err := dir.unixfsDir.GetNode() + if err != nil { + return nil, err + } + + var repub *Republisher + if pf != nil { + repub = NewRepublisher(pf, repubQuick, repubLong, nd.Cid()) + } + + root.repub = repub + root.dir = dir + return root, nil +} + // GetDirectory returns the root directory. func (kr *Root) GetDirectory() *Directory { return kr.dir