From b015c71173be2dfb8465efa2f4d45b9a80d97ae1 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 1 Apr 2025 08:58:31 +0200 Subject: [PATCH 01/19] Support MaxLinks and MaxHAMTFanout This introduces support for setting DAG-shaping options that control the maximum number of links in that a DAG node can have. It is supported in two ways: first, via `Import` configuration options; second, via `--max-links` and `--max-hamt-fanout` options `ipfs add`. The resulting DAGs will respect MaxLinks (for files and basic directories) and MaxHAMTFanout for HAMT directories. When options are unset, the previous Kubo defaults will apply. --- config/import.go | 14 +-- config/profile.go | 4 +- core/commands/add.go | 58 +++++++++---- core/coreapi/unixfs.go | 10 +++ core/coreiface/options/unixfs.go | 45 ++++++++-- core/coreunix/add.go | 113 ++++++++++++++----------- docs/changelogs/v0.35.md | 9 ++ docs/config.md | 35 ++++++++ docs/examples/kubo-as-a-library/go.mod | 2 +- docs/examples/kubo-as-a-library/go.sum | 4 +- fuse/readonly/ipfs_test.go | 12 ++- go.mod | 2 +- go.sum | 4 +- test/dependencies/go.mod | 6 +- test/dependencies/go.sum | 12 ++- 15 files changed, 238 insertions(+), 92 deletions(-) diff --git a/config/import.go b/config/import.go index 6ea4d060fc2..3499e26ba34 100644 --- a/config/import.go +++ b/config/import.go @@ -19,10 +19,12 @@ const ( // Import configures the default options for ingesting data. This affects commands // that ingest data, such as 'ipfs add', 'ipfs dag put, 'ipfs block put', 'ipfs files write'. type Import struct { - CidVersion OptionalInteger - UnixFSRawLeaves Flag - UnixFSChunker OptionalString - HashFunction OptionalString - BatchMaxNodes OptionalInteger - BatchMaxSize OptionalInteger + CidVersion OptionalInteger + UnixFSRawLeaves Flag + UnixFSChunker OptionalString + HashFunction OptionalString + UnixFSDAGMaxLinks OptionalInteger + UnixFSHAMTDirectoryMaxFanout OptionalInteger + BatchMaxNodes OptionalInteger + BatchMaxSize OptionalInteger } diff --git a/config/profile.go b/config/profile.go index 69aaf66dcb6..8ec679518cc 100644 --- a/config/profile.go +++ b/config/profile.go @@ -277,13 +277,15 @@ fetching may be degraded. }, }, "test-cid-v1": { - Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks.`, + Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and 1024 links at most.`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True c.Import.UnixFSChunker = *NewOptionalString("size-1048576") c.Import.HashFunction = *NewOptionalString("sha2-256") + c.Import.UnixFSDAGMaxLinks = *NewOptionalInteger(1024) + c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) return nil }, }, diff --git a/core/commands/add.go b/core/commands/add.go index a1642f127a3..2f237b762c4 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -37,23 +37,25 @@ type AddEvent struct { } const ( - quietOptionName = "quiet" - quieterOptionName = "quieter" - silentOptionName = "silent" - progressOptionName = "progress" - trickleOptionName = "trickle" - wrapOptionName = "wrap-with-directory" - onlyHashOptionName = "only-hash" - chunkerOptionName = "chunker" - pinOptionName = "pin" - rawLeavesOptionName = "raw-leaves" - noCopyOptionName = "nocopy" - fstoreCacheOptionName = "fscache" - cidVersionOptionName = "cid-version" - hashOptionName = "hash" - inlineOptionName = "inline" - inlineLimitOptionName = "inline-limit" - toFilesOptionName = "to-files" + quietOptionName = "quiet" + quieterOptionName = "quieter" + silentOptionName = "silent" + progressOptionName = "progress" + trickleOptionName = "trickle" + wrapOptionName = "wrap-with-directory" + onlyHashOptionName = "only-hash" + chunkerOptionName = "chunker" + pinOptionName = "pin" + rawLeavesOptionName = "raw-leaves" + maxLinksOptionName = "max-links" + maxHAMTFanoutOptionName = "max-hamt-fanout" + noCopyOptionName = "nocopy" + fstoreCacheOptionName = "fscache" + cidVersionOptionName = "cid-version" + hashOptionName = "hash" + inlineOptionName = "inline" + inlineLimitOptionName = "inline-limit" + toFilesOptionName = "to-files" preserveModeOptionName = "preserve-mode" preserveMtimeOptionName = "preserve-mtime" @@ -168,6 +170,8 @@ See 'dag export' and 'dag import' for more information. cmds.BoolOption(wrapOptionName, "w", "Wrap files with a directory object."), cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash"), cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes."), + cmds.IntOption(maxLinksOptionName, "Limit the maximum number of links in UnixFS file and basic directory nodes to this value."), + cmds.IntOption(maxHAMTFanoutOptionName, "Limit the maximum number of links of a UnixFS HAMT directory node to this (power of 2, multiple of 8)."), cmds.BoolOption(noCopyOptionName, "Add the file using filestore. Implies raw-leaves. (experimental)"), cmds.BoolOption(fstoreCacheOptionName, "Check the filestore for pre-existing blocks. (experimental)"), cmds.IntOption(cidVersionOptionName, "CID version. Defaults to 0 unless an option that depends on CIDv1 is passed. Passing version 1 will cause the raw-leaves option to default to true."), @@ -222,6 +226,8 @@ See 'dag export' and 'dag import' for more information. chunker, _ := req.Options[chunkerOptionName].(string) dopin, _ := req.Options[pinOptionName].(bool) rawblks, rbset := req.Options[rawLeavesOptionName].(bool) + maxLinks, maxLinksSet := req.Options[maxLinksOptionName].(int) + maxHAMTFanout, maxHAMTFanoutSet := req.Options[maxHAMTFanoutOptionName].(int) nocopy, _ := req.Options[noCopyOptionName].(bool) fscache, _ := req.Options[fstoreCacheOptionName].(bool) cidVer, cidVerSet := req.Options[cidVersionOptionName].(int) @@ -253,6 +259,16 @@ See 'dag export' and 'dag import' for more information. rawblks = cfg.Import.UnixFSRawLeaves.WithDefault(config.DefaultUnixFSRawLeaves) } + if !maxLinksSet && !cfg.Import.UnixFSDAGMaxLinks.IsDefault() { + maxLinksSet = true + maxLinks = int(cfg.Import.UnixFSDAGMaxLinks.WithDefault(0)) + } + + if !maxHAMTFanoutSet && !cfg.Import.UnixFSHAMTDirectoryMaxFanout.IsDefault() { + maxHAMTFanoutSet = true + maxHAMTFanout = int(cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(0)) + } + // Storing optional mode or mtime (UnixFS 1.5) requires root block // to always be 'dag-pb' and not 'raw'. Below adjusts raw-leaves setting, if possible. if preserveMode || preserveMtime || mode != 0 || mtime != 0 { @@ -329,6 +345,14 @@ See 'dag export' and 'dag import' for more information. opts = append(opts, options.Unixfs.RawLeaves(rawblks)) } + if maxLinksSet { + opts = append(opts, options.Unixfs.MaxLinks(maxLinks)) + } + + if maxHAMTFanoutSet { + opts = append(opts, options.Unixfs.MaxHAMTFanout(maxHAMTFanout)) + } + if trickle { opts = append(opts, options.Unixfs.Layout(options.TrickleLayout)) } diff --git a/core/coreapi/unixfs.go b/core/coreapi/unixfs.go index 3a74d3046c9..397c2914517 100644 --- a/core/coreapi/unixfs.go +++ b/core/coreapi/unixfs.go @@ -50,6 +50,10 @@ func (api *UnixfsAPI) Add(ctx context.Context, files files.Node, opts ...options attribute.Int("inlinelimit", settings.InlineLimit), attribute.Bool("rawleaves", settings.RawLeaves), attribute.Bool("rawleavesset", settings.RawLeavesSet), + attribute.Int("maxlinks", settings.MaxLinks), + attribute.Bool("maxlinksset", settings.MaxLinksSet), + attribute.Int("maxhamtfanout", settings.MaxHAMTFanout), + attribute.Bool("maxhamtfanoutset", settings.MaxHAMTFanoutSet), attribute.Int("layout", int(settings.Layout)), attribute.Bool("pin", settings.Pin), attribute.Bool("onlyhash", settings.OnlyHash), @@ -132,6 +136,12 @@ func (api *UnixfsAPI) Add(ctx context.Context, files files.Node, opts ...options fileAdder.Pin = settings.Pin && !settings.OnlyHash fileAdder.Silent = settings.Silent fileAdder.RawLeaves = settings.RawLeaves + if settings.MaxLinksSet { + fileAdder.MaxLinks = settings.MaxLinks + } + if settings.MaxHAMTFanoutSet { + fileAdder.MaxHAMTFanout = settings.MaxHAMTFanout + } fileAdder.NoCopy = settings.NoCopy fileAdder.CidBuilder = prefix fileAdder.PreserveMode = settings.PreserveMode diff --git a/core/coreiface/options/unixfs.go b/core/coreiface/options/unixfs.go index c837ec1b2db..9d01610ca29 100644 --- a/core/coreiface/options/unixfs.go +++ b/core/coreiface/options/unixfs.go @@ -7,6 +7,8 @@ import ( "time" dag "github.com/ipfs/boxo/ipld/merkledag" + "github.com/ipfs/boxo/ipld/unixfs/importer/helpers" + "github.com/ipfs/boxo/ipld/unixfs/io" cid "github.com/ipfs/go-cid" mh "github.com/multiformats/go-multihash" ) @@ -22,10 +24,14 @@ type UnixfsAddSettings struct { CidVersion int MhType uint64 - Inline bool - InlineLimit int - RawLeaves bool - RawLeavesSet bool + Inline bool + InlineLimit int + RawLeaves bool + RawLeavesSet bool + MaxLinks int + MaxLinksSet bool + MaxHAMTFanout int + MaxHAMTFanoutSet bool Chunker string Layout Layout @@ -60,10 +66,14 @@ func UnixfsAddOptions(opts ...UnixfsAddOption) (*UnixfsAddSettings, cid.Prefix, CidVersion: -1, MhType: mh.SHA2_256, - Inline: false, - InlineLimit: 32, - RawLeaves: false, - RawLeavesSet: false, + Inline: false, + InlineLimit: 32, + RawLeaves: false, + RawLeavesSet: false, + MaxLinks: helpers.DefaultLinksPerBlock, + MaxLinksSet: false, + MaxHAMTFanout: io.DefaultShardWidth, + MaxHAMTFanoutSet: false, Chunker: "size-262144", Layout: BalancedLayout, @@ -190,6 +200,25 @@ func (unixfsOpts) RawLeaves(enable bool) UnixfsAddOption { } } +// MaxLinks specifies the maximum width of the UnixFS DAG. It affects files +// and basic folders. +func (unixfsOpts) MaxLinks(n int) UnixfsAddOption { + return func(settings *UnixfsAddSettings) error { + settings.MaxLinks = n + settings.MaxLinksSet = true + return nil + } +} + +// MaxHAMTFanout specifies the maximum width of the HAMT directory shards. +func (unixfsOpts) MaxHAMTFanout(n int) UnixfsAddOption { + return func(settings *UnixfsAddSettings) error { + settings.MaxHAMTFanout = n + settings.MaxHAMTFanoutSet = true + return nil + } +} + // Inline tells the adder to inline small blocks into CIDs func (unixfsOpts) Inline(enable bool) UnixfsAddOption { return func(settings *UnixfsAddSettings) error { diff --git a/core/coreunix/add.go b/core/coreunix/add.go index 5f7cbb61065..ba1c164dea0 100644 --- a/core/coreunix/add.go +++ b/core/coreunix/add.go @@ -51,38 +51,42 @@ func NewAdder(ctx context.Context, p pin.Pinner, bs bstore.GCLocker, ds ipld.DAG bufferedDS := ipld.NewBufferedDAG(ctx, ds) return &Adder{ - ctx: ctx, - pinning: p, - gcLocker: bs, - dagService: ds, - bufferedDS: bufferedDS, - Progress: false, - Pin: true, - Trickle: false, - Chunker: "", + ctx: ctx, + pinning: p, + gcLocker: bs, + dagService: ds, + bufferedDS: bufferedDS, + Progress: false, + Pin: true, + Trickle: false, + MaxLinks: 0, // let boxo/ipld/unix defaults take place + MaxHAMTFanout: 0, // let boxo/ipld/unix defaults take place + Chunker: "", }, nil } // Adder holds the switches passed to the `add` command. type Adder struct { - ctx context.Context - pinning pin.Pinner - gcLocker bstore.GCLocker - dagService ipld.DAGService - bufferedDS *ipld.BufferedDAG - Out chan<- interface{} - Progress bool - Pin bool - Trickle bool - RawLeaves bool - Silent bool - NoCopy bool - Chunker string - mroot *mfs.Root - unlocker bstore.Unlocker - tempRoot cid.Cid - CidBuilder cid.Builder - liveNodes uint64 + ctx context.Context + pinning pin.Pinner + gcLocker bstore.GCLocker + dagService ipld.DAGService + bufferedDS *ipld.BufferedDAG + Out chan<- interface{} + Progress bool + Pin bool + Trickle bool + RawLeaves bool + MaxLinks int + MaxHAMTFanout int + Silent bool + NoCopy bool + Chunker string + mroot *mfs.Root + unlocker bstore.Unlocker + tempRoot cid.Cid + CidBuilder cid.Builder + liveNodes uint64 PreserveMode bool PreserveMtime bool @@ -94,12 +98,13 @@ func (adder *Adder) mfsRoot() (*mfs.Root, error) { if adder.mroot != nil { return adder.mroot, nil } - rnode := unixfs.EmptyDirNode() - err := rnode.SetCidBuilder(adder.CidBuilder) - if err != nil { - return nil, err - } - mr, err := mfs.NewRoot(adder.ctx, adder.dagService, rnode, nil) + + // Note, this adds it to DAGService already. + mr, err := mfs.NewEmptyRoot(adder.ctx, adder.dagService, nil, mfs.MkdirOpts{ + CidBuilder: adder.CidBuilder, + MaxLinks: adder.MaxLinks, + MaxHAMTFanout: adder.MaxHAMTFanout, + }) if err != nil { return nil, err } @@ -119,10 +124,15 @@ func (adder *Adder) add(reader io.Reader) (ipld.Node, error) { return nil, err } + maxLinks := ihelper.DefaultLinksPerBlock + if adder.MaxLinks > 0 { + maxLinks = adder.MaxLinks + } + params := ihelper.DagBuilderParams{ Dagserv: adder.bufferedDS, RawLeaves: adder.RawLeaves, - Maxlinks: ihelper.DefaultLinksPerBlock, + Maxlinks: maxLinks, NoCopy: adder.NoCopy, CidBuilder: adder.CidBuilder, FileMode: adder.FileMode, @@ -252,12 +262,15 @@ func (adder *Adder) addNode(node ipld.Node, path string) error { if err != nil { return err } + dir := gopath.Dir(path) if dir != "." { opts := mfs.MkdirOpts{ - Mkparents: true, - Flush: false, - CidBuilder: adder.CidBuilder, + Mkparents: true, + Flush: false, + CidBuilder: adder.CidBuilder, + MaxLinks: adder.MaxLinks, + MaxHAMTFanout: adder.MaxHAMTFanout, } if err := mfs.Mkdir(mr, dir, opts); err != nil { return err @@ -460,12 +473,14 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory // if we need to store mode or modification time then create a new root which includes that data if toplevel && (adder.FileMode != 0 || !adder.FileMtime.IsZero()) { - nd := unixfs.EmptyDirNodeWithStat(adder.FileMode, adder.FileMtime) - err := nd.SetCidBuilder(adder.CidBuilder) - if err != nil { - return err - } - mr, err := mfs.NewRoot(ctx, adder.dagService, nd, nil) + mr, err := mfs.NewEmptyRoot(ctx, adder.dagService, nil, + mfs.MkdirOpts{ + CidBuilder: adder.CidBuilder, + MaxLinks: adder.MaxLinks, + MaxHAMTFanout: adder.MaxHAMTFanout, + ModTime: adder.FileMtime, + Mode: adder.FileMode, + }) if err != nil { return err } @@ -478,11 +493,13 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory return err } err = mfs.Mkdir(mr, path, mfs.MkdirOpts{ - Mkparents: true, - Flush: false, - CidBuilder: adder.CidBuilder, - Mode: adder.FileMode, - ModTime: adder.FileMtime, + Mkparents: true, + Flush: false, + CidBuilder: adder.CidBuilder, + Mode: adder.FileMode, + ModTime: adder.FileMtime, + MaxLinks: adder.MaxLinks, + MaxHAMTFanout: adder.MaxHAMTFanout, }) if err != nil { return err diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index d1432a1f41c..6a56f47fab9 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -25,6 +25,15 @@ routing system as providers of content. See the [documentation](https://github.com/ipfs/kubo/blob/master/docs/config.md#routingignoreproviders) for for information. +##### New DAG-shaping options when adding + +We now allow controlling the default maximum number of links when adding content and building DAGs. There are two new `ipfs add` options: + + - `--max-links`: controls the maximum number of children that a file or a directory can have. Directories will be converted to HAMT-based directories when they have more than the given numbers of children. + - `--max-hamt-fanout`: controls the maximum number of children that HAMT internal nodes can have. + +Both options can be set permanently using the [corresponding `Import` settings](https://github.com/ipfs/kubo/blob/master/docs/config.md#importmaxlinks). + #### 📦️ Important dependency updates ### 📝 Changelog diff --git a/docs/config.md b/docs/config.md index 6056510b8a3..9fe7f5a12b6 100644 --- a/docs/config.md +++ b/docs/config.md @@ -184,6 +184,8 @@ config file at runtime. - [`Import.HashFunction`](#importhashfunction) - [`Import.BatchMaxNodes`](#importbatchmaxnodes) - [`Import.BatchMaxSize`](#importbatchmaxsize) + - [`Import.MaxLinks`](#importmaxlinks) + - [`Import.MaxHAMTFanout`](#importmaxhamtfanout) - [`Version`](#version) - [`Version.AgentSuffix`](#versionagentsuffix) - [`Version.SwarmCheckEnabled`](#versionswarmcheckenabled) @@ -2547,6 +2549,39 @@ Default: `20971520` (20MiB) Type: `optionalInteger` +### `Import.MaxLinks` + +The maximum number of links that a node part of a UnixFS DAG can have +when building the DAG while importing. + +This setting controls both the fanout in files as well as the fanout for +basic, non-HAMT folder. When unset (0), the default for files is `174`, while the +default for folders is dynamic. A size-estimation function chooses when to +convert the folders to HAMT-based directories and amount of links can vary depending +on their size. + +This setting will cause basic directories to be converted to HAMTs when they +exceed the maximum number of children. This happens transparently during the +add process. The fanout of HAMT nodes is controlled by `MaxHAMTFanout`. + +Default: `0` + +Type: `optionalInteger` + +### `Import.MaxHAMTFanout` + +The maximum number of children that a node part of a Unixfs HAMT directory +(aka sharded directory) can have. + +HAMT directory have unlimited children and are used when basic directories +become too big or reach `MaxLinks`. A HAMT is an structure made of unixfs +nodes that store the list of elements in the folder. This option controls the +maximum number of children that the HAMT nodes can have. + +Default: `256` + +Type: `optionalInteger` + ## `Version` Options to configure agent version announced to the swarm, and leveraging diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index 38b3e64e9a1..64bd24f4f64 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -7,7 +7,7 @@ go 1.24 replace github.com/ipfs/kubo => ./../../.. require ( - github.com/ipfs/boxo v0.29.1 + github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 github.com/ipfs/kubo v0.0.0-00010101000000-000000000000 github.com/libp2p/go-libp2p v0.41.1 github.com/multiformats/go-multiaddr v0.15.0 diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index f8fc2a88567..542e33fb7ab 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -298,8 +298,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc= -github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 h1:j8yvRgkHJMtaszUNxTUYLnKPaUVrfomBBhDdDNzKxnY= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= diff --git a/fuse/readonly/ipfs_test.go b/fuse/readonly/ipfs_test.go index 6d667843cdf..e7dfbcb2abd 100644 --- a/fuse/readonly/ipfs_test.go +++ b/fuse/readonly/ipfs_test.go @@ -150,7 +150,10 @@ func TestIpfsStressRead(t *testing.T) { // Now make a bunch of dirs for i := 0; i < ndiriter; i++ { - db := uio.NewDirectory(nd.DAG) + db, err := uio.NewDirectory(nd.DAG) + if err != nil { + t.Fatal(err) + } for j := 0; j < 1+rand.Intn(10); j++ { name := fmt.Sprintf("child%d", j) @@ -245,8 +248,11 @@ func TestIpfsBasicDirRead(t *testing.T) { fi, data := randObj(t, nd, 10000) // Make a directory and put that file in it - db := uio.NewDirectory(nd.DAG) - err := db.AddChild(nd.Context(), "actual", fi) + db, err := uio.NewDirectory(nd.DAG) + if err != nil { + t.Fatal(err) + } + err = db.AddChild(nd.Context(), "actual", fi) if err != nil { t.Fatal(err) } diff --git a/go.mod b/go.mod index 1abbe78123d..7aeca9bf288 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/hashicorp/go-version v1.7.0 github.com/ipfs-shipyard/nopfs v0.0.14 github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 - github.com/ipfs/boxo v0.29.1 + github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 github.com/ipfs/go-block-format v0.2.0 github.com/ipfs/go-cid v0.5.0 github.com/ipfs/go-cidutil v0.1.0 diff --git a/go.sum b/go.sum index 91d34dbbdb0..0d0b69c5ec3 100644 --- a/go.sum +++ b/go.sum @@ -362,8 +362,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc= -github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 h1:j8yvRgkHJMtaszUNxTUYLnKPaUVrfomBBhDdDNzKxnY= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= diff --git a/test/dependencies/go.mod b/test/dependencies/go.mod index 3f6a1e39bcd..37e1a512602 100644 --- a/test/dependencies/go.mod +++ b/test/dependencies/go.mod @@ -33,6 +33,7 @@ require ( github.com/Masterminds/semver/v3 v3.2.1 // indirect github.com/OpenPeeDeeP/depguard/v2 v2.2.0 // indirect github.com/alecthomas/go-check-sumtype v0.1.4 // indirect + github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect github.com/alexkohler/nakedret/v2 v2.0.4 // indirect github.com/alexkohler/prealloc v1.0.0 // indirect github.com/alingse/asasalint v0.0.11 // indirect @@ -57,6 +58,7 @@ require ( github.com/chavacava/garif v0.1.0 // indirect github.com/ckaznocha/intrange v0.1.2 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/crackcomm/go-gitignore v0.0.0-20241020182519-7843d2ba8fdf // indirect github.com/curioswitch/go-reassign v0.2.0 // indirect github.com/daixiang0/gci v0.13.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -116,7 +118,8 @@ require ( github.com/huin/goupnp v1.3.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect - github.com/ipfs/boxo v0.29.1 // indirect + github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 // indirect + github.com/ipfs/go-bitfield v1.1.0 // indirect github.com/ipfs/go-block-format v0.2.0 // indirect github.com/ipfs/go-cid v0.5.0 // indirect github.com/ipfs/go-datastore v0.8.2 // indirect @@ -273,6 +276,7 @@ require ( github.com/urfave/cli v1.22.16 // indirect github.com/uudashr/gocognit v1.1.3 // indirect github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc // indirect + github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/wlynxg/anet v0.0.5 // indirect github.com/xen0n/gosmopolitan v1.2.2 // indirect diff --git a/test/dependencies/go.sum b/test/dependencies/go.sum index 757ab2b7c52..d6f8a9552e1 100644 --- a/test/dependencies/go.sum +++ b/test/dependencies/go.sum @@ -43,6 +43,8 @@ github.com/alecthomas/go-check-sumtype v0.1.4 h1:WCvlB3l5Vq5dZQTFmodqL2g68uHiSww github.com/alecthomas/go-check-sumtype v0.1.4/go.mod h1:WyYPfhfkdhyrdaligV6svFopZV8Lqdzn5pyVBaV6jhQ= github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk= github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/alexkohler/nakedret/v2 v2.0.4 h1:yZuKmjqGi0pSmjGpOC016LtPJysIL0WEUiaXW5SUnNg= github.com/alexkohler/nakedret/v2 v2.0.4/go.mod h1:bF5i0zF2Wo2o4X4USt9ntUWve6JbFv02Ff4vlkmS/VU= github.com/alexkohler/prealloc v1.0.0 h1:Hbq0/3fJPQhNkN0dR95AVrr6R7tou91y0uHG5pOcUuw= @@ -105,6 +107,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/crackcomm/go-gitignore v0.0.0-20241020182519-7843d2ba8fdf h1:dwGgBWn84wUS1pVikGiruW+x5XM4amhjaZO20vCjay4= +github.com/crackcomm/go-gitignore v0.0.0-20241020182519-7843d2ba8fdf/go.mod h1:p1d6YEZWvFzEh4KLyvBcVSnrfNDDvK2zfK/4x2v/4pE= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis= github.com/curioswitch/go-reassign v0.2.0 h1:G9UZyOcpk/d7Gd6mqYgd8XYWFMw/znxwGDUstnC9DIo= @@ -294,8 +298,10 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc= -github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3 h1:j8yvRgkHJMtaszUNxTUYLnKPaUVrfomBBhDdDNzKxnY= +github.com/ipfs/boxo v0.29.2-0.20250407221841-f8115e4506c3/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= +github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= +github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-cid v0.5.0 h1:goEKKhaGm0ul11IHA7I6p1GmKz8kEYniqFopaB5Otwg= @@ -767,6 +773,8 @@ github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSD github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc h1:BCPnHtcboadS0DvysUuJXZ4lWVv5Bh5i7+tbIyi+ck4= github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc/go.mod h1:r45hJU7yEoA81k6MWNhpMj/kms0n14dkzkxYHoB96UM= +github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= +github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc= github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= From 946a57ada52bf13649698bec796b70e4655dc0cb Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 8 Apr 2025 17:42:35 +0200 Subject: [PATCH 02/19] Add MaxDirectoryLinks option --- config/import.go | 15 +++++++- config/profile.go | 6 ++- core/commands/add.go | 64 +++++++++++++++++++------------- core/coreapi/unixfs.go | 14 +++++-- core/coreiface/options/unixfs.go | 56 +++++++++++++++++----------- core/coreunix/add.go | 54 ++++++++++++++------------- docs/changelogs/v0.35.md | 3 +- docs/config.md | 30 +++++++++++---- 8 files changed, 154 insertions(+), 88 deletions(-) diff --git a/config/import.go b/config/import.go index 3499e26ba34..cf0b41f50a8 100644 --- a/config/import.go +++ b/config/import.go @@ -1,5 +1,10 @@ package config +import ( + "github.com/ipfs/boxo/ipld/unixfs/importer/helpers" + "github.com/ipfs/boxo/ipld/unixfs/io" +) + const ( DefaultCidVersion = 0 DefaultUnixFSRawLeaves = false @@ -14,6 +19,13 @@ const ( // write-batch. The total size of the batch is limited by // BatchMaxnodes and BatchMaxSize. DefaultBatchMaxSize = 100 << 20 // 20MiB + +) + +var ( + DefaultUnixFSHAMTDirectoryMaxFanout = io.DefaultShardWidth + DefaultUnixFSFileMaxLinks = helpers.DefaultLinksPerBlock + DefaultUnixFSDirectoryMaxLinks = 0 ) // Import configures the default options for ingesting data. This affects commands @@ -23,7 +35,8 @@ type Import struct { UnixFSRawLeaves Flag UnixFSChunker OptionalString HashFunction OptionalString - UnixFSDAGMaxLinks OptionalInteger + UnixFSDirectoryMaxLinks OptionalInteger + UnixFSFileMaxLinks OptionalInteger UnixFSHAMTDirectoryMaxFanout OptionalInteger BatchMaxNodes OptionalInteger BatchMaxSize OptionalInteger diff --git a/config/profile.go b/config/profile.go index 8ec679518cc..c5eeb74a460 100644 --- a/config/profile.go +++ b/config/profile.go @@ -273,6 +273,9 @@ fetching may be degraded. c.Import.UnixFSRawLeaves = False c.Import.UnixFSChunker = *NewOptionalString("size-262144") c.Import.HashFunction = *NewOptionalString("sha2-256") + c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174) + c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) + c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) return nil }, }, @@ -284,7 +287,8 @@ fetching may be degraded. c.Import.UnixFSRawLeaves = True c.Import.UnixFSChunker = *NewOptionalString("size-1048576") c.Import.HashFunction = *NewOptionalString("sha2-256") - c.Import.UnixFSDAGMaxLinks = *NewOptionalInteger(1024) + c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024) + c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) return nil }, diff --git a/core/commands/add.go b/core/commands/add.go index 2f237b762c4..988bd6bb48c 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -37,25 +37,26 @@ type AddEvent struct { } const ( - quietOptionName = "quiet" - quieterOptionName = "quieter" - silentOptionName = "silent" - progressOptionName = "progress" - trickleOptionName = "trickle" - wrapOptionName = "wrap-with-directory" - onlyHashOptionName = "only-hash" - chunkerOptionName = "chunker" - pinOptionName = "pin" - rawLeavesOptionName = "raw-leaves" - maxLinksOptionName = "max-links" - maxHAMTFanoutOptionName = "max-hamt-fanout" - noCopyOptionName = "nocopy" - fstoreCacheOptionName = "fscache" - cidVersionOptionName = "cid-version" - hashOptionName = "hash" - inlineOptionName = "inline" - inlineLimitOptionName = "inline-limit" - toFilesOptionName = "to-files" + quietOptionName = "quiet" + quieterOptionName = "quieter" + silentOptionName = "silent" + progressOptionName = "progress" + trickleOptionName = "trickle" + wrapOptionName = "wrap-with-directory" + onlyHashOptionName = "only-hash" + chunkerOptionName = "chunker" + pinOptionName = "pin" + rawLeavesOptionName = "raw-leaves" + maxFileLinksOptionName = "max-file-links" + maxDirectoryLinksOptionName = "max-directory-links" + maxHAMTFanoutOptionName = "max-hamt-fanout" + noCopyOptionName = "nocopy" + fstoreCacheOptionName = "fscache" + cidVersionOptionName = "cid-version" + hashOptionName = "hash" + inlineOptionName = "inline" + inlineLimitOptionName = "inline-limit" + toFilesOptionName = "to-files" preserveModeOptionName = "preserve-mode" preserveMtimeOptionName = "preserve-mtime" @@ -170,7 +171,8 @@ See 'dag export' and 'dag import' for more information. cmds.BoolOption(wrapOptionName, "w", "Wrap files with a directory object."), cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash"), cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes."), - cmds.IntOption(maxLinksOptionName, "Limit the maximum number of links in UnixFS file and basic directory nodes to this value."), + cmds.IntOption(maxFileLinksOptionName, "Limit the maximum number of links in UnixFS file nodes to this value."), + cmds.IntOption(maxDirectoryLinksOptionName, "Limit the maximum number of links in UnixFS basic directory nodes to this value."), cmds.IntOption(maxHAMTFanoutOptionName, "Limit the maximum number of links of a UnixFS HAMT directory node to this (power of 2, multiple of 8)."), cmds.BoolOption(noCopyOptionName, "Add the file using filestore. Implies raw-leaves. (experimental)"), cmds.BoolOption(fstoreCacheOptionName, "Check the filestore for pre-existing blocks. (experimental)"), @@ -226,7 +228,8 @@ See 'dag export' and 'dag import' for more information. chunker, _ := req.Options[chunkerOptionName].(string) dopin, _ := req.Options[pinOptionName].(bool) rawblks, rbset := req.Options[rawLeavesOptionName].(bool) - maxLinks, maxLinksSet := req.Options[maxLinksOptionName].(int) + maxFileLinks, maxFileLinksSet := req.Options[maxFileLinksOptionName].(int) + maxDirectoryLinks, maxDirectoryLinksSet := req.Options[maxDirectoryLinksOptionName].(int) maxHAMTFanout, maxHAMTFanoutSet := req.Options[maxHAMTFanoutOptionName].(int) nocopy, _ := req.Options[noCopyOptionName].(bool) fscache, _ := req.Options[fstoreCacheOptionName].(bool) @@ -259,9 +262,14 @@ See 'dag export' and 'dag import' for more information. rawblks = cfg.Import.UnixFSRawLeaves.WithDefault(config.DefaultUnixFSRawLeaves) } - if !maxLinksSet && !cfg.Import.UnixFSDAGMaxLinks.IsDefault() { - maxLinksSet = true - maxLinks = int(cfg.Import.UnixFSDAGMaxLinks.WithDefault(0)) + if !maxFileLinksSet && !cfg.Import.UnixFSFileMaxLinks.IsDefault() { + maxFileLinksSet = true + maxFileLinks = int(cfg.Import.UnixFSFileMaxLinks.WithDefault(int64(config.DefaultUnixFSFileMaxLinks))) + } + + if !maxDirectoryLinksSet && !cfg.Import.UnixFSDirectoryMaxLinks.IsDefault() { + maxDirectoryLinksSet = true + maxDirectoryLinks = int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(int64(config.DefaultUnixFSDirectoryMaxLinks))) } if !maxHAMTFanoutSet && !cfg.Import.UnixFSHAMTDirectoryMaxFanout.IsDefault() { @@ -345,8 +353,12 @@ See 'dag export' and 'dag import' for more information. opts = append(opts, options.Unixfs.RawLeaves(rawblks)) } - if maxLinksSet { - opts = append(opts, options.Unixfs.MaxLinks(maxLinks)) + if maxFileLinksSet { + opts = append(opts, options.Unixfs.MaxFileLinks(maxFileLinks)) + } + + if maxDirectoryLinksSet { + opts = append(opts, options.Unixfs.MaxDirectoryLinks(maxDirectoryLinks)) } if maxHAMTFanoutSet { diff --git a/core/coreapi/unixfs.go b/core/coreapi/unixfs.go index 397c2914517..eece797a5fa 100644 --- a/core/coreapi/unixfs.go +++ b/core/coreapi/unixfs.go @@ -50,8 +50,10 @@ func (api *UnixfsAPI) Add(ctx context.Context, files files.Node, opts ...options attribute.Int("inlinelimit", settings.InlineLimit), attribute.Bool("rawleaves", settings.RawLeaves), attribute.Bool("rawleavesset", settings.RawLeavesSet), - attribute.Int("maxlinks", settings.MaxLinks), - attribute.Bool("maxlinksset", settings.MaxLinksSet), + attribute.Int("maxfilelinks", settings.MaxFileLinks), + attribute.Bool("maxfilelinksset", settings.MaxFileLinksSet), + attribute.Int("maxdirectorylinks", settings.MaxDirectoryLinks), + attribute.Bool("maxdirectorylinksset", settings.MaxDirectoryLinksSet), attribute.Int("maxhamtfanout", settings.MaxHAMTFanout), attribute.Bool("maxhamtfanoutset", settings.MaxHAMTFanoutSet), attribute.Int("layout", int(settings.Layout)), @@ -136,9 +138,13 @@ func (api *UnixfsAPI) Add(ctx context.Context, files files.Node, opts ...options fileAdder.Pin = settings.Pin && !settings.OnlyHash fileAdder.Silent = settings.Silent fileAdder.RawLeaves = settings.RawLeaves - if settings.MaxLinksSet { - fileAdder.MaxLinks = settings.MaxLinks + if settings.MaxFileLinksSet { + fileAdder.MaxLinks = settings.MaxFileLinks } + if settings.MaxDirectoryLinksSet { + fileAdder.MaxDirectoryLinks = settings.MaxDirectoryLinks + } + if settings.MaxHAMTFanoutSet { fileAdder.MaxHAMTFanout = settings.MaxHAMTFanout } diff --git a/core/coreiface/options/unixfs.go b/core/coreiface/options/unixfs.go index 9d01610ca29..20f18d1e04c 100644 --- a/core/coreiface/options/unixfs.go +++ b/core/coreiface/options/unixfs.go @@ -24,14 +24,16 @@ type UnixfsAddSettings struct { CidVersion int MhType uint64 - Inline bool - InlineLimit int - RawLeaves bool - RawLeavesSet bool - MaxLinks int - MaxLinksSet bool - MaxHAMTFanout int - MaxHAMTFanoutSet bool + Inline bool + InlineLimit int + RawLeaves bool + RawLeavesSet bool + MaxFileLinks int + MaxFileLinksSet bool + MaxDirectoryLinks int + MaxDirectoryLinksSet bool + MaxHAMTFanout int + MaxHAMTFanoutSet bool Chunker string Layout Layout @@ -66,14 +68,16 @@ func UnixfsAddOptions(opts ...UnixfsAddOption) (*UnixfsAddSettings, cid.Prefix, CidVersion: -1, MhType: mh.SHA2_256, - Inline: false, - InlineLimit: 32, - RawLeaves: false, - RawLeavesSet: false, - MaxLinks: helpers.DefaultLinksPerBlock, - MaxLinksSet: false, - MaxHAMTFanout: io.DefaultShardWidth, - MaxHAMTFanoutSet: false, + Inline: false, + InlineLimit: 32, + RawLeaves: false, + RawLeavesSet: false, + MaxFileLinks: helpers.DefaultLinksPerBlock, + MaxFileLinksSet: false, + MaxDirectoryLinks: 0, + MaxDirectoryLinksSet: false, + MaxHAMTFanout: io.DefaultShardWidth, + MaxHAMTFanoutSet: false, Chunker: "size-262144", Layout: BalancedLayout, @@ -200,12 +204,22 @@ func (unixfsOpts) RawLeaves(enable bool) UnixfsAddOption { } } -// MaxLinks specifies the maximum width of the UnixFS DAG. It affects files -// and basic folders. -func (unixfsOpts) MaxLinks(n int) UnixfsAddOption { +// MaxFileLinks specifies the maximum number of children for UnixFS file +// nodes. +func (unixfsOpts) MaxFileLinks(n int) UnixfsAddOption { return func(settings *UnixfsAddSettings) error { - settings.MaxLinks = n - settings.MaxLinksSet = true + settings.MaxFileLinks = n + settings.MaxFileLinksSet = true + return nil + } +} + +// MaxDirectoryLinks specifies the maximum number of children for UnixFS basic +// directory nodes. +func (unixfsOpts) MaxDirectoryLinks(n int) UnixfsAddOption { + return func(settings *UnixfsAddSettings) error { + settings.MaxDirectoryLinks = n + settings.MaxDirectoryLinksSet = true return nil } } diff --git a/core/coreunix/add.go b/core/coreunix/add.go index ba1c164dea0..d9dc555d276 100644 --- a/core/coreunix/add.go +++ b/core/coreunix/add.go @@ -19,6 +19,7 @@ import ( "github.com/ipfs/boxo/ipld/unixfs/importer/balanced" ihelper "github.com/ipfs/boxo/ipld/unixfs/importer/helpers" "github.com/ipfs/boxo/ipld/unixfs/importer/trickle" + uio "github.com/ipfs/boxo/ipld/unixfs/io" "github.com/ipfs/boxo/mfs" "github.com/ipfs/boxo/path" pin "github.com/ipfs/boxo/pinning/pinner" @@ -59,34 +60,35 @@ func NewAdder(ctx context.Context, p pin.Pinner, bs bstore.GCLocker, ds ipld.DAG Progress: false, Pin: true, Trickle: false, - MaxLinks: 0, // let boxo/ipld/unix defaults take place - MaxHAMTFanout: 0, // let boxo/ipld/unix defaults take place + MaxLinks: ihelper.DefaultLinksPerBlock, + MaxHAMTFanout: uio.DefaultShardWidth, Chunker: "", }, nil } // Adder holds the switches passed to the `add` command. type Adder struct { - ctx context.Context - pinning pin.Pinner - gcLocker bstore.GCLocker - dagService ipld.DAGService - bufferedDS *ipld.BufferedDAG - Out chan<- interface{} - Progress bool - Pin bool - Trickle bool - RawLeaves bool - MaxLinks int - MaxHAMTFanout int - Silent bool - NoCopy bool - Chunker string - mroot *mfs.Root - unlocker bstore.Unlocker - tempRoot cid.Cid - CidBuilder cid.Builder - liveNodes uint64 + ctx context.Context + pinning pin.Pinner + gcLocker bstore.GCLocker + dagService ipld.DAGService + bufferedDS *ipld.BufferedDAG + Out chan<- interface{} + Progress bool + Pin bool + Trickle bool + RawLeaves bool + MaxLinks int + MaxDirectoryLinks int + MaxHAMTFanout int + Silent bool + NoCopy bool + Chunker string + mroot *mfs.Root + unlocker bstore.Unlocker + tempRoot cid.Cid + CidBuilder cid.Builder + liveNodes uint64 PreserveMode bool PreserveMtime bool @@ -102,7 +104,7 @@ func (adder *Adder) mfsRoot() (*mfs.Root, error) { // Note, this adds it to DAGService already. mr, err := mfs.NewEmptyRoot(adder.ctx, adder.dagService, nil, mfs.MkdirOpts{ CidBuilder: adder.CidBuilder, - MaxLinks: adder.MaxLinks, + MaxLinks: adder.MaxDirectoryLinks, MaxHAMTFanout: adder.MaxHAMTFanout, }) if err != nil { @@ -269,7 +271,7 @@ func (adder *Adder) addNode(node ipld.Node, path string) error { Mkparents: true, Flush: false, CidBuilder: adder.CidBuilder, - MaxLinks: adder.MaxLinks, + MaxLinks: adder.MaxDirectoryLinks, MaxHAMTFanout: adder.MaxHAMTFanout, } if err := mfs.Mkdir(mr, dir, opts); err != nil { @@ -476,7 +478,7 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory mr, err := mfs.NewEmptyRoot(ctx, adder.dagService, nil, mfs.MkdirOpts{ CidBuilder: adder.CidBuilder, - MaxLinks: adder.MaxLinks, + MaxLinks: adder.MaxDirectoryLinks, MaxHAMTFanout: adder.MaxHAMTFanout, ModTime: adder.FileMtime, Mode: adder.FileMode, @@ -498,7 +500,7 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory CidBuilder: adder.CidBuilder, Mode: adder.FileMode, ModTime: adder.FileMtime, - MaxLinks: adder.MaxLinks, + MaxLinks: adder.MaxDirectoryLinks, MaxHAMTFanout: adder.MaxHAMTFanout, }) if err != nil { diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index 6a56f47fab9..a16dbaf9dfd 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -29,7 +29,8 @@ for for information. We now allow controlling the default maximum number of links when adding content and building DAGs. There are two new `ipfs add` options: - - `--max-links`: controls the maximum number of children that a file or a directory can have. Directories will be converted to HAMT-based directories when they have more than the given numbers of children. + - `--max-file-links`: controls the maximum number of children that a file node (when files are chunked, nodes have links to all the chunks). + - `--max-directory-links`: controls the maximum number of children that a directory can have. Directories will be converted to HAMT-based directories when they have more than the given numbers of children. - `--max-hamt-fanout`: controls the maximum number of children that HAMT internal nodes can have. Both options can be set permanently using the [corresponding `Import` settings](https://github.com/ipfs/kubo/blob/master/docs/config.md#importmaxlinks). diff --git a/docs/config.md b/docs/config.md index 9fe7f5a12b6..b20aeadacd1 100644 --- a/docs/config.md +++ b/docs/config.md @@ -184,7 +184,8 @@ config file at runtime. - [`Import.HashFunction`](#importhashfunction) - [`Import.BatchMaxNodes`](#importbatchmaxnodes) - [`Import.BatchMaxSize`](#importbatchmaxsize) - - [`Import.MaxLinks`](#importmaxlinks) + - [`Import.MaxFileLinks`](#importmaxfilelinks) + - [`Import.MaxDirectoryLinks`](#importmaxDirectorylinks) - [`Import.MaxHAMTFanout`](#importmaxhamtfanout) - [`Version`](#version) - [`Version.AgentSuffix`](#versionagentsuffix) @@ -2549,16 +2550,28 @@ Default: `20971520` (20MiB) Type: `optionalInteger` -### `Import.MaxLinks` +### `Import.MaxFileLinks` -The maximum number of links that a node part of a UnixFS DAG can have +The maximum number of links that a node part of a UnixFS File can have when building the DAG while importing. -This setting controls both the fanout in files as well as the fanout for -basic, non-HAMT folder. When unset (0), the default for files is `174`, while the -default for folders is dynamic. A size-estimation function chooses when to -convert the folders to HAMT-based directories and amount of links can vary depending -on their size. +This setting controls both the fanout in files that are chunked into several +blocks and grouped as a Unixfs (dag-pb) DAG. + +Default: `174` + +Type: `optionalInteger` + +### `Import.MaxDirectoryLinks` + +The maximum number of links that a node part of a UnixFS basic directory can +have when building the DAG while importing. + +This setting controls both the fanout for basic, non-HAMT folder nodes. It +sets a limit after which directories are converted to a HAMT-based structure. + +When unset (0), no limit exists for chilcren. Directories will be converted to +HAMTs based on their estimated size only. This setting will cause basic directories to be converted to HAMTs when they exceed the maximum number of children. This happens transparently during the @@ -2568,6 +2581,7 @@ Default: `0` Type: `optionalInteger` + ### `Import.MaxHAMTFanout` The maximum number of children that a node part of a Unixfs HAMT directory From 7239332136920807cb8c6c9fe370a921ccc2bb13 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 8 Apr 2025 17:44:49 +0200 Subject: [PATCH 03/19] add: cosmetics --- config/import.go | 6 +++--- core/commands/add.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/import.go b/config/import.go index cf0b41f50a8..662cf900720 100644 --- a/config/import.go +++ b/config/import.go @@ -23,9 +23,9 @@ const ( ) var ( - DefaultUnixFSHAMTDirectoryMaxFanout = io.DefaultShardWidth - DefaultUnixFSFileMaxLinks = helpers.DefaultLinksPerBlock - DefaultUnixFSDirectoryMaxLinks = 0 + DefaultUnixFSHAMTDirectoryMaxFanout = int64(io.DefaultShardWidth) + DefaultUnixFSFileMaxLinks = int64(helpers.DefaultLinksPerBlock) + DefaultUnixFSDirectoryMaxLinks = int64(0) ) // Import configures the default options for ingesting data. This affects commands diff --git a/core/commands/add.go b/core/commands/add.go index 988bd6bb48c..20ccbf3da37 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -264,17 +264,17 @@ See 'dag export' and 'dag import' for more information. if !maxFileLinksSet && !cfg.Import.UnixFSFileMaxLinks.IsDefault() { maxFileLinksSet = true - maxFileLinks = int(cfg.Import.UnixFSFileMaxLinks.WithDefault(int64(config.DefaultUnixFSFileMaxLinks))) + maxFileLinks = int(cfg.Import.UnixFSFileMaxLinks.WithDefault(config.DefaultUnixFSFileMaxLinks)) } if !maxDirectoryLinksSet && !cfg.Import.UnixFSDirectoryMaxLinks.IsDefault() { maxDirectoryLinksSet = true - maxDirectoryLinks = int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(int64(config.DefaultUnixFSDirectoryMaxLinks))) + maxDirectoryLinks = int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks)) } if !maxHAMTFanoutSet && !cfg.Import.UnixFSHAMTDirectoryMaxFanout.IsDefault() { maxHAMTFanoutSet = true - maxHAMTFanout = int(cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(0)) + maxHAMTFanout = int(cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(config.DefaultUnixFSHAMTDirectoryMaxFanout)) } // Storing optional mode or mtime (UnixFS 1.5) requires root block From 4b91d17861d0b1d0c4cc8d6b857227cb17cf3560 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 11 Apr 2025 20:31:33 +0200 Subject: [PATCH 04/19] docs: correct Import field names --- config/import.go | 4 ++-- config/profile.go | 21 +++++++++++++++++++-- core/commands/add.go | 2 +- docs/changelogs/v0.35.md | 14 ++++++++++++-- docs/config.md | 17 ++++++++--------- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/config/import.go b/config/import.go index 662cf900720..8e0b2d8ca3a 100644 --- a/config/import.go +++ b/config/import.go @@ -23,9 +23,9 @@ const ( ) var ( - DefaultUnixFSHAMTDirectoryMaxFanout = int64(io.DefaultShardWidth) DefaultUnixFSFileMaxLinks = int64(helpers.DefaultLinksPerBlock) DefaultUnixFSDirectoryMaxLinks = int64(0) + DefaultUnixFSHAMTDirectoryMaxFanout = int64(io.DefaultShardWidth) ) // Import configures the default options for ingesting data. This affects commands @@ -35,8 +35,8 @@ type Import struct { UnixFSRawLeaves Flag UnixFSChunker OptionalString HashFunction OptionalString - UnixFSDirectoryMaxLinks OptionalInteger UnixFSFileMaxLinks OptionalInteger + UnixFSDirectoryMaxLinks OptionalInteger UnixFSHAMTDirectoryMaxFanout OptionalInteger BatchMaxNodes OptionalInteger BatchMaxSize OptionalInteger diff --git a/config/profile.go b/config/profile.go index c5eeb74a460..c3c058d2ae1 100644 --- a/config/profile.go +++ b/config/profile.go @@ -276,11 +276,27 @@ fetching may be degraded. c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174) c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) + // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here return nil }, }, - "test-cid-v1": { - Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and 1024 links at most.`, + "legacy-cid-v1": { + Description: `Makes UnixFS import produce legacy CIDv1 with the same DAG width as in legacy CIDv0.`, + + Transform: func(c *Config) error { + c.Import.CidVersion = *NewOptionalInteger(1) + c.Import.UnixFSRawLeaves = True + c.Import.UnixFSChunker = *NewOptionalString("size-1048576") + c.Import.HashFunction = *NewOptionalString("sha2-256") + c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174) + c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) + c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) + // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here + return nil + }, + }, + "test-cid-v1-2025q2": { + Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and wider file DAGs (1024 links per level).`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) @@ -290,6 +306,7 @@ fetching may be degraded. c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024) c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) + // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here return nil }, }, diff --git a/core/commands/add.go b/core/commands/add.go index 20ccbf3da37..85d5471584d 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -172,7 +172,7 @@ See 'dag export' and 'dag import' for more information. cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash"), cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes."), cmds.IntOption(maxFileLinksOptionName, "Limit the maximum number of links in UnixFS file nodes to this value."), - cmds.IntOption(maxDirectoryLinksOptionName, "Limit the maximum number of links in UnixFS basic directory nodes to this value."), + cmds.IntOption(maxDirectoryLinksOptionName, "Limit the maximum number of links in UnixFS basic directory nodes to this value. WARNING: use with caution, Import.UnixFSHAMTThreshold is a safer alternative."), cmds.IntOption(maxHAMTFanoutOptionName, "Limit the maximum number of links of a UnixFS HAMT directory node to this (power of 2, multiple of 8)."), cmds.BoolOption(noCopyOptionName, "Add the file using filestore. Implies raw-leaves. (experimental)"), cmds.BoolOption(fstoreCacheOptionName, "Check the filestore for pre-existing blocks. (experimental)"), diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index a16dbaf9dfd..76d8e4299be 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -27,13 +27,23 @@ for for information. ##### New DAG-shaping options when adding -We now allow controlling the default maximum number of links when adding content and building DAGs. There are two new `ipfs add` options: +We now allow controlling the default maximum number of links when adding content and building DAGs. + +There are also three new `ipfs add` options that allow overiding the default only for specific import operation: - `--max-file-links`: controls the maximum number of children that a file node (when files are chunked, nodes have links to all the chunks). - `--max-directory-links`: controls the maximum number of children that a directory can have. Directories will be converted to HAMT-based directories when they have more than the given numbers of children. - `--max-hamt-fanout`: controls the maximum number of children that HAMT internal nodes can have. -Both options can be set permanently using the [corresponding `Import` settings](https://github.com/ipfs/kubo/blob/master/docs/config.md#importmaxlinks). +Default options can be set permanently using the [corresponding `Import` settings](https://github.com/ipfs/kubo/blob/master/docs/config.md#import): +- [`Import.UnixFSFileMaxLinks`](#TODO) +- [`Import.UnixFSirectoryMaxLinks`](#TODO) +- [`Import.UnixFSHAMTFanout`](#TODO) +- [`Import.UnixFSThreshold`](#TODO-move-from-Internal) + +Convenience profiles were updated: +- Renamed `test-cid-v1` to `legacy-cid-v1`: this profile hardcodes the current defaults. useful for users who don't want implicit defaults to change in future Kubo releases +- Added `test-cid-v1-2025q2` with modern defaults (max file DAG width raised from 174 to 1024), these are candidates for new defaults in future Kubo release #### 📦️ Important dependency updates diff --git a/docs/config.md b/docs/config.md index b20aeadacd1..a269b53b857 100644 --- a/docs/config.md +++ b/docs/config.md @@ -119,7 +119,7 @@ config file at runtime. - [`Routing.Type`](#routingtype) - [`Routing.AcceleratedDHTClient`](#routingaccelerateddhtclient) - [`Routing.LoopbackAddressesOnLanDHT`](#routingloopbackaddressesonlandht) - - [`Routing.IgnoreProviders`](#routingignoreproviders) + - [`Routing.IgnoreProviders`](#routingignoreproviders) - [`Routing.Routers`](#routingrouters) - [`Routing.Routers: Type`](#routingrouters-type) - [`Routing.Routers: Parameters`](#routingrouters-parameters) @@ -184,9 +184,9 @@ config file at runtime. - [`Import.HashFunction`](#importhashfunction) - [`Import.BatchMaxNodes`](#importbatchmaxnodes) - [`Import.BatchMaxSize`](#importbatchmaxsize) - - [`Import.MaxFileLinks`](#importmaxfilelinks) - - [`Import.MaxDirectoryLinks`](#importmaxDirectorylinks) - - [`Import.MaxHAMTFanout`](#importmaxhamtfanout) + - [`Import.UnixFSFileMaxLinks`](#importunixfsfilemaxlinks) + - [`Import.UnixFSDirectoryMaxLinks`](#importunixfsdirectorymaxlinks) + - [`Import.UnixFSHAMTDirectoryMaxFanout`](#importunixfshamtdirectorymaxfanout) - [`Version`](#version) - [`Version.AgentSuffix`](#versionagentsuffix) - [`Version.SwarmCheckEnabled`](#versionswarmcheckenabled) @@ -2550,7 +2550,7 @@ Default: `20971520` (20MiB) Type: `optionalInteger` -### `Import.MaxFileLinks` +### `Import.UnixFSFileMaxLinks` The maximum number of links that a node part of a UnixFS File can have when building the DAG while importing. @@ -2562,7 +2562,7 @@ Default: `174` Type: `optionalInteger` -### `Import.MaxDirectoryLinks` +### `Import.UnixFSDirectoryMaxLinks` The maximum number of links that a node part of a UnixFS basic directory can have when building the DAG while importing. @@ -2577,12 +2577,11 @@ This setting will cause basic directories to be converted to HAMTs when they exceed the maximum number of children. This happens transparently during the add process. The fanout of HAMT nodes is controlled by `MaxHAMTFanout`. -Default: `0` +Default: `0` (no limit, because [`Internal.UnixFSShardingSizeThreshold`](https://github.com/ipfs/kubo/blob/master/docs/config.md#internalunixfsshardingsizethreshold) triggers the switch to HAMT if a directory grows too big) Type: `optionalInteger` - -### `Import.MaxHAMTFanout` +### `Import.UnixFSHAMTDirectoryMaxFanout` The maximum number of children that a node part of a Unixfs HAMT directory (aka sharded directory) can have. From d31b5a824f00c4227cb5c32c7c0045559c01d866 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 12 Apr 2025 00:05:47 +0200 Subject: [PATCH 05/19] refactor: Import.UnixFSHAMTDirectorySizeThreshold moved `Internal.UnixFSShardingSizeThreshold` to `Import.UnixFSHAMTDirectorySizeThreshold` and included it in profiles --- config/import.go | 21 +++++++------ config/internal.go | 2 +- config/profile.go | 17 +++++------ core/node/groups.go | 12 ++++++-- docs/changelogs/v0.35.md | 45 +++++++++++++++++++--------- docs/config.md | 30 ++++++++++++------- test/sharness/t0032-mount-sharded.sh | 2 +- test/sharness/t0250-files-api.sh | 4 +-- test/sharness/t0260-sharding.sh | 4 +-- 9 files changed, 86 insertions(+), 51 deletions(-) diff --git a/config/import.go b/config/import.go index 8e0b2d8ca3a..21bf232c1c0 100644 --- a/config/import.go +++ b/config/import.go @@ -11,6 +11,8 @@ const ( DefaultUnixFSChunker = "size-262144" DefaultHashFunction = "sha2-256" + DefaultUnixFSHAMTDirectorySizeThreshold = "256KiB" // https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L26 + // DefaultBatchMaxNodes controls the maximum number of nodes in a // write-batch. The total size of the batch is limited by // BatchMaxnodes and BatchMaxSize. @@ -31,13 +33,14 @@ var ( // Import configures the default options for ingesting data. This affects commands // that ingest data, such as 'ipfs add', 'ipfs dag put, 'ipfs block put', 'ipfs files write'. type Import struct { - CidVersion OptionalInteger - UnixFSRawLeaves Flag - UnixFSChunker OptionalString - HashFunction OptionalString - UnixFSFileMaxLinks OptionalInteger - UnixFSDirectoryMaxLinks OptionalInteger - UnixFSHAMTDirectoryMaxFanout OptionalInteger - BatchMaxNodes OptionalInteger - BatchMaxSize OptionalInteger + CidVersion OptionalInteger + UnixFSRawLeaves Flag + UnixFSChunker OptionalString + HashFunction OptionalString + UnixFSFileMaxLinks OptionalInteger + UnixFSDirectoryMaxLinks OptionalInteger + UnixFSHAMTDirectoryMaxFanout OptionalInteger + UnixFSHAMTDirectorySizeThreshold OptionalString + BatchMaxNodes OptionalInteger + BatchMaxSize OptionalInteger } diff --git a/config/internal.go b/config/internal.go index f43746534a3..4e9c7fba8bf 100644 --- a/config/internal.go +++ b/config/internal.go @@ -3,7 +3,7 @@ package config type Internal struct { // All marked as omitempty since we are expecting to make changes to all subcomponents of Internal Bitswap *InternalBitswap `json:",omitempty"` - UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` + UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` // moved to Import.UnixFSHAMTDirectorySizeThreshold Libp2pForceReachability *OptionalString `json:",omitempty"` BackupBootstrapInterval *OptionalDuration `json:",omitempty"` } diff --git a/config/profile.go b/config/profile.go index c3c058d2ae1..2cd6da53299 100644 --- a/config/profile.go +++ b/config/profile.go @@ -266,8 +266,7 @@ fetching may be degraded. }, }, "legacy-cid-v0": { - Description: `Makes UnixFS import produce legacy CIDv0 with no raw leaves, sha2-256 and 256 KiB chunks.`, - + Description: `Makes UnixFS import produce legacy CIDv0 with no raw leaves, sha2-256 and 256 KiB chunks. This is likely the least optimal preset, use only if legacy behavior is required.`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(0) c.Import.UnixFSRawLeaves = False @@ -276,13 +275,12 @@ fetching may be degraded. c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174) c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) - // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here + c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalString("256KiB") return nil }, }, "legacy-cid-v1": { - Description: `Makes UnixFS import produce legacy CIDv1 with the same DAG width as in legacy CIDv0.`, - + Description: `Makes UnixFS import produce legacy CIDv1 with the same suboptimal settings as legacy-cid-v0, but with CIDv1 and raw leaves. Use only if legacy behavior is required.`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True @@ -291,22 +289,21 @@ fetching may be degraded. c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174) c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) - // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here + c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalString("256KiB") return nil }, }, "test-cid-v1-2025q2": { Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and wider file DAGs (1024 links per level).`, - Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True - c.Import.UnixFSChunker = *NewOptionalString("size-1048576") + c.Import.UnixFSChunker = *NewOptionalString("size-1048576") // 1MiB c.Import.HashFunction = *NewOptionalString("sha2-256") c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024) - c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) + c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) // no limit here, use size-based Import.UnixFSHAMTDirectorySizeThreshold instead c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) - // TODO: move `Internal.UnixFSShardingSizeThreshold` to Import and set here + c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalString("1MiB") // 1MiB return nil }, }, diff --git a/core/node/groups.go b/core/node/groups.go index e8f9739c36e..28f075946b0 100644 --- a/core/node/groups.go +++ b/core/node/groups.go @@ -409,18 +409,26 @@ func IPFS(ctx context.Context, bcfg *BuildCfg) fx.Option { } // Auto-sharding settings - shardSizeString := cfg.Internal.UnixFSShardingSizeThreshold.WithDefault("256kiB") + shardSizeString := cfg.Import.UnixFSHAMTDirectorySizeThreshold.WithDefault(config.DefaultUnixFSHAMTDirectorySizeThreshold) shardSizeInt, err := humanize.ParseBytes(shardSizeString) if err != nil { return fx.Error(err) } + // TODO: avoid overriding this globally, see if we can extend Directory interface like Get/SetMaxLinks from https://github.com/ipfs/boxo/pull/906 uio.HAMTShardingSize = int(shardSizeInt) // Migrate users of deprecated Experimental.ShardingEnabled flag if cfg.Experimental.ShardingEnabled { logger.Fatal("The `Experimental.ShardingEnabled` field is no longer used, please remove it from the config.\n" + "go-ipfs now automatically shards when directory block is bigger than `" + shardSizeString + "`.\n" + - "If you need to restore the old behavior (sharding everything) set `Internal.UnixFSShardingSizeThreshold` to `1B`.\n") + "If you need to restore the old behavior (sharding everything) set `Import.UnixFSHAMTDirectorySizeThreshold` to `1B`.\n") + } + if !cfg.Internal.UnixFSShardingSizeThreshold.IsDefault() { + msg := "The `Internal.UnixFSShardingSizeThreshold` field was renamed to `Import.UnixFSHAMTDirectorySizeThreshold`. Please update your config.\n" + if !cfg.Import.UnixFSHAMTDirectorySizeThreshold.IsDefault() { + logger.Fatal(msg) // conflicting values, hard fail + } + logger.Error(msg) } return fx.Options( diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index 76d8e4299be..6a82b549889 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -10,6 +10,10 @@ This release was brought to you by the [Shipyard](http://ipshipyard.com/) team. - [Overview](#overview) - [🔦 Highlights](#-highlights) + - [Enhanced DAG-Shaping Controls for `ipfs add`](#enhanced-dag-shaping-controls-for-ipfs-add) + - [New `ipfs add` Options](#new-ipfs-add-options) + - [Persistent `Import.*` Configuration](#persistent-import-configuration) + - [Updated Configuration Profiles](#updated-configuration-profiles) - [📦️ Important dependency updates](#-important-dependency-updates) - [📝 Changelog](#-changelog) - [👨‍👩‍👧‍👦 Contributors](#-contributors) @@ -25,25 +29,38 @@ routing system as providers of content. See the [documentation](https://github.com/ipfs/kubo/blob/master/docs/config.md#routingignoreproviders) for for information. -##### New DAG-shaping options when adding +#### Enhanced DAG-Shaping Controls for `ipfs add` -We now allow controlling the default maximum number of links when adding content and building DAGs. +This release advances CIDv1 support by introducing fine-grained control over UnixFS DAG shaping during data ingestion with the `ipfs add` command. Kubo now allows users to customize the maximum number of links in per UnixFS block/chunk. -There are also three new `ipfs add` options that allow overiding the default only for specific import operation: +##### New `ipfs add` Options - - `--max-file-links`: controls the maximum number of children that a file node (when files are chunked, nodes have links to all the chunks). - - `--max-directory-links`: controls the maximum number of children that a directory can have. Directories will be converted to HAMT-based directories when they have more than the given numbers of children. - - `--max-hamt-fanout`: controls the maximum number of children that HAMT internal nodes can have. +Three new options allow you to override default settings for specific import operations: -Default options can be set permanently using the [corresponding `Import` settings](https://github.com/ipfs/kubo/blob/master/docs/config.md#import): -- [`Import.UnixFSFileMaxLinks`](#TODO) -- [`Import.UnixFSirectoryMaxLinks`](#TODO) -- [`Import.UnixFSHAMTFanout`](#TODO) -- [`Import.UnixFSThreshold`](#TODO-move-from-Internal) +- `--max-file-links`: Sets the maximum number of child links for a single file chunk. +- `--max-directory-links`: Defines the maximum number of child entries in a "basic" (single-chunk) directory. + - Note: Directories exceeding this limit or the `Import.UnixFSHAMTDirectorySizeThreshold` are converted to HAMT-based (sharded across multiple blocks) structures. +- `--max-hamt-fanout`: Specifies the maximum number of child nodes for HAMT internal structures. -Convenience profiles were updated: -- Renamed `test-cid-v1` to `legacy-cid-v1`: this profile hardcodes the current defaults. useful for users who don't want implicit defaults to change in future Kubo releases -- Added `test-cid-v1-2025q2` with modern defaults (max file DAG width raised from 174 to 1024), these are candidates for new defaults in future Kubo release +##### Persistent `Import.*` Configuration + +You can set default values for these options using the following configuration settings: +- [`Import.UnixFSFileMaxLinks`](https://github.com/ipfs/kubo/blob/master/docs/config.md#importunixfsfilemaxlinks) +- [`Import.UnixFSDirectoryMaxLinks`](https://github.com/ipfs/kubo/blob/master/docs/config.md#importunixfsdirectorymaxlinks) +- [`Import.UnixFSHAMTDirectoryMaxFanout`](https://github.com/ipfs/kubo/blob/master/docs/config.md#importunixfshamtdirectorymaxfanout) +- [`Import.UnixFSHAMTDirectorySizeThreshold`](https://github.com/ipfs/kubo/blob/master/docs/config.md#importunixfshamtdirectorysizethreshold) + +##### Updated Configuration Profiles + +The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profile) to incorporate these new `Import.*` settings: +- Renamed Profile: `test-cid-v1` is now `legacy-cid-v1`. This profile locks in current defaults, ensuring stability for users who prefer unchanged behavior in future releases. +- New Profile: `test-cid-v1-2025q2` adopts modern defaults, increasing the maximum file DAG width from 174 to 1024 and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. + - Benefits: Larger blocks (256KiB to 1MiB) enable up to 4x faster Amino DHT announcements and lookups. + - Status: This profile is a candidate for future default settings. + - Feedback: Try it out and share your thoughts at [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). + +> [!TIP] +> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-2025q2`. #### 📦️ Important dependency updates diff --git a/docs/config.md b/docs/config.md index a269b53b857..e38344e5d20 100644 --- a/docs/config.md +++ b/docs/config.md @@ -187,6 +187,7 @@ config file at runtime. - [`Import.UnixFSFileMaxLinks`](#importunixfsfilemaxlinks) - [`Import.UnixFSDirectoryMaxLinks`](#importunixfsdirectorymaxlinks) - [`Import.UnixFSHAMTDirectoryMaxFanout`](#importunixfshamtdirectorymaxfanout) + - [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold) - [`Version`](#version) - [`Version.AgentSuffix`](#versionagentsuffix) - [`Version.SwarmCheckEnabled`](#versionswarmcheckenabled) @@ -1194,15 +1195,7 @@ Type: `optionalDuration` (`null` means default which is 1s) ### `Internal.UnixFSShardingSizeThreshold` -The sharding threshold used internally to decide whether a UnixFS directory should be sharded or not. -This value is not strictly related to the size of the UnixFS directory block and any increases in -the threshold should come with being careful that block sizes stay under 2MiB in order for them to be -reliably transferable through the networking stack (IPFS peers on the public swarm tend to ignore requests for blocks bigger than 2MiB). - -Decreasing this value to 1B is functionally equivalent to the previous experimental sharding option to -shard all directories. - -Type: `optionalBytes` (`null` means default which is 256KiB) +**MOVED:** see [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold) ## `Ipns` @@ -2577,7 +2570,7 @@ This setting will cause basic directories to be converted to HAMTs when they exceed the maximum number of children. This happens transparently during the add process. The fanout of HAMT nodes is controlled by `MaxHAMTFanout`. -Default: `0` (no limit, because [`Internal.UnixFSShardingSizeThreshold`](https://github.com/ipfs/kubo/blob/master/docs/config.md#internalunixfsshardingsizethreshold) triggers the switch to HAMT if a directory grows too big) +Default: `0` (no limit, because [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold) triggers controls when to switch to HAMT sharding when a directory grows too big) Type: `optionalInteger` @@ -2595,6 +2588,23 @@ Default: `256` Type: `optionalInteger` +### `Import.UnixFSHAMTDirectorySizeThreshold` + +The sharding threshold to decide whether a basic UnixFS directory +should be sharded (converted into HAMT Directory) or not. + +This value is not strictly related to the size of the UnixFS directory block +and any increases in the threshold should come with being careful that block +sizes stay under 2MiB in order for them to be reliably transferable through the +networking stack. At the time of writing this, IPFS peers on the public swarm +tend to ignore requests for blocks bigger than 2MiB. + +Setting to `1B` is functionally equivalent to always using HAMT (useful in testing). + +Default: `256KiB` (may change, inspect `DefaultUnixFSHAMTDirectorySizeThreshold` to confirm) + +Type: `optionalBytes` + ## `Version` Options to configure agent version announced to the swarm, and leveraging diff --git a/test/sharness/t0032-mount-sharded.sh b/test/sharness/t0032-mount-sharded.sh index 10ba421a225..7a3e518585b 100755 --- a/test/sharness/t0032-mount-sharded.sh +++ b/test/sharness/t0032-mount-sharded.sh @@ -16,7 +16,7 @@ fi test_init_ipfs test_expect_success 'force sharding' ' - ipfs config --json Internal.UnixFSShardingSizeThreshold "\"1B\"" + ipfs config --json Import.UnixFSHAMTDirectorySizeThreshold "\"1B\"" ' test_launch_ipfs_daemon diff --git a/test/sharness/t0250-files-api.sh b/test/sharness/t0250-files-api.sh index 9c01a5bcf17..63dacf7d407 100755 --- a/test/sharness/t0250-files-api.sh +++ b/test/sharness/t0250-files-api.sh @@ -849,7 +849,7 @@ tests_for_files_api "with-daemon" test_kill_ipfs_daemon test_expect_success "enable sharding in config" ' - ipfs config --json Internal.UnixFSShardingSizeThreshold "\"1B\"" + ipfs config --json Import.UnixFSHAMTDirectorySizeThreshold "\"1B\"" ' test_launch_ipfs_daemon_without_network @@ -880,7 +880,7 @@ test_expect_success "set up automatic sharding/unsharding data" ' ' test_expect_success "reset automatic sharding" ' - ipfs config --json Internal.UnixFSShardingSizeThreshold null + ipfs config --json Import.UnixFSHAMTDirectorySizeThreshold null ' test_launch_ipfs_daemon_without_network diff --git a/test/sharness/t0260-sharding.sh b/test/sharness/t0260-sharding.sh index 85e4a7ca708..7b0094fd4ea 100755 --- a/test/sharness/t0260-sharding.sh +++ b/test/sharness/t0260-sharding.sh @@ -34,7 +34,7 @@ test_init_ipfs UNSHARDED="QmavrTrQG4VhoJmantURAYuw3bowq3E2WcvP36NRQDAC1N" test_expect_success "force sharding off" ' -ipfs config --json Internal.UnixFSShardingSizeThreshold "\"1G\"" +ipfs config --json Import.UnixFSHAMTDirectorySizeThreshold "\"1G\"" ' test_add_dir "$UNSHARDED" @@ -46,7 +46,7 @@ test_add_dir "$UNSHARDED" test_kill_ipfs_daemon test_expect_success "force sharding on" ' - ipfs config --json Internal.UnixFSShardingSizeThreshold "\"1B\"" + ipfs config --json Import.UnixFSHAMTDirectorySizeThreshold "\"1B\"" ' SHARDED="QmSCJD1KYLhVVHqBK3YyXuoEqHt7vggyJhzoFYbT8v1XYL" From 9cd4dab05b8d88e7378ed77ce146a13207c5ae48 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 12 Apr 2025 00:34:56 +0200 Subject: [PATCH 06/19] test: legacy-cid-v1 --- test/cli/add_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cli/add_test.go b/test/cli/add_test.go index ae652989ab5..b1dd8cace16 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -106,9 +106,9 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV0, cidStr) }) - t.Run("ipfs init --profile=test-cid-v1 produces modern CIDv1", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v1 produces modern CIDv1", func(t *testing.T) { t.Parallel() - node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1") + node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") node.StartDaemon() defer node.StopDaemon() From c5b64cf66232d6ca6055150edbc752f0b931657f Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 12 Apr 2025 00:36:00 +0200 Subject: [PATCH 07/19] docs: test-cid-v1-2025-v35 placeholder name based on year and kubo release --- config/profile.go | 4 ++-- docs/changelogs/v0.35.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/profile.go b/config/profile.go index 2cd6da53299..f05a865ef04 100644 --- a/config/profile.go +++ b/config/profile.go @@ -280,7 +280,7 @@ fetching may be degraded. }, }, "legacy-cid-v1": { - Description: `Makes UnixFS import produce legacy CIDv1 with the same suboptimal settings as legacy-cid-v0, but with CIDv1 and raw leaves. Use only if legacy behavior is required.`, + Description: `Makes UnixFS import produce legacy CIDv1 with the same suboptimal settings as legacy-cid-v0, but with 1MiB file chunk, CIDv1 and raw leaves. Use only if legacy behavior is required.`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True @@ -293,7 +293,7 @@ fetching may be degraded. return nil }, }, - "test-cid-v1-2025q2": { + "test-cid-v1-2025-v35": { Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and wider file DAGs (1024 links per level).`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index 33c548f41f1..4541e37c49b 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -71,13 +71,13 @@ You can set default values for these options using the following configuration s The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profile) to incorporate these new `Import.*` settings: - Renamed Profile: `test-cid-v1` is now `legacy-cid-v1`. This profile locks in current defaults, ensuring stability for users who prefer unchanged behavior in future releases. -- New Profile: `test-cid-v1-2025q2` adopts modern defaults, increasing the maximum file DAG width from 174 to 1024 and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. +- New Profile: `test-cid-v1-2025-v35` adopts modern defaults, increasing the maximum file DAG width from 174 to 1024 and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. - Benefits: Larger blocks (256KiB to 1MiB) enable up to 4x faster Amino DHT announcements and lookups. - Status: This profile is a candidate for future default settings. - Feedback: Try it out and share your thoughts at [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). > [!TIP] -> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-2025q2`. +> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-2025-v35`. #### 📦️ Important dependency updates From c402af5ec3b165d9d5430b4b2ce5332afa71173b Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Mon, 14 Apr 2025 22:13:33 +0200 Subject: [PATCH 08/19] test: legacy-cid-v1, UnixFSChunker, UnixFSFileMaxLinks basic smoke test confirming UnixFSFileMaxLinks sets to 'ipfs add' defaults and informs the DAG shape --- test/cli/add_test.go | 45 ++++++++++++++++++++++++-- test/cli/harness/ipfs.go | 23 +++++++++++++ test/cli/harness/pbinspect.go | 54 +++++++++++++++++++++++++++++++ test/cli/harness/random_reader.go | 46 ++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 test/cli/harness/pbinspect.go create mode 100644 test/cli/harness/random_reader.go diff --git a/test/cli/add_test.go b/test/cli/add_test.go index b1dd8cace16..228b4a215c1 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -5,6 +5,7 @@ import ( "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/test/cli/harness" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -106,13 +107,53 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV0, cidStr) }) - t.Run("ipfs init --profile=legacy-cid-v1 produces modern CIDv1", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") + node.StartDaemon() + defer node.StopDaemon() + + // Add 44544KiB file: + // 174 * 256KiB should fit in single DAG layer + cidStr := node.IPFSAddFromSeed("44544KiB", "v0-seed") + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 174, len(root.Links)) + + // add 256KiB (one more block), it should force rebalancing DAG and moving most to second layer + cidStr = node.IPFSAddFromSeed("44800KiB", "v0-seed") + root, err = node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 2, len(root.Links)) + }) + + t.Run("ipfs init --profile=legacy-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") node.StartDaemon() defer node.StopDaemon() cidStr := node.IPFSAddStr(shortString) - require.Equal(t, shortStringCidV1, cidStr) + require.Equal(t, shortStringCidV1, cidStr) // raw leaf + }) + + t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") + node.StartDaemon() + defer node.StopDaemon() + + // Add 174MiB file: + // 174 * 1MiB should fit in single layer + cidStr := node.IPFSAddFromSeed("174MiB", "v1-seed") + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 174, len(root.Links)) + + // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer + cidStr = node.IPFSAddFromSeed("175MiB", "v1-seed") + root, err = node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 2, len(root.Links)) }) } diff --git a/test/cli/harness/ipfs.go b/test/cli/harness/ipfs.go index 8537e2aa25d..574d2a2ed36 100644 --- a/test/cli/harness/ipfs.go +++ b/test/cli/harness/ipfs.go @@ -76,6 +76,17 @@ func (n *Node) IPFSAddStr(content string, args ...string) string { return n.IPFSAdd(strings.NewReader(content), args...) } +// IPFSAddDeterministic produces a CID of a file of a certain size, filled with deterministically generated bytes based on some seed. +// This ensures deterministic CID on the other end, that can be used in tests. +func (n *Node) IPFSAddFromSeed(size string, seed string, args ...string) string { + log.Debugf("node %d adding %s of deterministic pseudo-random data with seed %q and args: %v", n.ID, size, seed, args) + reader, err := createRandomReader(size, seed) + if err != nil { + panic(err) + } + return n.IPFSAdd(reader, args...) +} + func (n *Node) IPFSAdd(content io.Reader, args ...string) string { log.Debugf("node %d adding with args: %v", n.ID, args) fullArgs := []string{"add", "-q"} @@ -108,3 +119,15 @@ func (n *Node) IPFSDagImport(content io.Reader, cid string, args ...string) erro }) return res.Err } + +/* +func (n *Node) IPFSDagExport(cid string, car *os.File) error { + log.Debugf("node %d dag export of %s to %q with args: %v", n.ID, cid, car.Name()) + res := n.Runner.MustRun(RunRequest{ + Path: n.IPFSBin, + Args: []string{"dag", "export", cid}, + CmdOpts: []CmdOpt{RunWithStdout(car)}, + }) + return res.Err +} +*/ diff --git a/test/cli/harness/pbinspect.go b/test/cli/harness/pbinspect.go new file mode 100644 index 00000000000..6abddb61f10 --- /dev/null +++ b/test/cli/harness/pbinspect.go @@ -0,0 +1,54 @@ +package harness + +import ( + "bytes" + "encoding/json" +) + +// InspectPBNode uses dag-json output of 'ipfs dag get' to inspect +// "Logical Format" of DAG-PB as defined in +// https://web.archive.org/web/20250403194752/https://ipld.io/specs/codecs/dag-pb/spec/#logical-format +// (mainly used for inspecting Links without depending on any libraries) +func (n *Node) InspectPBNode(cid string) (PBNode, error) { + log.Debugf("node %d dag get %s as dag-json", n.ID, cid) + + var root PBNode + var dagJsonOutput bytes.Buffer + res := n.Runner.MustRun(RunRequest{ + Path: n.IPFSBin, + Args: []string{"dag", "get", "--output-codec=dag-json", cid}, + CmdOpts: []CmdOpt{RunWithStdout(&dagJsonOutput)}, + }) + if res.Err != nil { + return root, res.Err + } + + err := json.Unmarshal(dagJsonOutput.Bytes(), &root) + if err != nil { + return root, err + } + return root, nil + +} + +// Define structs to match the JSON for +type PBHash struct { + Slash string `json:"/"` +} + +type PBLink struct { + Hash PBHash `json:"Hash"` + Name string `json:"Name"` + Tsize int `json:"Tsize"` +} + +type PBData struct { + Slash struct { + Bytes string `json:"bytes"` + } `json:"/"` +} + +type PBNode struct { + Data PBData `json:"Data"` + Links []PBLink `json:"Links"` +} diff --git a/test/cli/harness/random_reader.go b/test/cli/harness/random_reader.go new file mode 100644 index 00000000000..ed8d9fcd0f1 --- /dev/null +++ b/test/cli/harness/random_reader.go @@ -0,0 +1,46 @@ +package harness + +import ( + "crypto/sha256" + "io" + + "github.com/dustin/go-humanize" + "golang.org/x/crypto/chacha20" +) + +type randomReader struct { + cipher *chacha20.Cipher + remaining int64 +} + +func (r *randomReader) Read(p []byte) (int, error) { + if r.remaining <= 0 { + return 0, io.EOF + } + n := int64(len(p)) + if n > r.remaining { + n = r.remaining + } + // Generate random bytes directly into the provided buffer + r.cipher.XORKeyStream(p[:n], make([]byte, n)) + r.remaining -= n + return int(n), nil +} + +// createRandomReader produces specified number of pseudo-random bytes +// from a seed. +func createRandomReader(sizeStr string, seed string) (io.Reader, error) { + size, err := humanize.ParseBytes(sizeStr) + if err != nil { + return nil, err + } + // Hash the seed string to a 32-byte key for ChaCha20 + key := sha256.Sum256([]byte(seed)) + // Use ChaCha20 for deterministic random bytes + var nonce [chacha20.NonceSize]byte // Zero nonce for simplicity + cipher, err := chacha20.NewUnauthenticatedCipher(key[:chacha20.KeySize], nonce[:]) + if err != nil { + return nil, err + } + return &randomReader{cipher: cipher, remaining: int64(size)}, nil +} From 59b670dd15bd988f6c0edc5867e4253c4ea09ff8 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Mon, 14 Apr 2025 23:03:08 +0200 Subject: [PATCH 09/19] test: --profile=test-cid-v1-2025-v35 and UnixFSFileMaxLinks --- test/cli/add_test.go | 99 ++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 26 deletions(-) diff --git a/test/cli/add_test.go b/test/cli/add_test.go index 228b4a215c1..330a5715e20 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -107,24 +107,33 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV0, cidStr) }) - t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") node.StartDaemon() defer node.StopDaemon() + seed := "v0-seed" + + t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) { + // Add 44544KiB file: + // 174 * 256KiB should fit in single DAG layer + cidStr := node.IPFSAddFromSeed("44544KiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 174, len(root.Links)) + // expect same CID every time + require.Equal(t, "QmUbBALi174SnogsUzLpYbD4xPiBSFANF4iztWCsHbMKh2", cidStr) + }) - // Add 44544KiB file: - // 174 * 256KiB should fit in single DAG layer - cidStr := node.IPFSAddFromSeed("44544KiB", "v0-seed") - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, 174, len(root.Links)) - - // add 256KiB (one more block), it should force rebalancing DAG and moving most to second layer - cidStr = node.IPFSAddFromSeed("44800KiB", "v0-seed") - root, err = node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, 2, len(root.Links)) + t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) { + // add 256KiB (one more block), it should force rebalancing DAG and moving most to second layer + cidStr := node.IPFSAddFromSeed("44800KiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 2, len(root.Links)) + // expect same CID every time + require.Equal(t, "QmepeWtdmS1hHXx1oZXsPUv6bMrfRRKfZcoPPU4eEfjnbf", cidStr) + }) }) t.Run("ipfs init --profile=legacy-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) { @@ -137,23 +146,61 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV1, cidStr) // raw leaf }) - t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") node.StartDaemon() defer node.StopDaemon() + seed := "v1-seed" + + t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) { + // Add 174MiB file: + // 174 * 1MiB should fit in single layer + cidStr := node.IPFSAddFromSeed("174MiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 174, len(root.Links)) + // expect same CID every time + require.Equal(t, "bafybeigwduxcf2aawppv3isnfeshnimkyplvw3hthxjhr2bdeje4tdaicu", cidStr) + }) + + t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) { + // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer + cidStr := node.IPFSAddFromSeed("175MiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 2, len(root.Links)) + // expect same CID every time + require.Equal(t, "bafybeidhd7lo2n2v7lta5yamob3xwhbxcczmmtmhquwhjesi35jntf7mpu", cidStr) + }) + }) + + t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1-2025-v35") + node.StartDaemon() + defer node.StopDaemon() + seed := "v1-seed-1024" + + t.Run("under UnixFSFileMaxLinks=1024", func(t *testing.T) { + // Add 174MiB file: + // 1024 * 1MiB should fit in single layer + cidStr := node.IPFSAddFromSeed("1024MiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 1024, len(root.Links)) + // expect same CID every time + require.Equal(t, "bafybeiej5w63ir64oxgkr5htqmlerh5k2rqflurn2howimexrlkae64xru", cidStr) + }) - // Add 174MiB file: - // 174 * 1MiB should fit in single layer - cidStr := node.IPFSAddFromSeed("174MiB", "v1-seed") - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, 174, len(root.Links)) - - // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer - cidStr = node.IPFSAddFromSeed("175MiB", "v1-seed") - root, err = node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, 2, len(root.Links)) + t.Run("above UnixFSFileMaxLinks=1024", func(t *testing.T) { + // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer + cidStr := node.IPFSAddFromSeed("1025MiB", seed) + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 2, len(root.Links)) + // expect same CID every time + require.Equal(t, "bafybeieilp2qx24pe76hxrxe6bpef5meuxto3kj5dd6mhb5kplfeglskdm", cidStr) + }) }) } From 6f3dfb5e3709f3af71df6656a231e974ed7c90aa Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Mon, 14 Apr 2025 23:30:13 +0200 Subject: [PATCH 10/19] refactor: RandomReader --- test/cli/harness/ipfs.go | 2 +- test/cli/{harness => testutils}/random_reader.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename test/cli/{harness => testutils}/random_reader.go (92%) diff --git a/test/cli/harness/ipfs.go b/test/cli/harness/ipfs.go index 574d2a2ed36..88f839c0794 100644 --- a/test/cli/harness/ipfs.go +++ b/test/cli/harness/ipfs.go @@ -80,7 +80,7 @@ func (n *Node) IPFSAddStr(content string, args ...string) string { // This ensures deterministic CID on the other end, that can be used in tests. func (n *Node) IPFSAddFromSeed(size string, seed string, args ...string) string { log.Debugf("node %d adding %s of deterministic pseudo-random data with seed %q and args: %v", n.ID, size, seed, args) - reader, err := createRandomReader(size, seed) + reader, err := RandomReader(size, seed) if err != nil { panic(err) } diff --git a/test/cli/harness/random_reader.go b/test/cli/testutils/random_reader.go similarity index 92% rename from test/cli/harness/random_reader.go rename to test/cli/testutils/random_reader.go index ed8d9fcd0f1..82ccc67800e 100644 --- a/test/cli/harness/random_reader.go +++ b/test/cli/testutils/random_reader.go @@ -1,4 +1,4 @@ -package harness +package testutils import ( "crypto/sha256" @@ -29,7 +29,7 @@ func (r *randomReader) Read(p []byte) (int, error) { // createRandomReader produces specified number of pseudo-random bytes // from a seed. -func createRandomReader(sizeStr string, seed string) (io.Reader, error) { +func RandomReader(sizeStr string, seed string) (io.Reader, error) { size, err := humanize.ParseBytes(sizeStr) if err != nil { return nil, err From cf3d09fed3bf8c8f235c69951845bc4cda7f42c8 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 15:27:13 +0200 Subject: [PATCH 11/19] test: UnixFSHAMTDirectorySizeThreshold --- test/cli/add_test.go | 252 ++++++++++++++++-- test/cli/harness/ipfs.go | 4 +- ...ndom_reader.go => random_deterministic.go} | 2 +- test/cli/testutils/random_files.go | 27 +- 4 files changed, 254 insertions(+), 31 deletions(-) rename test/cli/testutils/{random_reader.go => random_deterministic.go} (92%) diff --git a/test/cli/add_test.go b/test/cli/add_test.go index 330a5715e20..7febe2b54b1 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -1,10 +1,16 @@ package cli import ( + "io" + "os" + "path/filepath" + "strings" "testing" + "github.com/dustin/go-humanize" "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/test/cli/harness" + "github.com/ipfs/kubo/test/cli/testutils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -20,6 +26,11 @@ func TestAdd(t *testing.T) { shortStringCidV1Sha512 = "bafkrgqbqt3gerhas23vuzrapkdeqf4vu2dwxp3srdj6hvg6nhsug2tgyn6mj3u23yx7utftq3i2ckw2fwdh5qmhid5qf3t35yvkc5e5ottlw6" ) + const ( + cidV0Length = 34 // cidv0 sha2-256 + cidV1Length = 36 // cidv1 sha2-256 + ) + t.Run("produced cid version: implicit default (CIDv0)", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init().StartDaemon() @@ -109,15 +120,17 @@ func TestAdd(t *testing.T) { t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks", func(t *testing.T) { t.Parallel() - node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") - node.StartDaemon() - defer node.StopDaemon() seed := "v0-seed" + profile := "--profile=legacy-cid-v0" t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // Add 44544KiB file: // 174 * 256KiB should fit in single DAG layer - cidStr := node.IPFSAddFromSeed("44544KiB", seed) + cidStr := node.IPFSAddDeterministic("44544KiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 174, len(root.Links)) @@ -126,8 +139,12 @@ func TestAdd(t *testing.T) { }) t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // add 256KiB (one more block), it should force rebalancing DAG and moving most to second layer - cidStr := node.IPFSAddFromSeed("44800KiB", seed) + cidStr := node.IPFSAddDeterministic("44800KiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 2, len(root.Links)) @@ -136,6 +153,52 @@ func TestAdd(t *testing.T) { }) }) + t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + seed := "hamt-legacy-cid-v0" + profile := "--profile=legacy-cid-v0" + + t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV0Length, "255KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 903, len(root.Links)) + }) + + t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV0Length, "257KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 252, len(root.Links)) + }) + }) + t.Run("ipfs init --profile=legacy-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") @@ -146,17 +209,19 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV1, cidStr) // raw leaf }) - t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576", func(t *testing.T) { t.Parallel() - node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") - node.StartDaemon() - defer node.StopDaemon() seed := "v1-seed" + profile := "--profile=legacy-cid-v1" t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // Add 174MiB file: // 174 * 1MiB should fit in single layer - cidStr := node.IPFSAddFromSeed("174MiB", seed) + cidStr := node.IPFSAddDeterministic("174MiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 174, len(root.Links)) @@ -165,8 +230,12 @@ func TestAdd(t *testing.T) { }) t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer - cidStr := node.IPFSAddFromSeed("175MiB", seed) + cidStr := node.IPFSAddDeterministic("175MiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 2, len(root.Links)) @@ -175,17 +244,65 @@ func TestAdd(t *testing.T) { }) }) - t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) { + t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + seed := "hamt-legacy-cid-v1" + profile := "--profile=legacy-cid-v1" + + t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV1Length, "255KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 897, len(root.Links)) + }) + + t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV1Length, "257KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 245, len(root.Links)) + }) + }) + + t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) { t.Parallel() - node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1-2025-v35") - node.StartDaemon() - defer node.StopDaemon() seed := "v1-seed-1024" + profile := "--profile=test-cid-v1-2025-v35" t.Run("under UnixFSFileMaxLinks=1024", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // Add 174MiB file: // 1024 * 1MiB should fit in single layer - cidStr := node.IPFSAddFromSeed("1024MiB", seed) + cidStr := node.IPFSAddDeterministic("1024MiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 1024, len(root.Links)) @@ -194,8 +311,12 @@ func TestAdd(t *testing.T) { }) t.Run("above UnixFSFileMaxLinks=1024", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() // add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer - cidStr := node.IPFSAddFromSeed("1025MiB", seed) + cidStr := node.IPFSAddDeterministic("1025MiB", seed) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 2, len(root.Links)) @@ -203,4 +324,101 @@ func TestAdd(t *testing.T) { require.Equal(t, "bafybeieilp2qx24pe76hxrxe6bpef5meuxto3kj5dd6mhb5kplfeglskdm", cidStr) }) }) + + t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { + t.Parallel() + seed := "hamt-legacy-cid-v1" + profile := "--profile=test-cid-v1-2025-v35" + + t.Run("under UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV1Length, "1023KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 3599, len(root.Links)) + }) + + t.Run("above UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init(profile) + node.StartDaemon() + defer node.StopDaemon() + + randDir, err := os.MkdirTemp(node.Dir, seed) + require.NoError(t, err) + + // Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total + err = createDirectoryForHAMT(randDir, cidV1Length, "1025KiB", seed) + require.NoError(t, err) + cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + + // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + require.Equal(t, 256, len(root.Links)) + }) + }) + +} + +// createDirectoryForHAMT aims to create enough files with long names for the directory block to be close to the UnixFSHAMTDirectorySizeThreshold. +// The calculation is based on boxo's HAMTShardingSize and sizeBelowThreshold which calculates ballpark size of the block +// by adding length of link names and the binary cid length. +// See https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L491 +func createDirectoryForHAMT(dirPath string, cidLength int, unixfsNodeSizeTarget, seed string) error { + hamtThreshold, err := humanize.ParseBytes(unixfsNodeSizeTarget) + if err != nil { + return err + } + + // Calculate how many files with long filenames are needed to hit UnixFSHAMTDirectorySizeThreshold + nameLen := 255 // max that works across windows/macos/linux + alphabetLen := len(testutils.AlphabetEasy) + numFiles := int(hamtThreshold) / (nameLen + cidLength) + + // Deterministic pseudo-random bytes for static CID + drand, err := testutils.DeterministicRandomReader(unixfsNodeSizeTarget, seed) + if err != nil { + return err + } + + // Create necessary files in a single, flat directory + for i := 0; i < numFiles; i++ { + buf := make([]byte, nameLen) + _, err := io.ReadFull(drand, buf) + if err != nil { + return err + } + + // Convert deterministic pseudo-random bytes to ASCII + var sb strings.Builder + + for _, b := range buf { + // Map byte to printable ASCII range (33-126) + char := testutils.AlphabetEasy[int(b)%alphabetLen] + sb.WriteRune(char) + } + filename := sb.String()[:nameLen] + filePath := filepath.Join(dirPath, filename) + + // Create empty file + f, err := os.Create(filePath) + if err != nil { + return err + } + f.Close() + } + return nil } diff --git a/test/cli/harness/ipfs.go b/test/cli/harness/ipfs.go index 88f839c0794..0842d362727 100644 --- a/test/cli/harness/ipfs.go +++ b/test/cli/harness/ipfs.go @@ -78,9 +78,9 @@ func (n *Node) IPFSAddStr(content string, args ...string) string { // IPFSAddDeterministic produces a CID of a file of a certain size, filled with deterministically generated bytes based on some seed. // This ensures deterministic CID on the other end, that can be used in tests. -func (n *Node) IPFSAddFromSeed(size string, seed string, args ...string) string { +func (n *Node) IPFSAddDeterministic(size string, seed string, args ...string) string { log.Debugf("node %d adding %s of deterministic pseudo-random data with seed %q and args: %v", n.ID, size, seed, args) - reader, err := RandomReader(size, seed) + reader, err := DeterministicRandomReader(size, seed) if err != nil { panic(err) } diff --git a/test/cli/testutils/random_reader.go b/test/cli/testutils/random_deterministic.go similarity index 92% rename from test/cli/testutils/random_reader.go rename to test/cli/testutils/random_deterministic.go index 82ccc67800e..e55404168f6 100644 --- a/test/cli/testutils/random_reader.go +++ b/test/cli/testutils/random_deterministic.go @@ -29,7 +29,7 @@ func (r *randomReader) Read(p []byte) (int, error) { // createRandomReader produces specified number of pseudo-random bytes // from a seed. -func RandomReader(sizeStr string, seed string) (io.Reader, error) { +func DeterministicRandomReader(sizeStr string, seed string) (io.Reader, error) { size, err := humanize.ParseBytes(sizeStr) if err != nil { return nil, err diff --git a/test/cli/testutils/random_files.go b/test/cli/testutils/random_files.go index c7dca10d6de..7991cad8309 100644 --- a/test/cli/testutils/random_files.go +++ b/test/cli/testutils/random_files.go @@ -24,20 +24,22 @@ type RandFiles struct { FanoutFiles int // how many files per dir FanoutDirs int // how many dirs per dir - RandomSize bool // randomize file sizes - RandomFanout bool // randomize fanout numbers + RandomSize bool // randomize file sizes + RandomNameSize bool // randomize filename lengths + RandomFanout bool // randomize fanout numbers } func NewRandFiles() *RandFiles { return &RandFiles{ - Rand: rand.New(rand.NewSource(time.Now().UnixNano())), - FileSize: 4096, - FilenameSize: 16, - Alphabet: AlphabetEasy, - FanoutDepth: 2, - FanoutDirs: 5, - FanoutFiles: 10, - RandomSize: true, + Rand: rand.New(rand.NewSource(time.Now().UnixNano())), + FileSize: 4096, + FilenameSize: 16, + Alphabet: AlphabetEasy, + FanoutDepth: 2, + FanoutDirs: 5, + FanoutFiles: 10, + RandomSize: true, + RandomNameSize: true, } } @@ -83,7 +85,10 @@ func (r *RandFiles) WriteRandomFile(root string) error { filesize = r.Rand.Int63n(filesize) + 1 } - n := rand.Intn(r.FilenameSize-4) + 4 + n := r.FilenameSize + if r.RandomNameSize { + n = rand.Intn(r.FilenameSize-4) + 4 + } name := r.RandomFilename(n) filepath := path.Join(root, name) f, err := os.Create(filepath) From 318b8e769db1d70490c25982493a7f003f86a4a6 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 15:46:32 +0200 Subject: [PATCH 12/19] test: Import.UnixFSFileMaxLinks CLI override --- test/cli/add_test.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/cli/add_test.go b/test/cli/add_test.go index 7febe2b54b1..dc3533445b2 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -108,6 +108,33 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV1NoRawLeaves, cidStr) }) + t.Run("produced unixfs max file links: command flag --max-file-links overrides configuration in Import.UnixFSFileMaxLinks", func(t *testing.T) { + t.Parallel() + + // + // UnixFSChunker=size-262144 (256KiB) + // Import.UnixFSFileMaxLinks=174 + node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") // legacy-cid-v0 for determinism across all params + node.UpdateConfig(func(cfg *config.Config) { + cfg.Import.UnixFSChunker = *config.NewOptionalString("size-262144") // 256 KiB chunks + cfg.Import.UnixFSFileMaxLinks = *config.NewOptionalInteger(174) // max 174 per level + }) + node.StartDaemon() + defer node.StopDaemon() + + // Add 174MiB file: + // 1024 * 256KiB should fit in single layer + seed := shortString + cidStr := node.IPFSAddDeterministic("262144KiB", seed, "--max-file-links", "1024") + root, err := node.InspectPBNode(cidStr) + assert.NoError(t, err) + + // Expect 1024 links due to cli parameter raising link limit from 174 to 1024 + require.Equal(t, 1024, len(root.Links)) + // expect same CID every time + require.Equal(t, "QmbBftNHWmjSWKLC49dMVrfnY8pjrJYntiAXirFJ7oJrNk", cidStr) + }) + t.Run("ipfs init --profile=legacy-cid-v0 sets config that produces legacy CIDv0", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") From 7d176222011fe456c431ff4393831954014285c9 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 17:52:35 +0200 Subject: [PATCH 13/19] =?UTF-8?q?refactor:=20legacy-cid-v1=20=E2=86=92=20t?= =?UTF-8?q?est-cid-v1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit keeping old name to minimize noise, just document pre-existing defaults --- config/profile.go | 2 +- docs/changelogs/v0.35.md | 2 +- test/cli/add_test.go | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/config/profile.go b/config/profile.go index f05a865ef04..0e11d9487fd 100644 --- a/config/profile.go +++ b/config/profile.go @@ -279,7 +279,7 @@ fetching may be degraded. return nil }, }, - "legacy-cid-v1": { + "test-cid-v1": { Description: `Makes UnixFS import produce legacy CIDv1 with the same suboptimal settings as legacy-cid-v0, but with 1MiB file chunk, CIDv1 and raw leaves. Use only if legacy behavior is required.`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index 4541e37c49b..28c14bc9c04 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -70,7 +70,7 @@ You can set default values for these options using the following configuration s ##### Updated Configuration Profiles The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profile) to incorporate these new `Import.*` settings: -- Renamed Profile: `test-cid-v1` is now `legacy-cid-v1`. This profile locks in current defaults, ensuring stability for users who prefer unchanged behavior in future releases. +- Updated Profile: `test-cid-v1` now includes current defaults as explicit `Import.UnixFSFileMaxLinks=174`, `Import.UnixFSDirectoryMaxLinks=0`, `Import.UnixFSHAMTDirectoryMaxFanout=256` and `Import.UnixFSHAMTDirectorySizeThreshold=256KiB` - New Profile: `test-cid-v1-2025-v35` adopts modern defaults, increasing the maximum file DAG width from 174 to 1024 and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. - Benefits: Larger blocks (256KiB to 1MiB) enable up to 4x faster Amino DHT announcements and lookups. - Status: This profile is a candidate for future default settings. diff --git a/test/cli/add_test.go b/test/cli/add_test.go index dc3533445b2..3e23a306201 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -226,9 +226,9 @@ func TestAdd(t *testing.T) { }) }) - t.Run("ipfs init --profile=legacy-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) { + t.Run("ipfs init --profile=test-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) { t.Parallel() - node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1") + node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1") node.StartDaemon() defer node.StopDaemon() @@ -236,10 +236,10 @@ func TestAdd(t *testing.T) { require.Equal(t, shortStringCidV1, cidStr) // raw leaf }) - t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576", func(t *testing.T) { + t.Run("ipfs init --profile=test-cid-v1 applies UnixFSChunker=size-1048576", func(t *testing.T) { t.Parallel() seed := "v1-seed" - profile := "--profile=legacy-cid-v1" + profile := "--profile=test-cid-v1" t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) { t.Parallel() @@ -271,10 +271,10 @@ func TestAdd(t *testing.T) { }) }) - t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { + t.Run("ipfs init --profile=test-cid-v1 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { t.Parallel() - seed := "hamt-legacy-cid-v1" - profile := "--profile=legacy-cid-v1" + seed := "hamt-cid-v1" + profile := "--profile=test-cid-v1" t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) { t.Parallel() @@ -313,7 +313,7 @@ func TestAdd(t *testing.T) { // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) - require.Equal(t, 245, len(root.Links)) + require.Equal(t, 252, len(root.Links)) }) }) @@ -354,7 +354,7 @@ func TestAdd(t *testing.T) { t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { t.Parallel() - seed := "hamt-legacy-cid-v1" + seed := "hamt-cid-v1" profile := "--profile=test-cid-v1-2025-v35" t.Run("under UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { From b08bc4d14ff5ef32f496cdd772356b67c0c4c7ab Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 18:19:39 +0200 Subject: [PATCH 14/19] refactor: test-cid-v1-wide with UnixFSHAMTDirectoryMaxFanout=1024 lets make the fanout match the max links from files and rename profile to `-wide` this will make it easier to discuss in https://github.com/ipfs/specs/pull/499 --- config/profile.go | 8 ++++---- docs/changelogs/v0.35.md | 6 ++---- docs/config.md | 28 +++++++++++++++++++++++++--- test/cli/add_test.go | 22 +++++++++++----------- 4 files changed, 42 insertions(+), 22 deletions(-) diff --git a/config/profile.go b/config/profile.go index 0e11d9487fd..a26d74f99a5 100644 --- a/config/profile.go +++ b/config/profile.go @@ -280,7 +280,7 @@ fetching may be degraded. }, }, "test-cid-v1": { - Description: `Makes UnixFS import produce legacy CIDv1 with the same suboptimal settings as legacy-cid-v0, but with 1MiB file chunk, CIDv1 and raw leaves. Use only if legacy behavior is required.`, + Description: `Makes UnixFS import produce CIDv1 with raw leaves, sha2-256 and 1 MiB chunks (max 174 links per file, 256 per HAMT node, switch dir to HAMT above 256KiB).`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True @@ -293,8 +293,8 @@ fetching may be degraded. return nil }, }, - "test-cid-v1-2025-v35": { - Description: `Makes UnixFS import produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks and wider file DAGs (1024 links per level).`, + "test-cid-v1-wide": { + Description: `Makes UnixFS import produce CIDv1 with raw leaves, sha2-256 and 1MiB chunks and wider file DAGs (max 1024 links per every node type, switch dir to HAMT above 1MiB).`, Transform: func(c *Config) error { c.Import.CidVersion = *NewOptionalInteger(1) c.Import.UnixFSRawLeaves = True @@ -302,7 +302,7 @@ fetching may be degraded. c.Import.HashFunction = *NewOptionalString("sha2-256") c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024) c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) // no limit here, use size-based Import.UnixFSHAMTDirectorySizeThreshold instead - c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256) + c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(1024) c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalString("1MiB") // 1MiB return nil }, diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index 28c14bc9c04..bd6cefe3f57 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -71,13 +71,11 @@ You can set default values for these options using the following configuration s The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profile) to incorporate these new `Import.*` settings: - Updated Profile: `test-cid-v1` now includes current defaults as explicit `Import.UnixFSFileMaxLinks=174`, `Import.UnixFSDirectoryMaxLinks=0`, `Import.UnixFSHAMTDirectoryMaxFanout=256` and `Import.UnixFSHAMTDirectorySizeThreshold=256KiB` -- New Profile: `test-cid-v1-2025-v35` adopts modern defaults, increasing the maximum file DAG width from 174 to 1024 and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. - - Benefits: Larger blocks (256KiB to 1MiB) enable up to 4x faster Amino DHT announcements and lookups. - - Status: This profile is a candidate for future default settings. +- New Profile: `test-cid-v1-wide` adopts experimental directory DAG-shaping defaults, increasing the maximum file DAG width from 174 to 1024, HAMT fanout from 256 to 1024, and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. - Feedback: Try it out and share your thoughts at [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). > [!TIP] -> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-2025-v35`. +> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-wide`. #### 📦️ Important dependency updates diff --git a/docs/config.md b/docs/config.md index 7b83e670966..56b88ab7200 100644 --- a/docs/config.md +++ b/docs/config.md @@ -2800,16 +2800,38 @@ Disables [Reprovider](#reprovider) system (and announcing to Amino DHT). Makes UnixFS import (`ipfs add`) produce legacy CIDv0 with no raw leaves, sha2-256 and 256 KiB chunks. +See for exact [`Import.*`](#import) settings. + > [!NOTE] > This profile is provided for legacy users and should not be used for new projects. ### `test-cid-v1` profile -Makes UnixFS import (`ipfs add`) produce modern CIDv1 with raw leaves, sha2-256 and 1 MiB chunks. +Makes UnixFS import (`ipfs add`) produce modern CIDv1 with raw leaves, sha2-256 +and 1 MiB chunks (max 174 links per file, 256 per HAMT node, switch dir to HAMT +above 256KiB). + +See for exact [`Import.*`](#import) settings. > [!NOTE] -> This profile will become the new implicit default, provided for testing purposes. -> Follow [kubo#4143](https://github.com/ipfs/kubo/issues/4143) for more details. +> [`Import.*`](#import) settings applied by this profile MAY change in future release. Provided for testing purposes. +> +> Follow [kubo#4143](https://github.com/ipfs/kubo/issues/4143) for more details, +> and provide feedback in [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). + +### `test-cid-v1-wide` profile + +Makes UnixFS import (`ipfs add`) produce modern CIDv1 with raw leaves, sha2-256 +and 1 MiB chunks and wider file DAGs (max 1024 links per every node type, +switch dir to HAMT above 1MiB). + +See for exact [`Import.*`](#import) settings. + +> [!NOTE] +> [`Import.*`](#import) settings applied by this profile MAY change in future release. Provided for testing purposes. +> +> Follow [kubo#4143](https://github.com/ipfs/kubo/issues/4143) for more details, +> and provide feedback in [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). ## Types diff --git a/test/cli/add_test.go b/test/cli/add_test.go index 3e23a306201..775a6063baa 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -199,7 +199,7 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + // Confirm the number of links is more than UnixFSHAMTDirectorySizeThreshold (indicating regular "basic" directory" root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 903, len(root.Links)) @@ -219,7 +219,7 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + // Confirm this time, the number of links is less than UnixFSHAMTDirectorySizeThreshold root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 252, len(root.Links)) @@ -290,7 +290,7 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout (indicating regular "basic" directory" root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 897, len(root.Links)) @@ -310,17 +310,17 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 252, len(root.Links)) }) }) - t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) { + t.Run("ipfs init --profile=test-cid-v1-wide applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) { t.Parallel() seed := "v1-seed-1024" - profile := "--profile=test-cid-v1-2025-v35" + profile := "--profile=test-cid-v1-wide" t.Run("under UnixFSFileMaxLinks=1024", func(t *testing.T) { t.Parallel() @@ -352,10 +352,10 @@ func TestAdd(t *testing.T) { }) }) - t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { + t.Run("ipfs init --profile=test-cid-v1-wide applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { t.Parallel() seed := "hamt-cid-v1" - profile := "--profile=test-cid-v1-2025-v35" + profile := "--profile=test-cid-v1-wide" t.Run("under UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) { t.Parallel() @@ -371,7 +371,7 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory" + // Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout (indicating regular "basic" directory" root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, 3599, len(root.Links)) @@ -391,10 +391,10 @@ func TestAdd(t *testing.T) { require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256 + // Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) - require.Equal(t, 256, len(root.Links)) + require.Equal(t, 992, len(root.Links)) }) }) From 3e46c946147e7529ade9e832a56af34ff25456df Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 19:58:50 +0200 Subject: [PATCH 15/19] fix: apply both hamt settings for consistency threshold exists only as a global flag, so it was already set, but we were missing fanout. this applies both, for global consistency --- core/node/groups.go | 10 ++++++---- docs/config.md | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/core/node/groups.go b/core/node/groups.go index 28f075946b0..68f1449c086 100644 --- a/core/node/groups.go +++ b/core/node/groups.go @@ -409,18 +409,20 @@ func IPFS(ctx context.Context, bcfg *BuildCfg) fx.Option { } // Auto-sharding settings - shardSizeString := cfg.Import.UnixFSHAMTDirectorySizeThreshold.WithDefault(config.DefaultUnixFSHAMTDirectorySizeThreshold) - shardSizeInt, err := humanize.ParseBytes(shardSizeString) + shardingThresholdString := cfg.Import.UnixFSHAMTDirectorySizeThreshold.WithDefault(config.DefaultUnixFSHAMTDirectorySizeThreshold) + shardSingThresholdInt, err := humanize.ParseBytes(shardingThresholdString) if err != nil { return fx.Error(err) } + shardMaxFanout := cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(config.DefaultUnixFSHAMTDirectoryMaxFanout) // TODO: avoid overriding this globally, see if we can extend Directory interface like Get/SetMaxLinks from https://github.com/ipfs/boxo/pull/906 - uio.HAMTShardingSize = int(shardSizeInt) + uio.HAMTShardingSize = int(shardSingThresholdInt) + uio.DefaultShardWidth = int(shardMaxFanout) // Migrate users of deprecated Experimental.ShardingEnabled flag if cfg.Experimental.ShardingEnabled { logger.Fatal("The `Experimental.ShardingEnabled` field is no longer used, please remove it from the config.\n" + - "go-ipfs now automatically shards when directory block is bigger than `" + shardSizeString + "`.\n" + + "go-ipfs now automatically shards when directory block is bigger than `" + shardingThresholdString + "`.\n" + "If you need to restore the old behavior (sharding everything) set `Import.UnixFSHAMTDirectorySizeThreshold` to `1B`.\n") } if !cfg.Internal.UnixFSShardingSizeThreshold.IsDefault() { diff --git a/docs/config.md b/docs/config.md index 56b88ab7200..1ec5677db28 100644 --- a/docs/config.md +++ b/docs/config.md @@ -2583,6 +2583,8 @@ This setting will cause basic directories to be converted to HAMTs when they exceed the maximum number of children. This happens transparently during the add process. The fanout of HAMT nodes is controlled by `MaxHAMTFanout`. +Commands affected: `ipfs add` + Default: `0` (no limit, because [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold) triggers controls when to switch to HAMT sharding when a directory grows too big) Type: `optionalInteger` @@ -2597,6 +2599,10 @@ become too big or reach `MaxLinks`. A HAMT is an structure made of unixfs nodes that store the list of elements in the folder. This option controls the maximum number of children that the HAMT nodes can have. +Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size). + +Commands affected: `ipfs add`, `ipfs daemon` (globally overrides [`boxo/ipld/unixfs/io.DefaultShardWidth`](https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L30C5-L30C22)) + Default: `256` Type: `optionalInteger` @@ -2612,8 +2618,14 @@ sizes stay under 2MiB in order for them to be reliably transferable through the networking stack. At the time of writing this, IPFS peers on the public swarm tend to ignore requests for blocks bigger than 2MiB. +Uses implementation from `boxo/ipld/unixfs/io/directory`, where the size is not +the *exact* block size of the encoded directory but just the estimated size +based byte length of DAG-PB Links names and CIDs. + Setting to `1B` is functionally equivalent to always using HAMT (useful in testing). +Commands affected: `ipfs add`, `ipfs daemon` (globally overrides [`boxo/ipld/unixfs/io.HAMTShardingSize`](https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L26)) + Default: `256KiB` (may change, inspect `DefaultUnixFSHAMTDirectorySizeThreshold` to confirm) Type: `optionalBytes` From bb464951e9b1d1a1bc00d52f73ae20873c7d6d9e Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 20:12:31 +0200 Subject: [PATCH 16/19] docs: ipfs add --help --- core/commands/add.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/core/commands/add.go b/core/commands/add.go index 85d5471584d..f800e4f42d0 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -146,6 +146,9 @@ new flags may be added in the future. It is not guaranteed for the implicit defaults of 'ipfs add' to remain the same in future Kubo releases, or for other IPFS software to use the same import parameters as Kubo. +Use Import.* configuration options to override global implicit defaults: +https://github.com/ipfs/kubo/blob/master/docs/config.md#import + If you need to back up or transport content-addressed data using a non-IPFS medium, CID can be preserved with CAR files. See 'dag export' and 'dag import' for more information. @@ -169,15 +172,15 @@ See 'dag export' and 'dag import' for more information. cmds.BoolOption(trickleOptionName, "t", "Use trickle-dag format for dag generation."), cmds.BoolOption(onlyHashOptionName, "n", "Only chunk and hash - do not write to disk."), cmds.BoolOption(wrapOptionName, "w", "Wrap files with a directory object."), - cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash"), - cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes."), - cmds.IntOption(maxFileLinksOptionName, "Limit the maximum number of links in UnixFS file nodes to this value."), - cmds.IntOption(maxDirectoryLinksOptionName, "Limit the maximum number of links in UnixFS basic directory nodes to this value. WARNING: use with caution, Import.UnixFSHAMTThreshold is a safer alternative."), - cmds.IntOption(maxHAMTFanoutOptionName, "Limit the maximum number of links of a UnixFS HAMT directory node to this (power of 2, multiple of 8)."), + cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash. Default: Import.UnixFSChunker"), + cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes. Default: Import.UnixFSRawLeaves"), + cmds.IntOption(maxFileLinksOptionName, "Limit the maximum number of links in UnixFS file nodes to this value. (experimental) Default: Import.UnixFSFileMaxLinks"), + cmds.IntOption(maxDirectoryLinksOptionName, "Limit the maximum number of links in UnixFS basic directory nodes to this value. Default: Import.UnixFSDirectoryMaxLinks. WARNING: experimental, Import.UnixFSHAMTThreshold is a safer alternative."), + cmds.IntOption(maxHAMTFanoutOptionName, "Limit the maximum number of links of a UnixFS HAMT directory node to this (power of 2, multiple of 8). Default: Import.UnixFSHAMTDirectoryMaxFanout WARNING: experimental, see Import.UnixFSHAMTDirectorySizeThreshold as well."), cmds.BoolOption(noCopyOptionName, "Add the file using filestore. Implies raw-leaves. (experimental)"), cmds.BoolOption(fstoreCacheOptionName, "Check the filestore for pre-existing blocks. (experimental)"), - cmds.IntOption(cidVersionOptionName, "CID version. Defaults to 0 unless an option that depends on CIDv1 is passed. Passing version 1 will cause the raw-leaves option to default to true."), - cmds.StringOption(hashOptionName, "Hash function to use. Implies CIDv1 if not sha2-256. (experimental)"), + cmds.IntOption(cidVersionOptionName, "CID version. Defaults to 0 unless an option that depends on CIDv1 is passed. Passing version 1 will cause the raw-leaves option to default to true. Default: Import.CidVersion"), + cmds.StringOption(hashOptionName, "Hash function to use. Implies CIDv1 if not sha2-256. Default: Import.HashFunction"), cmds.BoolOption(inlineOptionName, "Inline small blocks into CIDs. (experimental)"), cmds.IntOption(inlineLimitOptionName, "Maximum block size to inline. (experimental)").WithDefault(32), cmds.BoolOption(pinOptionName, "Pin locally to protect added files from garbage collection.").WithDefault(true), From 61e592aad7a1a1f6381bbda148ee10dc2d67dac9 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 21:13:23 +0200 Subject: [PATCH 17/19] chore: latest boxo@main --- docs/examples/kubo-as-a-library/go.mod | 2 +- docs/examples/kubo-as-a-library/go.sum | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- test/dependencies/go.mod | 2 +- test/dependencies/go.sum | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index 8db4922e5ca..7adc7c80a81 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -7,7 +7,7 @@ go 1.24 replace github.com/ipfs/kubo => ./../../.. require ( - github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 + github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 github.com/ipfs/kubo v0.0.0-00010101000000-000000000000 github.com/libp2p/go-libp2p v0.41.1 github.com/multiformats/go-multiaddr v0.15.0 diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index 15676531f0e..5e2f686a0fb 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -298,8 +298,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 h1:Vxe2X6RO2Xf+YoFZbLOlOW3ErpmVrTCckyIJJJArc+c= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 h1:q3a+2FIbWzZbx/yUqpuG4jLVSa6GvxtRfx9TU5GLiN0= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= diff --git a/go.mod b/go.mod index ce31ab9888b..68e8c7a3f00 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/hashicorp/go-version v1.7.0 github.com/ipfs-shipyard/nopfs v0.0.14 github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 - github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 + github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 github.com/ipfs/go-block-format v0.2.0 github.com/ipfs/go-cid v0.5.0 github.com/ipfs/go-cidutil v0.1.0 diff --git a/go.sum b/go.sum index f1de7b142d9..cf9c33444e2 100644 --- a/go.sum +++ b/go.sum @@ -362,8 +362,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 h1:Vxe2X6RO2Xf+YoFZbLOlOW3ErpmVrTCckyIJJJArc+c= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 h1:q3a+2FIbWzZbx/yUqpuG4jLVSa6GvxtRfx9TU5GLiN0= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= diff --git a/test/dependencies/go.mod b/test/dependencies/go.mod index a6e1dc2417e..fef5063001b 100644 --- a/test/dependencies/go.mod +++ b/test/dependencies/go.mod @@ -118,7 +118,7 @@ require ( github.com/huin/goupnp v1.3.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect - github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 // indirect + github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect github.com/ipfs/go-block-format v0.2.0 // indirect github.com/ipfs/go-cid v0.5.0 // indirect diff --git a/test/dependencies/go.sum b/test/dependencies/go.sum index 0796eff6c1c..a0be368440a 100644 --- a/test/dependencies/go.sum +++ b/test/dependencies/go.sum @@ -298,8 +298,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8 h1:Vxe2X6RO2Xf+YoFZbLOlOW3ErpmVrTCckyIJJJArc+c= -github.com/ipfs/boxo v0.29.2-0.20250411173252-654578d290f8/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37 h1:q3a+2FIbWzZbx/yUqpuG4jLVSa6GvxtRfx9TU5GLiN0= +github.com/ipfs/boxo v0.29.2-0.20250415191135-dc60fe747c37/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= From 104ede9b6b3b43fd674014f3c3c47e252603777f Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 21:48:04 +0200 Subject: [PATCH 18/19] docs: changelog --- docs/changelogs/v0.35.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/changelogs/v0.35.md b/docs/changelogs/v0.35.md index bd6cefe3f57..bc3a857f8b7 100644 --- a/docs/changelogs/v0.35.md +++ b/docs/changelogs/v0.35.md @@ -48,7 +48,11 @@ The WebUI, accessible at http://127.0.0.1:5001/webui/, now includes support for #### Enhanced DAG-Shaping Controls for `ipfs add` -This release advances CIDv1 support by introducing fine-grained control over UnixFS DAG shaping during data ingestion with the `ipfs add` command. Kubo now allows users to customize the maximum number of links in per UnixFS block/chunk. +This release advances CIDv1 support by introducing fine-grained control over UnixFS DAG shaping during data ingestion with the `ipfs add` command. + +Wider DAG trees (more links per node, higher fanout, larger thresholds) are beneficial for large files and directories with many files, reducing tree depth and lookup latency in high-latency networks, but they increase node size, straining memory and CPU on resource-constrained devices. Narrower trees (lower link count, lower fanout, smaller thresholds) are preferable for smaller directories, frequent updates, or low-power clients, minimizing overhead and ensuring compatibility, though they may increase traversal steps for very large datasets. + +Kubo now allows users to act on these tradeoffs and customize the width of the DAG created by `ipfs add` command. ##### New `ipfs add` Options @@ -69,13 +73,13 @@ You can set default values for these options using the following configuration s ##### Updated Configuration Profiles -The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profile) to incorporate these new `Import.*` settings: +The release updated configuration [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profiles) to incorporate these new `Import.*` settings: - Updated Profile: `test-cid-v1` now includes current defaults as explicit `Import.UnixFSFileMaxLinks=174`, `Import.UnixFSDirectoryMaxLinks=0`, `Import.UnixFSHAMTDirectoryMaxFanout=256` and `Import.UnixFSHAMTDirectorySizeThreshold=256KiB` - New Profile: `test-cid-v1-wide` adopts experimental directory DAG-shaping defaults, increasing the maximum file DAG width from 174 to 1024, HAMT fanout from 256 to 1024, and raising the HAMT directory sharding threshold from 256KiB to 1MiB, aligning with 1MiB file chunks. - Feedback: Try it out and share your thoughts at [discuss.ipfs.tech/t/should-we-profile-cids](https://discuss.ipfs.tech/t/should-we-profile-cids/18507) or [ipfs/specs#499](https://github.com/ipfs/specs/pull/499). > [!TIP] -> Apply the modern CIDv1 test profile with `ipfs config profile apply test-cid-v1-wide`. +> Apply one of CIDv1 test [profiles](https://github.com/ipfs/kubo/blob/master/docs/config.md#profiles) with `ipfs config profile apply test-cid-v1[-wide]`. #### 📦️ Important dependency updates From 70d159c85ecded1907de9e0d3f7f5871f8582268 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Apr 2025 22:29:19 +0200 Subject: [PATCH 19/19] fix: migrate old config key if cfg.Internal.UnixFSShardingSizeThreshold is set and cfg.Import.UnixFSHAMTDirectorySizeThreshold is not, assign old value to new field and print ERROR urging user to migrate this way we wont break any existing CI etc --- core/node/groups.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/core/node/groups.go b/core/node/groups.go index 68f1449c086..4a471f17038 100644 --- a/core/node/groups.go +++ b/core/node/groups.go @@ -408,22 +408,9 @@ func IPFS(ctx context.Context, bcfg *BuildCfg) fx.Option { return fx.Error(err) } - // Auto-sharding settings - shardingThresholdString := cfg.Import.UnixFSHAMTDirectorySizeThreshold.WithDefault(config.DefaultUnixFSHAMTDirectorySizeThreshold) - shardSingThresholdInt, err := humanize.ParseBytes(shardingThresholdString) - if err != nil { - return fx.Error(err) - } - shardMaxFanout := cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(config.DefaultUnixFSHAMTDirectoryMaxFanout) - // TODO: avoid overriding this globally, see if we can extend Directory interface like Get/SetMaxLinks from https://github.com/ipfs/boxo/pull/906 - uio.HAMTShardingSize = int(shardSingThresholdInt) - uio.DefaultShardWidth = int(shardMaxFanout) - // Migrate users of deprecated Experimental.ShardingEnabled flag if cfg.Experimental.ShardingEnabled { - logger.Fatal("The `Experimental.ShardingEnabled` field is no longer used, please remove it from the config.\n" + - "go-ipfs now automatically shards when directory block is bigger than `" + shardingThresholdString + "`.\n" + - "If you need to restore the old behavior (sharding everything) set `Import.UnixFSHAMTDirectorySizeThreshold` to `1B`.\n") + logger.Fatal("The `Experimental.ShardingEnabled` field is no longer used, please remove it from the config. Use Import.UnixFSHAMTDirectorySizeThreshold instead.") } if !cfg.Internal.UnixFSShardingSizeThreshold.IsDefault() { msg := "The `Internal.UnixFSShardingSizeThreshold` field was renamed to `Import.UnixFSHAMTDirectorySizeThreshold`. Please update your config.\n" @@ -431,8 +418,20 @@ func IPFS(ctx context.Context, bcfg *BuildCfg) fx.Option { logger.Fatal(msg) // conflicting values, hard fail } logger.Error(msg) + cfg.Import.UnixFSHAMTDirectorySizeThreshold = *cfg.Internal.UnixFSShardingSizeThreshold } + // Auto-sharding settings + shardingThresholdString := cfg.Import.UnixFSHAMTDirectorySizeThreshold.WithDefault(config.DefaultUnixFSHAMTDirectorySizeThreshold) + shardSingThresholdInt, err := humanize.ParseBytes(shardingThresholdString) + if err != nil { + return fx.Error(err) + } + shardMaxFanout := cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(config.DefaultUnixFSHAMTDirectoryMaxFanout) + // TODO: avoid overriding this globally, see if we can extend Directory interface like Get/SetMaxLinks from https://github.com/ipfs/boxo/pull/906 + uio.HAMTShardingSize = int(shardSingThresholdInt) + uio.DefaultShardWidth = int(shardMaxFanout) + return fx.Options( bcfgOpts,