diff --git a/CHANGELOG.md b/CHANGELOG.md index b12ac141f..d5f2fcbc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The following emojis are used to highlight certain changes: ### Changed +- 🛠 `chunker`, `ipld/unixfs/importer/helpers`: block size limits raised from 1MiB to 2MiB to match the [bitswap spec](https://specs.ipfs.tech/bitswap-protocol/#block-sizes). Max chunker size is `2MiB - 256 bytes` to leave room for protobuf framing when `--raw-leaves=false`. IPIP-499 profiles use lower chunk sizes (256KiB and 1MiB) and are not affected. - 🛠 `chunker`: `DefaultBlockSize` changed from `const` to `var` to allow runtime configuration via global profiles. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499) - `gateway`: 🛠 ✨ [IPIP-523](https://github.com/ipfs/specs/pull/523) `?format=` URL query parameter now takes precedence over `Accept` HTTP header, ensuring deterministic HTTP cache behavior and allowing browsers to use `?format=` even when they send `Accept` headers with specific content types. [#1074](https://github.com/ipfs/boxo/pull/1074) - `gateway`: 🛠 ✨ [IPIP-524](https://github.com/ipfs/specs/pull/524) codec conversions (e.g., dag-pb to dag-json, dag-json to dag-cbor) are no longer performed by default. Requesting a format that differs from the block's codec now returns HTTP 406 Not Acceptable with a hint to fetch raw blocks (`?format=raw`) and convert client-side. Set `Config.AllowCodecConversion` to `true` to restore the old behavior. [#1077](https://github.com/ipfs/boxo/pull/1077) diff --git a/bitswap/message/message_test.go b/bitswap/message/message_test.go index 82cecf1ee..c58eb22f7 100644 --- a/bitswap/message/message_test.go +++ b/bitswap/message/message_test.go @@ -7,9 +7,12 @@ import ( "github.com/ipfs/boxo/bitswap/client/wantlist" pb "github.com/ipfs/boxo/bitswap/message/pb" + chunker "github.com/ipfs/boxo/chunker" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" "github.com/ipfs/go-test/random" + "github.com/libp2p/go-libp2p/core/network" + mh "github.com/multiformats/go-multihash" "google.golang.org/protobuf/proto" ) @@ -302,3 +305,60 @@ func TestEntrySize(t *testing.T) { t.Fatal("entry size calculation incorrect", e.Size(), proto.Size(epb)) } } + +// TestBlockSizeLimitFitsInLibp2pMessage verifies that a single block at the +// bitswap spec limit (chunker.BlockSizeLimit, 2MiB) fits within the libp2p +// message size limit (network.MessageSizeMax, 4MiB) when serialized as a +// bitswap message. This guards the invariant that the spec block size can +// always be transferred over bitswap-over-libp2p. The test uses CIDv1 with +// raw codec and SHA2-256 multihash, matching the unixfs-v1-2025 profile. +func TestBlockSizeLimitFitsInLibp2pMessage(t *testing.T) { + t.Parallel() + + // create a CIDv1 + raw + SHA2-256 block at exactly BlockSizeLimit + data := make([]byte, chunker.BlockSizeLimit) + prefix := cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.SHA2_256, + MhLength: -1, + } + c, err := prefix.Sum(data) + if err != nil { + t.Fatal(err) + } + blk, err := blocks.NewBlockWithCid(data, c) + if err != nil { + t.Fatal(err) + } + + // build a bitswap message with the block + msg := New(true) + msg.AddBlock(blk) + + // serialize and check size fits in libp2p message limit + buf := new(bytes.Buffer) + if err := msg.ToNetV1(buf); err != nil { + t.Fatal(err) + } + wireSize := buf.Len() + if wireSize > network.MessageSizeMax { + t.Fatalf("serialized message (%d bytes) exceeds network.MessageSizeMax (%d bytes)", + wireSize, network.MessageSizeMax) + } + t.Logf("block=%d wire=%d limit=%d headroom=%d bytes", + chunker.BlockSizeLimit, wireSize, network.MessageSizeMax, network.MessageSizeMax-wireSize) + + // round-trip: verify FromNet can read it back (uses MessageSizeMax as reader limit) + m2, _, err := FromNet(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatalf("FromNet failed: %v", err) + } + received := m2.Blocks() + if len(received) != 1 { + t.Fatalf("expected 1 block, got %d", len(received)) + } + if len(received[0].RawData()) != chunker.BlockSizeLimit { + t.Fatalf("expected block of %d bytes, got %d", chunker.BlockSizeLimit, len(received[0].RawData())) + } +} diff --git a/chunker/parse.go b/chunker/parse.go index 0ac17d88e..afdfa29a3 100644 --- a/chunker/parse.go +++ b/chunker/parse.go @@ -15,10 +15,22 @@ import ( var DefaultBlockSize int64 = 1024 * 256 const ( - // ChunkSizeLimit is the maximum allowed chunk size. - // No leaf block should contain more than 1MiB of payload data (wrapping overhead aside). - // See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879 - ChunkSizeLimit int = 1048576 + // BlockSizeLimit is the maximum block size defined by the bitswap spec. + // https://specs.ipfs.tech/bitswap-protocol/#block-sizes + BlockSizeLimit int = 2 * 1024 * 1024 // 2MiB + + // ChunkOverheadBudget is reserved for protobuf/UnixFS framing overhead + // when chunks are wrapped in non-raw leaves (--raw-leaves=false). + ChunkOverheadBudget int = 256 + + // ChunkSizeLimit is the maximum chunk size accepted by the chunker. + // It is set below BlockSizeLimit to leave room for framing overhead + // so that serialized blocks stay within the 2MiB wire limit. + // + // In practice this limit only matters for custom chunker sizes. + // The CID-deterministic profiles defined in IPIP-499 use max 1MiB + // chunks, well within this limit. + ChunkSizeLimit int = BlockSizeLimit - ChunkOverheadBudget ) var ( diff --git a/chunker/parse_test.go b/chunker/parse_test.go index 108b96729..360f46aea 100644 --- a/chunker/parse_test.go +++ b/chunker/parse_test.go @@ -10,6 +10,28 @@ const ( testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3)) ) +func TestBlockSizeConstants(t *testing.T) { + t.Parallel() + + if ChunkOverheadBudget <= 0 { + t.Fatal("ChunkOverheadBudget must be positive") + } + if ChunkSizeLimit <= 0 { + t.Fatal("ChunkSizeLimit must be positive") + } + if BlockSizeLimit <= 0 { + t.Fatal("BlockSizeLimit must be positive") + } + if ChunkSizeLimit+ChunkOverheadBudget != BlockSizeLimit { + t.Fatalf("ChunkSizeLimit (%d) + ChunkOverheadBudget (%d) != BlockSizeLimit (%d)", + ChunkSizeLimit, ChunkOverheadBudget, BlockSizeLimit) + } + if ChunkSizeLimit >= BlockSizeLimit { + t.Fatalf("ChunkSizeLimit (%d) must be less than BlockSizeLimit (%d)", + ChunkSizeLimit, BlockSizeLimit) + } +} + func TestParseRabin(t *testing.T) { t.Parallel() diff --git a/ipld/unixfs/importer/helpers/helpers.go b/ipld/unixfs/importer/helpers/helpers.go index 828d9aaeb..6508ad90d 100644 --- a/ipld/unixfs/importer/helpers/helpers.go +++ b/ipld/unixfs/importer/helpers/helpers.go @@ -2,10 +2,14 @@ package helpers import ( "errors" + + chunker "github.com/ipfs/boxo/chunker" ) // BlockSizeLimit specifies the maximum size an imported block can have. -var BlockSizeLimit = 1048576 // 1 MB +// Defaults to chunker.BlockSizeLimit (2MiB), the bitswap spec maximum. +// https://specs.ipfs.tech/bitswap-protocol/#block-sizes +var BlockSizeLimit = chunker.BlockSizeLimit // rough estimates on expected sizes var (