Skip to content

Commit f14f6a8

Browse files
authored
feat(store/fscache): Fragment cache filenames for long URLs (#17)
- Introduce adaptive filename logic to split long base64-encoded URLs into directory fragments. - Retain reversibility and compatibility with previous base64 filenames. Closes #16
2 parents a880a8c + d0fabd0 commit f14f6a8

File tree

6 files changed

+231
-33
lines changed

6 files changed

+231
-33
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module github.com/bartventer/httpcache
22

3-
go 1.24
3+
go 1.25

store/acceptance/acceptance.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package acceptance
1818
import (
1919
"bytes"
2020
"slices"
21+
"strings"
2122
"testing"
2223

2324
"github.com/bartventer/httpcache/internal/testutil"
@@ -126,7 +127,11 @@ func testKeys(t *testing.T, factory FactoryFunc) {
126127
if !ok {
127128
t.Skip("Cache implementation does not support key listing")
128129
}
129-
keys := []string{"foo", "bar", "baz"}
130+
keys := []string{
131+
"foo",
132+
"bar",
133+
"baz" + strings.Repeat("x", 255), // ensure long key handling
134+
}
130135
for _, key := range keys {
131136
value := []byte("value for " + key)
132137
testutil.RequireNoError(t, cache.Set(key, value), "Set failed for key "+key)

store/fscache/filenamer.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Copyright (c) 2025 Bart Venter <bartventer@proton.me>
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package fscache
16+
17+
import (
18+
"encoding/base64"
19+
"path/filepath"
20+
"strings"
21+
)
22+
23+
type (
24+
fileNamer interface{ FileName(key string) string }
25+
fileNameKeyer interface {
26+
KeyFromFileName(name string) (string, error)
27+
}
28+
)
29+
30+
type (
31+
fileNamerFunc func(key string) string
32+
fileNameKeyerFunc func(name string) (string, error)
33+
)
34+
35+
func (f fileNamerFunc) FileName(key string) string { return f(key) }
36+
func (f fileNameKeyerFunc) KeyFromFileName(name string) (string, error) { return f(name) }
37+
38+
// fragmentSize is the maximum filename length per directory level.
39+
// 48 is chosen so that 5 fragments fit within 240 chars, well under common filesystem limits.
40+
const fragmentSize = 48
41+
42+
// fragmentingFileNamer returns a fileNamer that fragments long keys into directory structures.
43+
// This helps avoid filesystem limits on filename lengths.
44+
func fragmentingFileNamer() fileNamer {
45+
return fileNamerFunc(fragmentFileName)
46+
}
47+
48+
func fragmentFileName(key string) string {
49+
encoded := base64.RawURLEncoding.EncodeToString([]byte(key))
50+
if len(encoded) <= 255 { // Common filesystem filename limit
51+
return encoded
52+
}
53+
54+
// Fragment the encoded string
55+
var parts []string
56+
for i := 0; i < len(encoded); i += fragmentSize {
57+
end := min(i+fragmentSize, len(encoded))
58+
parts = append(parts, encoded[i:end])
59+
}
60+
return filepath.Join(parts...)
61+
}
62+
63+
func fragmentingFileNameKeyer() fileNameKeyer {
64+
return fileNameKeyerFunc(fragmentedFileNameToKey)
65+
}
66+
67+
var filepathSeparatorReplacer = strings.NewReplacer(
68+
string(filepath.Separator),
69+
"",
70+
)
71+
72+
func fragmentedFileNameToKey(name string) (string, error) {
73+
// Check if the name contains path separators (i.e., is fragmented)
74+
if strings.ContainsRune(name, filepath.Separator) {
75+
// Handle fragmented path
76+
base64Str := filepathSeparatorReplacer.Replace(name)
77+
decoded, err := base64.RawURLEncoding.DecodeString(base64Str)
78+
if err != nil {
79+
return "", err
80+
}
81+
return string(decoded), nil
82+
}
83+
84+
// Handle plain base64
85+
decoded, err := base64.RawURLEncoding.DecodeString(name)
86+
if err != nil {
87+
return "", err
88+
}
89+
return string(decoded), nil
90+
}

store/fscache/filenamer_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright (c) 2025 Bart Venter <bartventer@proton.me>
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package fscache
16+
17+
import (
18+
"encoding/base64"
19+
"fmt"
20+
"path/filepath"
21+
"strings"
22+
"testing"
23+
24+
"github.com/bartventer/httpcache/internal/testutil"
25+
)
26+
27+
func Example_fragmentFileName_short() {
28+
url := "https://short.url/test"
29+
path := fragmentFileName(url)
30+
fmt.Println("Fragmented path:", path)
31+
// Output:
32+
// Fragmented path: aHR0cHM6Ly9zaG9ydC51cmwvdGVzdA
33+
}
34+
35+
func Example_fragmentFileName_long() {
36+
url := "https://example.com/" + strings.Repeat("a", 255)
37+
path := fragmentFileName(url)
38+
fmt.Println("Fragmented path:", path)
39+
// Output:
40+
// Fragmented path: aHR0cHM6Ly9leGFtcGxlLmNvbS9hYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWE
41+
}
42+
43+
func Test_fragmentFileName_fragmentedFileNameToKey(t *testing.T) {
44+
cases := []struct {
45+
name string
46+
url string
47+
assertion func(tt *testing.T, encoded, decoded string)
48+
}{
49+
{
50+
name: "Empty string",
51+
url: "",
52+
},
53+
{
54+
name: "Short ASCII URL",
55+
url: "https://example.com/test?foo=bar",
56+
},
57+
{
58+
name: "Long ASCII URL",
59+
url: "https://example.com/" + strings.Repeat("a", 1000),
60+
assertion: func(tt *testing.T, encoded string, _ string) {
61+
for frag := range strings.SplitSeq(encoded, string(filepath.Separator)) {
62+
testutil.AssertTrue(
63+
tt,
64+
len(frag) <= fragmentSize,
65+
"Fragment too long: got %d, want <= %d",
66+
len(frag),
67+
fragmentSize,
68+
)
69+
}
70+
},
71+
},
72+
{
73+
name: "Unicode URL",
74+
url: "https://例子.测试?emoji=🚀",
75+
},
76+
{
77+
name: "URL with separators",
78+
url: "https://foo/bar/baz?x=y/z",
79+
},
80+
}
81+
82+
for _, tc := range cases {
83+
t.Run(tc.name, func(t *testing.T) {
84+
encoded := fragmentFileName(tc.url)
85+
// Roundtrip test
86+
decoded, err := fragmentedFileNameToKey(encoded)
87+
testutil.RequireNoError(t, err)
88+
testutil.AssertEqual(t, tc.url, decoded, "Roundtrip failed")
89+
if tc.assertion != nil {
90+
tc.assertion(t, encoded, decoded)
91+
}
92+
})
93+
}
94+
}
95+
96+
func Test_fragmentedFileNameToKey_InvalidBase64(t *testing.T) {
97+
invalidPaths := []string{
98+
"!!!notbase64",
99+
"this/is/not/valid/base64/===",
100+
"foo/bar/baz",
101+
}
102+
for _, path := range invalidPaths {
103+
t.Run(path, func(t *testing.T) {
104+
_, err := fragmentedFileNameToKey(path)
105+
var cie base64.CorruptInputError
106+
testutil.RequireErrorAs(t, err, &cie)
107+
})
108+
}
109+
}

store/fscache/fscache.go

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,17 @@ import (
4949
"cmp"
5050
"context"
5151
"crypto/rand"
52-
"encoding/base64"
52+
"io/fs"
53+
"strings"
54+
5355
"errors"
5456
"fmt"
5557
"io"
56-
"io/fs"
58+
5759
"net/url"
5860
"os"
5961
"path/filepath"
6062
"slices"
61-
"strings"
6263
"time"
6364

6465
"github.com/bartventer/httpcache/store"
@@ -110,7 +111,7 @@ type fsCache struct {
110111
// internal components
111112

112113
fn fileNamer // generates file names from keys
113-
fnk fileNameKeyer // extracts keys from file names
114+
fnk fileNameKeyer // recovers keys from file names
114115
dw dirWalker // used for directory walking
115116
}
116117

@@ -251,39 +252,22 @@ func (c *fsCache) initialize(appname string) error {
251252
if err != nil {
252253
return fmt.Errorf("fscache: could not open cache directory %q: %w", c.base, err)
253254
}
254-
c.fn = fileNamerFunc(safeFileName)
255-
c.fnk = fileNameKeyerFunc(keyFromFileName)
255+
c.fn = fragmentingFileNamer()
256+
c.fnk = fragmentingFileNameKeyer()
256257
c.dw = dirWalkerFunc(filepath.WalkDir)
257258
c.timeout = cmp.Or(c.timeout, defaultTimeout)
258259

259260
return nil
260261
}
261262

262-
type (
263-
fileNamer interface{ FileName(key string) string }
264-
fileNameKeyer interface{ KeyFromFileName(name string) string }
265-
dirWalker interface {
266-
WalkDir(root string, fn fs.WalkDirFunc) error
267-
}
268-
)
263+
type dirWalker interface {
264+
WalkDir(root string, fn fs.WalkDirFunc) error
265+
}
269266

270-
type (
271-
fileNamerFunc func(key string) string
272-
fileNameKeyerFunc func(name string) string
273-
dirWalkerFunc func(root string, fn fs.WalkDirFunc) error
274-
)
267+
type dirWalkerFunc func(root string, fn fs.WalkDirFunc) error
275268

276-
func (f fileNamerFunc) FileName(key string) string { return f(key) }
277-
func (f fileNameKeyerFunc) KeyFromFileName(name string) string { return f(name) }
278269
func (f dirWalkerFunc) WalkDir(root string, fn fs.WalkDirFunc) error { return f(root, fn) }
279270

280-
func safeFileName(key string) string { return base64.RawURLEncoding.EncodeToString([]byte(key)) }
281-
282-
func keyFromFileName(name string) string {
283-
data, _ := base64.RawURLEncoding.DecodeString(name)
284-
return string(data)
285-
}
286-
287271
var _ driver.Conn = (*fsCache)(nil)
288272
var _ expapi.KeyLister = (*fsCache)(nil)
289273

@@ -366,7 +350,11 @@ func (c *fsCache) set(key string, entry []byte) error {
366350
return err
367351
}
368352
}
369-
f, err := c.root.Create(c.fn.FileName(key))
353+
name := c.fn.FileName(key)
354+
if err := c.root.MkdirAll(filepath.Dir(name), 0o755); err != nil {
355+
return err
356+
}
357+
f, err := c.root.Create(name)
370358
if err != nil {
371359
return err
372360
}
@@ -448,7 +436,13 @@ func (c *fsCache) keys(prefix string) ([]string, error) {
448436
if d.IsDir() {
449437
return nil
450438
}
451-
if key := c.fnk.KeyFromFileName(filepath.Base(path)); strings.HasPrefix(key, prefix) {
439+
key, err := c.fnk.KeyFromFileName(
440+
strings.TrimPrefix(path, dirname+string(os.PathSeparator)),
441+
)
442+
if err != nil {
443+
return err
444+
}
445+
if strings.HasPrefix(key, prefix) {
452446
keys = append(keys, key)
453447
}
454448
return nil

store/fscache/fscache_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ func Test_fsCache_KeysError(t *testing.T) {
7171
cache.fn = fileNamerFunc(func(key string) string {
7272
return key
7373
})
74-
cache.fnk = fileNameKeyerFunc(func(name string) string {
75-
return name
74+
cache.fnk = fileNameKeyerFunc(func(name string) (string, error) {
75+
return name, nil
7676
})
7777
cache.dw = dirWalkerFunc(func(root string, fn fs.WalkDirFunc) error {
7878
return testutil.ErrSample

0 commit comments

Comments
 (0)