diff --git a/.gitignore b/.gitignore index 958c2ab..6014599 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,44 @@ +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Code coverage profiles and other test artifacts +*.out +coverage.* +*.coverprofile +profile.cov + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work +go.work.sum + +# env file +.env + +# Editor/IDE +# .idea/ +# .vscode/ + + Editing/ debugging/ manifests/ +unusual/ norm/ *.exe *.json output/ +/extracted/ +bin/ \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bcfc81 --- /dev/null +++ b/Makefile @@ -0,0 +1,56 @@ +.PHONY: help build test bench clean install fmt lint check + +# Default target - show help +.DEFAULT_GOAL := help + +# Show available targets +help: + @echo "evrFileTools - EVR package/manifest tool" + @echo "" + @echo "Usage: make [target]" + @echo "" + @echo "Targets:" + @echo " build Build the CLI tool to bin/evrtools" + @echo " test Run all tests" + @echo " bench Run benchmarks" + @echo " bench-compare Run benchmarks with multiple iterations" + @echo " clean Remove build artifacts" + @echo " install Install CLI tool via go install" + @echo " fmt Format code" + @echo " lint Run go vet" + @echo " check Run fmt, lint, and test" + +# Build the CLI tool +build: + go build -o bin/evrtools ./cmd/evrtools + +# Run all tests +test: + go test -v ./pkg/... + +# Run benchmarks +bench: + go test -bench=. -benchmem -benchtime=1s ./pkg/... + +# Run benchmarks with comparison +bench-compare: + go test -bench=. -benchmem -count=5 ./pkg/... + +# Clean build artifacts +clean: + rm -rf bin/ + +# Install the CLI tool +install: + go install ./cmd/evrtools + +# Format code +fmt: + go fmt ./... + +# Lint code +lint: + go vet ./... + +# Check for common issues +check: fmt lint test \ No newline at end of file diff --git a/README.md b/README.md index b760519..9e421d3 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,155 @@ -thank you Exhibitmark for doing the hard work and making [carnation](https://github.com/Exhibitmark/carnation), saved me a lot of headache reversing the manifest format :) +# evrFileTools -tool i ~~not so~~ quickly threw together to modify any file(s) in an EVR manifest/package combo. -Barely in a working state, please cut me some slack while i clean this up +A Go library and CLI tool for working with EVR (Echo VR) package and manifest files. -extracting files example: +> Thanks to [Exhibitmark](https://github.com/Exhibitmark) for [carnation](https://github.com/Exhibitmark/carnation) which helped with reversing the manifest format! + +## Features + +- Extract files from EVR packages +- Build new packages from extracted files +- Read and write EVR manifest files +- ZSTD compression/decompression with optimized context reuse + +## Installation + +```bash +go install github.com/EchoTools/evrFileTools/cmd/evrtools@latest +``` + +Or build from source: + +```bash +git clone https://github.com/EchoTools/evrFileTools.git +cd evrFileTools +make build +``` + +## Usage + +### Extract files from a package + +```bash +evrtools -mode extract \ + -data ./ready-at-dawn-echo-arena/_data/5932408047/rad15/win10 \ + -package 48037dc70b0ecab2 \ + -output ./extracted ``` -evrFileTools -mode extract -packageName 48037dc70b0ecab2 -dataDir ./ready-at-dawn-echo-arena/_data/5932408047/rad15/win10 -outputDir ./output/ + +This extracts all files from the package. Output structure: +- `./output//` + +With `-preserve-groups`, frames are preserved: +- `./output///` + +### Build a package from files + +```bash +evrtools -mode build \ + -input ./files \ + -output ./output \ + -package mypackage ``` -this will extract and write out every file contained in the package to outputFolder. -the names of the subfolders created in outputFolder are the filetype symbols, the files contained within are named with their respective symbols. -If the `-outputPreserveGroups` flag is provided, there will be folders created to seperate each frame. This is currently the directory structure that `-mode build` expects. +Expected input structure: `./input///` + +### CLI Options + +| Flag | Description | +|------|-------------| +| `-mode` | Operation mode: `extract` or `build` | +| `-data` | Path to _data directory containing manifests/packages | +| `-package` | Package name (e.g., `48037dc70b0ecab2`) | +| `-input` | Input directory for build mode | +| `-output` | Output directory | +| `-preserve-groups` | Preserve frame grouping in extract output | +| `-force` | Allow non-empty output directory | + +## Library Usage +```go +package main -replacing files example: +import ( + "log" + "github.com/EchoTools/evrFileTools/pkg/manifest" +) + +func main() { + // Read a manifest + m, err := manifest.ReadFile("/path/to/manifests/packagename") + if err != nil { + log.Fatal(err) + } + + log.Printf("Manifest: %d files in %d packages", m.FileCount(), m.PackageCount()) + + // Open the package files + pkg, err := manifest.OpenPackage(m, "/path/to/packages/packagename") + if err != nil { + log.Fatal(err) + } + defer pkg.Close() + + // Extract all files + if err := pkg.Extract("./output"); err != nil { + log.Fatal(err) + } +} ``` -echoFileTools -mode replace -outputDir ./output/ -packageName 48037dc70b0ecab2 -dataDir ./ready-at-dawn-echo-arena/_data/5932408047/rad15/win10 -inputDir ./input/ + +## Project Structure + +``` +evrFileTools/ +├── cmd/ +│ └── evrtools/ # CLI application +├── pkg/ +│ ├── archive/ # ZSTD archive format +│ │ ├── header.go # Archive header (24 bytes) +│ │ ├── reader.go # Streaming decompression +│ │ └── writer.go # Streaming compression +│ └── manifest/ # EVR manifest/package handling +│ ├── manifest.go # Manifest types and binary encoding +│ ├── package.go # Multi-part package extraction +│ ├── builder.go # Package building from files +│ └── scanner.go # Input directory scanning +├── Makefile +└── go.mod +``` + +## Development + +```bash +# Build +make build + +# Run tests +make test + +# Run benchmarks +make bench + +# Format and lint +make check ``` -Directory structure of inputDir while using `-mode replace` should be `./inputFolder/0/...`, where ... is the structure of `-mode extract` *without* the `-outputPreserveGroups` flag. -e.g. if replacing the Echo VR logo DDS, the stucture would be as follows: `./input/0/-4707359568332879775/-3482028914369150717` +## Performance + +The library uses several optimizations: + +- **Direct binary encoding** instead of reflection-based `binary.Read/Write` +- **Pre-allocated buffers** for zero-allocation encoding paths +- **ZSTD context reuse** for ~4x faster decompression with zero allocations +- **Frame index maps** for O(1) file lookups during extraction +- **Directory caching** to minimize syscalls + +Run benchmarks to see current performance: + +```bash +go test -bench=. -benchmem ./pkg/... +``` +## License -if a file with the same filetype symbol & filename symbol exists in the manifest, it will edit the manifest & package file to match, and write out the contents of both to outputDir. +MIT License - see LICENSE file diff --git a/_data b/_data new file mode 120000 index 0000000..62c371d --- /dev/null +++ b/_data @@ -0,0 +1 @@ +/mnt/c/OculusLibrary/Software/ready-at-dawn-echo-arena/_data/5932408047/rad15/win10/ \ No newline at end of file diff --git a/cmd/evrtools/main.go b/cmd/evrtools/main.go new file mode 100644 index 0000000..cea300d --- /dev/null +++ b/cmd/evrtools/main.go @@ -0,0 +1,182 @@ +// Package main provides a command-line tool for working with EVR package files. +package main + +import ( + "flag" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/EchoTools/evrFileTools/pkg/manifest" +) + +var ( + mode string + packageName string + dataDir string + inputDir string + outputDir string + preserveGroups bool + forceOverwrite bool + useDecimalName bool +) + +func init() { + flag.StringVar(&mode, "mode", "", "Operation mode: extract, build") + flag.StringVar(&packageName, "package", "", "Package name (e.g., 48037dc70b0ecab2)") + flag.StringVar(&dataDir, "data", "", "Path to _data directory containing manifests/packages") + flag.StringVar(&inputDir, "input", "", "Input directory for build mode") + flag.StringVar(&outputDir, "output", "", "Output directory") + flag.BoolVar(&preserveGroups, "preserve-groups", false, "Preserve frame grouping in output") + flag.BoolVar(&forceOverwrite, "force", false, "Allow non-empty output directory") + flag.BoolVar(&useDecimalName, "decimal-names", false, "Use decimal format for filenames (default is hex)") +} + +func main() { + flag.Parse() + + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func run() error { + if err := validateFlags(); err != nil { + flag.Usage() + return err + } + + if err := prepareOutputDir(); err != nil { + return err + } + + switch mode { + case "extract": + return runExtract() + case "build": + return runBuild() + default: + return fmt.Errorf("unknown mode: %s", mode) + } +} + +func validateFlags() error { + if mode == "" { + return fmt.Errorf("mode is required") + } + if outputDir == "" { + return fmt.Errorf("output directory is required") + } + + switch mode { + case "extract": + if dataDir == "" || packageName == "" { + return fmt.Errorf("extract mode requires -data and -package") + } + case "build": + if inputDir == "" { + return fmt.Errorf("build mode requires -input") + } + if packageName == "" { + packageName = "package" + } + default: + return fmt.Errorf("mode must be 'extract' or 'build'") + } + + return nil +} + +func prepareOutputDir() error { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + + if !forceOverwrite { + empty, err := isDirEmpty(outputDir) + if err != nil { + return fmt.Errorf("check output directory: %w", err) + } + if !empty { + return fmt.Errorf("output directory is not empty (use -force to override)") + } + } + + return nil +} + +func isDirEmpty(path string) (bool, error) { + f, err := os.Open(path) + if err != nil { + return false, err + } + defer f.Close() + + _, err = f.Readdir(1) + return err == io.EOF, nil +} + +func runExtract() error { + manifestPath := filepath.Join(dataDir, "manifests", packageName) + m, err := manifest.ReadFile(manifestPath) + if err != nil { + return fmt.Errorf("read manifest: %w", err) + } + + fmt.Printf("Manifest loaded: %d files in %d packages\n", m.FileCount(), m.PackageCount()) + + packagePath := filepath.Join(dataDir, "packages", packageName) + pkg, err := manifest.OpenPackage(m, packagePath) + if err != nil { + return fmt.Errorf("open package: %w", err) + } + defer pkg.Close() + + fmt.Println("Extracting files...") + if err := pkg.Extract( + outputDir, + manifest.WithPreserveGroups(preserveGroups), + manifest.WithDecimalNames(useDecimalName), + ); err != nil { + return fmt.Errorf("extract: %w", err) + } + + fmt.Printf("Extraction complete. Files written to %s\n", outputDir) + return nil +} + +func runBuild() error { + fmt.Println("Scanning input directory...") + files, err := manifest.ScanFiles(inputDir) + if err != nil { + return fmt.Errorf("scan files: %w", err) + } + + totalFiles := 0 + for _, group := range files { + totalFiles += len(group) + } + fmt.Printf("Found %d files in %d groups\n", totalFiles, len(files)) + + fmt.Println("Building package...") + builder := manifest.NewBuilder(outputDir, packageName) + m, err := builder.Build(files) + if err != nil { + return fmt.Errorf("build: %w", err) + } + + manifestDir := filepath.Join(outputDir, "manifests") + if err := os.MkdirAll(manifestDir, 0755); err != nil { + return fmt.Errorf("create manifest dir: %w", err) + } + + manifestPath := filepath.Join(manifestDir, packageName) + if err := manifest.WriteFile(manifestPath, m); err != nil { + return fmt.Errorf("write manifest: %w", err) + } + + fmt.Printf("Build complete. Output written to %s\n", outputDir) + return nil +} diff --git a/evrManifests/5868485946-EVR.go b/evrManifests/5868485946-EVR.go deleted file mode 100644 index 6b1b7b1..0000000 --- a/evrManifests/5868485946-EVR.go +++ /dev/null @@ -1,168 +0,0 @@ -package evrManifests - -import ( - "bytes" - "encoding/binary" -) - -type manifest_5868485946_EVR struct { - Header struct { - PackageCount uint32 - Unk1 uint32 - Unk2 uint64 - _ [8]byte - FrameContents struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - SomeStructure struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - Frames struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - } - FrameContents []struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - } - SomeStructure []struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 uint32 - Unk4 uint32 - } - Frames []struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - } -} - -func (m *manifest_5868485946_EVR) evrmFromBytes(b []byte) (EvrManifest, error) { - newManifest := EvrManifest{} - if err := m.unmarshalManifest(b); err != nil { - return newManifest, err - } - - return m.convToEvrm() -} - -func (m *manifest_5868485946_EVR) convToEvrm() (EvrManifest, error) { - newManifest := EvrManifest{ - Header: ManifestHeader{ - PackageCount: m.Header.PackageCount, - Unk1: m.Header.Unk1, - Unk2: m.Header.Unk2, - FrameContents: m.Header.FrameContents, - SomeStructure: m.Header.SomeStructure, - Frames: m.Header.Frames, - }, - FrameContents: make([]FrameContents, len(m.FrameContents)), - SomeStructure: make([]SomeStructure, len(m.SomeStructure)), - Frames: make([]Frame, len(m.Frames)), - } - for k, v := range m.FrameContents { - newManifest.FrameContents[k] = FrameContents{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - FileIndex: v.FileIndex, - DataOffset: v.DataOffset, - Size: v.Size, - SomeAlignment: v.SomeAlignment, - } - } - for k, v := range m.SomeStructure { - // combine Unk3 and Unk4 into one uint64 and place in AssetType - atBytes := (int64(v.Unk3) << 32) | int64(v.Unk4) // autogenerated, i'm scared of this - newManifest.SomeStructure[k] = SomeStructure{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - Unk1: v.Unk1, - Unk2: v.Unk2, - AssetType: atBytes, - } - } - for k, v := range m.Frames { - newManifest.Frames[k] = Frame{ - CurrentPackageIndex: v.CurrentPackageIndex, - CurrentOffset: v.CurrentOffset, - CompressedSize: v.CompressedSize, - DecompressedSize: v.DecompressedSize, - } - } - return newManifest, nil -} - -func (m *manifest_5868485946_EVR) unmarshalManifest(b []byte) error { - currentOffset := binary.Size(m.Header) - buf := bytes.NewReader(b[:currentOffset]) - if err := binary.Read(buf, binary.LittleEndian, &m.Header); err != nil { - return err - } - - m.FrameContents = make([]struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }, m.Header.FrameContents.ElementCount) - m.SomeStructure = make([]struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 uint32 - Unk4 uint32 - }, m.Header.SomeStructure.ElementCount) - m.Frames = make([]struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }, m.Header.Frames.ElementCount) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.FrameContents)]) - if err := binary.Read(buf, binary.LittleEndian, &m.FrameContents); err != nil { - return err - } - currentOffset += binary.Size(m.FrameContents) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.SomeStructure)]) - if err := binary.Read(buf, binary.LittleEndian, &m.SomeStructure); err != nil { - return err - } - currentOffset += binary.Size(m.SomeStructure) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.Frames)]) - if err := binary.Read(buf, binary.LittleEndian, &m.Frames); err != nil { - return err - } - - return nil -} diff --git a/evrManifests/5932408047-EVR.go b/evrManifests/5932408047-EVR.go deleted file mode 100644 index 172a3e7..0000000 --- a/evrManifests/5932408047-EVR.go +++ /dev/null @@ -1,304 +0,0 @@ -package evrManifests - -import ( - "bytes" - "encoding/binary" - "fmt" -) - -// manifest structure ripped from Carnation, thank you exhibitmark <3 -type manifest_5932408047_EVR struct { - Header struct { - PackageCount uint32 - Unk1 uint32 // ? - 524288 on latest builds - Unk2 uint64 // ? - 0 on latest builds - FrameContents struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - _ [16]byte // padding - SomeStructure struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - _ [16]byte // padding - Frames struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - } - FrameContents []struct { - TypeSymbol int64 // Probably filetype - FileSymbol int64 // Symbol for file - FileIndex uint32 // Frame[FileIndex] = file containing this entry - DataOffset uint32 // Byte offset for beginning of wanted data in given file - Size uint32 // Size of file - SomeAlignment uint32 // file divisible by this (can this just be set to 1??) - yes - } - SomeStructure []struct { - TypeSymbol int64 // seems to be the same as unk3 (for a few files on quest, at least) - FileSymbol int64 // filename symbol - Unk1 int64 // ? - game still launches when set to 0 - Unk2 int64 // ? - game still launches when set to 0 - Unk3 int64 // ? - game still launches when set to 0 - } - Frames []struct { - CurrentPackageIndex uint32 // the package index - CurrentOffset uint32 // the package byte offset - CompressedSize uint32 // compressed size of file - DecompressedSize uint32 // decompressed size of file - } -} - -func (m *manifest_5932408047_EVR) evrmFromBytes(b []byte) (EvrManifest, error) { - newManifest := EvrManifest{} - if err := m.unmarshalManifest(b); err != nil { - return newManifest, err - } - - return m.convToEvrm() -} - -func (m *manifest_5932408047_EVR) bytesFromEvrm(evrm EvrManifest) ([]byte, error) { - if err := m.evrmToOrig(evrm); err != nil { - return nil, err - } - - wbuf := bytes.NewBuffer(nil) - - var data = []any{ - m.Header, - m.FrameContents, - m.SomeStructure, - m.Frames, - } - for _, v := range data { - err := binary.Write(wbuf, binary.LittleEndian, v) - if err != nil { - fmt.Println("binary.Write failed:", err) - } - } - - manifestBytes := wbuf.Bytes() - return manifestBytes, nil // hack -} - -func (m *manifest_5932408047_EVR) convToEvrm() (EvrManifest, error) { - newManifest := EvrManifest{ - Header: ManifestHeader{ - PackageCount: m.Header.PackageCount, - Unk1: m.Header.Unk1, - Unk2: m.Header.Unk2, - FrameContents: m.Header.FrameContents, - SomeStructure: m.Header.SomeStructure, - Frames: m.Header.Frames, - }, - FrameContents: make([]FrameContents, len(m.FrameContents)), - SomeStructure: make([]SomeStructure, len(m.SomeStructure)), - Frames: make([]Frame, len(m.Frames)), - } - for k, v := range m.FrameContents { - newManifest.FrameContents[k] = FrameContents{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - FileIndex: v.FileIndex, - DataOffset: v.DataOffset, - Size: v.Size, - SomeAlignment: v.SomeAlignment, - } - } - for k, v := range m.SomeStructure { - newManifest.SomeStructure[k] = SomeStructure{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - Unk1: v.Unk1, - Unk2: v.Unk2, - AssetType: v.Unk3, - } - } - for k, v := range m.Frames { - newManifest.Frames[k] = Frame{ - CurrentPackageIndex: v.CurrentPackageIndex, - CurrentOffset: v.CurrentOffset, - CompressedSize: v.CompressedSize, - DecompressedSize: v.DecompressedSize, - } - } - return newManifest, nil -} - -func (m *manifest_5932408047_EVR) evrmToOrig(evrm EvrManifest) error { - m.Header = struct { - PackageCount uint32 - Unk1 uint32 - Unk2 uint64 - FrameContents struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - SomeStructure struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - Frames struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - }{ - PackageCount: evrm.Header.PackageCount, - Unk1: evrm.Header.Unk1, - Unk2: evrm.Header.Unk2, - FrameContents: evrm.Header.FrameContents, - SomeStructure: evrm.Header.SomeStructure, - Frames: evrm.Header.Frames, - } - - m.FrameContents = make([]struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }, len(evrm.FrameContents)) - - m.SomeStructure = make([]struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }, len(evrm.SomeStructure)) - - m.Frames = make([]struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }, len(evrm.Frames)) - - for k, v := range evrm.FrameContents { - m.FrameContents[k] = struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }{ - TypeSymbol: v.T, - FileSymbol: v.FileSymbol, - FileIndex: v.FileIndex, - DataOffset: v.DataOffset, - Size: v.Size, - SomeAlignment: v.SomeAlignment, - } - } - - for k, v := range evrm.SomeStructure { - m.SomeStructure[k] = struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }{ - TypeSymbol: v.T, - FileSymbol: v.FileSymbol, - Unk1: v.Unk1, - Unk2: v.Unk2, - Unk3: v.AssetType, - } - } - - for k, v := range evrm.Frames { - m.Frames[k] = struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }{ - CurrentPackageIndex: v.CurrentPackageIndex, - CurrentOffset: v.CurrentOffset, - CompressedSize: v.CompressedSize, - DecompressedSize: v.DecompressedSize, - } - } - - return nil -} - -func (m *manifest_5932408047_EVR) unmarshalManifest(b []byte) error { - currentOffset := binary.Size(m.Header) - buf := bytes.NewReader(b[:currentOffset]) - if err := binary.Read(buf, binary.LittleEndian, &m.Header); err != nil { - return err - } - - m.FrameContents = make([]struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }, m.Header.FrameContents.ElementCount) - m.SomeStructure = make([]struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }, m.Header.SomeStructure.ElementCount) - m.Frames = make([]struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }, m.Header.Frames.ElementCount) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.FrameContents)]) - if err := binary.Read(buf, binary.LittleEndian, &m.FrameContents); err != nil { - return err - } - currentOffset += binary.Size(m.FrameContents) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.SomeStructure)]) - if err := binary.Read(buf, binary.LittleEndian, &m.SomeStructure); err != nil { - return err - } - currentOffset += binary.Size(m.SomeStructure) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.Frames)]) - if err := binary.Read(buf, binary.LittleEndian, &m.Frames); err != nil { - return err - } - - return nil -} diff --git a/evrManifests/5932408047-LE2.go b/evrManifests/5932408047-LE2.go deleted file mode 100644 index e4b4397..0000000 --- a/evrManifests/5932408047-LE2.go +++ /dev/null @@ -1,311 +0,0 @@ -package evrManifests - -import ( - "bytes" - "encoding/binary" - "fmt" -) - -type manifest_5932408047_LE2 struct { - Header struct { - PackageCount uint32 - Unk1 uint32 // ? - 524288 on latest builds - Unk2 uint64 // ? - 0 on latest builds - _ [8]byte // padding - FrameContents struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - _ [16]byte // padding - SomeStructure struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - _ [16]byte // padding - Frames struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements - } - } - FrameContents []struct { - TypeSymbol int64 // Probably filetype - FileSymbol int64 // Symbol for file - FileIndex uint32 // Frame[FileIndex] = file containing this entry - DataOffset uint32 // Byte offset for beginning of wanted data in given file - Size uint32 // Size of file - SomeAlignment uint32 // file divisible by this (can this just be set to 1??) - yes - } - SomeStructure []struct { - TypeSymbol int64 // seems to be the same as unk3 (for a few files on quest, at least) - FileSymbol int64 // filename symbol - Unk1 int64 // ? - game still launches when set to 0 - Unk2 int64 // ? - game still launches when set to 0 - Unk3 int64 // ? - game still launches when set to 0 - } - Frames []struct { - CurrentPackageIndex uint32 // the package index - CurrentOffset uint32 // the package byte offset - CompressedSize uint32 // compressed size of file - DecompressedSize uint32 // decompressed size of file - } -} - -func (m *manifest_5932408047_LE2) evrmFromBytes(b []byte) (EvrManifest, error) { - newManifest := EvrManifest{} - if err := m.unmarshalManifest(b); err != nil { - return newManifest, err - } - - return m.convToEvrm() -} - -func (m *manifest_5932408047_LE2) bytesFromEvrm(evrm EvrManifest) ([]byte, error) { - if err := m.evrmToOrig(evrm); err != nil { - return nil, err - } - - wbuf := bytes.NewBuffer(nil) - - var data = []any{ - m.Header, - m.FrameContents, - m.SomeStructure, - [8]byte{}, - m.Frames, - } - for _, v := range data { - err := binary.Write(wbuf, binary.LittleEndian, v) - if err != nil { - fmt.Println("binary.Write failed:", err) - } - } - - manifestBytes := wbuf.Bytes() - return manifestBytes[:len(manifestBytes)-8], nil // hack -} - -func (m *manifest_5932408047_LE2) convToEvrm() (EvrManifest, error) { - newManifest := EvrManifest{ - Header: ManifestHeader{ - PackageCount: m.Header.PackageCount, - Unk1: m.Header.Unk1, - Unk2: m.Header.Unk2, - FrameContents: m.Header.FrameContents, - SomeStructure: m.Header.SomeStructure, - Frames: m.Header.Frames, - }, - FrameContents: make([]FrameContents, len(m.FrameContents)), - SomeStructure: make([]SomeStructure, len(m.SomeStructure)), - Frames: make([]Frame, len(m.Frames)), - } - for k, v := range m.FrameContents { - newManifest.FrameContents[k] = FrameContents{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - FileIndex: v.FileIndex, - DataOffset: v.DataOffset, - Size: v.Size, - SomeAlignment: v.SomeAlignment, - } - } - for k, v := range m.SomeStructure { - newManifest.SomeStructure[k] = SomeStructure{ - T: v.TypeSymbol, - FileSymbol: v.FileSymbol, - Unk1: v.Unk1, - Unk2: v.Unk2, - AssetType: v.Unk3, - } - } - for k, v := range m.Frames { - newManifest.Frames[k] = Frame{ - CurrentPackageIndex: v.CurrentPackageIndex, - CurrentOffset: v.CurrentOffset, - CompressedSize: v.CompressedSize, - DecompressedSize: v.DecompressedSize, - } - } - return newManifest, nil -} - -func (m *manifest_5932408047_LE2) evrmToOrig(evrm EvrManifest) error { - m.Header = struct { - PackageCount uint32 - Unk1 uint32 - Unk2 uint64 - _ [8]byte - FrameContents struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - SomeStructure struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - _ [16]byte - Frames struct { - SectionSize uint64 - Unk1 uint64 - Unk2 uint64 - ElementSize uint64 - Count uint64 - ElementCount uint64 - } - }{ - PackageCount: evrm.Header.PackageCount, - Unk1: evrm.Header.Unk1, - Unk2: evrm.Header.Unk2, - FrameContents: evrm.Header.FrameContents, - SomeStructure: evrm.Header.SomeStructure, - Frames: evrm.Header.Frames, - } - - m.FrameContents = make([]struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }, len(evrm.FrameContents)) - - m.SomeStructure = make([]struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }, len(evrm.SomeStructure)) - - m.Frames = make([]struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }, len(evrm.Frames)) - - for k, v := range evrm.FrameContents { - m.FrameContents[k] = struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }{ - TypeSymbol: v.T, - FileSymbol: v.FileSymbol, - FileIndex: v.FileIndex, - DataOffset: v.DataOffset, - Size: v.Size, - SomeAlignment: v.SomeAlignment, - } - } - - for k, v := range evrm.SomeStructure { - m.SomeStructure[k] = struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }{ - TypeSymbol: v.T, - FileSymbol: v.FileSymbol, - Unk1: v.Unk1, - Unk2: v.Unk2, - Unk3: v.AssetType, - } - } - - for k, v := range evrm.Frames { - m.Frames[k] = struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }{ - CurrentPackageIndex: v.CurrentPackageIndex, - CurrentOffset: v.CurrentOffset, - CompressedSize: v.CompressedSize, - DecompressedSize: v.DecompressedSize, - } - } - - return nil -} - -func (m *manifest_5932408047_LE2) unmarshalManifest(b []byte) error { - currentOffset := binary.Size(m.Header) - buf := bytes.NewReader(b[:currentOffset]) - if err := binary.Read(buf, binary.LittleEndian, &m.Header); err != nil { - return err - } - fmt.Println("read header") - - m.FrameContents = make([]struct { - TypeSymbol int64 - FileSymbol int64 - FileIndex uint32 - DataOffset uint32 - Size uint32 - SomeAlignment uint32 - }, m.Header.FrameContents.ElementCount) - m.SomeStructure = make([]struct { - TypeSymbol int64 - FileSymbol int64 - Unk1 int64 - Unk2 int64 - Unk3 int64 - }, m.Header.SomeStructure.ElementCount) - m.Frames = make([]struct { - CurrentPackageIndex uint32 - CurrentOffset uint32 - CompressedSize uint32 - DecompressedSize uint32 - }, m.Header.Frames.ElementCount) - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.FrameContents)]) - if err := binary.Read(buf, binary.LittleEndian, &m.FrameContents); err != nil { - return err - } - currentOffset += binary.Size(m.FrameContents) - fmt.Println("read frame contents") - - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.SomeStructure)]) - if err := binary.Read(buf, binary.LittleEndian, &m.SomeStructure); err != nil { - return err - } - currentOffset += binary.Size(m.SomeStructure) - currentOffset += 8 // skip over padding - fmt.Println("read someStructure") - - b = append(b, make([]byte, 8)...) // hacky way to read end of manifest as Frame - buf = bytes.NewReader(b[currentOffset : currentOffset+binary.Size(m.Frames)]) - if err := binary.Read(buf, binary.LittleEndian, &m.Frames); err != nil { - return err - } - - return nil -} diff --git a/evrManifests/manifest.go b/evrManifests/manifest.go deleted file mode 100644 index e03d129..0000000 --- a/evrManifests/manifest.go +++ /dev/null @@ -1,101 +0,0 @@ -package evrManifests - -import "errors" - -// evrManifest definition -type ManifestHeader struct { - PackageCount uint32 - Unk1 uint32 // ? - 524288 on latest builds - Unk2 uint64 // ? - 0 on latest builds - FrameContents HeaderChunk - _ [16]byte // padding - SomeStructure HeaderChunk - _ [16]byte // padding - Frames HeaderChunk -} - -type HeaderChunk struct { - SectionSize uint64 // total byte length of entire section - Unk1 uint64 // ? 0 on latest builds - Unk2 uint64 // ? 4294967296 on latest builds - ElementSize uint64 // byte size of single entry - TODO: confirm, only matches up with Frame_contents entry - Count uint64 // number of elements, can differ from ElementCount? - ElementCount uint64 // number of elements -} - -type FrameContents struct { // 32 bytes - T int64 // Probably filetype - FileSymbol int64 // Symbol for file - FileIndex uint32 // Frame[FileIndex] = file containing this entry - DataOffset uint32 // Byte offset for beginning of wanted data in given file - Size uint32 // Size of file - SomeAlignment uint32 // file divisible by this (can this just be set to 1??) - yes -} - -type SomeStructure struct { // 40 bytes - T int64 // seems to be the same as AssetType - FileSymbol int64 // filename symbol - Unk1 int64 // ? - game still launches when set to 0 - Unk2 int64 // ? - game still launches when set to 0 - AssetType int64 // ? - game still launches when set to 0 -} - -type Frame struct { // 16 bytes - CurrentPackageIndex uint32 // the package index - CurrentOffset uint32 // the package byte offset - CompressedSize uint32 // compressed size of file - DecompressedSize uint32 // decompressed size of file -} - -type EvrManifest struct { - Header ManifestHeader - FrameContents []FrameContents - SomeStructure []SomeStructure - Frames []Frame -} - -// end evrManifest definition - -// note: i have a sneaking suspicion that there's only one manifest version. -// the ones i've looked at so far can either be extracted by 5932408047-LE2 or 5932408047-EVR -// i think i remember being told this but i need to do more research - -// every manifest version will be defined in it's own file -// each file should have functions to convert from evrManifest to it's type, and vice versa -// each file should also have a function to read and write itself to []byte - -// this should take given manifestType and manifest []byte data, and call the appropriate function for that type, and return the result -func MarshalManifest(data []byte, manifestType string) (EvrManifest, error) { - manifest := EvrManifest{} - - // switch based on manifestType - switch manifestType { - case "5932408047-LE2": - m5932408047_LE2 := manifest_5932408047_LE2{} - return m5932408047_LE2.evrmFromBytes(data) - case "5932408047-EVR": - m5932408047_EVR := manifest_5932408047_EVR{} - return m5932408047_EVR.evrmFromBytes(data) - case "5868485946-EVR": - m5868485946_EVR := manifest_5868485946_EVR{} - return m5868485946_EVR.evrmFromBytes(data) - default: - return manifest, errors.New("unimplemented manifest type") - } -} - -func UnmarshalManifest(m EvrManifest, manifestType string) ([]byte, error) { - switch manifestType { - case "5932408047-LE2": - m5932408047_LE2 := manifest_5932408047_LE2{} - return m5932408047_LE2.bytesFromEvrm(m) - case "5932408047-EVR": - m5932408047_EVR := manifest_5932408047_EVR{} - return m5932408047_EVR.bytesFromEvrm(m) - //case "5868485946-EVR": - // m5868485946_EVR := manifest_5868485946_EVR{} - // return m5868485946_EVR.bytesFromEvrm(m) - default: - return nil, errors.New("unimplemented manifest type") - } -} diff --git a/go.mod b/go.mod index 92f93a7..558c3a2 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ -module github.com/goopsie/evrFileTools +module github.com/EchoTools/evrFileTools -go 1.21.5 +go 1.22.0 -require github.com/DataDog/zstd v1.5.5 +require github.com/DataDog/zstd v1.5.7 diff --git a/go.sum b/go.sum index a3240e9..f6b6462 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,2 @@ -github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= -github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= +github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= diff --git a/main.go b/main.go deleted file mode 100644 index e426fe1..0000000 --- a/main.go +++ /dev/null @@ -1,705 +0,0 @@ -package main - -import ( - "bytes" - "encoding/binary" - "encoding/hex" - "encoding/json" - "flag" - "fmt" - "io" - "math" - "os" - "path/filepath" - "sort" - "strconv" - "strings" - "time" - - "github.com/DataDog/zstd" - evrm "github.com/goopsie/evrFileTools/evrManifests" -) - -type CompressedHeader struct { // seems to be the same across every manifest - Magic [4]byte - HeaderSize uint32 - UncompressedSize uint64 - CompressedSize uint64 -} - -type newFile struct { // Build manifest/package from this - TypeSymbol int64 - FileSymbol int64 - ModifiedFilePath string - FileSize uint32 -} - -type fileGroup struct { - currentData bytes.Buffer - decompressedSize uint32 // hack, if this is filled in, skip compressing file in appendChunkToPackages - fileIndex uint32 - fileCount int -} - -const compressionLevel = zstd.BestSpeed - -var ( - mode string - manifestType string - packageName string - dataDir string - inputDir string - outputDir string - outputPreserveGroups bool - help bool - ignoreOutputRestrictions bool -) - -func init() { - flag.StringVar(&mode, "mode", "", "must be one of the following: 'extract', 'build', 'replace', 'jsonmanifest'") - flag.StringVar(&manifestType, "manifestType", "5932408047-EVR", "See readme for updated list of manifest types.") - flag.StringVar(&packageName, "packageName", "package", "File name of package, e.g. 48037dc70b0ecab2, 2b47aab238f60515, etc.") - flag.StringVar(&dataDir, "dataDir", "", "Path of directory containing 'manifests' & 'packages' in ready-at-dawn-echo-arena/_data") - flag.StringVar(&inputDir, "inputDir", "", "Path of directory containing modified files (same structure as '-mode extract' output)") - flag.StringVar(&outputDir, "outputDir", "", "Path of directory to place modified package & manifest files") - flag.BoolVar(&outputPreserveGroups, "outputPreserveGroups", false, "If true, preserve groups during '-mode extract', e.g. './output/1.../fileType/fileSymbol' instead of './output/fileType/fileSymbol'") - flag.BoolVar(&ignoreOutputRestrictions, "ignoreOutputRestrictions", false, "Allows non-empty outputDir to be used.") - flag.BoolVar(&help, "help", false, "Print usage") - flag.Parse() - - if help { - flag.Usage() - os.Exit(0) - } - - if mode == "jsonmanifest" && dataDir == "" { - fmt.Println("'-mode jsonmanifest' must be used in conjunction with '-dataDir'") - os.Exit(1) - } - - if help || len(os.Args) == 1 || mode == "" || outputDir == "" { - flag.Usage() - os.Exit(1) - } - - if mode != "extract" && mode != "build" && mode != "replace" && mode != "jsonmanifest" { - fmt.Println("mode must be one of the following: 'extract', 'build', 'replace', 'jsonmanifest'") - flag.Usage() - os.Exit(1) - } - - if mode == "build" && inputDir == "" { - fmt.Println("'-mode build' must be used in conjunction with '-inputDir'") - flag.Usage() - os.Exit(1) - } - - os.MkdirAll(outputDir, 0777) - - isOutputDirEmpty := func() bool { - f, err := os.Open(outputDir) - if err != nil { - return false - } - defer f.Close() - _, err = f.Readdir(1) - return err == io.EOF - }() - - if !isOutputDirEmpty && !ignoreOutputRestrictions { - fmt.Println("Output directory is not empty. Use '-ignoreOutputRestrictions' to override this restriction.") - os.Exit(1) - } -} - -func main() { - if mode == "build" { - fmt.Println("Building list of files to package...") - files, err := scanPackageFiles() - if err != nil { - fmt.Printf("failed to scan %s", inputDir) - panic(err) - } - - if err := rebuildPackageManifestCombo(files); err != nil { - fmt.Println(err) - return - } - return - } - - b, err := os.ReadFile(dataDir + "/manifests/" + packageName) - if err != nil { - fmt.Println("Failed to open manifest file, check dataDir path") - return - } - - compHeader := CompressedHeader{} - decompBytes, err := decompressZSTD(b[binary.Size(compHeader):]) - if err != nil { - fmt.Println("Failed to decompress manifest") - fmt.Println(hex.Dump(b[binary.Size(compHeader):][:256])) - fmt.Println(err) - return - } - - buf := bytes.NewReader(b) - err = binary.Read(buf, binary.LittleEndian, &compHeader) - if err != nil { - fmt.Println("failed to marshal manifest into struct") - return - } - - if len(b[binary.Size(compHeader):]) != int(compHeader.CompressedSize) || len(decompBytes) != int(compHeader.UncompressedSize) { - fmt.Println("Manifest header does not match actual size of manifest") - return - } - - manifest, err := evrm.MarshalManifest(decompBytes, manifestType) - if err != nil { - fmt.Println("Error creating manifest: ", err) - panic(err) - } - - if mode == "extract" { - if err := extractFilesFromPackage(manifest); err != nil { - fmt.Println("Error extracting files: ", err) - } - return - } else if mode == "replace" { - files, err := scanPackageFiles() - if err != nil { - fmt.Printf("failed to scan %s", inputDir) - panic(err) - } - - if err := replaceFiles(files, manifest); err != nil { - fmt.Println(err) - return - } - - } else if mode == "jsonmanifest" { - jFile, err := os.OpenFile("manifestdebug.json", os.O_RDWR|os.O_CREATE, 0777) - if err != nil { - return - } - jBytes, _ := json.MarshalIndent(manifest, "", " ") - jFile.Write(jBytes) - jFile.Close() - } -} - -func replaceFiles(fileMap [][]newFile, manifest evrm.EvrManifest) error { - modifiedFrames := make(map[uint32]bool, manifest.Header.Frames.Count) - frameContentsLookupTable := make(map[[128]byte]evrm.FrameContents, manifest.Header.FrameContents.Count) - modifiedFilesLookupTable := make(map[[128]byte]newFile, len(fileMap[0])) - for _, v := range manifest.FrameContents { - buf := [128]byte{} - binary.LittleEndian.PutUint64(buf[0:64], uint64(v.T)) - binary.LittleEndian.PutUint64(buf[64:128], uint64(v.FileSymbol)) - frameContentsLookupTable[buf] = v - } - for _, v := range fileMap[0] { - buf := [128]byte{} - binary.LittleEndian.PutUint64(buf[0:64], uint64(v.TypeSymbol)) - binary.LittleEndian.PutUint64(buf[64:128], uint64(v.FileSymbol)) - modifiedFrames[frameContentsLookupTable[buf].FileIndex] = true - modifiedFilesLookupTable[buf] = v - } - - packages := make(map[uint32]*os.File) - - for i := 0; i < int(manifest.Header.PackageCount); i++ { - pFilePath := fmt.Sprintf("%s/packages/%s_%d", dataDir, packageName, i) - f, err := os.Open(pFilePath) - if err != nil { - fmt.Printf("failed to open package %s\n", pFilePath) - return err - } - packages[uint32(i)] = f - defer f.Close() - } - - newManifest := manifest - newManifest.Frames = make([]evrm.Frame, 0) - newManifest.Header.Frames = evrm.HeaderChunk{SectionSize: 0, Unk1: 0, Unk2: 0, ElementSize: 16, Count: 0, ElementCount: 0} - - logTimer := make(chan bool, 1) - go logTimerFunc(logTimer) - - for i := 0; i < int(manifest.Header.Frames.Count); i++ { - v := manifest.Frames[i] - activeFile := packages[v.CurrentPackageIndex] - activeFile.Seek(int64(v.CurrentOffset), 0) - splitFile := make([]byte, v.CompressedSize) - if v.CompressedSize == 0 { - continue - } - _, err := io.ReadAtLeast(activeFile, splitFile, int(v.CompressedSize)) - if err != nil && v.DecompressedSize == 0 { - continue - } else if err != nil { - return err - } - - if !modifiedFrames[uint32(i)] { - // there are a few frames that aren't actually real, one for each package, and one at the end that i don't understand. ...frames.Count is from 1, i from 0 - if len(logTimer) > 0 { - <-logTimer - fmt.Printf("\033[2K\rWriting stock frame %d/%d", i, manifest.Header.Frames.Count-uint64(manifest.Header.PackageCount)-1) - } - appendChunkToPackages(&newManifest, fileGroup{currentData: *bytes.NewBuffer(splitFile), decompressedSize: v.DecompressedSize}) - continue - } - - // there are a few frames that aren't actually real, one for each package, and one at the end that i don't understand. ...frames.Count is from 1, i from 0 - if len(logTimer) > 0 { - <-logTimer - fmt.Printf("\033[2K\rWriting modified frame %d/%d", i, manifest.Header.Frames.Count-uint64(manifest.Header.PackageCount)-1) - } - decompFile, err := decompressZSTD(splitFile) - if err != nil { - return err - } - type fcWrapper struct { // purely to keep index and framecontents entry in sync - index int // original manifest FrameContents[index] - fc evrm.FrameContents - } - - sortedFrameContents := make([]fcWrapper, 0) - - for k, v := range manifest.FrameContents { - if v.FileIndex != uint32(i) { - continue - } - if modifiedFrames[v.FileIndex] { - sortedFrameContents = append(sortedFrameContents, fcWrapper{index: k, fc: v}) - } - } - - // sort fcWrapper by fc.DataOffset - sort.Slice(sortedFrameContents, func(i, j int) bool { - return sortedFrameContents[i].fc.DataOffset < sortedFrameContents[j].fc.DataOffset - }) - - constructedFile := bytes.NewBuffer([]byte{}) - for j := 0; j < len(sortedFrameContents); j++ { - // make sure that we aren't writing original data when we're supposed to be writing modified data - buf := [128]byte{} - binary.LittleEndian.PutUint64(buf[0:64], uint64(sortedFrameContents[j].fc.T)) - binary.LittleEndian.PutUint64(buf[64:128], uint64(sortedFrameContents[j].fc.FileSymbol)) - if modifiedFilesLookupTable[buf].FileSymbol != 0 { - // read file, modify manifest, append data to constructedFile - file, err := os.ReadFile(modifiedFilesLookupTable[buf].ModifiedFilePath) - if err != nil { - return err - } - newManifest.FrameContents[sortedFrameContents[j].index] = evrm.FrameContents{ - T: sortedFrameContents[j].fc.T, - FileSymbol: sortedFrameContents[j].fc.FileSymbol, - FileIndex: sortedFrameContents[j].fc.FileIndex, - DataOffset: uint32(constructedFile.Len()), - Size: uint32(len(file)), - SomeAlignment: sortedFrameContents[j].fc.SomeAlignment, - } - - constructedFile.Write(file) - continue - } - - newManifest.FrameContents[sortedFrameContents[j].index] = evrm.FrameContents{ - T: sortedFrameContents[j].fc.T, - FileSymbol: sortedFrameContents[j].fc.FileSymbol, - FileIndex: sortedFrameContents[j].fc.FileIndex, - DataOffset: uint32(constructedFile.Len()), - Size: sortedFrameContents[j].fc.Size, - SomeAlignment: sortedFrameContents[j].fc.SomeAlignment, - } - constructedFile.Write(decompFile[sortedFrameContents[j].fc.DataOffset : sortedFrameContents[j].fc.DataOffset+sortedFrameContents[j].fc.Size]) - } - - appendChunkToPackages(&newManifest, fileGroup{currentData: *constructedFile}) - } - - // weirddata - - for i := uint32(0); i < newManifest.Header.PackageCount; i++ { - packageStats, err := os.Stat(fmt.Sprintf("%s/packages/%s_%d", outputDir, packageName, i)) - if err != nil { - fmt.Println("failed to stat package for weirddata writing") - return err - } - newEntry := evrm.Frame{ - CurrentPackageIndex: i, - CurrentOffset: uint32(packageStats.Size()), - CompressedSize: 0, // TODO: find out what this actually is - DecompressedSize: 0, - } - newManifest.Frames = append(newManifest.Frames, newEntry) - newManifest.Header.Frames = incrementHeaderChunk(newManifest.Header.Frames, 1) - } - - newEntry := evrm.Frame{} // CompressedSize here is a populated field, but i don't know what it's used for - - newManifest.Frames = append(newManifest.Frames, newEntry) - newManifest.Header.Frames = incrementHeaderChunk(newManifest.Header.Frames, 1) - - // write new manifest - err := writeManifest(newManifest) - if err != nil { - return err - } - - fmt.Printf("\nfinished, modified %d files\n", len(modifiedFilesLookupTable)) - - return nil -} - -func decompressZSTD(b []byte) ([]byte, error) { - decomp, err := zstd.Decompress(nil, b) - if err != nil { - return nil, err - } - return decomp, nil -} - -func rebuildPackageManifestCombo(fileMap [][]newFile) error { - totalFileCount := 0 - for _, v := range fileMap { - totalFileCount += len(v) - } - fmt.Printf("Building from %d files\n", totalFileCount) - manifest := evrm.EvrManifest{ - Header: evrm.ManifestHeader{ - PackageCount: 1, - Unk1: 0, - Unk2: 0, - FrameContents: evrm.HeaderChunk{SectionSize: 0, Unk1: 0, Unk2: 0, ElementSize: 32, Count: 0, ElementCount: 0}, - SomeStructure: evrm.HeaderChunk{SectionSize: 0, Unk1: 0, Unk2: 0, ElementSize: 40, Count: 0, ElementCount: 0}, - Frames: evrm.HeaderChunk{SectionSize: 0, Unk1: 0, Unk2: 0, ElementSize: 16, Count: 0, ElementCount: 0}, - }, - FrameContents: make([]evrm.FrameContents, totalFileCount), - SomeStructure: make([]evrm.SomeStructure, totalFileCount), - Frames: []evrm.Frame{}, - } - - currentFileGroup := fileGroup{} - totalFilesWritten := 0 - - logTimer := make(chan bool, 1) - go logTimerFunc(logTimer) - - // preserving chunk grouping, temporary until I can figure out grouping rules/why echo crashes with specific file groupings - for _, files := range fileMap { - if currentFileGroup.currentData.Len() != 0 { - if err := appendChunkToPackages(&manifest, currentFileGroup); err != nil { - return err - } - currentFileGroup.currentData.Reset() - currentFileGroup.fileIndex++ - currentFileGroup.fileCount = 0 - } - for _, file := range files { - toWrite, err := os.ReadFile(file.ModifiedFilePath) - if err != nil { - return err - } - - frameContentsEntry := evrm.FrameContents{ - T: file.TypeSymbol, - FileSymbol: file.FileSymbol, - FileIndex: currentFileGroup.fileIndex, - DataOffset: uint32(currentFileGroup.currentData.Len()), - Size: uint32(len(toWrite)), - SomeAlignment: 1, - } - someStructureEntry := evrm.SomeStructure{ - T: file.TypeSymbol, - FileSymbol: file.FileSymbol, - Unk1: 0, - Unk2: 0, - AssetType: 0, - } - - manifest.FrameContents[totalFilesWritten] = frameContentsEntry - manifest.SomeStructure[totalFilesWritten] = someStructureEntry - manifest.Header.FrameContents = incrementHeaderChunk(manifest.Header.FrameContents, 1) - manifest.Header.SomeStructure = incrementHeaderChunk(manifest.Header.SomeStructure, 1) - - totalFilesWritten++ - currentFileGroup.fileCount++ - currentFileGroup.currentData.Write(toWrite) - } - if len(logTimer) > 0 { - <-logTimer - fmt.Printf("\033[2K\rWrote %d/%d files ", totalFilesWritten, totalFileCount) - } - } - if currentFileGroup.currentData.Len() > 0 { - if err := appendChunkToPackages(&manifest, currentFileGroup); err != nil { - return err - } - currentFileGroup.currentData.Reset() - currentFileGroup.fileIndex++ - currentFileGroup.fileCount = 0 - } - fmt.Printf("finished writing package data, %d files in %d packages\n", totalFilesWritten, manifest.Header.PackageCount) - - // write weird data - // not necessary from what i can tell but just in case - - for i := uint32(0); i < manifest.Header.PackageCount; i++ { - packageStats, err := os.Stat(fmt.Sprintf("%s/packages/%s_%d", outputDir, packageName, i)) - if err != nil { - fmt.Println("failed to stat package for weirddata writing") - return err - } - newEntry := evrm.Frame{ - CurrentPackageIndex: i, - CurrentOffset: uint32(packageStats.Size()), - CompressedSize: 0, // TODO: find out what this actually is - DecompressedSize: 0, - } - manifest.Frames = append(manifest.Frames, newEntry) - manifest.Header.Frames = incrementHeaderChunk(manifest.Header.Frames, 1) - } - - newEntry := evrm.Frame{} // CompressedSize here is a populated field, but i don't know what it's used for - - manifest.Frames = append(manifest.Frames, newEntry) - manifest.Header.Frames = incrementHeaderChunk(manifest.Header.Frames, 1) - - // write out manifest - fmt.Println("Writing manifest") - if err := writeManifest(manifest); err != nil { - return err - } - return nil -} - -// Takes a fileGroup, appends the data contained into whichever package set is specified. -// Modifies provided manifest to match the appended data. -func appendChunkToPackages(manifest *evrm.EvrManifest, currentFileGroup fileGroup) error { - os.MkdirAll(fmt.Sprintf("%s/packages", outputDir), 0777) - - cEntry := evrm.Frame{} - activePackageNum := uint32(0) - if len(manifest.Frames) > 0 { - cEntry = manifest.Frames[len(manifest.Frames)-1] - activePackageNum = cEntry.CurrentPackageIndex - } - var compFile []byte - var err error - if currentFileGroup.decompressedSize != 0 { - compFile = currentFileGroup.currentData.Bytes() - } else { - compFile, err = zstd.CompressLevel(nil, currentFileGroup.currentData.Bytes(), compressionLevel) - if err != nil { - return err - } - } - - currentPackagePath := fmt.Sprintf("%s/packages/%s_%d", outputDir, packageName, activePackageNum) - - if int(cEntry.CurrentOffset+cEntry.CompressedSize)+len(compFile) > math.MaxInt32 { - activePackageNum++ - manifest.Header.PackageCount = activePackageNum + 1 - currentPackagePath = fmt.Sprintf("%s/packages/%s_%d", outputDir, packageName, activePackageNum) - } - - f, err := os.OpenFile(currentPackagePath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0777) - if err != nil { - return err - } - defer f.Close() - _, err = f.Write(compFile) - if err != nil { - return err - } - - newEntry := evrm.Frame{ - CurrentPackageIndex: activePackageNum, - CurrentOffset: cEntry.CurrentOffset + cEntry.CompressedSize, - CompressedSize: uint32(len(compFile)), - DecompressedSize: uint32(currentFileGroup.currentData.Len()), - } - if newEntry.CurrentOffset+newEntry.CompressedSize > math.MaxInt32 { - newEntry.CurrentOffset = 0 - } - if currentFileGroup.decompressedSize != 0 { - newEntry.DecompressedSize = currentFileGroup.decompressedSize - } - - manifest.Frames = append(manifest.Frames, newEntry) - manifest.Header.Frames = incrementHeaderChunk(manifest.Header.Frames, 1) - - return nil -} - -func scanPackageFiles() ([][]newFile, error) { - // there has to be a better way to do this - filestats, _ := os.ReadDir(inputDir) - files := make([][]newFile, len(filestats)) - err := filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - fmt.Println(err) - return err - } - if info.IsDir() { - return nil - } - newFile := newFile{} - newFile.ModifiedFilePath = path - newFile.FileSize = uint32(info.Size()) - foo := strings.Split(filepath.ToSlash(path), "/") - dir1 := foo[len(foo)-3] - dir2 := foo[len(foo)-2] - dir3 := foo[len(foo)-1] - chunkNum, err := strconv.ParseInt(dir1, 10, 64) - if err != nil { - return err - } - uintTypeSymbol, err := strconv.ParseUint(dir2[2:], 16, 64) - if err != nil { - return err - } - newFile.TypeSymbol = int64(uintTypeSymbol) - uintFileSymbol, err := strconv.ParseUint(dir3[2:], 16, 64) - if err != nil { - return err - } - newFile.FileSymbol = int64(uintFileSymbol) - - files[chunkNum] = append(files[chunkNum], newFile) - return nil - }) - - if err != nil { - return nil, err - } - return files, nil -} - -func extractFilesFromPackage(fullManifest evrm.EvrManifest) error { - packages := make(map[uint32]*os.File) - totalFilesWritten := 0 - - for i := 0; i < int(fullManifest.Header.PackageCount); i++ { - pFilePath := fmt.Sprintf("%s/packages/%s_%d", dataDir, packageName, i) - f, err := os.Open(pFilePath) - if err != nil { - fmt.Printf("failed to open package %s\n", pFilePath) - return err - } - packages[uint32(i)] = f - defer f.Close() - } - - logTimer := make(chan bool, 1) - go logTimerFunc(logTimer) - - for k, v := range fullManifest.Frames { - activeFile := packages[v.CurrentPackageIndex] - activeFile.Seek(int64(v.CurrentOffset), 0) - - splitFile := make([]byte, v.CompressedSize) - if v.CompressedSize == 0 { - continue - } - _, err := io.ReadAtLeast(activeFile, splitFile, int(v.CompressedSize)) - - if err != nil && v.DecompressedSize == 0 { - continue - } else if err != nil { - fmt.Println("failed to read file, check input") - return err - } - - if len(logTimer) > 0 { - <-logTimer - fmt.Printf("\033[2K\rDecompressing and extracting files contained in file index %d, %d/%d", k, totalFilesWritten, fullManifest.Header.FrameContents.Count) - } - decompBytes, err := decompressZSTD(splitFile) - if err != nil { - return err - } - - if len(decompBytes) != int(fullManifest.Frames[k].DecompressedSize) { - return fmt.Errorf("size of decompressed data does not match manifest for file %d, is %d but should be %d", k, len(decompBytes), fullManifest.Frames[k].DecompressedSize) - } - - for _, v2 := range fullManifest.FrameContents { - if v2.FileIndex != uint32(k) { - continue - } - fileName := "0x" + strconv.FormatUint(uint64(v2.FileSymbol), 16) - fileType := "0x" + strconv.FormatUint(uint64(v2.T), 16) - basePath := fmt.Sprintf("%s/%s", outputDir, fileType) - if outputPreserveGroups { - basePath = fmt.Sprintf("%s/%d/%s", outputDir, v2.FileIndex, fileType) - } - os.MkdirAll(basePath, 0777) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", basePath, fileName), os.O_RDWR|os.O_CREATE, 0777) - if err != nil { - fmt.Println(err) - continue - } - - file.Write(decompBytes[v2.DataOffset : v2.DataOffset+v2.Size]) - file.Close() - totalFilesWritten++ - } - } - return nil -} - -func incrementHeaderChunk(chunk evrm.HeaderChunk, amount int) evrm.HeaderChunk { - for i := 0; i < amount; i++ { - chunk.Count++ - chunk.ElementCount++ - chunk.SectionSize += uint64(chunk.ElementSize) - } - return chunk -} - -func writeManifest(manifest evrm.EvrManifest) error { - os.MkdirAll(outputDir+"/manifests/", 0777) - file, err := os.OpenFile(outputDir+"/manifests/"+packageName, os.O_RDWR|os.O_CREATE, 0777) - if err != nil { - return err - } - manifestBytes, err := evrm.UnmarshalManifest(manifest, manifestType) - if err != nil { - return err - } - file.Write(compressManifest(manifestBytes)) - file.Close() - return nil -} - -func compressManifest(b []byte) []byte { - zstdBytes, err := zstd.CompressLevel(nil, b, compressionLevel) - if err != nil { - fmt.Println("error compressing manifest") - panic(err) - } - - cHeader := CompressedHeader{ - [4]byte{0x5A, 0x53, 0x54, 0x44}, // Z S T D - uint32(binary.Size(CompressedHeader{})), - uint64(len(b)), - uint64(len(zstdBytes)), - } - - fBuf := bytes.NewBuffer(nil) - binary.Write(fBuf, binary.LittleEndian, cHeader) - fBuf.Write(zstdBytes) - return fBuf.Bytes() -} - -func logTimerFunc(logTimer chan bool) { - for { - time.Sleep(1 * time.Second) - logTimer <- true - } -} diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go new file mode 100644 index 0000000..3a68de6 --- /dev/null +++ b/pkg/archive/archive_test.go @@ -0,0 +1,119 @@ +package archive + +import ( + "bytes" + "testing" +) + +func TestHeader(t *testing.T) { + t.Run("MarshalUnmarshal", func(t *testing.T) { + original := &Header{ + Magic: Magic, + HeaderLength: 16, + Length: 1024, + CompressedLength: 512, + } + + data, err := original.MarshalBinary() + if err != nil { + t.Fatalf("marshal: %v", err) + } + + decoded := &Header{} + if err := decoded.UnmarshalBinary(data); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if *decoded != *original { + t.Errorf("mismatch: got %+v, want %+v", decoded, original) + } + }) + + t.Run("InvalidMagic", func(t *testing.T) { + h := &Header{ + Magic: [4]byte{0x00, 0x00, 0x00, 0x00}, + HeaderLength: 16, + Length: 1024, + CompressedLength: 512, + } + if err := h.Validate(); err == nil { + t.Error("expected error for invalid magic") + } + }) + + t.Run("ZeroLength", func(t *testing.T) { + h := &Header{ + Magic: Magic, + HeaderLength: 16, + Length: 0, + CompressedLength: 512, + } + if err := h.Validate(); err == nil { + t.Error("expected error for zero length") + } + }) +} + +func TestReadWrite(t *testing.T) { + original := []byte("Hello, World! This is test data for compression.") + + t.Run("EncodeDecodeRoundTrip", func(t *testing.T) { + var buf bytes.Buffer + + ws := &seekableBuffer{Buffer: &buf} + + if err := Encode(ws, original); err != nil { + t.Fatalf("encode: %v", err) + } + + rs := bytes.NewReader(buf.Bytes()) + decoded, err := ReadAll(rs) + if err != nil { + t.Fatalf("decode: %v", err) + } + + if !bytes.Equal(decoded, original) { + t.Errorf("data mismatch: got %q, want %q", decoded, original) + } + }) +} + +type seekableBuffer struct { + *bytes.Buffer + pos int64 +} + +func (s *seekableBuffer) Seek(offset int64, whence int) (int64, error) { + var newPos int64 + switch whence { + case 0: + newPos = offset + case 1: + newPos = s.pos + offset + case 2: + newPos = int64(s.Buffer.Len()) + offset + } + s.pos = newPos + return newPos, nil +} + +func (s *seekableBuffer) Write(p []byte) (n int, err error) { + for int64(s.Buffer.Len()) < s.pos { + s.Buffer.WriteByte(0) + } + if s.pos < int64(s.Buffer.Len()) { + data := s.Buffer.Bytes() + n = copy(data[s.pos:], p) + if n < len(p) { + m, err := s.Buffer.Write(p[n:]) + n += m + if err != nil { + return n, err + } + } + } else { + n, err = s.Buffer.Write(p) + } + s.pos += int64(n) + return n, err +} diff --git a/pkg/archive/benchmark_test.go b/pkg/archive/benchmark_test.go new file mode 100644 index 0000000..b2a082d --- /dev/null +++ b/pkg/archive/benchmark_test.go @@ -0,0 +1,188 @@ +package archive + +import ( + "bytes" + "testing" + + "github.com/DataDog/zstd" +) + +// BenchmarkCompression benchmarks compression with different configurations. +func BenchmarkCompression(b *testing.B) { + data := make([]byte, 256*1024) // 256KB + for i := range data { + data[i] = byte(i % 256) + } + + b.Run("Compress_BestSpeed", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := zstd.CompressLevel(nil, data, zstd.BestSpeed) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("Compress_Default", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := zstd.CompressLevel(nil, data, zstd.DefaultCompression) + if err != nil { + b.Fatal(err) + } + } + }) +} + +// BenchmarkDecompression benchmarks decompression with context reuse. +func BenchmarkDecompression(b *testing.B) { + original := make([]byte, 64*1024) // 64KB + for i := range original { + original[i] = byte(i % 256) + } + + compressed, _ := zstd.Compress(nil, original) + + b.Run("WithoutContext", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := zstd.Decompress(nil, compressed) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("WithContext", func(b *testing.B) { + ctx := zstd.NewCtx() + dst := make([]byte, len(original)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := ctx.Decompress(dst, compressed) + if err != nil { + b.Fatal(err) + } + } + }) +} + +// BenchmarkHeader benchmarks header operations. +func BenchmarkHeader(b *testing.B) { + header := &Header{ + Magic: Magic, + HeaderLength: 16, + Length: 1024 * 1024, + CompressedLength: 512 * 1024, + } + + b.Run("Marshal", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := header.MarshalBinary() + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("EncodeTo", func(b *testing.B) { + buf := make([]byte, HeaderSize) + b.ResetTimer() + for i := 0; i < b.N; i++ { + header.EncodeTo(buf) + } + }) + + data, _ := header.MarshalBinary() + + b.Run("Unmarshal", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := &Header{} + err := h.UnmarshalBinary(data) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("DecodeFrom", func(b *testing.B) { + h := &Header{} + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.DecodeFrom(data) + } + }) +} + +// BenchmarkEncodeDecode benchmarks full encode/decode cycle. +func BenchmarkEncodeDecode(b *testing.B) { + data := make([]byte, 1024*1024) // 1MB + for i := range data { + data[i] = byte(i % 256) + } + + b.Run("Encode", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + ws := &benchSeekableBuffer{Buffer: &buf} + if err := Encode(ws, data); err != nil { + b.Fatal(err) + } + } + }) + + // Pre-encode for decode benchmark + var buf bytes.Buffer + ws := &benchSeekableBuffer{Buffer: &buf} + _ = Encode(ws, data) + encoded := buf.Bytes() + + b.Run("Decode", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + rs := bytes.NewReader(encoded) + _, err := ReadAll(rs) + if err != nil { + b.Fatal(err) + } + } + }) +} + +type benchSeekableBuffer struct { + *bytes.Buffer + pos int64 +} + +func (s *benchSeekableBuffer) Seek(offset int64, whence int) (int64, error) { + switch whence { + case 0: + s.pos = offset + case 1: + s.pos += offset + case 2: + s.pos = int64(s.Buffer.Len()) + offset + } + return s.pos, nil +} + +func (s *benchSeekableBuffer) Write(p []byte) (n int, err error) { + for int64(s.Buffer.Len()) < s.pos { + s.Buffer.WriteByte(0) + } + if s.pos < int64(s.Buffer.Len()) { + data := s.Buffer.Bytes() + n = copy(data[s.pos:], p) + if n < len(p) { + m, _ := s.Buffer.Write(p[n:]) + n += m + } + } else { + n, err = s.Buffer.Write(p) + } + s.pos += int64(n) + return n, err +} diff --git a/pkg/archive/header.go b/pkg/archive/header.go new file mode 100644 index 0000000..721fd82 --- /dev/null +++ b/pkg/archive/header.go @@ -0,0 +1,89 @@ +// Package archive provides types and functions for working with ZSTD compressed archives. +package archive + +import ( + "encoding/binary" + "fmt" +) + +// Magic bytes identifying a ZSTD archive header. +var Magic = [4]byte{0x5a, 0x53, 0x54, 0x44} // "ZSTD" + +// HeaderSize is the fixed binary size of an archive header. +const HeaderSize = 24 // 4 + 4 + 8 + 8 bytes + +// Header represents the header of a compressed archive file. +type Header struct { + Magic [4]byte + HeaderLength uint32 + Length uint64 // Uncompressed size + CompressedLength uint64 // Compressed size +} + +// Size returns the binary size of the header. +func (h *Header) Size() int { + return HeaderSize +} + +// Validate checks the header for validity. +func (h *Header) Validate() error { + if h.Magic != Magic { + return fmt.Errorf("invalid magic: expected %x, got %x", Magic, h.Magic) + } + if h.HeaderLength != 16 { + return fmt.Errorf("invalid header length: expected 16, got %d", h.HeaderLength) + } + if h.Length == 0 { + return fmt.Errorf("uncompressed size is zero") + } + if h.CompressedLength == 0 { + return fmt.Errorf("compressed size is zero") + } + return nil +} + +// MarshalBinary encodes the header to binary format. +// Uses direct encoding to avoid allocations. +func (h *Header) MarshalBinary() ([]byte, error) { + buf := make([]byte, HeaderSize) + h.EncodeTo(buf) + return buf, nil +} + +// EncodeTo writes the header to the given buffer. +// The buffer must be at least HeaderSize bytes. +func (h *Header) EncodeTo(buf []byte) { + copy(buf[0:4], h.Magic[:]) + binary.LittleEndian.PutUint32(buf[4:8], h.HeaderLength) + binary.LittleEndian.PutUint64(buf[8:16], h.Length) + binary.LittleEndian.PutUint64(buf[16:24], h.CompressedLength) +} + +// UnmarshalBinary decodes the header from binary format. +// Uses direct decoding to avoid allocations. +func (h *Header) UnmarshalBinary(data []byte) error { + if len(data) < HeaderSize { + return fmt.Errorf("header data too short: need %d, got %d", HeaderSize, len(data)) + } + h.DecodeFrom(data) + return h.Validate() +} + +// DecodeFrom reads the header from the given buffer. +// Does not validate - use UnmarshalBinary for validation. +func (h *Header) DecodeFrom(data []byte) { + copy(h.Magic[:], data[0:4]) + h.HeaderLength = binary.LittleEndian.Uint32(data[4:8]) + h.Length = binary.LittleEndian.Uint64(data[8:16]) + h.CompressedLength = binary.LittleEndian.Uint64(data[16:24]) +} + +// NewHeader creates a new archive header with the given sizes. +func NewHeader(uncompressedSize, compressedSize uint64) *Header { + return &Header{ + Magic: Magic, + HeaderLength: 16, + Length: uncompressedSize, + CompressedLength: compressedSize, + } +} diff --git a/pkg/archive/reader.go b/pkg/archive/reader.go new file mode 100644 index 0000000..663185e --- /dev/null +++ b/pkg/archive/reader.go @@ -0,0 +1,81 @@ +package archive + +import ( + "fmt" + "io" + + "github.com/DataDog/zstd" +) + +const ( + // DefaultCompressionLevel is the default compression level for encoding. + DefaultCompressionLevel = zstd.BestSpeed +) + +// Reader wraps an io.ReadSeeker to provide decompression of archive data. +type Reader struct { + header *Header + zReader io.ReadCloser + headerBuf [HeaderSize]byte // Reusable buffer for header decoding +} + +// NewReader creates a new archive reader from the given source. +// It reads and validates the header, then returns a reader for the decompressed content. +func NewReader(r io.ReadSeeker) (*Reader, error) { + reader := &Reader{ + header: &Header{}, + } + + if _, err := r.Read(reader.headerBuf[:]); err != nil { + return nil, fmt.Errorf("read header: %w", err) + } + + if err := reader.header.UnmarshalBinary(reader.headerBuf[:]); err != nil { + return nil, fmt.Errorf("parse header: %w", err) + } + + reader.zReader = zstd.NewReader(r) + return reader, nil +} + +// Header returns the archive header. +func (r *Reader) Header() *Header { + return r.header +} + +// Read reads decompressed data into p. +func (r *Reader) Read(p []byte) (n int, err error) { + return r.zReader.Read(p) +} + +// Close closes the reader. +func (r *Reader) Close() error { + return r.zReader.Close() +} + +// Length returns the uncompressed data length. +func (r *Reader) Length() int { + return int(r.header.Length) +} + +// CompressedLength returns the compressed data length. +func (r *Reader) CompressedLength() int { + return int(r.header.CompressedLength) +} + +// ReadAll reads the entire decompressed content from an archive. +func ReadAll(r io.ReadSeeker) ([]byte, error) { + reader, err := NewReader(r) + if err != nil { + return nil, err + } + defer reader.Close() + + data := make([]byte, reader.Length()) + _, err = io.ReadFull(reader, data) + if err != nil { + return nil, fmt.Errorf("read content: %w", err) + } + + return data, nil +} diff --git a/pkg/archive/writer.go b/pkg/archive/writer.go new file mode 100644 index 0000000..3f942ad --- /dev/null +++ b/pkg/archive/writer.go @@ -0,0 +1,107 @@ +package archive + +import ( + "fmt" + "io" + + "github.com/DataDog/zstd" +) + +// Writer wraps an io.WriteSeeker to provide compression of archive data. +type Writer struct { + dst io.WriteSeeker + zWriter *zstd.Writer + header *Header + level int + headerBuf [HeaderSize]byte // Reusable buffer for header encoding +} + +// WriterOption configures a Writer. +type WriterOption func(*Writer) + +// WithCompressionLevel sets the compression level for the writer. +func WithCompressionLevel(level int) WriterOption { + return func(w *Writer) { + w.level = level + } +} + +// NewWriter creates a new archive writer that writes to dst. +// The uncompressedSize is the expected size of the uncompressed data. +func NewWriter(dst io.WriteSeeker, uncompressedSize uint64, opts ...WriterOption) (*Writer, error) { + w := &Writer{ + dst: dst, + level: DefaultCompressionLevel, + header: &Header{ + Magic: Magic, + HeaderLength: 16, + Length: uncompressedSize, + CompressedLength: 0, // Will be updated after writing + }, + } + + for _, opt := range opts { + opt(w) + } + + // Write placeholder header using reusable buffer + w.header.EncodeTo(w.headerBuf[:]) + if _, err := dst.Write(w.headerBuf[:]); err != nil { + return nil, fmt.Errorf("write header: %w", err) + } + + w.zWriter = zstd.NewWriterLevel(dst, w.level) + return w, nil +} + +// Write writes compressed data. +func (w *Writer) Write(p []byte) (n int, err error) { + return w.zWriter.Write(p) +} + +// Close finalizes the archive by updating the header with the compressed size. +func (w *Writer) Close() error { + if err := w.zWriter.Close(); err != nil { + return fmt.Errorf("close compressor: %w", err) + } + + // Get current position to determine compressed size + pos, err := w.dst.Seek(0, io.SeekCurrent) + if err != nil { + return fmt.Errorf("get position: %w", err) + } + + // Update header with actual compressed size + w.header.CompressedLength = uint64(pos) - uint64(w.header.Size()) + + // Seek to beginning and rewrite header + if _, err := w.dst.Seek(0, io.SeekStart); err != nil { + return fmt.Errorf("seek to start: %w", err) + } + + w.header.EncodeTo(w.headerBuf[:]) + if _, err := w.dst.Write(w.headerBuf[:]); err != nil { + return fmt.Errorf("write header: %w", err) + } + + // Seek back to end + if _, err := w.dst.Seek(pos, io.SeekStart); err != nil { + return fmt.Errorf("seek to end: %w", err) + } + + return nil +} + +// Encode compresses data and writes it as an archive to dst. +func Encode(dst io.WriteSeeker, data []byte, opts ...WriterOption) error { + w, err := NewWriter(dst, uint64(len(data)), opts...) + if err != nil { + return err + } + + if _, err := w.Write(data); err != nil { + return fmt.Errorf("write data: %w", err) + } + + return w.Close() +} diff --git a/pkg/manifest/benchmark_test.go b/pkg/manifest/benchmark_test.go new file mode 100644 index 0000000..7758f38 --- /dev/null +++ b/pkg/manifest/benchmark_test.go @@ -0,0 +1,192 @@ +package manifest + +import ( + "fmt" + "strconv" + "testing" +) + +// BenchmarkManifest benchmarks manifest operations. +func BenchmarkManifest(b *testing.B) { + // Create a realistic manifest + manifest := &Manifest{ + Header: Header{ + PackageCount: 3, + FrameContents: Section{ + ElementSize: 32, + }, + Metadata: Section{ + ElementSize: 40, + }, + Frames: Section{ + ElementSize: 16, + }, + }, + FrameContents: make([]FrameContent, 10000), + Metadata: make([]FileMetadata, 10000), + Frames: make([]Frame, 500), + } + + // Fill with test data + for i := range manifest.FrameContents { + manifest.FrameContents[i] = FrameContent{ + TypeSymbol: int64(i % 100), + FileSymbol: int64(i), + FrameIndex: uint32(i % 500), + DataOffset: uint32(i * 1024), + Size: 1024, + Alignment: 1, + } + } + + for i := range manifest.Metadata { + manifest.Metadata[i] = FileMetadata{ + TypeSymbol: int64(i % 100), + FileSymbol: int64(i), + } + } + + for i := range manifest.Frames { + manifest.Frames[i] = Frame{ + PackageIndex: uint32(i % 3), + Offset: uint32(i * 65536), + CompressedSize: 32768, + Length: 65536, + } + } + + // Update header sections + manifest.Header.FrameContents.Count = uint64(len(manifest.FrameContents)) + manifest.Header.FrameContents.ElementCount = uint64(len(manifest.FrameContents)) + manifest.Header.FrameContents.Length = uint64(len(manifest.FrameContents)) * 32 + + manifest.Header.Metadata.Count = uint64(len(manifest.Metadata)) + manifest.Header.Metadata.ElementCount = uint64(len(manifest.Metadata)) + manifest.Header.Metadata.Length = uint64(len(manifest.Metadata)) * 40 + + manifest.Header.Frames.Count = uint64(len(manifest.Frames)) + manifest.Header.Frames.ElementCount = uint64(len(manifest.Frames)) + manifest.Header.Frames.Length = uint64(len(manifest.Frames)) * 16 + + b.Run("Marshal", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := manifest.MarshalBinary() + if err != nil { + b.Fatal(err) + } + } + }) + + data, _ := manifest.MarshalBinary() + + b.Run("Unmarshal", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + m := &Manifest{} + err := m.UnmarshalBinary(data) + if err != nil { + b.Fatal(err) + } + } + }) +} + +// BenchmarkLookupStrategies benchmarks different lookup key strategies. +func BenchmarkLookupStrategies(b *testing.B) { + const entries = 10000 + + // Strategy 1: Struct key (recommended) + type symbolKey struct { + typeSymbol int64 + fileSymbol int64 + } + + b.Run("StructKey", func(b *testing.B) { + table := make(map[symbolKey]int, entries) + for i := 0; i < entries; i++ { + table[symbolKey{int64(i), int64(i * 2)}] = i + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + idx := i % entries + _ = table[symbolKey{int64(idx), int64(idx * 2)}] + } + }) + + // Strategy 2: Combined int64 key + b.Run("CombinedInt64Key", func(b *testing.B) { + table := make(map[uint64]int, entries) + for i := 0; i < entries; i++ { + key := uint64(i)<<32 | uint64(i*2)&0xFFFFFFFF + table[key] = i + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + idx := i % entries + key := uint64(idx)<<32 | uint64(idx*2)&0xFFFFFFFF + _ = table[key] + } + }) +} + +// BenchmarkFrameIndex benchmarks frame content lookup strategies. +func BenchmarkFrameIndex(b *testing.B) { + // Simulate 10000 files across 500 frames + frameContents := make([]FrameContent, 10000) + for i := range frameContents { + frameContents[i] = FrameContent{ + TypeSymbol: int64(i % 100), + FileSymbol: int64(i), + FrameIndex: uint32(i % 500), + } + } + + b.Run("LinearScan", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + frameIdx := uint32(i % 500) + count := 0 + for _, fc := range frameContents { + if fc.FrameIndex == frameIdx { + count++ + } + } + } + }) + + b.Run("PrebuiltIndex", func(b *testing.B) { + // Build index once + frameIndex := make(map[uint32][]FrameContent) + for _, fc := range frameContents { + frameIndex[fc.FrameIndex] = append(frameIndex[fc.FrameIndex], fc) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + frameIdx := uint32(i % 500) + _ = frameIndex[frameIdx] + } + }) +} + +// BenchmarkHexFormatting benchmarks hex string formatting strategies. +func BenchmarkHexFormatting(b *testing.B) { + symbols := make([]int64, 1000) + for i := range symbols { + symbols[i] = int64(i * 12345678) + } + + b.Run("Sprintf", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = fmt.Sprintf("%x", symbols[i%len(symbols)]) + } + }) + + b.Run("FormatInt", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = strconv.FormatInt(symbols[i%len(symbols)], 16) + } + }) +} diff --git a/pkg/manifest/builder.go b/pkg/manifest/builder.go new file mode 100644 index 0000000..20c523c --- /dev/null +++ b/pkg/manifest/builder.go @@ -0,0 +1,205 @@ +package manifest + +import ( + "bytes" + "fmt" + "math" + "os" + "path/filepath" + + "github.com/DataDog/zstd" +) + +const ( + // DefaultCompressionLevel is the compression level used for building packages. + DefaultCompressionLevel = zstd.BestSpeed + + // MaxPackageSize is the maximum size of a single package file. + MaxPackageSize = math.MaxInt32 +) + +// Builder constructs packages and manifests from a set of files. +type Builder struct { + outputDir string + packageName string + compressionLevel int +} + +// NewBuilder creates a new package builder. +func NewBuilder(outputDir, packageName string) *Builder { + return &Builder{ + outputDir: outputDir, + packageName: packageName, + compressionLevel: DefaultCompressionLevel, + } +} + +// SetCompressionLevel sets the compression level for the builder. +func (b *Builder) SetCompressionLevel(level int) { + b.compressionLevel = level +} + +// Build creates a package and manifest from the given file groups. +func (b *Builder) Build(fileGroups [][]ScannedFile) (*Manifest, error) { + totalFiles := 0 + for _, group := range fileGroups { + totalFiles += len(group) + } + + manifest := &Manifest{ + Header: Header{ + PackageCount: 1, + FrameContents: Section{ + ElementSize: 32, + }, + Metadata: Section{ + ElementSize: 40, + }, + Frames: Section{ + ElementSize: 16, + }, + }, + FrameContents: make([]FrameContent, 0, totalFiles), + Metadata: make([]FileMetadata, 0, totalFiles), + Frames: make([]Frame, 0), + } + + packagesDir := filepath.Join(b.outputDir, "packages") + if err := os.MkdirAll(packagesDir, 0755); err != nil { + return nil, fmt.Errorf("create packages dir: %w", err) + } + + var ( + currentFrame bytes.Buffer + currentOffset uint32 + frameIndex uint32 + ) + + for _, group := range fileGroups { + if len(group) == 0 { + continue + } + + // Write previous frame if not empty + if currentFrame.Len() > 0 { + if err := b.writeFrame(manifest, ¤tFrame, frameIndex); err != nil { + return nil, err + } + frameIndex++ + currentFrame.Reset() + currentOffset = 0 + } + + for _, file := range group { + data, err := os.ReadFile(file.Path) + if err != nil { + return nil, fmt.Errorf("read file %s: %w", file.Path, err) + } + + manifest.FrameContents = append(manifest.FrameContents, FrameContent{ + TypeSymbol: file.TypeSymbol, + FileSymbol: file.FileSymbol, + FrameIndex: frameIndex, + DataOffset: currentOffset, + Size: uint32(len(data)), + Alignment: 1, + }) + + manifest.Metadata = append(manifest.Metadata, FileMetadata{ + TypeSymbol: file.TypeSymbol, + FileSymbol: file.FileSymbol, + }) + + currentFrame.Write(data) + currentOffset += uint32(len(data)) + } + + b.incrementSection(&manifest.Header.FrameContents, len(group)) + b.incrementSection(&manifest.Header.Metadata, len(group)) + } + + // Write final frame + if currentFrame.Len() > 0 { + if err := b.writeFrame(manifest, ¤tFrame, frameIndex); err != nil { + return nil, err + } + } + + // Add package terminator frames + b.addTerminatorFrames(manifest) + + return manifest, nil +} + +func (b *Builder) writeFrame(manifest *Manifest, data *bytes.Buffer, index uint32) error { + compressed, err := zstd.CompressLevel(nil, data.Bytes(), b.compressionLevel) + if err != nil { + return fmt.Errorf("compress frame %d: %w", index, err) + } + + packageIndex := manifest.Header.PackageCount - 1 + packagePath := filepath.Join(b.outputDir, "packages", fmt.Sprintf("%s_%d", b.packageName, packageIndex)) + + // Check if we need a new package file + var offset uint32 + if len(manifest.Frames) > 0 { + lastFrame := manifest.Frames[len(manifest.Frames)-1] + offset = lastFrame.Offset + lastFrame.CompressedSize + } + + maxSize := int64(MaxPackageSize) + if int64(offset) >= maxSize || int64(offset)+int64(len(compressed)) > maxSize { + manifest.Header.PackageCount++ + packageIndex++ + packagePath = filepath.Join(b.outputDir, "packages", fmt.Sprintf("%s_%d", b.packageName, packageIndex)) + offset = 0 + } + + f, err := os.OpenFile(packagePath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + return fmt.Errorf("open package %d: %w", packageIndex, err) + } + defer f.Close() + + if _, err := f.Write(compressed); err != nil { + return fmt.Errorf("write frame %d: %w", index, err) + } + + manifest.Frames = append(manifest.Frames, Frame{ + PackageIndex: packageIndex, + Offset: offset, + CompressedSize: uint32(len(compressed)), + Length: uint32(data.Len()), + }) + + b.incrementSection(&manifest.Header.Frames, 1) + return nil +} + +func (b *Builder) addTerminatorFrames(manifest *Manifest) { + packagesDir := filepath.Join(b.outputDir, "packages") + + for i := uint32(0); i < manifest.Header.PackageCount; i++ { + packagePath := filepath.Join(packagesDir, fmt.Sprintf("%s_%d", b.packageName, i)) + info, err := os.Stat(packagePath) + if err != nil { + continue + } + + manifest.Frames = append(manifest.Frames, Frame{ + PackageIndex: i, + Offset: uint32(info.Size()), + }) + b.incrementSection(&manifest.Header.Frames, 1) + } + + // Final terminator frame + manifest.Frames = append(manifest.Frames, Frame{}) + b.incrementSection(&manifest.Header.Frames, 1) +} + +func (b *Builder) incrementSection(s *Section, count int) { + s.Count += uint64(count) + s.ElementCount += uint64(count) + s.Length += s.ElementSize * uint64(count) +} diff --git a/pkg/manifest/manifest.go b/pkg/manifest/manifest.go new file mode 100644 index 0000000..1f28443 --- /dev/null +++ b/pkg/manifest/manifest.go @@ -0,0 +1,288 @@ +// Package manifest provides types and functions for working with EVR manifest files. +package manifest + +import ( + "encoding/binary" + "fmt" + "os" + + "github.com/EchoTools/evrFileTools/pkg/archive" +) + +// Binary sizes for manifest structures +const ( + HeaderSize = 192 // Fixed header size: + // 4 (PackageCount) + 4 (Unk1) + 8 (Unk2) + // + SectionSize (FrameContents) + 16 bytes padding + // + SectionSize (Metadata) + 16 bytes padding + // + SectionSize (Frames) + SectionSize = 48 // 6 * 8 bytes (Section has 6 uint64 fields) + FrameContentSize = 32 // 8 + 8 + 4 + 4 + 4 + 4 bytes + FileMetadataSize = 40 // 5 * 8 bytes + FrameSize = 16 // 4 * 4 bytes +) + +// Manifest represents a parsed EVR manifest file. +type Manifest struct { + Header Header + FrameContents []FrameContent + Metadata []FileMetadata + Frames []Frame +} + +// Header contains manifest metadata and section information. +type Header struct { + PackageCount uint32 + Unk1 uint32 // Unknown - 524288 on latest builds + Unk2 uint64 // Unknown - 0 on latest builds + FrameContents Section + _ [16]byte // Padding + Metadata Section + _ [16]byte // Padding + Frames Section +} + +// Section describes a section within the manifest. +type Section struct { + Length uint64 // Total byte length of section + Unk1 uint64 // Unknown - 0 on latest builds + Unk2 uint64 // Unknown - 4294967296 on latest builds + ElementSize uint64 // Byte size of single entry + Count uint64 // Number of elements + ElementCount uint64 // Number of elements (can differ from Count) +} + +// FrameContent describes a file within a frame. +type FrameContent struct { + TypeSymbol int64 // File type identifier + FileSymbol int64 // File identifier + FrameIndex uint32 // Index into Frames array + DataOffset uint32 // Byte offset within decompressed frame + Size uint32 // File size in bytes + Alignment uint32 // Alignment (can be set to 1) +} + +// FileMetadata contains additional file metadata. +type FileMetadata struct { + TypeSymbol int64 // File type identifier + FileSymbol int64 // File identifier + Unk1 int64 // Unknown - game launches with 0 + Unk2 int64 // Unknown - game launches with 0 + AssetType int64 // Asset type identifier +} + +// Frame describes a compressed data frame within a package. +type Frame struct { + PackageIndex uint32 // Package file index + Offset uint32 // Byte offset within package + CompressedSize uint32 // Compressed frame size + Length uint32 // Decompressed frame size +} + +// PackageCount returns the number of packages referenced by this manifest. +func (m *Manifest) PackageCount() int { + return int(m.Header.PackageCount) +} + +// FileCount returns the number of files in this manifest. +func (m *Manifest) FileCount() int { + return len(m.FrameContents) +} + +// UnmarshalBinary decodes a manifest from binary data. +// Uses direct decoding for better performance. +func (m *Manifest) UnmarshalBinary(data []byte) error { + if len(data) < HeaderSize { + return fmt.Errorf("data too short for header") + } + + // Decode header inline + offset := 0 + m.Header.PackageCount = binary.LittleEndian.Uint32(data[offset:]) + offset += 4 + m.Header.Unk1 = binary.LittleEndian.Uint32(data[offset:]) + offset += 4 + m.Header.Unk2 = binary.LittleEndian.Uint64(data[offset:]) + offset += 8 + + // FrameContents section + decodeSection(&m.Header.FrameContents, data[offset:]) + offset += SectionSize + 16 // +16 for padding + + // Metadata section + decodeSection(&m.Header.Metadata, data[offset:]) + offset += SectionSize + 16 // +16 for padding + + // Frames section + decodeSection(&m.Header.Frames, data[offset:]) + offset += SectionSize + + // Decode FrameContents + count := int(m.Header.FrameContents.ElementCount) + m.FrameContents = make([]FrameContent, count) + for i := 0; i < count; i++ { + m.FrameContents[i].TypeSymbol = int64(binary.LittleEndian.Uint64(data[offset:])) + m.FrameContents[i].FileSymbol = int64(binary.LittleEndian.Uint64(data[offset+8:])) + m.FrameContents[i].FrameIndex = binary.LittleEndian.Uint32(data[offset+16:]) + m.FrameContents[i].DataOffset = binary.LittleEndian.Uint32(data[offset+20:]) + m.FrameContents[i].Size = binary.LittleEndian.Uint32(data[offset+24:]) + m.FrameContents[i].Alignment = binary.LittleEndian.Uint32(data[offset+28:]) + offset += FrameContentSize + } + + // Decode Metadata + count = int(m.Header.Metadata.ElementCount) + m.Metadata = make([]FileMetadata, count) + for i := 0; i < count; i++ { + m.Metadata[i].TypeSymbol = int64(binary.LittleEndian.Uint64(data[offset:])) + m.Metadata[i].FileSymbol = int64(binary.LittleEndian.Uint64(data[offset+8:])) + m.Metadata[i].Unk1 = int64(binary.LittleEndian.Uint64(data[offset+16:])) + m.Metadata[i].Unk2 = int64(binary.LittleEndian.Uint64(data[offset+24:])) + m.Metadata[i].AssetType = int64(binary.LittleEndian.Uint64(data[offset+32:])) + offset += FileMetadataSize + } + + // Decode Frames + count = int(m.Header.Frames.ElementCount) + m.Frames = make([]Frame, count) + for i := 0; i < count; i++ { + m.Frames[i].PackageIndex = binary.LittleEndian.Uint32(data[offset:]) + m.Frames[i].Offset = binary.LittleEndian.Uint32(data[offset+4:]) + m.Frames[i].CompressedSize = binary.LittleEndian.Uint32(data[offset+8:]) + m.Frames[i].Length = binary.LittleEndian.Uint32(data[offset+12:]) + offset += FrameSize + } + + return nil +} + +func decodeSection(s *Section, data []byte) { + s.Length = binary.LittleEndian.Uint64(data[0:]) + s.Unk1 = binary.LittleEndian.Uint64(data[8:]) + s.Unk2 = binary.LittleEndian.Uint64(data[16:]) + s.ElementSize = binary.LittleEndian.Uint64(data[24:]) + s.Count = binary.LittleEndian.Uint64(data[32:]) + s.ElementCount = binary.LittleEndian.Uint64(data[40:]) +} + +// MarshalBinary encodes a manifest to binary data. +// Pre-allocates buffer for better performance. +func (m *Manifest) MarshalBinary() ([]byte, error) { + buf := make([]byte, m.BinarySize()) + m.EncodeTo(buf) + return buf, nil +} + +// BinarySize returns the total binary size of the manifest. +func (m *Manifest) BinarySize() int { + return HeaderSize + + len(m.FrameContents)*FrameContentSize + + len(m.Metadata)*FileMetadataSize + + len(m.Frames)*FrameSize +} + +// EncodeTo writes the manifest to the given buffer. +// The buffer must be at least BinarySize() bytes. +func (m *Manifest) EncodeTo(buf []byte) { + offset := 0 + + // Encode header + binary.LittleEndian.PutUint32(buf[offset:], m.Header.PackageCount) + offset += 4 + binary.LittleEndian.PutUint32(buf[offset:], m.Header.Unk1) + offset += 4 + binary.LittleEndian.PutUint64(buf[offset:], m.Header.Unk2) + offset += 8 + + // FrameContents section + encodeSection(&m.Header.FrameContents, buf[offset:]) + offset += SectionSize + 16 + + // Metadata section + encodeSection(&m.Header.Metadata, buf[offset:]) + offset += SectionSize + 16 + + // Frames section + encodeSection(&m.Header.Frames, buf[offset:]) + offset += SectionSize + + // Encode FrameContents + for i := range m.FrameContents { + binary.LittleEndian.PutUint64(buf[offset:], uint64(m.FrameContents[i].TypeSymbol)) + binary.LittleEndian.PutUint64(buf[offset+8:], uint64(m.FrameContents[i].FileSymbol)) + binary.LittleEndian.PutUint32(buf[offset+16:], m.FrameContents[i].FrameIndex) + binary.LittleEndian.PutUint32(buf[offset+20:], m.FrameContents[i].DataOffset) + binary.LittleEndian.PutUint32(buf[offset+24:], m.FrameContents[i].Size) + binary.LittleEndian.PutUint32(buf[offset+28:], m.FrameContents[i].Alignment) + offset += FrameContentSize + } + + // Encode Metadata + for i := range m.Metadata { + binary.LittleEndian.PutUint64(buf[offset:], uint64(m.Metadata[i].TypeSymbol)) + binary.LittleEndian.PutUint64(buf[offset+8:], uint64(m.Metadata[i].FileSymbol)) + binary.LittleEndian.PutUint64(buf[offset+16:], uint64(m.Metadata[i].Unk1)) + binary.LittleEndian.PutUint64(buf[offset+24:], uint64(m.Metadata[i].Unk2)) + binary.LittleEndian.PutUint64(buf[offset+32:], uint64(m.Metadata[i].AssetType)) + offset += FileMetadataSize + } + + // Encode Frames + for i := range m.Frames { + binary.LittleEndian.PutUint32(buf[offset:], m.Frames[i].PackageIndex) + binary.LittleEndian.PutUint32(buf[offset+4:], m.Frames[i].Offset) + binary.LittleEndian.PutUint32(buf[offset+8:], m.Frames[i].CompressedSize) + binary.LittleEndian.PutUint32(buf[offset+12:], m.Frames[i].Length) + offset += FrameSize + } +} + +func encodeSection(s *Section, buf []byte) { + binary.LittleEndian.PutUint64(buf[0:], s.Length) + binary.LittleEndian.PutUint64(buf[8:], s.Unk1) + binary.LittleEndian.PutUint64(buf[16:], s.Unk2) + binary.LittleEndian.PutUint64(buf[24:], s.ElementSize) + binary.LittleEndian.PutUint64(buf[32:], s.Count) + binary.LittleEndian.PutUint64(buf[40:], s.ElementCount) +} + +// ReadFile reads and parses a manifest from a file. +func ReadFile(path string) (*Manifest, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("open manifest: %w", err) + } + defer f.Close() + + data, err := archive.ReadAll(f) + if err != nil { + return nil, fmt.Errorf("read archive: %w", err) + } + + manifest := &Manifest{} + if err := manifest.UnmarshalBinary(data); err != nil { + return nil, fmt.Errorf("parse manifest: %w", err) + } + + return manifest, nil +} + +// WriteFile writes a manifest to a file. +func WriteFile(path string, m *Manifest) error { + data, err := m.MarshalBinary() + if err != nil { + return fmt.Errorf("marshal manifest: %w", err) + } + + f, err := os.Create(path) + if err != nil { + return fmt.Errorf("create file: %w", err) + } + defer f.Close() + + if err := archive.Encode(f, data); err != nil { + return fmt.Errorf("encode archive: %w", err) + } + + return nil +} diff --git a/pkg/manifest/manifest_test.go b/pkg/manifest/manifest_test.go new file mode 100644 index 0000000..3e312b4 --- /dev/null +++ b/pkg/manifest/manifest_test.go @@ -0,0 +1,83 @@ +package manifest + +import ( + "testing" +) + +func TestManifest(t *testing.T) { + t.Run("MarshalUnmarshal", func(t *testing.T) { + original := &Manifest{ + Header: Header{ + PackageCount: 2, + FrameContents: Section{ + Length: 64, + ElementSize: 32, + Count: 2, + ElementCount: 2, + }, + Metadata: Section{ + Length: 80, + ElementSize: 40, + Count: 2, + ElementCount: 2, + }, + Frames: Section{ + Length: 32, + ElementSize: 16, + Count: 2, + ElementCount: 2, + }, + }, + FrameContents: []FrameContent{ + {TypeSymbol: 100, FileSymbol: 200, FrameIndex: 0, DataOffset: 0, Size: 1024, Alignment: 1}, + {TypeSymbol: 101, FileSymbol: 201, FrameIndex: 1, DataOffset: 0, Size: 2048, Alignment: 1}, + }, + Metadata: []FileMetadata{ + {TypeSymbol: 100, FileSymbol: 200}, + {TypeSymbol: 101, FileSymbol: 201}, + }, + Frames: []Frame{ + {PackageIndex: 0, Offset: 0, CompressedSize: 512, Length: 1024}, + {PackageIndex: 0, Offset: 512, CompressedSize: 1024, Length: 2048}, + }, + } + + data, err := original.MarshalBinary() + if err != nil { + t.Fatalf("marshal: %v", err) + } + + decoded := &Manifest{} + if err := decoded.UnmarshalBinary(data); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if decoded.Header.PackageCount != original.Header.PackageCount { + t.Errorf("PackageCount: got %d, want %d", decoded.Header.PackageCount, original.Header.PackageCount) + } + + if len(decoded.FrameContents) != len(original.FrameContents) { + t.Errorf("FrameContents len: got %d, want %d", len(decoded.FrameContents), len(original.FrameContents)) + } + + if len(decoded.Frames) != len(original.Frames) { + t.Errorf("Frames len: got %d, want %d", len(decoded.Frames), len(original.Frames)) + } + }) + + t.Run("PackageCount", func(t *testing.T) { + m := &Manifest{Header: Header{PackageCount: 5}} + if m.PackageCount() != 5 { + t.Errorf("PackageCount: got %d, want 5", m.PackageCount()) + } + }) + + t.Run("FileCount", func(t *testing.T) { + m := &Manifest{ + FrameContents: make([]FrameContent, 100), + } + if m.FileCount() != 100 { + t.Errorf("FileCount: got %d, want 100", m.FileCount()) + } + }) +} diff --git a/pkg/manifest/package.go b/pkg/manifest/package.go new file mode 100644 index 0000000..de00e26 --- /dev/null +++ b/pkg/manifest/package.go @@ -0,0 +1,174 @@ +package manifest + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strconv" + + "github.com/DataDog/zstd" +) + +// Package represents a multi-part package file set. +type Package struct { + manifest *Manifest + files []packageFile +} + +type packageFile interface { + io.Reader + io.ReaderAt + io.Seeker + io.Closer +} + +// OpenPackage opens a multi-part package from the given base path. +// The path should be the package name without the _N suffix. +func OpenPackage(manifest *Manifest, basePath string) (*Package, error) { + dir := filepath.Dir(basePath) + stem := filepath.Base(basePath) + count := manifest.PackageCount() + + pkg := &Package{ + manifest: manifest, + files: make([]packageFile, count), + } + + for i := range count { + path := filepath.Join(dir, fmt.Sprintf("%s_%d", stem, i)) + f, err := os.Open(path) + if err != nil { + pkg.Close() + return nil, fmt.Errorf("open package %d: %w", i, err) + } + pkg.files[i] = f + } + + return pkg, nil +} + +// Close closes all package files. +func (p *Package) Close() error { + var lastErr error + for _, f := range p.files { + if f != nil { + if err := f.Close(); err != nil { + lastErr = err + } + } + } + return lastErr +} + +// Manifest returns the associated manifest. +func (p *Package) Manifest() *Manifest { + return p.manifest +} + +// Extract extracts all files from the package to the output directory. +func (p *Package) Extract(outputDir string, opts ...ExtractOption) error { + cfg := &extractConfig{} + for _, opt := range opts { + opt(cfg) + } + + // Build frame index for O(1) lookup instead of O(n) scan per frame + frameIndex := make(map[uint32][]FrameContent) + for _, fc := range p.manifest.FrameContents { + frameIndex[fc.FrameIndex] = append(frameIndex[fc.FrameIndex], fc) + } + + ctx := zstd.NewCtx() + compressed := make([]byte, 32*1024*1024) + decompressed := make([]byte, 32*1024*1024) + + // Pre-create directory cache to avoid repeated MkdirAll calls + createdDirs := make(map[string]struct{}) + + for frameIdx, frame := range p.manifest.Frames { + if frame.Length == 0 || frame.CompressedSize == 0 { + continue + } + + // Ensure buffers are large enough + if int(frame.CompressedSize) > len(compressed) { + compressed = make([]byte, frame.CompressedSize) + } + if int(frame.Length) > len(decompressed) { + decompressed = make([]byte, frame.Length) + } + + // Read compressed data + file := p.files[frame.PackageIndex] + if _, err := file.Seek(int64(frame.Offset), io.SeekStart); err != nil { + return fmt.Errorf("seek frame %d: %w", frameIdx, err) + } + + if _, err := io.ReadFull(file, compressed[:frame.CompressedSize]); err != nil { + return fmt.Errorf("read frame %d: %w", frameIdx, err) + } + + // Decompress + if _, err := ctx.Decompress(decompressed[:frame.Length], compressed[:frame.CompressedSize]); err != nil { + return fmt.Errorf("decompress frame %d: %w", frameIdx, err) + } + + // Extract files from this frame using pre-built index + contents := frameIndex[uint32(frameIdx)] + for _, fc := range contents { + var fileName string + if cfg.decimalNames { + fileName = strconv.FormatInt(fc.FileSymbol, 10) + } else { + fileName = strconv.FormatUint(uint64(fc.FileSymbol), 16) + } + fileType := strconv.FormatUint(uint64(fc.TypeSymbol), 16) + + var basePath string + if cfg.preserveGroups { + basePath = filepath.Join(outputDir, strconv.FormatUint(uint64(fc.FrameIndex), 10), fileType) + } else { + basePath = filepath.Join(outputDir, fileType) + } + + // Only create directory if not already created + if _, exists := createdDirs[basePath]; !exists { + if err := os.MkdirAll(basePath, 0755); err != nil { + return fmt.Errorf("create dir %s: %w", basePath, err) + } + createdDirs[basePath] = struct{}{} + } + + filePath := filepath.Join(basePath, fileName) + if err := os.WriteFile(filePath, decompressed[fc.DataOffset:fc.DataOffset+fc.Size], 0644); err != nil { + return fmt.Errorf("write file %s: %w", filePath, err) + } + } + } + + return nil +} + +// extractConfig holds extraction options. +type extractConfig struct { + preserveGroups bool + decimalNames bool +} + +// ExtractOption configures extraction behavior. +type ExtractOption func(*extractConfig) + +// WithPreserveGroups preserves frame grouping in output directory structure. +func WithPreserveGroups(preserve bool) ExtractOption { + return func(c *extractConfig) { + c.preserveGroups = preserve + } +} + +// WithDecimalNames uses decimal format for filenames instead of hex. +func WithDecimalNames(decimal bool) ExtractOption { + return func(c *extractConfig) { + c.decimalNames = decimal + } +} diff --git a/pkg/manifest/scanner.go b/pkg/manifest/scanner.go new file mode 100644 index 0000000..5e7ceaa --- /dev/null +++ b/pkg/manifest/scanner.go @@ -0,0 +1,81 @@ +package manifest + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" +) + +// ScannedFile represents a file scanned from an input directory for building packages. +type ScannedFile struct { + TypeSymbol int64 + FileSymbol int64 + Path string + Size uint32 +} + +// ScanFiles walks the input directory and returns files grouped by chunk number. +// The directory structure is expected to be: /// +func ScanFiles(inputDir string) ([][]ScannedFile, error) { + var files [][]ScannedFile + + err := filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + + // Parse directory structure + dir := filepath.Dir(path) + parts := strings.Split(filepath.ToSlash(dir), "/") + if len(parts) < 3 { + return fmt.Errorf("invalid path structure: %s", path) + } + + chunkNum, err := strconv.ParseInt(parts[len(parts)-3], 10, 64) + if err != nil { + return fmt.Errorf("parse chunk number: %w", err) + } + + typeSymbol, err := strconv.ParseInt(parts[len(parts)-2], 10, 64) + if err != nil { + return fmt.Errorf("parse type symbol: %w", err) + } + + fileSymbol, err := strconv.ParseInt(filepath.Base(path), 10, 64) + if err != nil { + return fmt.Errorf("parse file symbol: %w", err) + } + + size := info.Size() + const maxUint32 = int64(^uint32(0)) + if size < 0 || size > maxUint32 { + return fmt.Errorf("file too large: %s (size %d exceeds %d bytes)", path, size, maxUint32) + } + + file := ScannedFile{ + TypeSymbol: typeSymbol, + FileSymbol: fileSymbol, + Path: path, + Size: uint32(size), + } + + // Grow slice if needed + for int(chunkNum) >= len(files) { + files = append(files, nil) + } + + files[chunkNum] = append(files[chunkNum], file) + return nil + }) + + if err != nil { + return nil, err + } + + return files, nil +} diff --git a/ready-at-dawn-echo-arena b/ready-at-dawn-echo-arena new file mode 120000 index 0000000..479d94b --- /dev/null +++ b/ready-at-dawn-echo-arena @@ -0,0 +1 @@ +/mnt/c/OculusLibrary/Software/ready-at-dawn-echo-arena \ No newline at end of file