From f5db2650bc021c80dd88326a93ea2dc36ad88f8f Mon Sep 17 00:00:00 2001 From: Rogdham Date: Sat, 29 Nov 2025 16:38:03 +0100 Subject: [PATCH] refactor: use compression.zstd (PEP-784) --- .github/workflows/build.yml | 263 ++------ .gitignore | 14 +- .gitmodules | 3 - CHANGELOG.md | 7 +- MANIFEST.in | 13 - README.md | 2 + build_script/pyzstd_build_cffi.py | 222 ------- build_script/pyzstd_pep517.py | 10 - docs/index.md | 2 + docs/pyzstd.rst | 90 +-- pyproject.toml | 63 +- setup.py | 248 ------- src/__init__.py | 251 -------- src/_c/__init__.py | 182 ------ src/_cffi/__init__.py | 25 - src/_cffi/common.py | 461 -------------- src/_cffi/compressor.py | 304 --------- src/_cffi/decompressor.py | 414 ------------ src/_cffi/dict.py | 248 ------- src/_cffi/file.py | 347 ---------- src/_cffi/output_buffer.py | 125 ---- src/_cffi/stream.py | 364 ----------- src/_zstdfile.py | 496 -------------- src/bin_ext/compressor.c | 599 ----------------- src/bin_ext/decompressor.c | 851 ------------------------- src/bin_ext/dict.c | 507 --------------- src/bin_ext/file.c | 765 ---------------------- src/bin_ext/macro_functions.h | 315 --------- src/bin_ext/output_buffer.h | 526 --------------- src/bin_ext/pyzstd.c | 628 ------------------ src/bin_ext/pyzstd.h | 663 ------------------- src/bin_ext/stream.c | 600 ----------------- src/pyzstd/__init__.py | 709 ++++++++++++++++++++ src/{ => pyzstd}/__init__.pyi | 4 - src/{ => pyzstd}/__main__.py | 14 +- src/{ => pyzstd}/_seekable_zstdfile.py | 9 +- src/{ => pyzstd}/py.typed | 0 tests/test_seekable.py | 11 +- tests/test_zstd.py | 546 +++------------- zstd | 1 - 40 files changed, 939 insertions(+), 9963 deletions(-) delete mode 100644 .gitmodules delete mode 100644 MANIFEST.in delete mode 100644 build_script/pyzstd_build_cffi.py delete mode 100644 build_script/pyzstd_pep517.py delete mode 100644 setup.py delete mode 100644 src/__init__.py delete mode 100644 src/_c/__init__.py delete mode 100644 src/_cffi/__init__.py delete mode 100644 src/_cffi/common.py delete mode 100644 src/_cffi/compressor.py delete mode 100644 src/_cffi/decompressor.py delete mode 100644 src/_cffi/dict.py delete mode 100644 src/_cffi/file.py delete mode 100644 src/_cffi/output_buffer.py delete mode 100644 src/_cffi/stream.py delete mode 100644 src/_zstdfile.py delete mode 100644 src/bin_ext/compressor.c delete mode 100644 src/bin_ext/decompressor.c delete mode 100644 src/bin_ext/dict.c delete mode 100644 src/bin_ext/file.c delete mode 100644 src/bin_ext/macro_functions.h delete mode 100644 src/bin_ext/output_buffer.h delete mode 100644 src/bin_ext/pyzstd.c delete mode 100644 src/bin_ext/pyzstd.h delete mode 100644 src/bin_ext/stream.c create mode 100644 src/pyzstd/__init__.py rename src/{ => pyzstd}/__init__.pyi (99%) rename src/{ => pyzstd}/__main__.py (97%) rename src/{ => pyzstd}/_seekable_zstdfile.py (99%) rename src/{ => pyzstd}/py.typed (100%) delete mode 160000 zstd diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0689e89..5f54906 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,243 +11,88 @@ on: workflow_dispatch: env: - PYTHON_VER: "3.11" # Python to run test/cibuildwheel - CIBW_BUILD: > - cp310-* cp311-* cp312-* cp313-* cp313t-* cp314-* cp314t-* - pp310-* pp311-* - CIBW_ENABLE: cpython-freethreading pypy pypy-eol - CIBW_TEST_COMMAND: python -m unittest discover {project}/tests + PY_COLORS: 1 jobs: - test_cpython: - name: Test on ${{ matrix.platform }} ${{ matrix.architecture }} ${{ matrix.build_option }} - runs-on: ${{ matrix.platform }} - - strategy: - matrix: - platform: - - ubuntu-latest - - macos-latest - - windows-latest - build_option: - - "--warning-as-error" - - "--warning-as-error --multi-phase-init" - - "--warning-as-error --cffi" - architecture: - - x64 - include: - - platform: windows-latest - build_option: "--warning-as-error" - architecture: x86 - - platform: windows-latest - build_option: "--warning-as-error --multi-phase-init" - architecture: x86 - - platform: windows-latest - build_option: "--warning-as-error --cffi" - architecture: x86 - - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VER }} - architecture: ${{ matrix.architecture }} - - - name: Run test - run: | - python -m pip install cffi - python -m pip install --config-settings="--build-option=${{ matrix.build_option }}" -vv . - python -m unittest discover tests - - test_cpython_debug: - name: Test debug build on Ubuntu ${{ matrix.build_option }} - runs-on: ubuntu-latest - - strategy: - matrix: - build_option: - - "--warning-as-error --debug" - - "--warning-as-error --debug --multi-phase-init" - - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - name: Run test - run: | - sudo apt-get update - sudo apt-get install python3-dbg - python3-dbg -m pip install --config-settings="--build-option=${{ matrix.build_option }}" -vv . - python3-dbg -m unittest discover tests - - test_pypy: - name: Test on ${{ matrix.py }} - runs-on: ubuntu-latest - - strategy: - matrix: - py: - - "pypy-3.10" - - "pypy-3.11" - - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.py }} - - - name: Build & test - run: | - sudo apt-get install -q -y zstd libzstd1 libzstd-dev - python -m pip install --config-settings="--build-option=--dynamic-link-zstd --warning-as-error" -v . - python -m unittest discover tests - - build_sdist: - name: Build sdist - if: startsWith(github.ref, 'refs/tags') || startsWith(github.head_ref, 'release-') || github.event_name == 'workflow_dispatch' + build: + name: Build runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VER }} - - - name: Build sdist - run: | - python -m pip install -U build - python -m build --sdist - + - uses: actions/checkout@v6 + with: + # fetch all commits for version computation + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: "3.14" + - name: Install dependencies + run: python -m pip install -U build + - name: Build + run: python -m build - name: List distributions run: ls -lR dist - - - name: Upload sdist - uses: actions/upload-artifact@v4 - with: - name: cibw-sdist - path: dist/*.tar.gz - - build_wheels: - name: Build wheels on ${{ matrix.platform }} - if: startsWith(github.ref, 'refs/tags') || startsWith(github.head_ref, 'release-') || github.event_name == 'workflow_dispatch' - runs-on: ${{ matrix.platform }} - - strategy: - matrix: - platform: - - ubuntu-latest - - macos-latest - - windows-latest - - windows-11-arm - - env: - CIBW_ARCHS_MACOS: "x86_64 arm64" - - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VER }} - - - name: Build wheels - run: | - python -m pip install -U cibuildwheel - python -m cibuildwheel --output-dir wheelhouse - - - name: List distributions - run: ls -lR wheelhouse - shell: bash - - - name: Upload wheels - uses: actions/upload-artifact@v4 + - name: Save build artifacts + uses: actions/upload-artifact@v5 with: - name: cibw-wheels-${{ matrix.platform }} - path: wheelhouse/*.whl + name: build + path: dist + - name: Install sdist + run: python -m pip install dist/*.tar.gz + - name: Test + run: python -m unittest discover tests -v - build_arch_wheels: - name: Build wheels for ${{ matrix.arch }} (skip ${{ matrix.skip_image }}) - if: startsWith(github.ref, 'refs/tags') || startsWith(github.head_ref, 'release-') || github.event_name == 'workflow_dispatch' + tests-py: + name: Test | ${{ matrix.python }} runs-on: ubuntu-latest - + needs: + - build strategy: matrix: - arch: - - aarch64 - - ppc64le - - s390x - # Building in QEMU is very slow, so parallelize the tasks. - skip_image: - - musllinux - - manylinux - - env: - CIBW_ARCHS: ${{ matrix.arch }} - CIBW_SKIP: "*${{ matrix.skip_image }}*" - + python: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + - "3.14" + - "pypy-3.10" + - "pypy-3.11" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + - name: Restore build artifacts + uses: actions/download-artifact@v6 with: - submodules: true - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VER }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build wheels - run: | - python -m pip install -U cibuildwheel - python -m cibuildwheel --output-dir wheelhouse - - - name: List distributions - run: ls -lR wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v4 + name: build + path: dist + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v6 with: - name: cibw-wheels-${{ matrix.arch }}-${{ matrix.skip_image }} - path: wheelhouse/*.whl + python-version: ${{ matrix.python }} + - name: Install wheel + run: python -m pip install dist/*.whl + - name: Test + run: python -m unittest discover tests -v - upload_pypi: + publish: name: Publish to PyPI if: startsWith(github.ref, 'refs/tags') needs: - - build_sdist - - build_wheels - - build_arch_wheels + - build + - tests-py runs-on: ubuntu-latest environment: publish permissions: id-token: write # This permission is mandatory for trusted publishing - steps: - - name: Download wheels - uses: actions/download-artifact@v4 + - name: Restore build artifacts + uses: actions/download-artifact@v6 with: - pattern: cibw-* + name: build path: dist - merge-multiple: true - - name: List distributions run: ls -lR dist - - - name: Upload to PyPI + - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - skip-existing: true verbose: true print-hash: true diff --git a/.gitignore b/.gitignore index 888ff5b..57dc714 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -/.vscode +/env +__pycache__ + /build /dist - -__pycache__ -*.o -*.pdb -*.so *.egg-info +/.eggs + +/.vscode -/LICENSE_zstd +/src/pyzstd/_version.py diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 8ef60ef..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "zstd"] - path = zstd - url = https://github.com/facebook/zstd.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fc7aea..f3a1bad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,12 @@ All notable changes to this project will be documented in this file. ## Unreleased +- The project has been completely refactored to use the Zstandard implementation from the standard library ([PEP-784](https://peps.python.org/pep-0784/)) +- The refactor has some minor impact on public APIs, such as changing the exception raised on invalid input +- Add `backports.zstd` dependency for Python before 3.14 +- Changes in build dependency: remove `setuptools` and C build toolchain, add `hatchling` and `hatch-vcs` +- Remove git submodule usage - Drop support for Python 3.9 and below -- Build free-threaded wheels for CPython 3.13 and 3.14, even if the feature is not supported -- Refactor SeekableZstdFile to make it independent of ZstdFile ## 0.18.0 (October 5, 2025) diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index bcce970..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,13 +0,0 @@ -include pyproject.toml -include CHANGELOG.md -include LICENSE -include README.md - -include zstd/LICENSE -recursive-include zstd/lib * -recursive-include src * -recursive-include tests * -recursive-include build_script * - -recursive-exclude * __pycache__ -recursive-exclude * *.pyc diff --git a/README.md b/README.md index 52ec20b..d1f3214 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ an API style similar to the `bz2`, `lzma`, and `zlib` modules. > We recommend new projects to use the standard library, and existing ones to consider > migrating. > +> `pyzstd` internally uses `compression.zstd` since version 0.19.0. +> > See [`pyzstd`'s documentation][doc] for details and a migration guide. [doc]: https://pyzstd.readthedocs.io/ diff --git a/build_script/pyzstd_build_cffi.py b/build_script/pyzstd_build_cffi.py deleted file mode 100644 index 01f5701..0000000 --- a/build_script/pyzstd_build_cffi.py +++ /dev/null @@ -1,222 +0,0 @@ -try: - import cffi -except ImportError: - # PyPy includes cffi by default - msg = ('\n To build the CFFI implementation ' - 'of pyzstd module, need cffi module.' - '\n On CPython, CFFI implementation ' - 'is slower than C implementation.\n') - print(msg) - raise - -ffibuilder = cffi.FFI() - -ffibuilder.cdef(""" -#define ZSTD_VERSION_NUMBER ... -#define ZSTD_CONTENTSIZE_UNKNOWN ... -#define ZSTD_CONTENTSIZE_ERROR ... - -typedef ... ZSTD_CDict; -typedef ... ZSTD_DDict; -typedef ... ZSTD_CCtx; -typedef ... ZSTD_DCtx; - -typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; - -typedef enum { - ZSTD_e_continue, - ZSTD_e_flush, - ZSTD_e_end -} ZSTD_EndDirective; - -typedef struct ZSTD_inBuffer_s { - const void* src; - size_t size; - size_t pos; -} ZSTD_inBuffer; - -typedef struct ZSTD_outBuffer_s { - void* dst; - size_t size; - size_t pos; -} ZSTD_outBuffer; - -typedef enum { - /* Compression parameters */ - ZSTD_c_compressionLevel, - - /* Advanced compression parameters */ - ZSTD_c_windowLog, - ZSTD_c_hashLog, - ZSTD_c_chainLog, - ZSTD_c_searchLog, - ZSTD_c_minMatch, - ZSTD_c_targetLength, - ZSTD_c_strategy, - ZSTD_c_targetCBlockSize, - - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching, - ZSTD_c_ldmHashLog, - ZSTD_c_ldmMinMatch, - ZSTD_c_ldmBucketSizeLog, - ZSTD_c_ldmHashRateLog, - - /* frame parameters */ - ZSTD_c_contentSizeFlag, - ZSTD_c_checksumFlag, - ZSTD_c_dictIDFlag, - - /* multi-threading parameters */ - ZSTD_c_nbWorkers, - ZSTD_c_jobSize, - ZSTD_c_overlapLog, -} ZSTD_cParameter; - -typedef enum { - ZSTD_d_windowLogMax, -} ZSTD_dParameter; - -typedef enum { - ZSTD_fast, - ZSTD_dfast, - ZSTD_greedy, - ZSTD_lazy, - ZSTD_lazy2, - ZSTD_btlazy2, - ZSTD_btopt, - ZSTD_btultra, - ZSTD_btultra2 -} ZSTD_strategy; - -typedef enum { - ZSTD_reset_session_only, - ... -} ZSTD_ResetDirective; - -size_t ZSTD_compressBound(size_t srcSize); -unsigned ZSTD_isError(size_t code); -const char* ZSTD_getErrorName(size_t code); -int ZSTD_minCLevel(void); -int ZSTD_maxCLevel(void); -int ZSTD_defaultCLevel(void); - -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); -size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); - -unsigned ZSTD_versionNumber(void); -const char* ZSTD_versionString(void); - -ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); -ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); -size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); -size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); - -size_t ZSTD_CStreamInSize(void); -size_t ZSTD_CStreamOutSize(void); -size_t ZSTD_DStreamInSize(void); -size_t ZSTD_DStreamOutSize(void); - -ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); -size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -size_t ZSTD_freeCDict(ZSTD_CDict* CDict); -size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); - -ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); -size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -size_t ZSTD_freeDDict(ZSTD_DDict* ddict); -size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); - -unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -ZSTD_CCtx* ZSTD_createCCtx(void); -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); -size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); -size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); -size_t ZSTD_compressStream2(ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); - -ZSTD_DCtx* ZSTD_createDCtx(void); -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); -size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); -size_t ZSTD_decompressStream(ZSTD_DCtx* dctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input); - -unsigned ZDICT_isError(size_t errorCode); -size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, - const size_t* samplesSizes, unsigned nbSamples); - - -typedef struct { - int compressionLevel; - unsigned notificationLevel; - unsigned dictID; -} ZDICT_params_t; - -size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, - const void* dictContent, size_t dictContentSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); - -extern int pyzstd_static_link; -""") - -source = """ -#include "zstd.h" -#include "zdict.h" - -#if ZSTD_VERSION_NUMBER < 10400 - #error "pyzstd module requires zstd v1.4.0+" -#endif - -#if ZSTD_VERSION_NUMBER < 10405 -typedef struct { - int compressionLevel; - unsigned notificationLevel; - unsigned dictID; -} ZDICT_params_t; - -size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, - const void* dictContent, size_t dictContentSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters) -{ - return 0; -} -#endif - -#if ZSTD_VERSION_NUMBER < 10500 -int ZSTD_defaultCLevel(void) -{ - return ZSTD_CLEVEL_DEFAULT; -} -#endif - -#if ZSTD_VERSION_NUMBER < 10506 -typedef enum { - ZSTD_c_targetCBlockSize=130 -} PYZSTD_compat_c_targetCBlockSize; -#endif - -#ifdef PYZSTD_STATIC_LINK -int pyzstd_static_link = 1; -#else -int pyzstd_static_link = 0; -#endif -""" - -def get_extension(**kwargs): - ffibuilder.set_source(source=source, **kwargs) - return ffibuilder.distutils_extension() diff --git a/build_script/pyzstd_pep517.py b/build_script/pyzstd_pep517.py deleted file mode 100644 index 7b4b3f8..0000000 --- a/build_script/pyzstd_pep517.py +++ /dev/null @@ -1,10 +0,0 @@ -from setuptools import build_meta as _orig -from setuptools.build_meta import * - -def get_requires_for_build_wheel(config_settings=None): - requires = [] - if isinstance(config_settings, dict) and '--build-option' in config_settings: - v = config_settings['--build-option'] - if isinstance(v, (str, list)) and '--cffi' in v: - requires.append('cffi') - return _orig.get_requires_for_build_wheel(config_settings) + requires diff --git a/docs/index.md b/docs/index.md index bc47d68..4627b51 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,6 +14,8 @@ The `pyzstd` library was created by Ma Lin in 2020 to provide Python support for In 2025, an effort led by [Emma Smith](https://github.com/emmatyping) (now a CPython core developer) resulted in [PEP 784][] and the inclusion of the [`compression.zstd` module][compression.zstd] in the Python 3.14 standard library. The implementation was adapted from `pyzstd`, with its maintainer [Rogdham](https://github.com/rogdham) contributing directly to the effort. Rogdham also developed the [`backports.zstd` library][backports.zstd] which backports the `compression.zstd` APIs to older Python versions. +In version 0.19.0, `pyzstd` became a pure-Python package by using the `compression.zstd` module internally. + Recommendations: - **New projects**: use the standard library [`compression.zstd` module][compression.zstd], with [`backports.zstd`][backports.zstd] as a fallback for older Python versions. diff --git a/docs/pyzstd.rst b/docs/pyzstd.rst index c9014d8..7cdb477 100644 --- a/docs/pyzstd.rst +++ b/docs/pyzstd.rst @@ -9,11 +9,7 @@ The pyzstd module provides classes and functions for compressing and decompressi The API style is similar to Python's bz2/lzma/zlib modules. -* Includes the latest zstd library source code -* Can also dynamically link to zstd library provided by system, see :ref:`this note`. -* Has a CFFI implementation that can work with PyPy -* Support sub-interpreter on CPython 3.12+ -* :py:class:`ZstdFile` class has C language level performance +* Pure-Python package relying on the `compression.zstd` module internally (`PEP 784 `_). * Supports `Zstandard Seekable Format `__ * Has a command line interface, ``python -m pyzstd --help``. @@ -1411,90 +1407,6 @@ Use zstd as a patching engine VER_2 = decompress(PATCH, zstd_dict=v1.as_prefix, option=option) -Build pyzstd module with options ->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> - -.. _build_pyzstd: - -.. note:: Build pyzstd module with options - - 1️⃣ If provide ``--avx2`` build option, it will build with AVX2/BMI2 instructions. In MSVC build (static link), this brings some performance improvements. GCC/CLANG builds already dynamically dispatch some functions for BMI2 instructions, so no significant improvement, or worse. - - .. sourcecode:: shell - - # 🟠 pyzstd 0.15.4+ and pip 22.1+ support PEP-517: - # build and install - pip install --config-settings="--build-option=--avx2" -v pyzstd-0.15.4.tar.gz - # build a redistributable wheel - pip wheel --config-settings="--build-option=--avx2" -v pyzstd-0.15.4.tar.gz - # 🟠 legacy commands: - # build and install - python setup.py install --avx2 - # build a redistributable wheel - python setup.py bdist_wheel --avx2 - - 2️⃣ Pyzstd module supports: - - * Dynamically link to zstd library (provided by system or a DLL library), then the zstd source code in ``zstd`` folder will be ignored. - * Provide a `CFFI `_ implementation that can work with PyPy. - - On CPython, provide these build options: - - #. no option: C implementation, statically link to zstd library. - #. ``--dynamic-link-zstd``: C implementation, dynamically link to zstd library. - #. ``--cffi``: CFFI implementation (slower), statically link to zstd library. - #. ``--cffi --dynamic-link-zstd``: CFFI implementation (slower), dynamically link to zstd library. - - On PyPy, only CFFI implementation can be used, so ``--cffi`` is added implicitly. ``--dynamic-link-zstd`` is optional. - - .. sourcecode:: shell - - # 🟠 pyzstd 0.15.4+ and pip 22.1+ support PEP-517: - # build and install - pip3 install --config-settings="--build-option=--dynamic-link-zstd" -v pyzstd-0.15.4.tar.gz - # build a redistributable wheel - pip3 wheel --config-settings="--build-option=--dynamic-link-zstd" -v pyzstd-0.15.4.tar.gz - # specify more than one option - pip3 wheel --config-settings="--build-option=--dynamic-link-zstd --cffi" -v pyzstd-0.15.4.tar.gz - # 🟠 legacy commands: - # build and install - python3 setup.py install --dynamic-link-zstd - # build a redistributable wheel - python3 setup.py bdist_wheel --dynamic-link-zstd - - Some notes: - - * The wheels on `PyPI `_ use static linking, the packages on `Anaconda `_ use dynamic linking. - * No matter static or dynamic linking, pyzstd module requires zstd v1.4.0+. - * Static linking: Use zstd's official release without any change. If want to upgrade or downgrade the zstd library, just replace ``zstd`` folder. - * Dynamic linking: If new zstd API is used at compile-time, linking to lower version run-time zstd library will fail. Use v1.5.0 new API if possible. - - On Windows, there is no system-wide zstd library. Pyzstd module can dynamically link to a DLL library, modify ``setup.py``: - - .. sourcecode:: python - - # E:\zstd_dll folder has zstd.h / zdict.h / libzstd.lib that - # along with libzstd.dll - if DYNAMIC_LINK: - kwargs = { - 'include_dirs': ['E:\zstd_dll'], # .h directory - 'library_dirs': ['E:\zstd_dll'], # .lib directory - 'libraries': ['libzstd'], # lib name, not filename, for the linker. - ... - - And put ``libzstd.dll`` into one of these directories: - - * Directory added by `os.add_dll_directory() `_ function. (The unit-tests and the CLI can't utilize this) - * Python's root directory that has python.exe. - * %SystemRoot%\System32 - - Note that the above list doesn't include the current working directory and %PATH% directories. - - 3️⃣ Disable mremap output buffer on CPython+Linux. - - On CPython(3.5~3.12)+Linux, pyzstd uses another output buffer code that can utilize the ``mremap`` mechanism, which brings some performance improvements. If this causes problems, you may use ``--no-mremap`` option to disable this code. - - Deprecations >>>>>>>>>>>> diff --git a/pyproject.toml b/pyproject.toml index 8c774f3..a61a3ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,60 @@ +[project] +dynamic = ["version"] +name = "pyzstd" +authors = [ + { name = "Ma Lin", email = "malincns@163.com" }, + { name = "Rogdham", email = "contact@rogdham.net" }, +] +maintainers = [{ name = "Rogdham", email = "contact@rogdham.net" }] +description = "Support for Zstandard (zstd) compression" +readme = { file = "README.md", content-type = "text/markdown" } +keywords = [ + "zstandard", + "zstd", + "zst", + "compress", + "decompress", + "tar", + "file", + "seek", + "seekable", +] +license = "BSD-3-Clause" +license-files = ["LICENSE"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Topic :: System :: Archiving :: Compression", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +requires-python = ">=3.10" +dependencies = [ + "backports.zstd>=1.0.0 ; python_version<'3.14'", + "typing-extensions>=4.13.2 ; python_version<'3.13'", +] + +[project.urls] +Homepage = "https://github.com/Rogdham/pyzstd" +Documentation = "https://pyzstd.readthedocs.io/" +Source = "https://github.com/Rogdham/pyzstd" + +# +# build +# + [build-system] -# setuptools 64+ support --build-option -requires = ["setuptools>=64"] -backend-path = ["build_script"] -build-backend = "pyzstd_pep517" +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[tool.hatch.build.hooks.vcs] +template = "__version__ = \"{version}\"\n" +version-file = "src/pyzstd/_version.py" + +[tool.hatch.version] +source = "vcs" diff --git a/setup.py b/setup.py deleted file mode 100644 index dc41471..0000000 --- a/setup.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -import fnmatch -import os -import platform -import re -import sys -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext - -def read_stuff(): - ROOT_PATH = os.path.dirname(os.path.realpath(__file__)) - - # Read README.md - README_PATH = os.path.join(ROOT_PATH, 'README.md') - with open(README_PATH, 'r', encoding='utf-8') as file: - long_description = file.read() - - # Read module version - INIT_PATH = os.path.join(ROOT_PATH, 'src', '__init__.py') - with open(INIT_PATH, 'r', encoding='utf-8') as file: - file_content = file.read() - m = re.search(r'''__version__\s*=\s*(['"])(.*?)\1''', file_content) - module_version = m.group(2) - - # Create LICENSE_zstd - LICENSE_ZSTD_SRC = os.path.join(ROOT_PATH, 'zstd', 'LICENSE') - with open(LICENSE_ZSTD_SRC, 'r', encoding='utf-8') as file: - license_zstd = file.read() - LICENSE_ZSTD_DST = os.path.join(ROOT_PATH, 'LICENSE_zstd') - with open(LICENSE_ZSTD_DST, 'w', encoding='utf-8') as file: - file.write( - "Depending on how it is build, this package may distribute the zstd library,\n" - "partially or in its integrality, in source or binary form.\n\n" - "Its license is reproduced below.\n\n" - "---\n\n" - ) - file.write(license_zstd) - - return long_description, module_version - -def get_zstd_files_list(): - ret = [] - for sub_dir in ('common', 'compress', 'decompress', 'dictBuilder'): - directory = 'zstd/lib/' + sub_dir + '/' - dir_list = os.listdir(directory) - - # Source files - l = [directory + fn - for fn in dir_list - if fnmatch.fnmatch(fn, '*.[cCsS]')] - ret.extend(l) - return ret - -def has_option(option): - if option in sys.argv: - print(' * build pyzstd wheel with option:', option) - sys.argv = [s for s in sys.argv if s != option] - return True - else: - return False - -class pyzstd_build_ext(build_ext): - PYZSTD_AVX2 = False - PYZSTD_DEBUG = False - PYZSTD_WARNING_AS_ERROR = False - PYZSTD_CONFIG_MSG = '' - - def build_extensions(self): - # Print build config message in actual build - print(self.PYZSTD_CONFIG_MSG) - - # Accept assembly files - self.compiler.src_extensions.extend(['.s', '.S']) - # Build debug build - self.debug = self.PYZSTD_DEBUG - - if self.compiler.compiler_type in ('unix', 'mingw32', 'cygwin'): - # Remove -Wunreachable-code default args based on how Python was build - # see distutils.sysconfig.get_config_var("CFLAGS") - # see https://github.com/facebook/zstd/issues/4308 - self.compiler.compiler = [part for part in self.compiler.compiler if part != '-Wunreachable-code'] - self.compiler.compiler_so = [part for part in self.compiler.compiler_so if part != '-Wunreachable-code'] - - for extension in self.extensions: - if self.compiler.compiler_type in ('unix', 'mingw32', 'cygwin'): - # -g0: - # Level 0 produces no debug information at all. This reduces - # the size of GCC wheels. By default CPython won't print any - # C stack trace, so -g0 and -g2 are same for most users. - # -flto: - # This option runs the standard link-time optimizer. To use the - # link-time optimizer, -flto and optimization options should be - # specified at compile time and during the final link. - more_options = ['-g0', '-flto'] - if self.PYZSTD_AVX2: - instrs = ['-mavx2', '-mlzcnt', '-mbmi', '-mbmi2'] - more_options.extend(instrs) - if self.PYZSTD_WARNING_AS_ERROR: - more_options.append('-Werror') - extension.extra_compile_args.extend(more_options) - extension.extra_link_args.extend(['-g0', '-flto']) - elif self.compiler.compiler_type == 'msvc': - # Remove .S source files, they use gcc/clang syntax. - extension.sources = [i for i in extension.sources - if not fnmatch.fnmatch(i, '*.[sS]')] - - # /Ob3: More aggressive inlining than /Ob2. - # /GF: Eliminates duplicate strings. - # /Gy: Does function level linking. - # /Ob3 is a bit faster on the whole. In setuptools v56.1+, - # /GF and /Gy are enabled by default, they reduce the size - # of MSVC wheels. - more_options = ['/Ob3', '/GF', '/Gy'] - if self.PYZSTD_AVX2: - more_options.append('/arch:AVX2') - if self.PYZSTD_WARNING_AS_ERROR: - more_options.append('/WX') - extension.extra_compile_args.extend(more_options) - super().build_extensions() - -def do_setup(): - # Read stuff - long_description, module_version = read_stuff() - - # Parse options - pyzstd_build_ext.PYZSTD_AVX2 = has_option('--avx2') - pyzstd_build_ext.PYZSTD_DEBUG = has_option('--debug') - pyzstd_build_ext.PYZSTD_WARNING_AS_ERROR = has_option('--warning-as-error') - - DYNAMIC_LINK = has_option('--dynamic-link-zstd') - CFFI = has_option('--cffi') or platform.python_implementation() == 'PyPy' - MULTI_PHASE_INIT = has_option('--multi-phase-init') - NO_MREMAP = has_option('--no-mremap') - - # Build config message - pyzstd_build_ext.PYZSTD_CONFIG_MSG = \ - ('+--------------------------------------------+\n' - '| Pyzstd build config |\n' - '+-------------------------+------------------+\n' - '| Pyzstd version | {!s:<16} |\n' - '+-------------------------+------------------+\n' - '| Implementation | {!s:<16} |\n' - '+-------------------------+------------------+\n' - '| Link to zstd library | {!s:<16} |\n' - '+-------------------------+------------------+\n' - '| Enable AVX2/BMI2 | {!s:<16} |\n' - '+-------------------------+------------------+\n' - '| Debug build | {!s:<16} |\n' - '+-------------------------+------------------+\n' - '| Warning as error | {!s:<16} |\n' - '+-------------------------+------------------+').format( - module_version, - 'CFFI' if CFFI else 'C', - 'Dynamically link' if DYNAMIC_LINK else 'Statically link', - pyzstd_build_ext.PYZSTD_AVX2, - pyzstd_build_ext.PYZSTD_DEBUG, - pyzstd_build_ext.PYZSTD_WARNING_AS_ERROR) - - if DYNAMIC_LINK: - kwargs = { - 'include_dirs': [], # .h directory - 'library_dirs': [], # .lib directory - 'libraries': ['zstd'], # lib name, not filename, for the linker. - 'sources': [], - 'define_macros': [] - } - else: # Statically link to zstd lib - kwargs = { - 'include_dirs': ['zstd/lib', - # For zstd 1.4.x: - 'zstd/lib/common', - 'zstd/lib/dictBuilder'], - 'library_dirs': [], - 'libraries': [], - 'sources': get_zstd_files_list(), - 'define_macros': [('PYZSTD_STATIC_LINK', None), - # Enable multi-threaded compression - ('ZSTD_MULTITHREAD', None)] - } - - if CFFI: - # Packages - packages = ['pyzstd', 'pyzstd._cffi'] - - # Binary extension - kwargs['module_name'] = 'pyzstd._cffi._cffi_zstd' - - sys.path.append('build_script') - import pyzstd_build_cffi - binary_extension = pyzstd_build_cffi.get_extension(**kwargs) - else: # C implementation - # Packages - packages = ['pyzstd', 'pyzstd._c'] - - # Binary extension - kwargs['name'] = 'pyzstd._c._zstd' - kwargs['sources'].append('src/bin_ext/pyzstd.c') - if MULTI_PHASE_INIT: - # Use multi-phase initialization (PEP-489) on CPython 3.11. - # On CPython 3.12+, it's always enabled. - kwargs['define_macros'].append(('USE_MULTI_PHASE_INIT', None)) - if NO_MREMAP: - # Disable mremap output buffer on Linux - kwargs['define_macros'].append(('PYZSTD_NO_MREMAP', None)) - - binary_extension = Extension(**kwargs) - - setup( - name='pyzstd', - version=module_version, - description=("Python bindings to Zstandard (zstd) compression library."), - long_description=long_description, - long_description_content_type='text/markdown', - author='Ma Lin', - author_email='malincns@163.com', - maintainer="Rogdham", - maintainer_email="contact@rogdham.net", - url='https://github.com/Rogdham/pyzstd', - license='BSD-3-Clause', - python_requires='>=3.10', - install_requires=["typing-extensions>=4.13.2 ; python_version<'3.13'"], - - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Topic :: System :: Archiving :: Compression", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - ], - keywords='zstandard zstd zst compress decompress tar file seekable format', - package_dir={'pyzstd': 'src'}, - packages=packages, - package_data={'pyzstd': ['__init__.pyi', 'py.typed']}, - - ext_modules=[binary_extension], - cmdclass={'build_ext': pyzstd_build_ext}, - - test_suite='tests' - ) - -if __name__ == '__main__': - do_setup() diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index cb30ee6..0000000 --- a/src/__init__.py +++ /dev/null @@ -1,251 +0,0 @@ -try: - # Import C implementation - from ._c import ( - CParameter, - DParameter, - EndlessZstdDecompressor, - PYZSTD_CONFIG, - RichMemZstdCompressor as _RichMemZstdCompressor, - Strategy, - ZstdCompressor, - ZstdDecompressor, - ZstdDict, - ZstdError, - ZstdFileReader as _ZstdFileReader, - ZstdFileWriter as _ZstdFileWriter, - _ZSTD_CStreamSizes, - _ZSTD_DStreamSizes, - _finalize_dict, - _train_dict, - compress_stream as _compress_stream, - compressionLevel_values, - decompress, - decompress_stream as _decompress_stream, - get_frame_info, - get_frame_size, - zstd_version, - zstd_version_info - ) -except ImportError: - try: - # Import CFFI implementation - from ._cffi import ( - CParameter, - DParameter, - EndlessZstdDecompressor, - PYZSTD_CONFIG, - RichMemZstdCompressor as _RichMemZstdCompressor, - Strategy, - ZstdCompressor, - ZstdDecompressor, - ZstdDict, - ZstdError, - ZstdFileReader as _ZstdFileReader, - ZstdFileWriter as _ZstdFileWriter, - _ZSTD_CStreamSizes, - _ZSTD_DStreamSizes, - _finalize_dict, - _train_dict, - compress_stream as _compress_stream, - compressionLevel_values, - decompress, - decompress_stream as _decompress_stream, - get_frame_info, - get_frame_size, - zstd_version, - zstd_version_info - ) - except ImportError: - raise ImportError( - "\n\npyzstd module: Can't import compiled .so/.pyd file.\n" - "1, If pyzstd module is dynamically linked to zstd library: Make sure\n" - " not to remove zstd library, and the run-time zstd library's version\n" - " can't be lower than that at compile-time; On Windows, the directory\n" - " that has libzstd.dll should be added by os.add_dll_directory() function.\n" - "2, Please install pyzstd module through pip, to ensure that compiled\n" - " .so/.pyd file matches the architecture/OS/Python.\n") -from ._zstdfile import ZstdFile, open -from ._seekable_zstdfile import SeekableFormatError, SeekableZstdFile - -from functools import wraps - -try: - from warnings import deprecated -except ImportError: - from typing_extensions import deprecated - - -__version__ = '0.18.0' - -__doc__ = '''\ -Python bindings to Zstandard (zstd) compression library, the API style is -similar to Python's bz2/lzma/zlib modules. - -Command line interface of this module: python -m pyzstd --help - -Documentation: https://pyzstd.readthedocs.io -GitHub: https://github.com/Rogdham/pyzstd -PyPI: https://pypi.org/project/pyzstd''' - -__all__ = ('ZstdCompressor', 'RichMemZstdCompressor', - 'ZstdDecompressor', 'EndlessZstdDecompressor', - 'CParameter', 'DParameter', 'Strategy', 'ZstdError', - 'compress', 'richmem_compress', 'decompress', - 'compress_stream', 'decompress_stream', - 'ZstdDict', 'train_dict', 'finalize_dict', - 'get_frame_info', 'get_frame_size', 'ZstdFile', 'open', - 'zstd_version', 'zstd_version_info', - 'zstd_support_multithread', 'compressionLevel_values', - 'SeekableZstdFile', 'SeekableFormatError') - - -zstd_support_multithread = (CParameter.nbWorkers.bounds() != (0, 0)) - - -def compress(data, level_or_option=None, zstd_dict=None): - """Compress a block of data, return a bytes object. - - Compressing b'' will get an empty content frame (9 bytes or more). - - Parameters - data: A bytes-like object, data to be compressed. - level_or_option: When it's an int object, it represents compression level. - When it's a dict object, it contains advanced compression - parameters. - zstd_dict: A ZstdDict object, pre-trained dictionary for compression. - """ - comp = ZstdCompressor(level_or_option, zstd_dict) - return comp.compress(data, ZstdCompressor.FLUSH_FRAME) - - -@deprecated("See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.richmem_compress") -def richmem_compress(data, level_or_option=None, zstd_dict=None): - """Compress a block of data, return a bytes object. - - Use rich memory mode, it's faster than compress() in some cases, but - allocates more memory. - - Compressing b'' will get an empty content frame (9 bytes or more). - - Parameters - data: A bytes-like object, data to be compressed. - level_or_option: When it's an int object, it represents compression level. - When it's a dict object, it contains advanced compression - parameters. - zstd_dict: A ZstdDict object, pre-trained dictionary for compression. - """ - comp = _RichMemZstdCompressor(level_or_option, zstd_dict) - return comp.compress(data) - - -def _nbytes(dat): - if isinstance(dat, (bytes, bytearray)): - return len(dat) - with memoryview(dat) as mv: - return mv.nbytes - - -def train_dict(samples, dict_size): - """Train a zstd dictionary, return a ZstdDict object. - - Parameters - samples: An iterable of samples, a sample is a bytes-like object - represents a file. - dict_size: The dictionary's maximum size, in bytes. - """ - # Check argument's type - if not isinstance(dict_size, int): - raise TypeError('dict_size argument should be an int object.') - - # Prepare data - chunks = [] - chunk_sizes = [] - for chunk in samples: - chunks.append(chunk) - chunk_sizes.append(_nbytes(chunk)) - - chunks = b''.join(chunks) - if not chunks: - raise ValueError("The samples are empty content, can't train dictionary.") - - # samples_bytes: samples be stored concatenated in a single flat buffer. - # samples_size_list: a list of each sample's size. - # dict_size: size of the dictionary, in bytes. - dict_content = _train_dict(chunks, chunk_sizes, dict_size) - - return ZstdDict(dict_content) - - -def finalize_dict(zstd_dict, samples, dict_size, level): - """Finalize a zstd dictionary, return a ZstdDict object. - - Given a custom content as a basis for dictionary, and a set of samples, - finalize dictionary by adding headers and statistics according to the zstd - dictionary format. - - You may compose an effective dictionary content by hand, which is used as - basis dictionary, and use some samples to finalize a dictionary. The basis - dictionary can be a "raw content" dictionary, see is_raw parameter in - ZstdDict.__init__ method. - - Parameters - zstd_dict: A ZstdDict object, basis dictionary. - samples: An iterable of samples, a sample is a bytes-like object - represents a file. - dict_size: The dictionary's maximum size, in bytes. - level: The compression level expected to use in production. The - statistics for each compression level differ, so tuning the - dictionary for the compression level can help quite a bit. - """ - if zstd_version_info < (1, 4, 5): - msg = ("This function only available when the underlying zstd " - "library's version is greater than or equal to v1.4.5, " - "the current underlying zstd library's version is v%s.") % zstd_version - raise NotImplementedError(msg) - - # Check arguments' type - if not isinstance(zstd_dict, ZstdDict): - raise TypeError('zstd_dict argument should be a ZstdDict object.') - if not isinstance(dict_size, int): - raise TypeError('dict_size argument should be an int object.') - if not isinstance(level, int): - raise TypeError('level argument should be an int object.') - - # Prepare data - chunks = [] - chunk_sizes = [] - for chunk in samples: - chunks.append(chunk) - chunk_sizes.append(_nbytes(chunk)) - - chunks = b''.join(chunks) - if not chunks: - raise ValueError("The samples are empty content, can't finalize dictionary.") - - # custom_dict_bytes: existing dictionary. - # samples_bytes: samples be stored concatenated in a single flat buffer. - # samples_size_list: a list of each sample's size. - # dict_size: maximal size of the dictionary, in bytes. - # compression_level: compression level expected to use in production. - dict_content = _finalize_dict(zstd_dict.dict_content, - chunks, chunk_sizes, - dict_size, level) - - return ZstdDict(dict_content) - - -@wraps(_compress_stream) -@deprecated("See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.compress_stream") -def compress_stream(*args, **kwargs): - return _compress_stream(*args, **kwargs) - -@wraps(_decompress_stream) -@deprecated("See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.decompress_stream") -def decompress_stream(*args, **kwargs): - return _decompress_stream(*args, **kwargs) - -@deprecated("See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.RichMemZstdCompressor") -class RichMemZstdCompressor(_RichMemZstdCompressor): - pass - -RichMemZstdCompressor.__doc__ = _RichMemZstdCompressor.__doc__ diff --git a/src/_c/__init__.py b/src/_c/__init__.py deleted file mode 100644 index f5abdfc..0000000 --- a/src/_c/__init__.py +++ /dev/null @@ -1,182 +0,0 @@ -from collections import namedtuple -from enum import IntEnum - -from ._zstd import ( - EndlessZstdDecompressor, - PYZSTD_CONFIG, - RichMemZstdCompressor, - ZstdCompressor, - ZstdDecompressor, - ZstdDict, - ZstdError, - ZstdFileReader, - ZstdFileWriter, - _ZSTD_CStreamSizes, - _ZSTD_DStreamSizes, - _ZSTD_btlazy2, - _ZSTD_btopt, - _ZSTD_btultra, - _ZSTD_btultra2, - _ZSTD_c_chainLog, - _ZSTD_c_checksumFlag, - _ZSTD_c_compressionLevel, - _ZSTD_c_contentSizeFlag, - _ZSTD_c_dictIDFlag, - _ZSTD_c_enableLongDistanceMatching, - _ZSTD_c_hashLog, - _ZSTD_c_jobSize, - _ZSTD_c_ldmBucketSizeLog, - _ZSTD_c_ldmHashLog, - _ZSTD_c_ldmHashRateLog, - _ZSTD_c_ldmMinMatch, - _ZSTD_c_minMatch, - _ZSTD_c_nbWorkers, - _ZSTD_c_overlapLog, - _ZSTD_c_searchLog, - _ZSTD_c_strategy, - _ZSTD_c_targetCBlockSize, - _ZSTD_c_targetLength, - _ZSTD_c_windowLog, - _ZSTD_d_windowLogMax, - _ZSTD_dfast, - _ZSTD_fast, - _ZSTD_greedy, - _ZSTD_lazy, - _ZSTD_lazy2, - _compressionLevel_values, - _finalize_dict, - _get_frame_info, - _get_param_bounds, - _set_parameter_types, - _train_dict, - compress_stream, - decompress, - decompress_stream, - get_frame_size, - zstd_version, - zstd_version_info -) - -__all__ = (# From this file - 'compressionLevel_values', 'get_frame_info', - 'CParameter', 'DParameter', 'Strategy', - # From _zstd - 'ZstdCompressor', 'RichMemZstdCompressor', - 'ZstdDecompressor', 'EndlessZstdDecompressor', - 'ZstdDict', 'ZstdError', 'decompress', 'get_frame_size', - 'compress_stream', 'decompress_stream', - 'zstd_version', 'zstd_version_info', - '_train_dict', '_finalize_dict', - 'ZstdFileReader', 'ZstdFileWriter', - '_ZSTD_CStreamSizes', '_ZSTD_DStreamSizes', - 'PYZSTD_CONFIG') - - -# compressionLevel_values -_nt_values = namedtuple('values', ['default', 'min', 'max']) -compressionLevel_values = _nt_values(*_compressionLevel_values) - - -_nt_frame_info = namedtuple('frame_info', - ['decompressed_size', 'dictionary_id']) - -def get_frame_info(frame_buffer): - """Get zstd frame information from a frame header. - - Parameter - frame_buffer: A bytes-like object. It should starts from the beginning of - a frame, and needs to include at least the frame header (6 to - 18 bytes). - - Return a two-items namedtuple: (decompressed_size, dictionary_id) - - If decompressed_size is None, decompressed size is unknown. - - dictionary_id is a 32-bit unsigned integer value. 0 means dictionary ID was - not recorded in the frame header, the frame may or may not need a dictionary - to be decoded, and the ID of such a dictionary is not specified. - - It's possible to append more items to the namedtuple in the future.""" - - ret_tuple = _get_frame_info(frame_buffer) - return _nt_frame_info(*ret_tuple) - - -class _UnsupportedCParameter: - def __set_name__(self, _, name): - self.name = name - - def __get__(self, *_, **__): - msg = ("%s CParameter only available when the underlying " - "zstd library's version is greater than or equal to v1.5.6. " - "At pyzstd module's run-time, zstd version is %s.") % \ - (self.name, zstd_version) - raise NotImplementedError(msg) - - -class CParameter(IntEnum): - """Compression parameters""" - - compressionLevel = _ZSTD_c_compressionLevel - windowLog = _ZSTD_c_windowLog - hashLog = _ZSTD_c_hashLog - chainLog = _ZSTD_c_chainLog - searchLog = _ZSTD_c_searchLog - minMatch = _ZSTD_c_minMatch - targetLength = _ZSTD_c_targetLength - strategy = _ZSTD_c_strategy - if zstd_version_info >= (1, 5, 6): - targetCBlockSize = _ZSTD_c_targetCBlockSize - else: - targetCBlockSize = _UnsupportedCParameter() - - enableLongDistanceMatching = _ZSTD_c_enableLongDistanceMatching - ldmHashLog = _ZSTD_c_ldmHashLog - ldmMinMatch = _ZSTD_c_ldmMinMatch - ldmBucketSizeLog = _ZSTD_c_ldmBucketSizeLog - ldmHashRateLog = _ZSTD_c_ldmHashRateLog - - contentSizeFlag = _ZSTD_c_contentSizeFlag - checksumFlag = _ZSTD_c_checksumFlag - dictIDFlag = _ZSTD_c_dictIDFlag - - nbWorkers = _ZSTD_c_nbWorkers - jobSize = _ZSTD_c_jobSize - overlapLog = _ZSTD_c_overlapLog - - def bounds(self): - """Return lower and upper bounds of a compression parameter, both inclusive.""" - # 1 means compression parameter - return _get_param_bounds(1, self.value) - - -class DParameter(IntEnum): - """Decompression parameters""" - - windowLogMax = _ZSTD_d_windowLogMax - - def bounds(self): - """Return lower and upper bounds of a decompression parameter, both inclusive.""" - # 0 means decompression parameter - return _get_param_bounds(0, self.value) - - -class Strategy(IntEnum): - """Compression strategies, listed from fastest to strongest. - - Note : new strategies _might_ be added in the future, only the order - (from fast to strong) is guaranteed. - """ - fast = _ZSTD_fast - dfast = _ZSTD_dfast - greedy = _ZSTD_greedy - lazy = _ZSTD_lazy - lazy2 = _ZSTD_lazy2 - btlazy2 = _ZSTD_btlazy2 - btopt = _ZSTD_btopt - btultra = _ZSTD_btultra - btultra2 = _ZSTD_btultra2 - - -# Set CParameter/DParameter types for validity check -_set_parameter_types(CParameter, DParameter) diff --git a/src/_cffi/__init__.py b/src/_cffi/__init__.py deleted file mode 100644 index 98caca3..0000000 --- a/src/_cffi/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -from .common import ZstdError, CParameter, DParameter, Strategy, \ - get_frame_info, get_frame_size, \ - zstd_version, zstd_version_info, \ - compressionLevel_values, \ - _train_dict, _finalize_dict, \ - _ZSTD_CStreamSizes, _ZSTD_DStreamSizes, \ - PYZSTD_CONFIG -from .dict import ZstdDict -from .compressor import ZstdCompressor, RichMemZstdCompressor -from .decompressor import ZstdDecompressor, EndlessZstdDecompressor, decompress -from .stream import compress_stream, decompress_stream -from .file import ZstdFileReader, ZstdFileWriter - -__all__ = ('ZstdCompressor', 'RichMemZstdCompressor', - 'ZstdDecompressor', 'EndlessZstdDecompressor', - 'ZstdDict', 'ZstdError', - 'CParameter', 'DParameter', 'Strategy', - 'decompress', 'get_frame_info', 'get_frame_size', - 'compress_stream', 'decompress_stream', - 'zstd_version', 'zstd_version_info', - 'compressionLevel_values', - '_train_dict', '_finalize_dict', - 'ZstdFileReader', 'ZstdFileWriter', - '_ZSTD_CStreamSizes', '_ZSTD_DStreamSizes', - 'PYZSTD_CONFIG') diff --git a/src/_cffi/common.py b/src/_cffi/common.py deleted file mode 100644 index c8afb2c..0000000 --- a/src/_cffi/common.py +++ /dev/null @@ -1,461 +0,0 @@ -import sys -from collections import namedtuple -from enum import IntEnum - -from ._cffi_zstd import ffi, lib as m - -PYZSTD_CONFIG = (64 if sys.maxsize > 2**32 else 32, - 'cffi', bool(m.pyzstd_static_link), False, False) - -_ZSTD_CStreamSizes = (m.ZSTD_CStreamInSize(), m.ZSTD_CStreamOutSize()) -_ZSTD_DStreamSizes = (m.ZSTD_DStreamInSize(), m.ZSTD_DStreamOutSize()) - -zstd_version = ffi.string(m.ZSTD_versionString()).decode('ascii') -zstd_version_info = tuple(int(i) for i in zstd_version.split('.')) - -_nt_values = namedtuple('values', ['default', 'min', 'max']) -compressionLevel_values = _nt_values(m.ZSTD_defaultCLevel(), - m.ZSTD_minCLevel(), - m.ZSTD_maxCLevel()) - -_new_nonzero = ffi.new_allocator(should_clear_after_alloc=False) - -def _nbytes(dat): - if isinstance(dat, (bytes, bytearray)): - return len(dat) - with memoryview(dat) as mv: - return mv.nbytes - -class ZstdError(Exception): - "Call to the underlying zstd library failed." - pass - -def _get_param_bounds(is_compress, key): - # Get parameter bounds - if is_compress: - bounds = m.ZSTD_cParam_getBounds(key) - if m.ZSTD_isError(bounds.error): - _set_zstd_error(_ErrorType.ERR_GET_C_BOUNDS, bounds.error) - else: - bounds = m.ZSTD_dParam_getBounds(key) - if m.ZSTD_isError(bounds.error): - _set_zstd_error(_ErrorType.ERR_GET_D_BOUNDS, bounds.error) - - return (bounds.lowerBound, bounds.upperBound) - - -class _UnsupportedCParameter: - def __set_name__(self, _, name): - self.name = name - - def __get__(self, *_, **__): - msg = ("%s CParameter only available when the underlying " - "zstd library's version is greater than or equal to v1.5.6. " - "At pyzstd module's run-time, zstd version is %s.") % \ - (self.name, zstd_version) - raise NotImplementedError(msg) - - -class CParameter(IntEnum): - """Compression parameters""" - - compressionLevel = m.ZSTD_c_compressionLevel - windowLog = m.ZSTD_c_windowLog - hashLog = m.ZSTD_c_hashLog - chainLog = m.ZSTD_c_chainLog - searchLog = m.ZSTD_c_searchLog - minMatch = m.ZSTD_c_minMatch - targetLength = m.ZSTD_c_targetLength - strategy = m.ZSTD_c_strategy - if zstd_version_info >= (1, 5, 6): - targetCBlockSize = m.ZSTD_c_targetCBlockSize - else: - targetCBlockSize = _UnsupportedCParameter() - - enableLongDistanceMatching = m.ZSTD_c_enableLongDistanceMatching - ldmHashLog = m.ZSTD_c_ldmHashLog - ldmMinMatch = m.ZSTD_c_ldmMinMatch - ldmBucketSizeLog = m.ZSTD_c_ldmBucketSizeLog - ldmHashRateLog = m.ZSTD_c_ldmHashRateLog - - contentSizeFlag = m.ZSTD_c_contentSizeFlag - checksumFlag = m.ZSTD_c_checksumFlag - dictIDFlag = m.ZSTD_c_dictIDFlag - - nbWorkers = m.ZSTD_c_nbWorkers - jobSize = m.ZSTD_c_jobSize - overlapLog = m.ZSTD_c_overlapLog - - def bounds(self): - """Return lower and upper bounds of a compression parameter, both inclusive.""" - # 1 means compression parameter - return _get_param_bounds(1, self.value) - -class DParameter(IntEnum): - """Decompression parameters""" - - windowLogMax = m.ZSTD_d_windowLogMax - - def bounds(self): - """Return lower and upper bounds of a decompression parameter, both inclusive.""" - # 0 means decompression parameter - return _get_param_bounds(0, self.value) - -class Strategy(IntEnum): - """Compression strategies, listed from fastest to strongest. - - Note : new strategies _might_ be added in the future, only the order - (from fast to strong) is guaranteed. - """ - fast = m.ZSTD_fast - dfast = m.ZSTD_dfast - greedy = m.ZSTD_greedy - lazy = m.ZSTD_lazy - lazy2 = m.ZSTD_lazy2 - btlazy2 = m.ZSTD_btlazy2 - btopt = m.ZSTD_btopt - btultra = m.ZSTD_btultra - btultra2 = m.ZSTD_btultra2 - -class _ErrorType: - ERR_DECOMPRESS=0 - ERR_COMPRESS=1 - ERR_SET_PLEDGED_INPUT_SIZE=2 - - ERR_LOAD_D_DICT=3 - ERR_LOAD_C_DICT=4 - - ERR_GET_C_BOUNDS=5 - ERR_GET_D_BOUNDS=6 - ERR_SET_C_LEVEL=7 - - ERR_TRAIN_DICT=8 - ERR_FINALIZE_DICT=9 - - _TYPE_MSG = ( - "Unable to decompress zstd data: %s", - "Unable to compress zstd data: %s", - "Unable to set pledged uncompressed content size: %s", - - "Unable to load zstd dictionary or prefix for decompression: %s", - "Unable to load zstd dictionary or prefix for compression: %s", - - "Unable to get zstd compression parameter bounds: %s", - "Unable to get zstd decompression parameter bounds: %s", - "Unable to set zstd compression level: %s", - - "Unable to train zstd dictionary: %s", - "Unable to finalize zstd dictionary: %s") - - @staticmethod - def get_type_msg(type): - return _ErrorType._TYPE_MSG[type] - -def _set_zstd_error(type, zstd_ret): - msg = _ErrorType.get_type_msg(type) % \ - ffi.string(m.ZSTD_getErrorName(zstd_ret)).decode('utf-8') - raise ZstdError(msg) - -def _set_parameter_error(is_compress, key, value): - COMPRESS_PARAMETERS = \ - {m.ZSTD_c_compressionLevel: "compressionLevel", - m.ZSTD_c_windowLog: "windowLog", - m.ZSTD_c_hashLog: "hashLog", - m.ZSTD_c_chainLog: "chainLog", - m.ZSTD_c_searchLog: "searchLog", - m.ZSTD_c_minMatch: "minMatch", - m.ZSTD_c_targetLength: "targetLength", - m.ZSTD_c_strategy: "strategy", - - m.ZSTD_c_enableLongDistanceMatching: "enableLongDistanceMatching", - m.ZSTD_c_ldmHashLog: "ldmHashLog", - m.ZSTD_c_ldmMinMatch: "ldmMinMatch", - m.ZSTD_c_ldmBucketSizeLog: "ldmBucketSizeLog", - m.ZSTD_c_ldmHashRateLog: "ldmHashRateLog", - - m.ZSTD_c_contentSizeFlag: "contentSizeFlag", - m.ZSTD_c_checksumFlag: "checksumFlag", - m.ZSTD_c_dictIDFlag: "dictIDFlag", - - m.ZSTD_c_nbWorkers: "nbWorkers", - m.ZSTD_c_jobSize: "jobSize", - m.ZSTD_c_overlapLog: "overlapLog"} - if zstd_version_info >= (1, 5, 6): - COMPRESS_PARAMETERS[m.ZSTD_c_targetCBlockSize] = "targetCBlockSize" - - DECOMPRESS_PARAMETERS = {m.ZSTD_d_windowLogMax: "windowLogMax"} - - if is_compress: - parameters = COMPRESS_PARAMETERS - type_msg = "compression" - else: - parameters = DECOMPRESS_PARAMETERS - type_msg = "decompression" - - # Find parameter's name - name = parameters.get(key) - # Unknown parameter - if name is None: - name = 'unknown parameter (key %d)' % key - - # Get parameter bounds - if is_compress: - bounds = m.ZSTD_cParam_getBounds(key) - else: - bounds = m.ZSTD_dParam_getBounds(key) - if m.ZSTD_isError(bounds.error): - msg = 'Zstd %s parameter "%s" is invalid. (zstd v%s)' % \ - (type_msg, name, zstd_version) - raise ZstdError(msg) - - # Error message - msg = ('Error when setting zstd %s parameter "%s", it ' - 'should %d <= value <= %d, provided value is %d. ' - '(zstd v%s, %d-bit build)') % \ - (type_msg, name, - bounds.lowerBound, bounds.upperBound, value, - zstd_version, PYZSTD_CONFIG[0]) - raise ZstdError(msg) - -def _check_int32_value(value, name): - try: - if value > 2147483647 or value < -2147483648: - raise Exception - except: - raise ValueError("%s should be 32-bit signed int value." % name) - -# return: (compressionLevel, use_multithread) -def _set_c_parameters(cctx, level_or_option): - if isinstance(level_or_option, int): - _check_int32_value(level_or_option, "Compression level") - - # Set compression level - zstd_ret = m.ZSTD_CCtx_setParameter(cctx, m.ZSTD_c_compressionLevel, - level_or_option) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_SET_C_LEVEL, zstd_ret) - - return level_or_option, False - - if isinstance(level_or_option, dict): - level = 0 # 0 means use zstd's default compression level - use_multithread = False - - for key, value in level_or_option.items(): - # Check key type - if type(key) == DParameter: - raise TypeError("Key of compression option dict should " - "NOT be DParameter.") - - # Both key & value should be 32-bit signed int - _check_int32_value(key, "Key of option dict") - _check_int32_value(value, "Value of option dict") - - if key == m.ZSTD_c_compressionLevel: - level = value - elif key == m.ZSTD_c_nbWorkers: - if value != 0: - use_multithread = True - - # Set parameter - zstd_ret = m.ZSTD_CCtx_setParameter(cctx, key, value) - if m.ZSTD_isError(zstd_ret): - _set_parameter_error(True, key, value) - - return level, use_multithread - - raise TypeError("level_or_option argument wrong type.") - -def _set_d_parameters(dctx, option): - if not isinstance(option, dict): - raise TypeError("option argument should be dict object.") - - for key, value in option.items(): - # Check key type - if type(key) == CParameter: - raise TypeError("Key of decompression option dict should " - "NOT be CParameter.") - - # Both key & value should be 32-bit signed int - _check_int32_value(key, "Key of option dict") - _check_int32_value(value, "Value of option dict") - - # Set parameter - zstd_ret = m.ZSTD_DCtx_setParameter(dctx, key, value) - if m.ZSTD_isError(zstd_ret): - _set_parameter_error(False, key, value) - -# Write output data to fp. -# If (out_b.pos == 0), do nothing. -def _write_to_fp(func_name, fp, out_mv, out_b): - if out_b.pos == 0: - return - - write_ret = fp.write(out_mv[:out_b.pos]) - if write_ret != out_b.pos: - msg = ("%s returned invalid length %d " - "(should be %d <= value <= %d)") % \ - (func_name, write_ret, out_b.pos, out_b.pos) - raise ValueError(msg) - -def _train_dict(samples_bytes, samples_size_list, dict_size): - # C code - if dict_size <= 0: - raise ValueError("dict_size argument should be positive number.") - - # Prepare chunk_sizes - _chunks_number = len(samples_size_list) - _sizes = _new_nonzero("size_t[]", _chunks_number) - if _sizes == ffi.NULL: - raise MemoryError - - _sizes_sum = 0 - for i, size in enumerate(samples_size_list): - _sizes[i] = size - _sizes_sum += size - - if _sizes_sum != _nbytes(samples_bytes): - msg = "The samples size list doesn't match the concatenation's size." - raise ValueError(msg) - - # Allocate dict buffer - _dst_dict_bytes = _new_nonzero("char[]", dict_size) - if _dst_dict_bytes == ffi.NULL: - raise MemoryError - - # Train - zstd_ret = m.ZDICT_trainFromBuffer(_dst_dict_bytes, dict_size, - ffi.from_buffer(samples_bytes), - _sizes, _chunks_number) - if m.ZDICT_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_TRAIN_DICT, zstd_ret) - - # Resize dict_buffer - b = ffi.buffer(_dst_dict_bytes)[:zstd_ret] - return b - -def _finalize_dict(custom_dict_bytes, - samples_bytes, samples_size_list, - dict_size, compression_level): - # If m.ZSTD_VERSION_NUMBER < 10405, m.ZDICT_finalizeDictionary() is an - # empty function defined in build_cffi.py. - # If m.ZSTD_versionNumber() < 10405, m.ZDICT_finalizeDictionary() doesn't - # exist in run-time zstd library. - if (m.ZSTD_VERSION_NUMBER < 10405 # compile-time version - or m.ZSTD_versionNumber() < 10405): # run-time version - msg = ("finalize_dict function only available when the underlying " - "zstd library's version is greater than or equal to v1.4.5. " - "At pyzstd module's compile-time, zstd version is %d. At " - "pyzstd module's run-time, zstd version is %d.") % \ - (m.ZSTD_VERSION_NUMBER, m.ZSTD_versionNumber()) - raise NotImplementedError(msg) - - # C code - if dict_size <= 0: - raise ValueError("dict_size argument should be positive number.") - - # Prepare chunk_sizes - _chunks_number = len(samples_size_list) - _sizes = _new_nonzero("size_t[]", _chunks_number) - if _sizes == ffi.NULL: - raise MemoryError - - _sizes_sum = 0 - for i, size in enumerate(samples_size_list): - _sizes[i] = size - _sizes_sum += size - - if _sizes_sum != _nbytes(samples_bytes): - msg = "The samples size list doesn't match the concatenation's size." - raise ValueError(msg) - - # Allocate dict buffer - _dst_dict_bytes = _new_nonzero("char[]", dict_size) - if _dst_dict_bytes == ffi.NULL: - raise MemoryError - - # Parameters - params = _new_nonzero("ZDICT_params_t *") - if params == ffi.NULL: - raise MemoryError - # Optimize for a specific zstd compression level, 0 means default. - params.compressionLevel = compression_level - # Write log to stderr, 0 = none. - params.notificationLevel = 0 - # Force dictID value, 0 means auto mode (32-bits random value). - params.dictID = 0 - - # Finalize - zstd_ret = m.ZDICT_finalizeDictionary( - _dst_dict_bytes, dict_size, - ffi.from_buffer(custom_dict_bytes), _nbytes(custom_dict_bytes), - ffi.from_buffer(samples_bytes), _sizes, _chunks_number, - params[0]) - if m.ZDICT_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_FINALIZE_DICT, zstd_ret) - - # Resize dict_buffer - b = ffi.buffer(_dst_dict_bytes)[:zstd_ret] - return b - -_nt_frame_info = namedtuple('frame_info', - ['decompressed_size', 'dictionary_id']) - -def get_frame_info(frame_buffer): - """Get zstd frame information from a frame header. - - Parameter - frame_buffer: A bytes-like object. It should starts from the beginning of - a frame, and needs to include at least the frame header (6 to - 18 bytes). - - Return a two-items namedtuple: (decompressed_size, dictionary_id) - - If decompressed_size is None, decompressed size is unknown. - - dictionary_id is a 32-bit unsigned integer value. 0 means dictionary ID was - not recorded in the frame header, the frame may or may not need a dictionary - to be decoded, and the ID of such a dictionary is not specified. - - It's possible to append more items to the namedtuple in the future. - """ - - decompressed_size = m.ZSTD_getFrameContentSize( - ffi.from_buffer(frame_buffer), len(frame_buffer)) - if decompressed_size == m.ZSTD_CONTENTSIZE_UNKNOWN: - decompressed_size = None - elif decompressed_size == m.ZSTD_CONTENTSIZE_ERROR: - msg = ("Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " - "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes).") - raise ZstdError(msg) - - dict_id = m.ZSTD_getDictID_fromFrame( - ffi.from_buffer(frame_buffer), len(frame_buffer)) - - ret = _nt_frame_info(decompressed_size, dict_id) - return ret - -def get_frame_size(frame_buffer): - """Get the size of a zstd frame, including frame header and 4-byte checksum if it - has. - - It will iterate all blocks' header within a frame, to accumulate the frame size. - - Parameter - frame_buffer: A bytes-like object, it should starts from the beginning of a - frame, and contains at least one complete frame. - """ - - frame_size = m.ZSTD_findFrameCompressedSize( - ffi.from_buffer(frame_buffer), len(frame_buffer)) - if m.ZSTD_isError(frame_size): - msg = ("Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " - "complete frame. Zstd error message: %s.") % \ - ffi.string(m.ZSTD_getErrorName(frame_size)).decode('utf-8') - raise ZstdError(msg) - - return frame_size diff --git a/src/_cffi/compressor.py b/src/_cffi/compressor.py deleted file mode 100644 index 6fb6f94..0000000 --- a/src/_cffi/compressor.py +++ /dev/null @@ -1,304 +0,0 @@ -from threading import Lock -from warnings import warn - -from .common import m, ffi, _new_nonzero, _nbytes, \ - ZstdError, _set_c_parameters, \ - _set_zstd_error, _ErrorType -from .dict import _load_c_dict -from .output_buffer import _BlocksOutputBuffer - -class _Compressor: - def __init__(self, level_or_option=None, zstd_dict=None): - self._use_multithread = False - self._lock = Lock() - level = 0 # 0 means use zstd's default compression level - - self._singleton_in_buf = _new_nonzero("ZSTD_inBuffer *") - if self._singleton_in_buf == ffi.NULL: - raise MemoryError - - self._singleton_out_buf = _new_nonzero("ZSTD_outBuffer *") - if self._singleton_out_buf == ffi.NULL: - raise MemoryError - - # Compression context - self._cctx = m.ZSTD_createCCtx() - if self._cctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_CCtx instance.") - - # Set compressLevel/option to compression context - if level_or_option is not None: - level, self._use_multithread = \ - _set_c_parameters(self._cctx, level_or_option) - - # Load dictionary to compression context - if zstd_dict is not None: - _load_c_dict(self._cctx, zstd_dict, level) - self.__dict = zstd_dict - - def __del__(self): - try: - m.ZSTD_freeCCtx(self._cctx) - self._cctx = ffi.NULL - except AttributeError: - pass - - def _compress_impl(self, data, end_directive, rich_mem): - # Input buffer - in_buf = self._singleton_in_buf - in_buf.src = ffi.from_buffer(data) - in_buf.size = _nbytes(data) - in_buf.pos = 0 - - # Output buffer - out_buf = self._singleton_out_buf - out = _BlocksOutputBuffer() - - # Initialize output buffer - if rich_mem: - init_size = m.ZSTD_compressBound(_nbytes(data)) - out.initWithSize(out_buf, -1, init_size) - else: - out.initAndGrow(out_buf, -1) - - while True: - # Compress - zstd_ret = m.ZSTD_compressStream2(self._cctx, out_buf, in_buf, end_directive) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Finished - if zstd_ret == 0: - return out.finish(out_buf) - - # Output buffer should be exhausted, grow the buffer. - if out_buf.pos == out_buf.size: - out.grow(out_buf) - - def _compress_mt_continue_impl(self, data): - # Input buffer - in_buf = self._singleton_in_buf - in_buf.src = ffi.from_buffer(data) - in_buf.size = _nbytes(data) - in_buf.pos = 0 - - # Output buffer - out_buf = self._singleton_out_buf - out = _BlocksOutputBuffer() - out.initAndGrow(out_buf, -1) - - while True: - # Compress - while True: - zstd_ret = m.ZSTD_compressStream2(self._cctx, - out_buf, in_buf, - m.ZSTD_e_continue) - if (out_buf.pos == out_buf.size - or in_buf.pos == in_buf.size - or m.ZSTD_isError(zstd_ret)): - break - - # Check error - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Like ._compress_impl(), output as much as possible. - if out_buf.pos == out_buf.size: - out.grow(out_buf) - elif in_buf.pos == in_buf.size: - # Finished - return out.finish(out_buf) - - def __reduce__(self): - msg = "Cannot pickle %s object." % type(self) - raise TypeError(msg) - -class ZstdCompressor(_Compressor): - """A streaming compressor. Thread-safe at method level.""" - - CONTINUE = m.ZSTD_e_continue - """Used for mode parameter in .compress() method. - - Collect more data, encoder decides when to output compressed result, for optimal - compression ratio. Usually used for traditional streaming compression. - """ - - FLUSH_BLOCK = m.ZSTD_e_flush - """Used for mode parameter in .compress(), .flush() methods. - - Flush any remaining data, but don't close the current frame. Usually used for - communication scenarios. - - If there is data, it creates at least one new block, that can be decoded - immediately on reception. If no remaining data, no block is created, return b''. - - Note: Abuse of this mode will reduce compression ratio. Use it only when - necessary. - """ - - FLUSH_FRAME = m.ZSTD_e_end - """Used for mode parameter in .compress(), .flush() methods. - - Flush any remaining data, and close the current frame. Usually used for - traditional flush. - - Since zstd data consists of one or more independent frames, data can still be - provided after a frame is closed. - - Note: Abuse of this mode will reduce compression ratio, and some programs can - only decompress single frame data. Use it only when necessary. - """ - - def __init__(self, level_or_option=None, zstd_dict=None): - """Initialize a ZstdCompressor object. - - Parameters - level_or_option: When it's an int object, it represents the compression level. - When it's a dict object, it contains advanced compression - parameters. - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - """ - super().__init__(level_or_option=level_or_option, zstd_dict=zstd_dict) - self.__last_mode = m.ZSTD_e_end - - def compress(self, data, mode=CONTINUE): - """Provide data to the compressor object. - Return a chunk of compressed data if possible, or b'' otherwise. - - Parameters - data: A bytes-like object, data to be compressed. - mode: Can be these 3 values .CONTINUE, .FLUSH_BLOCK, .FLUSH_FRAME. - """ - if mode not in (ZstdCompressor.CONTINUE, - ZstdCompressor.FLUSH_BLOCK, - ZstdCompressor.FLUSH_FRAME): - msg = ("mode argument wrong value, it should be one of " - "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, " - "ZstdCompressor.FLUSH_FRAME.") - raise ValueError(msg) - - with self._lock: - try: - if self._use_multithread and mode == ZstdCompressor.CONTINUE: - ret = self._compress_mt_continue_impl(data) - else: - ret = self._compress_impl(data, mode, False) - self.__last_mode = mode - return ret - except: - self.__last_mode = m.ZSTD_e_end - # Resetting cctx's session never fail - m.ZSTD_CCtx_reset(self._cctx, m.ZSTD_reset_session_only) - raise - - def flush(self, mode=FLUSH_FRAME): - """Flush any remaining data in internal buffer. - - Since zstd data consists of one or more independent frames, the compressor - object can still be used after this method is called. - - Parameter - mode: Can be these 2 values .FLUSH_FRAME, .FLUSH_BLOCK. - """ - if mode not in (ZstdCompressor.FLUSH_FRAME, ZstdCompressor.FLUSH_BLOCK): - msg = ("mode argument wrong value, it should be " - "ZstdCompressor.FLUSH_FRAME or ZstdCompressor.FLUSH_BLOCK.") - raise ValueError(msg) - - with self._lock: - try: - ret = self._compress_impl(b"", mode, False) - self.__last_mode = mode - return ret - except: - self.__last_mode = m.ZSTD_e_end - # Resetting cctx's session never fail - m.ZSTD_CCtx_reset(self._cctx, m.ZSTD_reset_session_only) - raise - - def _set_pledged_input_size(self, size): - """*This is an undocumented method, because it may be used incorrectly.* - - Set uncompressed content size of a frame, the size will be written into the - frame header. - 1, If called when (.last_mode != .FLUSH_FRAME), a RuntimeError will be raised. - 2, If the actual size doesn't match the value, a ZstdError will be raised, and - the last compressed chunk is likely to be lost. - 3, The size is only valid for one frame, then it restores to "unknown size". - - Parameter - size: Uncompressed content size of a frame, None means "unknown size". - """ - # Get size value - if size is None: - size = m.ZSTD_CONTENTSIZE_UNKNOWN - else: - try: - if size < 0 or size > 2**64-1: - raise Exception - except: - msg = ("size argument should be 64-bit unsigned integer " - "value, or None.") - raise ValueError(msg) - - with self._lock: - # Check the current mode - if self.__last_mode != m.ZSTD_e_end: - msg = ("._set_pledged_input_size() method must be called " - "when (.last_mode == .FLUSH_FRAME).") - raise RuntimeError(msg) - - # Set pledged content size - zstd_ret = m.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret) - - @property - def last_mode(self): - """The last mode used to this compressor object, its value can be .CONTINUE, - .FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME. - - It can be used to get the current state of a compressor, such as, data flushed, - a frame ended. - """ - return self.__last_mode - -class RichMemZstdCompressor(_Compressor): - """A compressor use rich memory mode. It is designed to allocate more memory, - but faster in some cases. - """ - - def __init__(self, level_or_option=None, zstd_dict=None): - """Initialize a RichMemZstdCompressor object. - - Parameters - level_or_option: When it's an int object, it represents the compression level. - When it's a dict object, it contains advanced compression - parameters. - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - """ - super().__init__(level_or_option=level_or_option, zstd_dict=zstd_dict) - - if self._use_multithread: - msg = ('Currently "rich memory mode" has no effect on ' - 'zstd multi-threaded compression (set ' - '"CParameter.nbWorkers" >= 1), it will allocate ' - 'unnecessary memory.') - warn(msg, ResourceWarning, 1) - - def compress(self, data): - """Compress data using rich memory mode, return a single zstd frame. - - Compressing b'' will get an empty content frame (9 bytes or more). - - Parameter - data: A bytes-like object, data to be compressed. - """ - with self._lock: - try: - ret = self._compress_impl(data, m.ZSTD_e_end, True) - return ret - except: - # Resetting cctx's session never fail - m.ZSTD_CCtx_reset(self._cctx, m.ZSTD_reset_session_only) - raise diff --git a/src/_cffi/decompressor.py b/src/_cffi/decompressor.py deleted file mode 100644 index a268116..0000000 --- a/src/_cffi/decompressor.py +++ /dev/null @@ -1,414 +0,0 @@ -from threading import Lock - -from .common import m, ffi, ZstdError, \ - _new_nonzero, _set_d_parameters, \ - _set_zstd_error, _ErrorType -from .dict import _load_d_dict -from .output_buffer import _BlocksOutputBuffer - -_TYPE_DEC = 0 -_TYPE_ENDLESS_DEC = 1 - -class _Decompressor: - def __init__(self, zstd_dict=None, option=None): - self._lock = Lock() - self._needs_input = True - self._input_buffer = ffi.NULL - self._input_buffer_size = 0 - self._in_begin = 0 - self._in_end = 0 - - self._singleton_in_buf = _new_nonzero("ZSTD_inBuffer *") - if self._singleton_in_buf == ffi.NULL: - raise MemoryError - - self._singleton_out_buf = _new_nonzero("ZSTD_outBuffer *") - if self._singleton_out_buf == ffi.NULL: - raise MemoryError - - # Decompression context - self._dctx = m.ZSTD_createDCtx() - if self._dctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_DCtx instance.") - - # Load dictionary to compression context - if zstd_dict is not None: - _load_d_dict(self._dctx, zstd_dict) - self.__dict = zstd_dict - - # Set compressLevel/option to compression context - if option is not None: - _set_d_parameters(self._dctx, option) - - def __del__(self): - try: - m.ZSTD_freeDCtx(self._dctx) - self._dctx = ffi.NULL - except AttributeError: - pass - - @property - def needs_input(self): - """If the max_length output limit in .decompress() method has been reached, and - the decompressor has (or may has) unconsumed input data, it will be set to - False. In this case, pass b'' to .decompress() method may output further data. - """ - return self._needs_input - - def _decompress_impl(self, in_buf, max_length, initial_size): - # The first AFE check for setting .at_frame_edge flag, search "AFE" in - # decompressor.c to see details. - if self._type == _TYPE_ENDLESS_DEC: - if self._at_frame_edge and in_buf.pos == in_buf.size: - return b"" - - # Output buffer - out_buf = self._singleton_out_buf - out = _BlocksOutputBuffer() - if initial_size >= 0: - out.initWithSize(out_buf, max_length, initial_size) - else: - out.initAndGrow(out_buf, max_length) - - while True: - # Decompress - zstd_ret = m.ZSTD_decompressStream(self._dctx, out_buf, in_buf) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_DECOMPRESS, zstd_ret) - - # Set .eof/.af_frame_edge flag - if self._type == _TYPE_DEC: - # ZstdDecompressor class stops when a frame is decompressed - if zstd_ret == 0: - self._eof = True - break - else: - # EndlessZstdDecompressor class supports multiple frames - self._at_frame_edge = (zstd_ret == 0) - - # The second AFE check for setting .at_frame_edge flag, search - # "AFE" in decompressor.c to see details. - if self._at_frame_edge and in_buf.pos == in_buf.size: - break - - # Need to check out before in. Maybe zstd's internal buffer still has - # a few bytes can be output, grow the buffer and continue. - if out_buf.pos == out_buf.size: - # Output buffer exhausted - - # Output buffer reached max_length - if out.reachedMaxLength(out_buf): - break - - # Grow output buffer - out.grow(out_buf) - elif in_buf.pos == in_buf.size: - # Finished - break - - return out.finish(out_buf) - - def _stream_decompress(self, data, max_length=-1): - self._lock.acquire() - try: - initial_buffer_size = -1 - in_buf = self._singleton_in_buf - - if self._type == _TYPE_DEC: - # Check .eof flag - if self._eof: - raise EOFError("Already at the end of a zstd frame.") - else: - # Fast path for the first frame - if self._at_frame_edge and self._in_begin == self._in_end: - # Read decompressed size - decompressed_size = m.ZSTD_getFrameContentSize(ffi.from_buffer(data), - len(data)) - - # Use ZSTD_findFrameCompressedSize() to check complete frame, - # prevent allocating too much memory for small input chunk. - if (decompressed_size not in (m.ZSTD_CONTENTSIZE_UNKNOWN, - m.ZSTD_CONTENTSIZE_ERROR) \ - and \ - not m.ZSTD_isError(m.ZSTD_findFrameCompressedSize(ffi.from_buffer(data), - len(data))) ): - initial_buffer_size = decompressed_size - - # Prepare input buffer w/wo unconsumed data - if self._in_begin == self._in_end: - # No unconsumed data - use_input_buffer = False - - in_buf.src = ffi.from_buffer(data) - in_buf.size = len(data) - in_buf.pos = 0 - elif len(data) == 0: - # Has unconsumed data, fast path for b"". - use_input_buffer = True - - in_buf.src = self._input_buffer + self._in_begin - in_buf.size = self._in_end - self._in_begin - in_buf.pos = 0 - else: - # Has unconsumed data - use_input_buffer = True - - # Unconsumed data size in input_buffer - used_now = self._in_end - self._in_begin - # Number of bytes we can append to input buffer - avail_now = self._input_buffer_size - self._in_end - # Number of bytes we can append if we move existing - # contents to beginning of buffer - avail_total = self._input_buffer_size - used_now - - if avail_total < len(data): - new_size = used_now + len(data) - # Allocate with new size - tmp = _new_nonzero("char[]", new_size) - if tmp == ffi.NULL: - raise MemoryError - - # Copy unconsumed data to the beginning of new buffer - ffi.memmove(tmp, - self._input_buffer+self._in_begin, - used_now) - - # Switch to new buffer - self._input_buffer = tmp - self._input_buffer_size = new_size - - # Set begin & end position - self._in_begin = 0 - self._in_end = used_now - elif avail_now < len(data): - # Move unconsumed data to the beginning - ffi.memmove(self._input_buffer, - self._input_buffer+self._in_begin, - used_now) - - # Set begin & end position - self._in_begin = 0 - self._in_end = used_now - - # Copy data to input buffer - ffi.memmove(self._input_buffer+self._in_end, - ffi.from_buffer(data), len(data)) - self._in_end += len(data) - - in_buf.src = self._input_buffer + self._in_begin - in_buf.size = used_now + len(data) - in_buf.pos = 0 - # Now in_buf.pos == 0 - - ret = self._decompress_impl(in_buf, max_length, initial_buffer_size) - - # Unconsumed input data - if in_buf.pos == in_buf.size: - if self._type == _TYPE_DEC: - if len(ret) == max_length or self._eof: - self._needs_input = False - else: - self._needs_input = True - else: - if len(ret) == max_length and not self._at_frame_edge: - self._needs_input = False - else: - self._needs_input = True - - if use_input_buffer: - # Clear input_buffer - self._in_begin = 0 - self._in_end = 0 - else: - data_size = in_buf.size - in_buf.pos - - self._needs_input = False - if self._type == _TYPE_ENDLESS_DEC: - self._at_frame_edge = False - - if not use_input_buffer: - # Discard buffer if it's too small - if (self._input_buffer == ffi.NULL - or self._input_buffer_size < data_size): - # Create new buffer - self._input_buffer = _new_nonzero("char[]", data_size) - if self._input_buffer == ffi.NULL: - self._input_buffer_size = 0 - raise MemoryError - # Set buffer size - self._input_buffer_size = data_size - - # Copy unconsumed data - ffi.memmove(self._input_buffer, in_buf.src+in_buf.pos, data_size) - self._in_begin = 0 - self._in_end = data_size - else: - # Use input buffer - self._in_begin += in_buf.pos - - return ret - except: - # Reset decompressor's states/session - self.__reset_session() - raise - finally: - self._lock.release() - - def __reset_session(self): - # Reset variables - self._in_begin = 0 - self._in_end = 0 - - self._needs_input = True - if self._type == _TYPE_DEC: - self._eof = False - self._unused_data = ffi.NULL - else: - self._at_frame_edge = True - - # Resetting session never fail - m.ZSTD_DCtx_reset(self._dctx, m.ZSTD_reset_session_only) - - def _reset_session(self): - """This is an undocumented method. Reset decompressor's states/session, don't - reset parameters and dictionary. - """ - with self._lock: - self.__reset_session() - - def __reduce__(self): - msg = "Cannot pickle %s object." % type(self) - raise TypeError(msg) - -class ZstdDecompressor(_Decompressor): - """A streaming decompressor, it stops after a frame is decompressed. - Thread-safe at method level.""" - - def __init__(self, zstd_dict=None, option=None): - """Initialize a ZstdDecompressor object. - - Parameters - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - option: A dict object that contains advanced decompression parameters. - """ - super().__init__(zstd_dict, option) - self._eof = False - self._unused_data = ffi.NULL - self._type = _TYPE_DEC - - def decompress(self, data, max_length=-1): - """Decompress data, return a chunk of decompressed data if possible, or b'' - otherwise. - - It stops after a frame is decompressed. - - Parameters - data: A bytes-like object, zstd data to be decompressed. - max_length: Maximum size of returned data. When it is negative, the size of - output buffer is unlimited. When it is nonnegative, returns at - most max_length bytes of decompressed data. - """ - return self._stream_decompress(data, max_length) - - @property - def eof(self): - """True means the end of the first frame has been reached. If decompress data - after that, an EOFError exception will be raised.""" - return self._eof - - @property - def unused_data(self): - """A bytes object. When ZstdDecompressor object stops after a frame is - decompressed, unused input data after the frame. Otherwise this will be b''.""" - with self._lock: - if not self._eof: - return b"" - else: - if self._unused_data == ffi.NULL: - if self._input_buffer == ffi.NULL: - self._unused_data = b"" - else: - self._unused_data = \ - ffi.buffer(self._input_buffer)[self._in_begin:self._in_end] - return self._unused_data - -class EndlessZstdDecompressor(_Decompressor): - """A streaming decompressor, accepts multiple concatenated frames. - Thread-safe at method level.""" - - def __init__(self, zstd_dict=None, option=None): - """Initialize an EndlessZstdDecompressor object. - - Parameters - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - option: A dict object that contains advanced decompression parameters. - """ - super().__init__(zstd_dict, option) - self._at_frame_edge = True - self._type = _TYPE_ENDLESS_DEC - - def decompress(self, data, max_length=-1): - """Decompress data, return a chunk of decompressed data if possible, or b'' - otherwise. - - Parameters - data: A bytes-like object, zstd data to be decompressed. - max_length: Maximum size of returned data. When it is negative, the size of - output buffer is unlimited. When it is nonnegative, returns at - most max_length bytes of decompressed data. - """ - return self._stream_decompress(data, max_length) - - @property - def at_frame_edge(self): - """True when both the input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor just be - initialized. - - This flag could be used to check data integrity in some cases. - """ - return self._at_frame_edge - -def decompress(data, zstd_dict=None, option=None): - """Decompress a zstd data, return a bytes object. - - Support multiple concatenated frames. - - Parameters - data: A bytes-like object, compressed zstd data. - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - option: A dict object, contains advanced decompression parameters. - """ - # EndlessZstdDecompressor - decomp = EndlessZstdDecompressor(zstd_dict, option) - - # Prepare input data - in_buf = decomp._singleton_in_buf - in_buf.src = ffi.from_buffer(data) - in_buf.size = len(data) - in_buf.pos = 0 - - # Get decompressed size - decompressed_size = m.ZSTD_getFrameContentSize(ffi.from_buffer(data), len(data)) - if decompressed_size not in (m.ZSTD_CONTENTSIZE_UNKNOWN, - m.ZSTD_CONTENTSIZE_ERROR): - initial_size = decompressed_size - else: - initial_size = -1 - - # Decompress - ret = decomp._decompress_impl(in_buf, -1, initial_size) - - # Check data integrity. at_frame_edge flag is True when the both the input - # and output streams are at a frame edge. - if not decomp._at_frame_edge or not in_buf.pos: - extra_msg = "." if (len(ret) == 0) \ - else (", if want to output these decompressed data, use " - "decompress_stream function or " - "EndlessZstdDecompressor class to decompress.") - msg = ("Decompression failed: zstd data ends in an incomplete " - "frame, maybe the input data was truncated. Decompressed " - "data is %d bytes%s") % (len(ret), extra_msg) - raise ZstdError(msg) - - return ret diff --git a/src/_cffi/dict.py b/src/_cffi/dict.py deleted file mode 100644 index 7a48764..0000000 --- a/src/_cffi/dict.py +++ /dev/null @@ -1,248 +0,0 @@ -from threading import Lock - -from .common import m, ffi, ZstdError, \ - _set_zstd_error, _ErrorType - -_DICT_TYPE_DIGESTED = 0 -_DICT_TYPE_UNDIGESTED = 1 -_DICT_TYPE_PREFIX = 2 - -class ZstdDict: - """Zstd dictionary, used for compression/decompression.""" - - def __init__(self, dict_content, is_raw=False): - """Initialize a ZstdDict object. - - Parameters - dict_content: A bytes-like object, dictionary's content. - is_raw: This parameter is for advanced user. True means dict_content - argument is a "raw content" dictionary, free of any format - restriction. False means dict_content argument is an ordinary - zstd dictionary, was created by zstd functions, follow a - specified format. - """ - self.__cdicts = {} - self.__ddict = ffi.NULL - self.__lock = Lock() - - # Check dict_content's type - try: - self.__dict_content = bytes(dict_content) - except: - raise TypeError("dict_content argument should be bytes-like object.") - - # Both ordinary dictionary and "raw content" dictionary should - # at least 8 bytes - if len(self.__dict_content) < 8: - raise ValueError('Zstd dictionary content should at least 8 bytes.') - - # Get dict_id, 0 means "raw content" dictionary. - self.__dict_id = m.ZSTD_getDictID_fromDict( - ffi.from_buffer(self.__dict_content), - len(self.__dict_content)) - - # Check validity for ordinary dictionary - if not is_raw and self.__dict_id == 0: - msg = ('The dict_content argument is not a valid zstd ' - 'dictionary. The first 4 bytes of a valid zstd dictionary ' - 'should be a magic number: b"\\x37\\xA4\\x30\\xEC".\n' - 'If you are an advanced user, and can be sure that ' - 'dict_content argument is a "raw content" zstd ' - 'dictionary, set is_raw parameter to True.') - raise ValueError(msg) - - def __del__(self): - try: - for level, cdict in self.__cdicts.items(): - m.ZSTD_freeCDict(cdict) - self.__cdicts[level] = ffi.NULL - except AttributeError: - pass - - try: - m.ZSTD_freeDDict(self.__ddict) - self.__ddict = ffi.NULL - except AttributeError: - pass - - @property - def dict_content(self): - """The content of zstd dictionary, a bytes object, it's the same as dict_content - argument in ZstdDict.__init__() method. It can be used with other programs. - """ - return self.__dict_content - - @property - def dict_id(self): - """ID of zstd dictionary, a 32-bit unsigned int value. - - Non-zero means ordinary dictionary, was created by zstd functions, follow - a specified format. - - 0 means a "raw content" dictionary, free of any format restriction, used - for advanced user. - """ - return self.__dict_id - - @property - def as_digested_dict(self): - """Load as a digested dictionary to compressor, by passing this attribute as - zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) - 1, Some advanced compression parameters of compressor may be overridden - by parameters of digested dictionary. - 2, ZstdDict has a digested dictionaries cache for each compression level. - It's faster when loading again a digested dictionary with the same - compression level. - 3, No need to use this for decompression. - """ - return (self, _DICT_TYPE_DIGESTED) - - @property - def as_undigested_dict(self): - """Load as an undigested dictionary to compressor, by passing this attribute as - zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) - 1, The advanced compression parameters of compressor will not be overridden. - 2, Loading an undigested dictionary is costly. If load an undigested dictionary - multiple times, consider reusing a compressor object. - 3, No need to use this for decompression. - """ - return (self, _DICT_TYPE_UNDIGESTED) - - @property - def as_prefix(self): - """Load as a prefix to compressor/decompressor, by passing this attribute as - zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) - 1, Prefix is compatible with long distance matching, while dictionary is not. - 2, It only works for the first frame, then the compressor/decompressor will - return to no prefix state. - 3, When decompressing, must use the same prefix as when compressing. - """ - return (self, _DICT_TYPE_PREFIX) - - def __str__(self): - return '' % \ - (self.__dict_id, len(self.__dict_content)) - - def __len__(self): - return len(self.__dict_content) - - def __reduce__(self): - msg = ("ZstdDict object intentionally doesn't support pickle. If need " - "to save zstd dictionary to disk, please save .dict_content " - "attribute, it's a bytes object. So that the zstd dictionary " - "can be used with other programs.") - raise TypeError(msg) - - def _get_cdict(self, level): - with self.__lock: - # Already cached - if level in self.__cdicts: - cdict = self.__cdicts[level] - else: - # Create ZSTD_CDict instance - cdict = m.ZSTD_createCDict(ffi.from_buffer(self.__dict_content), - len(self.__dict_content), level) - if cdict == ffi.NULL: - msg = ("Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted.") - raise ZstdError(msg) - self.__cdicts[level] = cdict - return cdict - - def _get_ddict(self): - # Already created - if self.__ddict != ffi.NULL: - return self.__ddict - - with self.__lock: - # Create ZSTD_DDict instance from dictionary content - self.__ddict = m.ZSTD_createDDict( - ffi.from_buffer(self.__dict_content), - len(self.__dict_content)) - - if self.__ddict == ffi.NULL: - msg = ("Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted.") - raise ZstdError(msg) - - return self.__ddict - -def _load_c_dict(cctx, zstd_dict, level): - if isinstance(zstd_dict, ZstdDict): - # When compressing, use undigested dictionary by default. - zd = zstd_dict - type = _DICT_TYPE_UNDIGESTED - elif isinstance(zstd_dict, tuple) and \ - len(zstd_dict) == 2 and \ - isinstance(zstd_dict[0], ZstdDict) and \ - zstd_dict[1] in {_DICT_TYPE_DIGESTED, - _DICT_TYPE_UNDIGESTED, - _DICT_TYPE_PREFIX}: - zd = zstd_dict[0] - type = zstd_dict[1] - else: - raise TypeError("zstd_dict argument should be ZstdDict object.") - - if type == _DICT_TYPE_DIGESTED: - # Get ZSTD_CDict - c_dict = zd._get_cdict(level) - # Reference a prepared dictionary. - # It overrides some compression context's parameters. - zstd_ret = m.ZSTD_CCtx_refCDict(cctx, c_dict) - elif type == _DICT_TYPE_UNDIGESTED: - # Load a dictionary. - # It doesn't override compression context's parameters. - zstd_ret = m.ZSTD_CCtx_loadDictionary( - cctx, - ffi.from_buffer(zd.dict_content), - len(zd.dict_content)) - elif type == _DICT_TYPE_PREFIX: - # Reference as prefix - zstd_ret = m.ZSTD_CCtx_refPrefix( - cctx, - ffi.from_buffer(zd.dict_content), - len(zd.dict_content)) - else: - raise SystemError('_load_c_dict() impossible code path') - - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_LOAD_C_DICT, zstd_ret) - -def _load_d_dict(dctx, zstd_dict): - if isinstance(zstd_dict, ZstdDict): - # When decompressing, use digested dictionary by default. - zd = zstd_dict - type = _DICT_TYPE_DIGESTED - elif isinstance(zstd_dict, tuple) and \ - len(zstd_dict) == 2 and \ - isinstance(zstd_dict[0], ZstdDict) and \ - zstd_dict[1] in {_DICT_TYPE_DIGESTED, - _DICT_TYPE_UNDIGESTED, - _DICT_TYPE_PREFIX}: - zd = zstd_dict[0] - type = zstd_dict[1] - else: - raise TypeError("zstd_dict argument should be ZstdDict object.") - - if type == _DICT_TYPE_DIGESTED: - # Get ZSTD_DDict - d_dict = zd._get_ddict() - # Reference a prepared dictionary - zstd_ret = m.ZSTD_DCtx_refDDict(dctx, d_dict) - elif type == _DICT_TYPE_UNDIGESTED: - # Load a dictionary - zstd_ret = m.ZSTD_DCtx_loadDictionary( - dctx, - ffi.from_buffer(zd.dict_content), - len(zd.dict_content)) - elif type == _DICT_TYPE_PREFIX: - # Reference as prefix - zstd_ret = m.ZSTD_DCtx_refPrefix( - dctx, - ffi.from_buffer(zd.dict_content), - len(zd.dict_content)) - else: - raise SystemError('_load_d_dict() impossible code path') - - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_LOAD_D_DICT, zstd_ret) diff --git a/src/_cffi/file.py b/src/_cffi/file.py deleted file mode 100644 index 9c815e8..0000000 --- a/src/_cffi/file.py +++ /dev/null @@ -1,347 +0,0 @@ -from .common import m, ffi, ZstdError, _new_nonzero, _nbytes, \ - _set_c_parameters, _set_d_parameters, \ - _write_to_fp, _ZSTD_DStreamSizes, \ - _set_zstd_error, _ErrorType -from .dict import _load_c_dict, _load_d_dict -from .output_buffer import _BlocksOutputBuffer - -_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] - -class ZstdFileReader: - def __init__(self, fp, zstd_dict, option, read_size): - if read_size <= 0: - raise ValueError("read_size argument should > 0") - self._read_size = read_size - - # File states, the last three are public attributes. - self._fp = fp - self._sof = True # start of file - self.eof = False - self.pos = 0 # Decompressed position - self.size = -1 # File size, -1 means unknown. - - # Decompression states - self._needs_input = True - self._at_frame_edge = True - - # Input state, need to be initialized with 0. - self._in_buf = ffi.new("ZSTD_inBuffer *") - if self._in_buf == ffi.NULL: - raise MemoryError - # Output state - self._out_buf = _new_nonzero("ZSTD_outBuffer *") - if self._out_buf == ffi.NULL: - raise MemoryError - # Lazy create forward output buffer - self._tmp_output = ffi.NULL - - # Decompression context - self._dctx = m.ZSTD_createDCtx() - if self._dctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_DCtx instance.") - - # Load dictionary to decompression context - if zstd_dict is not None: - _load_d_dict(self._dctx, zstd_dict) - self.__dict = zstd_dict - - # Set option to decompression context - if option is not None: - _set_d_parameters(self._dctx, option) - - def __del__(self): - try: - m.ZSTD_freeDCtx(self._dctx) - self._dctx = ffi.NULL - except AttributeError: - pass - - def _decompress_into(self, out_b, fill_full): - # Return - if self.eof or out_b.size == out_b.pos: - return - - in_b = self._in_buf - orig_pos = out_b.pos - while True: - if in_b.size == in_b.pos and self._needs_input: - # Read - self._in_dat = self._fp.read(self._read_size) - # EOF - if not self._in_dat: - if self._at_frame_edge and not self._sof: - self.eof = True - self.pos += out_b.pos - orig_pos - self.size = self.pos - return - else: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") - in_b.src = ffi.from_buffer(self._in_dat) - in_b.size = _nbytes(self._in_dat) - in_b.pos = 0 - self._sof = False - - # Decompress - zstd_ret = m.ZSTD_decompressStream(self._dctx, out_b, in_b) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_DECOMPRESS, zstd_ret) - - # Set flags - if zstd_ret == 0: - self._needs_input = True - self._at_frame_edge = True - else: - self._needs_input = (out_b.size != out_b.pos) - self._at_frame_edge = False - - if fill_full: - if out_b.size != out_b.pos: - continue - else: - self.pos += out_b.pos - orig_pos - return - else: - if out_b.pos != orig_pos: - self.pos += out_b.pos - orig_pos - return - - def readinto(self, b): - out_b = self._out_buf - out_b.dst = ffi.from_buffer(b) - out_b.size = _nbytes(b) - out_b.pos = 0 - - self._decompress_into(out_b, False) - return out_b.pos - - def readall(self): - out_b = self._out_buf - out = _BlocksOutputBuffer() - if self.size >= 0: - # Known file size - out.initWithSize(out_b, -1, self.size - self.pos) - else: - # Unknown file size - out.initAndGrow(out_b, -1) - - while True: - self._decompress_into(out_b, True) - if self.eof: - # Finished - return out.finish(out_b) - if out_b.size == out_b.pos: - # Grow output buffer - out.grow(out_b) - - # If obj is None, forward to EOF. - # If obj <= 0, do nothing. - def forward(self, offset): - # Lazy create forward output buffer - if self._tmp_output == ffi.NULL: - # ZSTD_outBuffer struct - self._out_tmp = _new_nonzero("ZSTD_outBuffer *") - if self._out_tmp == ffi.NULL: - raise MemoryError - # Forward output buffer - self._tmp_output = _new_nonzero("char[]", _ZSTD_DStreamOutSize) - if self._tmp_output == ffi.NULL: - raise MemoryError - # ZSTD_outBuffer.dst - self._out_tmp.dst = self._tmp_output - out_b = self._out_tmp - - # Forward to EOF - if offset is None: - out_b.size = _ZSTD_DStreamOutSize - while True: - out_b.pos = 0 - self._decompress_into(out_b, True) - if self.eof: - return - - # Forward to offset - while offset > 0: - out_b.size = min(_ZSTD_DStreamOutSize, offset) - out_b.pos = 0 - self._decompress_into(out_b, True) - - if self.eof: - return - offset -= out_b.pos - - def reset_session(self): - # Reset decompression states - self._needs_input = True - self._sof = True - self._at_frame_edge = True - self._in_buf.size = 0 - self._in_buf.pos = 0 - - # Resetting session never fail - m.ZSTD_DCtx_reset(self._dctx, m.ZSTD_reset_session_only) - -class ZstdFileWriter: - def __init__(self, fp, level_or_option, zstd_dict, write_size): - # File object - self._fp = fp - self._fp_has_flush = hasattr(fp, "flush") - - # States - self._last_mode = m.ZSTD_e_end - self._use_multithread = False - level = 0 # 0 means use zstd's default compression level - - # Write buffer - if write_size <= 0: - raise ValueError("write_size argument should > 0") - self._write_buffer_size = write_size - - self._write_buffer = _new_nonzero("char[]", write_size) - if self._write_buffer == ffi.NULL: - raise MemoryError - - # Singleton buffer objects - self._in_buf = _new_nonzero("ZSTD_inBuffer *") - if self._in_buf == ffi.NULL: - raise MemoryError - - self._out_buf = _new_nonzero("ZSTD_outBuffer *") - if self._out_buf == ffi.NULL: - raise MemoryError - - self._out_mv = memoryview(ffi.buffer(self._write_buffer)) - - # Compression context - self._cctx = m.ZSTD_createCCtx() - if self._cctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_CCtx instance.") - - # Set compressLevel/option to compression context - if level_or_option is not None: - level, self._use_multithread = \ - _set_c_parameters(self._cctx, level_or_option) - - # Load dictionary to compression context - if zstd_dict is not None: - _load_c_dict(self._cctx, zstd_dict, level) - self.__dict = zstd_dict - - def __del__(self): - try: - m.ZSTD_freeCCtx(self._cctx) - self._cctx = ffi.NULL - except AttributeError: - pass - - def write(self, data): - # Output size - output_size = 0 - - # Input buffer - in_b = self._in_buf - in_b.src = ffi.from_buffer(data) - in_b.size = _nbytes(data) - in_b.pos = 0 - - # Output buffer, out.pos will be set later. - out_b = self._out_buf - out_b.dst = self._write_buffer - out_b.size = self._write_buffer_size - - # State - self._last_mode = m.ZSTD_e_continue - - while True: - # Output position - out_b.pos = 0 - - # Compress - if not self._use_multithread: - zstd_ret = m.ZSTD_compressStream2(self._cctx, out_b, in_b, - m.ZSTD_e_continue) - else: - while True: - zstd_ret = m.ZSTD_compressStream2(self._cctx, out_b, in_b, - m.ZSTD_e_continue) - if (out_b.pos == out_b.size - or in_b.pos == in_b.size - or m.ZSTD_isError(zstd_ret)): - break - - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Accumulate output bytes - output_size += out_b.pos - - # Write output to fp - _write_to_fp("self._fp.write()", self._fp, - self._out_mv, out_b) - - # Finished - if not self._use_multithread: - # Single-thread compression + .CONTINUE mode - if zstd_ret == 0: - break - else: - # Multi-thread compression + .CONTINUE mode - if in_b.size == in_b.pos and \ - out_b.size != out_b.pos: - break - - return (in_b.size, output_size) - - def flush(self, mode): - # Mode argument - if mode not in (m.ZSTD_e_flush, m.ZSTD_e_end): - msg = ("mode argument wrong value, it should be " - "ZstdFile.FLUSH_BLOCK or ZstdFile.FLUSH_FRAME.") - raise ValueError(msg) - - # Don't generate empty content frame - if mode == self._last_mode: - return (0, 0) - - # Output size - output_size = 0 - - # Input buffer - in_b = self._in_buf - in_b.src = self._write_buffer - in_b.size = 0 - in_b.pos = 0 - - # Output buffer, out.pos will be set later. - out_b = self._out_buf - out_b.dst = self._write_buffer - out_b.size = self._write_buffer_size - - # State - self._last_mode = mode - - while True: - # Output position - out_b.pos = 0 - - # Compress - zstd_ret = m.ZSTD_compressStream2(self._cctx, out_b, in_b, mode) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Accumulate output bytes - output_size += out_b.pos - - # Write output to fp - _write_to_fp("self._fp.write()", self._fp, - self._out_mv, out_b) - - # Finished - if zstd_ret == 0: - break - - # Flush - if self._fp_has_flush: - self._fp.flush() - - return (0, output_size) diff --git a/src/_cffi/output_buffer.py b/src/_cffi/output_buffer.py deleted file mode 100644 index 0ed7e0a..0000000 --- a/src/_cffi/output_buffer.py +++ /dev/null @@ -1,125 +0,0 @@ -from .common import m, ffi, _new_nonzero - -class _BlocksOutputBuffer: - KB = 1024 - MB = 1024 * 1024 - BUFFER_BLOCK_SIZE = ( - # If change this list, also change: - # The C implementation - # OutputBufferTestCase unittest - # If change the first blocks's size, also change: - # _32_KiB in __init__.py - # FileTestCase.test_decompress_limited() test - 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, - 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, - 256*MB ) - MEM_ERR_MSG = "Unable to allocate output buffer." - - def initAndGrow(self, out, max_length): - # Get block size - if 0 <= max_length < self.BUFFER_BLOCK_SIZE[0]: - block_size = max_length - else: - block_size = self.BUFFER_BLOCK_SIZE[0] - - # The first block - block = _new_nonzero("char[]", block_size) - if block == ffi.NULL: - raise MemoryError - - # Create the list - self.list = [block] - - # Set variables - self.allocated = block_size - self.max_length = max_length - - out.dst = block - out.size = block_size - out.pos = 0 - - def initWithSize(self, out, max_length, init_size): - # Get block size - if 0 <= max_length < init_size: - block_size = max_length - else: - block_size = init_size - - # The first block - block = _new_nonzero("char[]", block_size) - if block == ffi.NULL: - raise MemoryError(self.MEM_ERR_MSG) - - # Create the list - self.list = [block] - - # Set variables - self.allocated = block_size - self.max_length = max_length - - out.dst = block - out.size = block_size - out.pos = 0 - - def grow(self, out): - # Ensure no gaps in the data - assert out.pos == out.size - - # Get block size - list_len = len(self.list) - if list_len < len(self.BUFFER_BLOCK_SIZE): - block_size = self.BUFFER_BLOCK_SIZE[list_len] - else: - block_size = self.BUFFER_BLOCK_SIZE[-1] - - # Check max_length - if self.max_length >= 0: - # If (rest == 0), should not grow the buffer. - rest = self.max_length - self.allocated - assert rest > 0 - - # block_size of the last block - if block_size > rest: - block_size = rest - - # Create the block - b = _new_nonzero("char[]", block_size) - if b == ffi.NULL: - raise MemoryError(self.MEM_ERR_MSG) - self.list.append(b) - - # Set variables - self.allocated += block_size - - out.dst = b - out.size = block_size - out.pos = 0 - - def reachedMaxLength(self, out): - # Ensure (data size == allocated size) - assert out.pos == out.size - - return self.allocated == self.max_length - - def finish(self, out): - # Fast path for single block - if (len(self.list) == 1 and out.pos == out.size) or \ - (len(self.list) == 2 and out.pos == 0): - return bytes(ffi.buffer(self.list[0])) - - # Final bytes object - data_size = self.allocated - (out.size-out.pos) - final = _new_nonzero("char[]", data_size) - if final == ffi.NULL: - raise MemoryError(self.MEM_ERR_MSG) - - # Memory copy - # Blocks except the last one - posi = 0 - for block in self.list[:-1]: - ffi.memmove(final+posi, block, len(block)) - posi += len(block) - # The last block - ffi.memmove(final+posi, self.list[-1], out.pos) - - return bytes(ffi.buffer(final)) \ No newline at end of file diff --git a/src/_cffi/stream.py b/src/_cffi/stream.py deleted file mode 100644 index 12de8ca..0000000 --- a/src/_cffi/stream.py +++ /dev/null @@ -1,364 +0,0 @@ -from .common import m, ffi, ZstdError, _new_nonzero, \ - _set_c_parameters, _set_d_parameters, \ - _set_zstd_error, _ErrorType, \ - _write_to_fp -from .dict import _load_c_dict, _load_d_dict - -def _invoke_callback(callback, in_mv, in_buf, callback_read_pos, - out_mv, out_buf, total_input_size, total_output_size): - # Only yield input data once - in_size = in_buf.size - callback_read_pos - callback_read_pos = in_buf.size - - # Don't yield empty data - if in_size == 0 and out_buf.pos == 0: - return callback_read_pos - - # memoryview - in_memoryview = in_mv[:in_size] - out_memoryview = out_mv[:out_buf.pos] - - # Callback - callback(total_input_size, total_output_size, - in_memoryview, out_memoryview) - - return callback_read_pos - -def compress_stream(input_stream, output_stream, *, - level_or_option = None, zstd_dict = None, - pledged_input_size = None, - read_size = m.ZSTD_CStreamInSize(), - write_size = m.ZSTD_CStreamOutSize(), - callback = None): - """Compresses input_stream and writes the compressed data to output_stream, it - doesn't close the streams. - - ---- - DEPRECATION NOTICE - The (de)compress_stream are deprecated and will be removed in a future version. - See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives - ---- - - If input stream is b'', nothing will be written to output stream. - - Return a tuple, (total_input, total_output), the items are int objects. - - Parameters - input_stream: Input stream that has a .readinto(b) method. - output_stream: Output stream that has a .write(b) method. If use callback - function, this parameter can be None. - level_or_option: When it's an int object, it represents the compression - level. When it's a dict object, it contains advanced compression - parameters. - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - pledged_input_size: If set this parameter to the size of input data, the - size will be written into the frame header. If the actual input data - doesn't match it, a ZstdError will be raised. - read_size: Input buffer size, in bytes. - write_size: Output buffer size, in bytes. - callback: A callback function that accepts four parameters: - (total_input, total_output, read_data, write_data), the first two are - int objects, the last two are readonly memoryview objects. - """ - level = 0 # 0 means use zstd's default compression level - use_multithread = False - total_input_size = 0 - total_output_size = 0 - - # Check arguments - if not hasattr(input_stream, "readinto"): - raise TypeError("input_stream argument should have a .readinto(b) method.") - if output_stream is not None: - if not hasattr(output_stream, "write"): - raise TypeError("output_stream argument should have a .write(b) method.") - else: - if callback is None: - msg = ("At least one of output_stream argument and " - "callback argument should be non-None.") - raise TypeError(msg) - - try: - if read_size <= 0 or write_size <= 0: - raise Exception - except: - msg = ("read_size argument and write_size argument should " - "be positive numbers.") - raise ValueError(msg) - - if pledged_input_size is not None: - try: - if pledged_input_size < 0 or pledged_input_size > 2**64-1: - raise Exception - except: - msg = ("pledged_input_size argument should be 64-bit " - "unsigned integer value.") - raise ValueError(msg) - - try: - # Initialize & set ZstdCompressor - cctx = m.ZSTD_createCCtx() - if cctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_CCtx instance.") - - if level_or_option is not None: - level, use_multithread = \ - _set_c_parameters(cctx, level_or_option) - if zstd_dict is not None: - _load_c_dict(cctx, zstd_dict, level) - - if pledged_input_size is not None: - zstd_ret = m.ZSTD_CCtx_setPledgedSrcSize(cctx, pledged_input_size) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Input buffer, in.size and in.pos will be set later. - in_buf = _new_nonzero("ZSTD_inBuffer *") - if in_buf == ffi.NULL: - raise MemoryError - - _input_block = ffi.buffer(_new_nonzero("char[]", read_size)) - in_mv = memoryview(_input_block) - in_buf.src = ffi.from_buffer(_input_block) - - # Output buffer, out.pos will be set later. - out_buf = _new_nonzero("ZSTD_outBuffer *") - if out_buf == ffi.NULL: - raise MemoryError - - _output_block = ffi.buffer(_new_nonzero("char[]", write_size)) - out_mv = memoryview(_output_block) - out_buf.dst = ffi.from_buffer(_output_block) - out_buf.size = write_size - - # Read - while True: - # Invoke .readinto() method - read_bytes = input_stream.readinto(_input_block) - if read_bytes < 0 or read_bytes > read_size: - msg = ("input_stream.readinto() returned invalid length " - "%d (should be 0 <= value <= %d)") % \ - (read_bytes, read_size) - raise ValueError(msg) - - # Don't generate empty frame - if read_bytes == 0 and total_input_size == 0: - break - total_input_size += read_bytes - - in_buf.size = read_bytes - in_buf.pos = 0 - callback_read_pos = 0 - end_directive = m.ZSTD_e_end \ - if (read_bytes == 0) \ - else m.ZSTD_e_continue - - # Compress & write - while True: - # Output position - out_buf.pos = 0 - - # Compress - if use_multithread and end_directive == m.ZSTD_e_continue: - while True: - zstd_ret = m.ZSTD_compressStream2(cctx, out_buf, in_buf, m.ZSTD_e_continue) - if (out_buf.pos == out_buf.size - or in_buf.pos == in_buf.size - or m.ZSTD_isError(zstd_ret)): - break - else: - zstd_ret = m.ZSTD_compressStream2(cctx, out_buf, in_buf, end_directive) - - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_COMPRESS, zstd_ret) - - # Accumulate output bytes - total_output_size += out_buf.pos - - # Write all output to output_stream - if output_stream is not None: - _write_to_fp("output_stream.write()", output_stream, - out_mv, out_buf) - - # Invoke callback - if callback is not None: - callback_read_pos = _invoke_callback( - callback, in_mv, in_buf, callback_read_pos, - out_mv, out_buf, total_input_size, total_output_size) - - # Finished - if use_multithread and end_directive == m.ZSTD_e_continue: - if in_buf.pos == in_buf.size and \ - out_buf.pos != out_buf.size: - break - else: - if zstd_ret == 0: - break - - # Input stream ended - if read_bytes == 0: - break - - return (total_input_size, total_output_size) - finally: - m.ZSTD_freeCCtx(cctx) - -def decompress_stream(input_stream, output_stream, *, - zstd_dict = None, option = None, - read_size = m.ZSTD_DStreamInSize(), - write_size = m.ZSTD_DStreamOutSize(), - callback = None): - """Decompresses input_stream and writes the decompressed data to output_stream, - it doesn't close the streams. - - ---- - DEPRECATION NOTICE - The (de)compress_stream are deprecated and will be removed in a future version. - See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives - ---- - - Supports multiple concatenated frames. - - Return a tuple, (total_input, total_output), the items are int objects. - - Parameters - input_stream: Input stream that has a .readinto(b) method. - output_stream: Output stream that has a .write(b) method. If use callback - function, this parameter can be None. - zstd_dict: A ZstdDict object, pre-trained zstd dictionary. - option: A dict object, contains advanced decompression parameters. - read_size: Input buffer size, in bytes. - write_size: Output buffer size, in bytes. - callback: A callback function that accepts four parameters: - (total_input, total_output, read_data, write_data), the first two are - int objects, the last two are readonly memoryview objects. - """ - at_frame_edge = True - total_input_size = 0 - total_output_size = 0 - - # Check arguments - if not hasattr(input_stream, "readinto"): - raise TypeError("input_stream argument should have a .readinto(b) method.") - if output_stream is not None: - if not hasattr(output_stream, "write"): - raise TypeError("output_stream argument should have a .write(b) method.") - else: - if callback is None: - msg = ("At least one of output_stream argument and " - "callback argument should be non-None.") - raise TypeError(msg) - - try: - if read_size <= 0 or write_size <= 0: - raise Exception - except: - msg = ("read_size argument and write_size argument should " - "be positive numbers.") - raise ValueError(msg) - - try: - # Initialize & set ZstdDecompressor - dctx = m.ZSTD_createDCtx() - if dctx == ffi.NULL: - raise ZstdError("Unable to create ZSTD_DCtx instance.") - - if zstd_dict is not None: - _load_d_dict(dctx, zstd_dict) - if option is not None: - _set_d_parameters(dctx, option) - - # Input buffer, in.size and in.pos will be set later. - in_buf = _new_nonzero("ZSTD_inBuffer *") - if in_buf == ffi.NULL: - raise MemoryError - - _input_block = ffi.buffer(_new_nonzero("char[]", read_size)) - in_mv = memoryview(_input_block) - in_buf.src = ffi.from_buffer(_input_block) - - # Output buffer, out.pos will be set later. - out_buf = _new_nonzero("ZSTD_outBuffer *") - if out_buf == ffi.NULL: - raise MemoryError - - _output_block = ffi.buffer(_new_nonzero("char[]", write_size)) - out_mv = memoryview(_output_block) - out_buf.dst = ffi.from_buffer(_output_block) - out_buf.size = write_size - - # Read - while True: - # Invoke .readinto() method - read_bytes = input_stream.readinto(_input_block) - if read_bytes < 0 or read_bytes > read_size: - msg = ("input_stream.readinto() returned invalid length " - "%d (should be 0 <= value <= %d)") % \ - (read_bytes, read_size) - raise ValueError(msg) - - total_input_size += read_bytes - - in_buf.size = read_bytes - in_buf.pos = 0 - callback_read_pos = 0 - - # Decompress & write - while True: - # AFE check for setting .at_frame_edge flag, search "AFE" in - # decompressor.c to see details. - if at_frame_edge and in_buf.pos == in_buf.size: - break - - # Output position - out_buf.pos = 0 - - # Decompress - zstd_ret = m.ZSTD_decompressStream(dctx, out_buf, in_buf) - if m.ZSTD_isError(zstd_ret): - _set_zstd_error(_ErrorType.ERR_DECOMPRESS, zstd_ret) - - # Set .af_frame_edge flag - at_frame_edge = (zstd_ret == 0) - - # Accumulate output bytes - total_output_size += out_buf.pos - - # Write all output to output_stream - if output_stream is not None: - _write_to_fp("output_stream.write()", output_stream, - out_mv, out_buf) - - # Invoke callback - if callback is not None: - callback_read_pos = _invoke_callback( - callback, in_mv, in_buf, callback_read_pos, - out_mv, out_buf, total_input_size, total_output_size) - - # Finished. When a frame is fully decoded, but not fully flushed, - # the last byte is kept as hostage, it will be released when all - # output is flushed. - if in_buf.pos == in_buf.size: - # If input stream ends in an incomplete frame, output as - # much as possible. - if (read_bytes == 0 - and not at_frame_edge - and out_buf.pos == out_buf.size): - continue - - break - - # Input stream ended - if read_bytes == 0: - # Check data integrity. at_frame_edge flag is 1 when both the - # input and output streams are at a frame edge. - if not at_frame_edge: - msg = ("Decompression failed: zstd data ends in an " - "incomplete frame, maybe the input data was " - "truncated. Total input %d bytes, total output " - "%d bytes.") % (total_input_size, total_output_size) - raise ZstdError(msg) - break - - return (total_input_size, total_output_size) - finally: - m.ZSTD_freeDCtx(dctx) diff --git a/src/_zstdfile.py b/src/_zstdfile.py deleted file mode 100644 index 553ab98..0000000 --- a/src/_zstdfile.py +++ /dev/null @@ -1,496 +0,0 @@ -import io -import warnings -from os import PathLike - -from pyzstd import ZstdCompressor, _ZstdFileReader, \ - _ZstdFileWriter, _ZSTD_DStreamSizes - -__all__ = ('ZstdFile', 'open') - -class _ZstdDecompressReader(io.RawIOBase): - """Adapt decompressor to RawIOBase reader API""" - - def __init__(self, fp, zstd_dict, option, read_size): - self._fp = fp - self._decomp = _ZstdFileReader(fp, zstd_dict, option, read_size) - - def close(self): - self._decomp = None - return super().close() - - def readable(self): - return True - - # Some file-like objects don't have .seekable(), invoke when necessary. - def seekable(self): - return self._fp.seekable() - - def tell(self): - return self._decomp.pos - - def readinto(self, b): - return self._decomp.readinto(b) - - def readall(self): - return self._decomp.readall() - - # If the new position is within io.BufferedReader's buffer, - # this method may not be called. - def seek(self, offset, whence=0): - # offset is absolute file position - if whence == 0: # SEEK_SET - pass - elif whence == 1: # SEEK_CUR - offset = self._decomp.pos + offset - elif whence == 2: # SEEK_END - if self._decomp.size < 0: - # Get file size - self._decomp.forward(None) - offset = self._decomp.size + offset - else: - raise ValueError("Invalid whence value: {}".format(whence)) - - # offset is bytes number to skip forward - if offset < self._decomp.pos: - # Rewind - self._decomp.eof = False - self._decomp.pos = 0 - self._decomp.reset_session() - self._fp.seek(0) - else: - offset -= self._decomp.pos - # If offset <= 0, .forward() method does nothing. - self._decomp.forward(offset) - - return self._decomp.pos - -_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] - -_MODE_CLOSED = 0 -_MODE_READ = 1 -_MODE_WRITE = 2 - -class _DeprecatedPlaceholder: - def __repr__(self): - return '' -_DEPRECATED_PLACEHOLDER = _DeprecatedPlaceholder() - -class ZstdFile(io.BufferedIOBase): - """A file object providing transparent zstd (de)compression. - - A ZstdFile can act as a wrapper for an existing file object, or refer - directly to a named file on disk. - - Note that ZstdFile provides a *binary* file interface - data read is - returned as bytes, and data to be written should be an object that - supports the Buffer Protocol. - """ - FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK - FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME - - _READER_CLASS = _ZstdDecompressReader - - def __init__(self, filename, mode="r", *, - level_or_option=None, zstd_dict=None, - read_size=_DEPRECATED_PLACEHOLDER, write_size=_DEPRECATED_PLACEHOLDER): - """Open a zstd compressed file in binary mode. - - filename can be either an actual file name (given as a str, bytes, or - PathLike object), in which case the named file is opened, or it can be - an existing file object to read from or write to. - - mode can be "r" for reading (default), "w" for (over)writing, "x" for - creating exclusively, or "a" for appending. These can equivalently be - given as "rb", "wb", "xb" and "ab" respectively. - - Parameters - level_or_option: When it's an int object, it represents compression - level. When it's a dict object, it contains advanced compression - parameters. Note, in read mode (decompression), it can only be a - dict object, that represents decompression option. It doesn't - support int type compression level in this case. - zstd_dict: A ZstdDict object, pre-trained dictionary for compression / - decompression. - """ - if read_size == _DEPRECATED_PLACEHOLDER: - read_size = 131075 - else: - warnings.warn("pyzstd.ZstdFile()'s read_size parameter is deprecated", DeprecationWarning, stacklevel=2) - if write_size == _DEPRECATED_PLACEHOLDER: - write_size = 131591 - else: - warnings.warn("pyzstd.ZstdFile()'s write_size parameter is deprecated", DeprecationWarning, stacklevel=2) - - self._fp = None - self._close_fp = False - self._mode = _MODE_CLOSED - - if not isinstance(mode, str): - raise ValueError('mode must be a str') - mode = mode.removesuffix('b') # handle rb, wb, xb, ab - - # Read or write mode - if mode == "r": - if not isinstance(level_or_option, (type(None), dict)): - raise TypeError( - ("In read mode (decompression), level_or_option argument " - "should be a dict object, that represents decompression " - "option. It doesn't support int type compression level " - "in this case.")) - if write_size != 131591: - raise ValueError( - "write_size argument is only valid in write modes.") - mode_code = _MODE_READ - elif mode in {"w", "a", "x"}: - if not isinstance(level_or_option, (type(None), int, dict)): - raise TypeError(("level_or_option argument " - "should be int or dict object.")) - if read_size != 131075: - raise ValueError( - "read_size argument is only valid in read mode.") - mode_code = _MODE_WRITE - else: - raise ValueError("Invalid mode: {!r}".format(mode)) - - # File object - if isinstance(filename, (str, bytes, PathLike)): - self._fp = io.open(filename, mode + "b") - self._close_fp = True - elif hasattr(filename, "read") or hasattr(filename, "write"): - self._fp = filename - else: - raise TypeError(("filename must be a str, bytes, " - "file or PathLike object")) - - # Set ._mode here for ._close_fp in .close(). If the following code - # fails, IOBase's cleanup code will call .close(), so that ._fp can - # be closed. - self._mode = mode_code - - # Reader or writer - if mode_code == _MODE_READ: - raw = self._READER_CLASS( - self._fp, - zstd_dict=zstd_dict, - option=level_or_option, - read_size=read_size) - self._buffer = io.BufferedReader(raw, _ZSTD_DStreamOutSize) - elif mode_code == _MODE_WRITE: - self._pos = 0 - self._writer = _ZstdFileWriter( - self._fp, - level_or_option=level_or_option, - zstd_dict=zstd_dict, - write_size=write_size) - - def close(self): - """Flush and close the file. - - May be called more than once without error. Once the file is - closed, any other operation on it will raise a ValueError. - """ - if self._mode == _MODE_CLOSED: - return - - try: - # In .__init__ method, if fails after setting ._mode attribute, - # these attributes don't exist. - if hasattr(self, "_buffer"): - try: - self._buffer.close() - finally: - # Set to None for ._check_mode() - self._buffer = None - elif hasattr(self, "_writer"): - try: - self.flush(self.FLUSH_FRAME) - finally: - # Set to None for ._check_mode() - self._writer = None - finally: - try: - if self._close_fp: - self._fp.close() - finally: - self._fp = None - self._close_fp = False - self._mode = _MODE_CLOSED - - # None argument means the file should be closed - def _check_mode(self, expected_mode=None): - # If closed, raise ValueError. - if self._mode == _MODE_CLOSED: - raise ValueError("I/O operation on closed file") - - # Check _MODE_READ/_MODE_WRITE mode - if expected_mode == _MODE_READ: - if self._mode != _MODE_READ: - raise io.UnsupportedOperation("File not open for reading") - elif expected_mode == _MODE_WRITE: - if self._mode != _MODE_WRITE: - raise io.UnsupportedOperation("File not open for writing") - - # Re-raise other AttributeError exception - raise - - # If modify this method, also modify SeekableZstdFile.write() method. - def write(self, data): - """Write a bytes-like object to the file. - - Returns the number of uncompressed bytes written, which is - always the length of data in bytes. Note that due to buffering, - the file on disk may not reflect the data written until .flush() - or .close() is called. - """ - # Compress & write - try: - input_size, _ = self._writer.write(data) - except AttributeError: - self._check_mode(_MODE_WRITE) - - self._pos += input_size - return input_size - - # If modify this method, also modify SeekableZstdFile.flush() method. - def flush(self, mode=FLUSH_BLOCK): - """Flush remaining data to the underlying stream. - - The mode argument can be ZstdFile.FLUSH_BLOCK, ZstdFile.FLUSH_FRAME. - Abuse of this method will reduce compression ratio, use it only when - necessary. - - If the program is interrupted afterwards, all data can be recovered. - To ensure saving to disk, also need to use os.fsync(fd). - - This method does nothing in reading mode. - """ - if self._mode != _MODE_WRITE: - # Like IOBase.flush(), do nothing in reading mode. - # TextIOWrapper.close() relies on this behavior. - if self._mode == _MODE_READ: - return - # Closed, raise ValueError. - self._check_mode() - - # Flush zstd block/frame, and write. - self._writer.flush(mode) - - def read(self, size=-1): - """Read up to size uncompressed bytes from the file. - - If size is negative or omitted, read until EOF is reached. - Returns b"" if the file is already at EOF. - """ - if size is None: - size = -1 - try: - return self._buffer.read(size) - except AttributeError: - self._check_mode(_MODE_READ) - - def read1(self, size=-1): - """Read up to size uncompressed bytes, while trying to avoid - making multiple reads from the underlying stream. Reads up to a - buffer's worth of data if size is negative. - - Returns b"" if the file is at EOF. - """ - if size < 0: - size = _ZSTD_DStreamOutSize - - try: - return self._buffer.read1(size) - except AttributeError: - self._check_mode(_MODE_READ) - - def readinto(self, b): - """Read bytes into b. - - Returns the number of bytes read (0 for EOF). - """ - try: - return self._buffer.readinto(b) - except AttributeError: - self._check_mode(_MODE_READ) - - def readinto1(self, b): - """Read bytes into b, while trying to avoid making multiple reads - from the underlying stream. - - Returns the number of bytes read (0 for EOF). - """ - try: - return self._buffer.readinto1(b) - except AttributeError: - self._check_mode(_MODE_READ) - - def readline(self, size=-1): - """Read a line of uncompressed bytes from the file. - - The terminating newline (if present) is retained. If size is - non-negative, no more than size bytes will be read (in which - case the line may be incomplete). Returns b'' if already at EOF. - """ - if size is None: - size = -1 - try: - return self._buffer.readline(size) - except AttributeError: - self._check_mode(_MODE_READ) - - def seek(self, offset, whence=io.SEEK_SET): - """Change the file position. - - The new position is specified by offset, relative to the - position indicated by whence. Possible values for whence are: - - 0: start of stream (default): offset must not be negative - 1: current stream position - 2: end of stream; offset must not be positive - - Returns the new file position. - - Note that seeking is emulated, so depending on the arguments, - this operation may be extremely slow. - """ - try: - # BufferedReader.seek() checks seekable - return self._buffer.seek(offset, whence) - except AttributeError: - self._check_mode(_MODE_READ) - - def peek(self, size=-1): - """Return buffered data without advancing the file position. - - Always returns at least one byte of data, unless at EOF. - The exact number of bytes returned is unspecified. - """ - # Relies on the undocumented fact that BufferedReader.peek() always - # returns at least one byte (except at EOF) - try: - return self._buffer.peek(size) - except AttributeError: - self._check_mode(_MODE_READ) - - def __iter__(self): - try: - self._buffer - except AttributeError: - self._check_mode(_MODE_READ) - return self - - def __next__(self): - ret = self._buffer.readline() - if ret: - return ret - raise StopIteration - - def tell(self): - """Return the current file position.""" - if self._mode == _MODE_READ: - return self._buffer.tell() - elif self._mode == _MODE_WRITE: - return self._pos - - # Closed, raise ValueError. - self._check_mode() - - def fileno(self): - """Return the file descriptor for the underlying file.""" - try: - return self._fp.fileno() - except AttributeError: - # Closed, raise ValueError. - self._check_mode() - - @property - def name(self): - """Return the file name for the underlying file.""" - try: - return self._fp.name - except AttributeError: - self._check_mode() - - @property - def closed(self): - """True if this file is closed.""" - return self._mode == _MODE_CLOSED - - def writable(self): - """Return whether the file was opened for writing.""" - if self._mode == _MODE_WRITE: - return True - elif self._mode == _MODE_READ: - return False - - # Closed, raise ValueError. - self._check_mode() - - def readable(self): - """Return whether the file was opened for reading.""" - if self._mode == _MODE_READ: - return True - elif self._mode == _MODE_WRITE: - return False - - # Closed, raise ValueError. - self._check_mode() - - def seekable(self): - """Return whether the file supports seeking.""" - if self._mode == _MODE_READ: - return self._buffer.seekable() - elif self._mode == _MODE_WRITE: - return False - - # Closed, raise ValueError. - self._check_mode() - - -# Copied from lzma module -def open(filename, mode="rb", *, level_or_option=None, zstd_dict=None, - encoding=None, errors=None, newline=None): - """Open a zstd compressed file in binary or text mode. - - filename can be either an actual file name (given as a str, bytes, or - PathLike object), in which case the named file is opened, or it can be an - existing file object to read from or write to. - - The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", - "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. - - The level_or_option and zstd_dict parameters specify the settings, as for - ZstdCompressor, ZstdDecompressor and ZstdFile. - - When using read mode (decompression), the level_or_option parameter can - only be a dict object, that represents decompression option. It doesn't - support int type compression level in this case. - - For binary mode, this function is equivalent to the ZstdFile constructor: - ZstdFile(filename, mode, ...). In this case, the encoding, errors and - newline parameters must not be provided. - - For text mode, an ZstdFile object is created, and wrapped in an - io.TextIOWrapper instance with the specified encoding, error handling - behavior, and line ending(s). - """ - - if "t" in mode: - if "b" in mode: - raise ValueError("Invalid mode: %r" % (mode,)) - else: - if encoding is not None: - raise ValueError("Argument 'encoding' not supported in binary mode") - if errors is not None: - raise ValueError("Argument 'errors' not supported in binary mode") - if newline is not None: - raise ValueError("Argument 'newline' not supported in binary mode") - - zstd_mode = mode.replace("t", "") - binary_file = ZstdFile(filename, zstd_mode, - level_or_option=level_or_option, zstd_dict=zstd_dict) - - if "t" in mode: - return io.TextIOWrapper(binary_file, encoding, errors, newline) - else: - return binary_file diff --git a/src/bin_ext/compressor.c b/src/bin_ext/compressor.c deleted file mode 100644 index 66f5030..0000000 --- a/src/bin_ext/compressor.c +++ /dev/null @@ -1,599 +0,0 @@ -#include "pyzstd.h" - -/* ----------------------- - ZstdCompressor code - ----------------------- */ -static PyObject * -ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - ZstdCompressor *self; - self = (ZstdCompressor*)type->tp_alloc(type, 0); - if (self == NULL) { - goto error; - } - - /* Keep this first. Set module state to self. */ - SET_STATE_TO_OBJ(type, self); - - assert(self->dict == NULL); - assert(self->use_multithread == 0); - assert(self->compression_level == 0); - assert(self->inited == 0); - - /* Compression context */ - self->cctx = ZSTD_createCCtx(); - if (self->cctx == NULL) { - STATE_FROM_OBJ(self); - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_CCtx instance."); - goto error; - } - - /* Last mode */ - self->last_mode = ZSTD_e_end; - - /* Thread lock */ - self->lock = PyThread_allocate_lock(); - if (self->lock == NULL) { - PyErr_NoMemory(); - goto error; - } - return (PyObject*)self; - -error: - Py_XDECREF(self); - return NULL; -} - -static void -ZstdCompressor_dealloc(ZstdCompressor *self) -{ - /* Free compression context */ - ZSTD_freeCCtx(self->cctx); - - /* Py_XDECREF the dict after free the compression context */ - Py_XDECREF(self->dict); - - /* Thread lock */ - if (self->lock) { - PyThread_free_lock(self->lock); - } - - PyTypeObject *tp = Py_TYPE(self); - tp->tp_free((PyObject*)self); - Py_DECREF(tp); -} - -PyDoc_STRVAR(ZstdCompressor_doc, -"A streaming compressor. Thread-safe at method level.\n\n" -"ZstdCompressor.__init__(self, level_or_option=None, zstd_dict=None)\n" -"----\n" -"Initialize a ZstdCompressor object.\n\n" -"Parameters\n" -"level_or_option: When it's an int object, it represents the compression level.\n" -" When it's a dict object, it contains advanced compression\n" -" parameters.\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary."); - -static int -ZstdCompressor_init(ZstdCompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"level_or_option", "zstd_dict", NULL}; - PyObject *level_or_option = Py_None; - PyObject *zstd_dict = Py_None; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "|OO:ZstdCompressor.__init__", kwlist, - &level_or_option, &zstd_dict)) { - return -1; - } - - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Set compressLevel/option to compression context */ - if (level_or_option != Py_None) { - if (set_c_parameters(self, level_or_option) < 0) { - return -1; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (load_c_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - return 0; -} - -FORCE_INLINE PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - const ZSTD_EndDirective end_directive, const int rich_mem) -{ - ZSTD_inBuffer in; - ZSTD_outBuffer out; - PYZSTD_OUTPUT_BUFFER(buffer); - size_t zstd_ret; - PyObject *ret; - - /* Prepare input & output buffers */ - if (data != NULL) { - in.src = data->buf; - in.size = data->len; - in.pos = 0; - } else { - in.src = ∈ - in.size = 0; - in.pos = 0; - } - - if (rich_mem) { - /* Calculate output buffer's size */ - size_t output_buffer_size = ZSTD_compressBound(in.size); - if (output_buffer_size > (size_t) PY_SSIZE_T_MAX) { - PyErr_NoMemory(); - goto error; - } - - if (OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t) output_buffer_size) < 0) { - goto error; - } - } else { - if (OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) { - goto error; - } - } - - /* zstd stream compress */ - while (1) { - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); - Py_END_ALLOW_THREADS - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - STATE_FROM_OBJ(self); - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - - /* Finished */ - if (zstd_ret == 0) { - break; - } - - /* Output buffer should be exhausted, grow the buffer. */ - assert(out.pos == out.size); - if (out.pos == out.size) { - if (OutputBuffer_Grow(&buffer, &out) < 0) { - goto error; - } - } - } - - /* Return a bytes object */ - ret = OutputBuffer_Finish(&buffer, &out); - if (ret != NULL) { - return ret; - } - -error: - OutputBuffer_OnError(&buffer); - return NULL; -} - -static PyObject * -compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) -{ - ZSTD_inBuffer in; - ZSTD_outBuffer out; - PYZSTD_OUTPUT_BUFFER(buffer); - size_t zstd_ret; - PyObject *ret; - - /* Prepare input & output buffers */ - in.src = data->buf; - in.size = data->len; - in.pos = 0; - - if (OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) { - goto error; - } - - /* zstd stream compress */ - while (1) { - Py_BEGIN_ALLOW_THREADS - do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); - Py_END_ALLOW_THREADS - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - STATE_FROM_OBJ(self); - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - - /* Like compress_impl(), output as much as possible. */ - if (out.pos == out.size) { - if (OutputBuffer_Grow(&buffer, &out) < 0) { - goto error; - } - } else if (in.pos == in.size) { - /* Finished */ - assert(mt_continue_should_break(&in, &out)); - break; - } - } - - /* Return a bytes object */ - ret = OutputBuffer_Finish(&buffer, &out); - if (ret != NULL) { - return ret; - } - -error: - OutputBuffer_OnError(&buffer); - return NULL; -} - -PyDoc_STRVAR(ZstdCompressor_compress_doc, -"compress(data, mode=ZstdCompressor.CONTINUE)\n" -"----\n" -"Provide data to the compressor object.\n" -"Return a chunk of compressed data if possible, or b'' otherwise.\n\n" -"Parameters\n" -"data: A bytes-like object, data to be compressed.\n" -"mode: Can be these 3 values .CONTINUE, .FLUSH_BLOCK, .FLUSH_FRAME."); - -static PyObject * -ZstdCompressor_compress(ZstdCompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"data", "mode", NULL}; - Py_buffer data; - int mode = ZSTD_e_continue; - - PyObject *ret; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "y*|i:ZstdCompressor.compress", kwlist, - &data, &mode)) { - return NULL; - } - - /* Check mode value */ - if (mode != ZSTD_e_continue && - mode != ZSTD_e_flush && - mode != ZSTD_e_end) - { - PyErr_SetString(PyExc_ValueError, - "mode argument wrong value, it should be one of " - "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, " - "ZstdCompressor.FLUSH_FRAME."); - PyBuffer_Release(&data); - return NULL; - } - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - - /* Compress */ - if (self->use_multithread && mode == ZSTD_e_continue) { - ret = compress_mt_continue_impl(self, &data); - } else { - ret = compress_impl(self, &data, mode, 0); - } - - if (ret) { - self->last_mode = mode; - } else { - self->last_mode = ZSTD_e_end; - - /* Resetting cctx's session never fail */ - ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); - } - RELEASE_LOCK(self); - - PyBuffer_Release(&data); - return ret; -} - -PyDoc_STRVAR(ZstdCompressor_flush_doc, -"flush(mode=ZstdCompressor.FLUSH_FRAME)\n" -"----\n" -"Flush any remaining data in internal buffer.\n\n" -"Since zstd data consists of one or more independent frames, the compressor\n" -"object can still be used after this method is called.\n\n" -"Parameter\n" -"mode: Can be these 2 values .FLUSH_FRAME, .FLUSH_BLOCK."); - -static PyObject * -ZstdCompressor_flush(ZstdCompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"mode", NULL}; - int mode = ZSTD_e_end; - - PyObject *ret; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "|i:ZstdCompressor.flush", kwlist, - &mode)) { - return NULL; - } - - /* Check mode value */ - if (mode != ZSTD_e_end && mode != ZSTD_e_flush) { - PyErr_SetString(PyExc_ValueError, - "mode argument wrong value, it should be " - "ZstdCompressor.FLUSH_FRAME or " - "ZstdCompressor.FLUSH_BLOCK."); - return NULL; - } - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - ret = compress_impl(self, NULL, mode, 0); - - if (ret) { - self->last_mode = mode; - } else { - self->last_mode = ZSTD_e_end; - - /* Resetting cctx's session never fail */ - ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); - } - RELEASE_LOCK(self); - - return ret; -} - -PyDoc_STRVAR(ZstdCompressor_set_pledged_input_size_doc, -"_set_pledged_input_size(size)\n" -"----\n" -"*This is an undocumented method, because it may be used incorrectly.*\n\n" -"Set uncompressed content size of a frame, the size will be written into the\n" -"frame header.\n" -"1, If called when (.last_mode != .FLUSH_FRAME), a RuntimeError will be raised.\n" -"2, If the actual size doesn't match the value, a ZstdError will be raised, and\n" -" the last compressed chunk is likely to be lost.\n" -"3, The size is only valid for one frame, then it restores to \"unknown size\".\n\n" -"Parameter\n" -"size: Uncompressed content size of a frame, None means \"unknown size\"."); - -static PyObject * -ZstdCompressor_set_pledged_input_size(ZstdCompressor *self, PyObject *size) -{ - uint64_t pledged_size; - size_t zstd_ret; - PyObject *ret; - - /* Get size value */ - if (size == Py_None) { - pledged_size = ZSTD_CONTENTSIZE_UNKNOWN; - } else { - pledged_size = PyLong_AsUnsignedLongLong(size); - if (pledged_size == (uint64_t)-1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "size argument should be 64-bit unsigned integer " - "value, or None."); - return NULL; - } - } - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - - /* Check the current mode */ - if (self->last_mode != ZSTD_e_end) { - PyErr_SetString(PyExc_RuntimeError, - "._set_pledged_input_size() method must be called " - "when (.last_mode == .FLUSH_FRAME)."); - goto error; - } - - /* Set pledged content size */ - zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, pledged_size); - if (ZSTD_isError(zstd_ret)) { - STATE_FROM_OBJ(self); - set_zstd_error(MODULE_STATE, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); - goto error; - } - - /* Return None */ - ret = Py_None; - Py_INCREF(ret); - goto success; - -error: - ret = NULL; -success: - RELEASE_LOCK(self); - return ret; -} - -static PyMethodDef ZstdCompressor_methods[] = { - {"compress", (PyCFunction)ZstdCompressor_compress, - METH_VARARGS|METH_KEYWORDS, ZstdCompressor_compress_doc}, - - {"flush", (PyCFunction)ZstdCompressor_flush, - METH_VARARGS|METH_KEYWORDS, ZstdCompressor_flush_doc}, - - {"_set_pledged_input_size", (PyCFunction)ZstdCompressor_set_pledged_input_size, - METH_O, ZstdCompressor_set_pledged_input_size_doc}, - - {"__reduce__", (PyCFunction)reduce_cannot_pickle, - METH_NOARGS, reduce_cannot_pickle_doc}, - - {0} -}; - -PyDoc_STRVAR(ZstdCompressor_last_mode_doc, -"The last mode used to this compressor object, its value can be .CONTINUE,\n" -".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" -"It can be used to get the current state of a compressor, such as, data flushed,\n" -"a frame ended."); - -static PyMemberDef ZstdCompressor_members[] = { - {"last_mode", T_INT, offsetof(ZstdCompressor, last_mode), - READONLY, ZstdCompressor_last_mode_doc}, - {0} -}; - -static PyType_Slot zstdcompressor_slots[] = { - {Py_tp_new, ZstdCompressor_new}, - {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, ZstdCompressor_init}, - {Py_tp_methods, ZstdCompressor_methods}, - {Py_tp_members, ZstdCompressor_members}, - {Py_tp_doc, (char*)ZstdCompressor_doc}, - {0} -}; - -static PyType_Spec zstdcompressor_type_spec = { - .name = "pyzstd.ZstdCompressor", - .basicsize = sizeof(ZstdCompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = zstdcompressor_slots, -}; - -/* ------------------------------ - RichMemZstdCompressor code - ------------------------------ */ -static int -RichMemZstdCompressor_init(ZstdCompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"level_or_option", "zstd_dict", NULL}; - PyObject *level_or_option = Py_None; - PyObject *zstd_dict = Py_None; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "|OO:RichMemZstdCompressor.__init__", kwlist, - &level_or_option, &zstd_dict)) { - return -1; - } - - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Set compressLevel/option to compression context */ - if (level_or_option != Py_None) { - if (set_c_parameters(self, level_or_option) < 0) { - return -1; - } - } - - /* Check effective condition */ - if (self->use_multithread) { - char *msg = "Currently \"rich memory mode\" has no effect on " - "zstd multi-threaded compression (set " - "\"CParameter.nbWorkers\" >= 1), it will allocate " - "unnecessary memory."; - if (PyErr_WarnEx(PyExc_ResourceWarning, msg, 1) < 0) { - return -1; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (load_c_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - return 0; -} - -PyDoc_STRVAR(RichMemZstdCompressor_compress_doc, -"compress(data)\n" -"----\n" -"Compress data using rich memory mode, return a single zstd frame.\n\n" -"Compressing b'' will get an empty content frame (9 bytes or more).\n\n" -"Parameter\n" -"data: A bytes-like object, data to be compressed."); - -static PyObject * -RichMemZstdCompressor_compress(ZstdCompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"data", NULL}; - Py_buffer data; - - PyObject *ret; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "y*:RichMemZstdCompressor.compress", kwlist, - &data)) { - return NULL; - } - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - - ret = compress_impl(self, &data, ZSTD_e_end, 1); - if (ret == NULL) { - /* Resetting cctx's session never fail */ - ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); - } - - RELEASE_LOCK(self); - - PyBuffer_Release(&data); - return ret; -} - -static PyMethodDef RichMem_ZstdCompressor_methods[] = { - {"compress", (PyCFunction)RichMemZstdCompressor_compress, - METH_VARARGS|METH_KEYWORDS, RichMemZstdCompressor_compress_doc}, - - {"__reduce__", (PyCFunction)reduce_cannot_pickle, - METH_NOARGS, reduce_cannot_pickle_doc}, - - {0} -}; - -PyDoc_STRVAR(RichMemZstdCompressor_doc, -"A compressor use rich memory mode. It is designed to allocate more memory,\n" -"but faster in some cases.\n\n" -"RichMemZstdCompressor.__init__(self, level_or_option=None, zstd_dict=None)\n" -"----\n" -"Initialize a RichMemZstdCompressor object.\n\n" -"Parameters\n" -"level_or_option: When it's an int object, it represents the compression level.\n" -" When it's a dict object, it contains advanced compression\n" -" parameters.\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary."); - -static PyType_Slot richmem_zstdcompressor_slots[] = { - {Py_tp_new, ZstdCompressor_new}, - {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, RichMemZstdCompressor_init}, - {Py_tp_methods, RichMem_ZstdCompressor_methods}, - {Py_tp_doc, (char*)RichMemZstdCompressor_doc}, - {0} -}; - -static PyType_Spec richmem_zstdcompressor_type_spec = { - .name = "pyzstd.RichMemZstdCompressor", - .basicsize = sizeof(ZstdCompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = richmem_zstdcompressor_slots, -}; diff --git a/src/bin_ext/decompressor.c b/src/bin_ext/decompressor.c deleted file mode 100644 index f88113a..0000000 --- a/src/bin_ext/decompressor.c +++ /dev/null @@ -1,851 +0,0 @@ -#include "pyzstd.h" - -/* ----------------------------- - Decompress implementation - ----------------------------- */ -typedef enum { - TYPE_DECOMPRESSOR, /* , ZstdDecompressor class */ - TYPE_ENDLESS_DECOMPRESSOR, /* , EndlessZstdDecompressor class */ -} decompress_type; - -/* Decompress implementation for , , pseudo code: - - initialize_output_buffer - while True: - decompress_data - set_object_flag # .eof for , .at_frame_edge for . - - if output_buffer_exhausted: - if output_buffer_reached_max_length: - finish - grow_output_buffer - elif input_buffer_exhausted: - finish - - ZSTD_decompressStream()'s size_t return value: - - 0 when a frame is completely decoded and fully flushed, zstd's internal - buffer has no data. - - An error code, which can be tested using ZSTD_isError(). - - Or any other value > 0, which means there is still some decoding or - flushing to do to complete current frame. - - Note, decompressing "an empty input" in any case will make it > 0. - - supports multiple frames, has an .at_frame_edge flag, it means both the - input and output streams are at a frame edge. The flag can be set by this - statement: - - .at_frame_edge = (zstd_ret == 0) ? 1 : 0 - - But if decompressing "an empty input" at "a frame edge", zstd_ret will be - non-zero, then .at_frame_edge will be wrongly set to false. To solve this - problem, two AFE checks are needed to ensure that: when at "a frame edge", - empty input will not be decompressed. - - // AFE check - if (self->at_frame_edge && in->pos == in->size) { - finish - } - - In , if .at_frame_edge is eventually set to true, but input stream has - unconsumed data (in->pos < in->size), then the outer function - stream_decompress() will set .at_frame_edge to false. In this case, - although the output stream is at a frame edge, for the caller, the input - stream is not at a frame edge, see below diagram. This behavior does not - affect the next AFE check, since (in->pos < in->size). - - input stream: --------------|--- - ^ - output stream: ====================| - ^ -*/ -FORCE_INLINE PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - const Py_ssize_t max_length, - const Py_ssize_t initial_size, - const decompress_type type) -{ - size_t zstd_ret; - ZSTD_outBuffer out; - PYZSTD_OUTPUT_BUFFER(buffer); - PyObject *ret; - - /* The first AFE check for setting .at_frame_edge flag */ - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (self->at_frame_edge && in->pos == in->size) { - STATE_FROM_OBJ(self); - ret = MS_MEMBER(empty_bytes); - Py_INCREF(ret); - return ret; - } - } - - /* Initialize the output buffer */ - if (initial_size >= 0) { - if (OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { - goto error; - } - } else { - if (OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { - goto error; - } - } - assert(out.pos == 0); - - while (1) { - /* Decompress */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZSTD_decompressStream(self->dctx, &out, in); - Py_END_ALLOW_THREADS - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - STATE_FROM_OBJ(self); - set_zstd_error(MODULE_STATE, ERR_DECOMPRESS, zstd_ret); - goto error; - } - - /* Set .eof/.af_frame_edge flag */ - if (type == TYPE_DECOMPRESSOR) { - /* ZstdDecompressor class stops when a frame is decompressed */ - if (zstd_ret == 0) { - self->eof = 1; - break; - } - } else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* EndlessZstdDecompressor class supports multiple frames */ - self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* The second AFE check for setting .at_frame_edge flag */ - if (self->at_frame_edge && in->pos == in->size) { - break; - } - } - - /* Need to check out before in. Maybe zstd's internal buffer still has - a few bytes can be output, grow the buffer and continue. */ - if (out.pos == out.size) { - /* Output buffer exhausted */ - - /* Output buffer reached max_length */ - if (OutputBuffer_ReachedMaxLength(&buffer, &out)) { - break; - } - - /* Grow output buffer */ - if (OutputBuffer_Grow(&buffer, &out) < 0) { - goto error; - } - assert(out.pos == 0); - - } else if (in->pos == in->size) { - /* Finished */ - break; - } - } - - /* Return a bytes object */ - ret = OutputBuffer_Finish(&buffer, &out); - if (ret != NULL) { - return ret; - } - -error: - OutputBuffer_OnError(&buffer); - return NULL; -} - -FORCE_INLINE void -decompressor_reset_session(ZstdDecompressor *self, - const decompress_type type) -{ - /* Reset variables */ - self->in_begin = 0; - self->in_end = 0; - - if (type == TYPE_DECOMPRESSOR) { - Py_CLEAR(self->unused_data); - } - - /* Reset variables in one operation */ - self->needs_input = 1; - self->at_frame_edge = 1; - self->eof = 0; - self->_unused_char_for_align = 0; - - /* Resetting session never fail */ - ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); -} - -/* For ZstdDecompressor, EndlessZstdDecompressor. */ -FORCE_INLINE PyObject * -stream_decompress(ZstdDecompressor *self, PyObject *args, PyObject *kwargs, - const decompress_type type) -{ - static char *kwlist[] = {"data", "max_length", NULL}; - Py_buffer data; - Py_ssize_t max_length = -1; - - Py_ssize_t initial_buffer_size = -1; - ZSTD_inBuffer in; - PyObject *ret = NULL; - int use_input_buffer; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "y*|n:ZstdDecompressor.decompress", kwlist, - &data, &max_length)) { - return NULL; - } - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - - if (type == TYPE_DECOMPRESSOR) { - /* Check .eof flag */ - if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); - assert(ret == NULL); - goto success; - } - } else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* Fast path for the first frame */ - if (self->at_frame_edge && self->in_begin == self->in_end) { - /* Read decompressed size */ - uint64_t decompressed_size = ZSTD_getFrameContentSize(data.buf, data.len); - - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) - - Use ZSTD_findFrameCompressedSize() to check complete frame, - prevent allocating too much memory for small input chunk. */ - - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && - !ZSTD_isError(ZSTD_findFrameCompressedSize(data.buf, data.len)) ) - { - initial_buffer_size = (Py_ssize_t) decompressed_size; - } - } - } - - /* Prepare input buffer w/wo unconsumed data */ - if (self->in_begin == self->in_end) { - /* No unconsumed data */ - use_input_buffer = 0; - - in.src = data.buf; - in.size = data.len; - in.pos = 0; - } else if (data.len == 0) { - /* Has unconsumed data, fast path for b'' */ - assert(self->in_begin < self->in_end); - - use_input_buffer = 1; - - in.src = self->input_buffer + self->in_begin; - in.size = self->in_end - self->in_begin; - in.pos = 0; - } else { - /* Has unconsumed data */ - use_input_buffer = 1; - - /* Unconsumed data size in input_buffer */ - const size_t used_now = self->in_end - self->in_begin; - assert(self->in_end > self->in_begin); - - /* Number of bytes we can append to input buffer */ - const size_t avail_now = self->input_buffer_size - self->in_end; - assert(self->input_buffer_size >= self->in_end); - - /* Number of bytes we can append if we move existing contents to - beginning of buffer */ - const size_t avail_total = self->input_buffer_size - used_now; - assert(self->input_buffer_size >= used_now); - - if (avail_total < (size_t) data.len) { - char *tmp; - const size_t new_size = used_now + data.len; - - /* Allocate with new size */ - tmp = PyMem_Malloc(new_size); - if (tmp == NULL) { - PyErr_NoMemory(); - goto error; - } - - /* Copy unconsumed data to the beginning of new buffer */ - memcpy(tmp, - self->input_buffer + self->in_begin, - used_now); - - /* Switch to new buffer */ - PyMem_Free(self->input_buffer); - self->input_buffer = tmp; - self->input_buffer_size = new_size; - - /* Set begin & end position */ - self->in_begin = 0; - self->in_end = used_now; - } else if (avail_now < (size_t) data.len) { - /* Move unconsumed data to the beginning. - Overlap is possible, so use memmove(). */ - memmove(self->input_buffer, - self->input_buffer + self->in_begin, - used_now); - - /* Set begin & end position */ - self->in_begin = 0; - self->in_end = used_now; - } - - /* Copy data to input buffer */ - memcpy(self->input_buffer + self->in_end, data.buf, data.len); - self->in_end += data.len; - - in.src = self->input_buffer + self->in_begin; - in.size = used_now + data.len; - in.pos = 0; - } - assert(in.pos == 0); - - /* Decompress */ - ret = decompress_impl(self, &in, - max_length, initial_buffer_size, - type); - if (ret == NULL) { - goto error; - } - - /* Unconsumed input data */ - if (in.pos == in.size) { - if (type == TYPE_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length || self->eof) { - self->needs_input = 0; - } else { - self->needs_input = 1; - } - } else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { - self->needs_input = 0; - } else { - self->needs_input = 1; - } - } - - if (use_input_buffer) { - /* Clear input_buffer */ - self->in_begin = 0; - self->in_end = 0; - } - } else { - const size_t data_size = in.size - in.pos; - - /*if (type == DECOMPRESSOR) { - if (self->eof) { - self->needs_input = 0; - } else { - self->needs_input = 0; - } - } else if (type == ENDLESS_DECOMPRESSOR) { - self->needs_input = 0; - }*/ - self->needs_input = 0; - - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /*if (self->at_frame_edge) { - self->at_frame_edge = 0; - }*/ - self->at_frame_edge = 0; - } - - if (!use_input_buffer) { - /* Discard buffer if it's too small - (resizing it may needlessly copy the current contents) */ - if (self->input_buffer != NULL && - self->input_buffer_size < data_size) - { - PyMem_Free(self->input_buffer); - self->input_buffer = NULL; - self->input_buffer_size = 0; - } - - /* Allocate if necessary */ - if (self->input_buffer == NULL) { - self->input_buffer = PyMem_Malloc(data_size); - if (self->input_buffer == NULL) { - PyErr_NoMemory(); - goto error; - } - self->input_buffer_size = data_size; - } - - /* Copy unconsumed data */ - memcpy(self->input_buffer, (char*)in.src + in.pos, data_size); - self->in_begin = 0; - self->in_end = data_size; - } else { - /* Use input buffer */ - self->in_begin += in.pos; - } - } - - goto success; - -error: - /* Reset decompressor's states/session */ - decompressor_reset_session(self, type); - - Py_CLEAR(ret); -success: - RELEASE_LOCK(self); - - PyBuffer_Release(&data); - return ret; -} - -/* ------------------------- - ZstdDecompressor code - ------------------------- */ -static PyObject * -ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - ZstdDecompressor *self; - self = (ZstdDecompressor*)type->tp_alloc(type, 0); - if (self == NULL) { - goto error; - } - - /* Keep this first. Set module state to self. */ - SET_STATE_TO_OBJ(type, self); - - assert(self->dict == NULL); - assert(self->input_buffer == NULL); - assert(self->input_buffer_size == 0); - assert(self->in_begin == 0); - assert(self->in_end == 0); - assert(self->unused_data == NULL); - assert(self->eof == 0); - assert(self->inited == 0); - - /* needs_input flag */ - self->needs_input = 1; - - /* at_frame_edge flag */ - self->at_frame_edge = 1; - - /* Decompression context */ - self->dctx = ZSTD_createDCtx(); - if (self->dctx == NULL) { - STATE_FROM_OBJ(self); - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_DCtx instance."); - goto error; - } - - /* Thread lock */ - self->lock = PyThread_allocate_lock(); - if (self->lock == NULL) { - PyErr_NoMemory(); - goto error; - } - return (PyObject*)self; - -error: - Py_XDECREF(self); - return NULL; -} - -static void -ZstdDecompressor_dealloc(ZstdDecompressor *self) -{ - /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); - - /* Py_XDECREF the dict after free decompression context */ - Py_XDECREF(self->dict); - - /* Free unconsumed input data buffer */ - PyMem_Free(self->input_buffer); - - /* Free unused data */ - Py_XDECREF(self->unused_data); - - /* Free thread lock */ - if (self->lock) { - PyThread_free_lock(self->lock); - } - - PyTypeObject *tp = Py_TYPE(self); - tp->tp_free((PyObject*)self); - Py_DECREF(tp); -} - -PyDoc_STRVAR(ZstdDecompressor_doc, -"A streaming decompressor, it stops after a frame is decompressed.\n" -"Thread-safe at method level.\n\n" -"ZstdDecompressor.__init__(self, zstd_dict=None, option=None)\n" -"----\n" -"Initialize a ZstdDecompressor object.\n\n" -"Parameters\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary.\n" -"option: A dict object that contains advanced decompression parameters."); - -static int -ZstdDecompressor_init(ZstdDecompressor *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"zstd_dict", "option", NULL}; - PyObject *zstd_dict = Py_None; - PyObject *option = Py_None; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "|OO:ZstdDecompressor.__init__", kwlist, - &zstd_dict, &option)) { - return -1; - } - - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (load_d_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (option != Py_None) { - if (set_d_parameters(self, option) < 0) { - return -1; - } - } - - return 0; -} - -static PyObject * -unused_data_get(ZstdDecompressor *self, void *Py_UNUSED(ignored)) -{ - PyObject *ret; - - /* Thread-safe code */ - ACQUIRE_LOCK(self); - - if (!self->eof) { - STATE_FROM_OBJ(self); - ret = MS_MEMBER(empty_bytes); - Py_INCREF(ret); - } else { - if (self->unused_data == NULL) { - self->unused_data = PyBytes_FromStringAndSize( - self->input_buffer + self->in_begin, - self->in_end - self->in_begin); - ret = self->unused_data; - Py_XINCREF(ret); - } else { - ret = self->unused_data; - Py_INCREF(ret); - } - } - - RELEASE_LOCK(self); - - return ret; -} - -PyDoc_STRVAR(ZstdDecompressor_decompress_doc, -"decompress(data, max_length=-1)\n" -"----\n" -"Decompress data, return a chunk of decompressed data if possible, or b''\n" -"otherwise.\n\n" -"It stops after a frame is decompressed.\n\n" -"Parameters\n" -"data: A bytes-like object, zstd data to be decompressed.\n" -"max_length: Maximum size of returned data. When it is negative, the size of\n" -" output buffer is unlimited. When it is nonnegative, returns at\n" -" most max_length bytes of decompressed data."); - -static PyObject * -ZstdDecompressor_decompress(ZstdDecompressor *self, PyObject *args, PyObject *kwargs) -{ - return stream_decompress(self, args, kwargs, TYPE_DECOMPRESSOR); -} - -PyDoc_STRVAR(ZstdDecompressor_reset_session_doc, -"_reset_session()\n" -"----\n" -"This is an undocumented method. Reset decompressor's states/session, don't\n" -"reset parameters and dictionary."); - -static PyObject * -ZstdDecompressor_reset_session(ZstdDecompressor *self) -{ - /* Thread-safe code */ - ACQUIRE_LOCK(self); - decompressor_reset_session(self, TYPE_DECOMPRESSOR); - RELEASE_LOCK(self); - - Py_RETURN_NONE; -} - -static PyMethodDef ZstdDecompressor_methods[] = { - {"decompress", (PyCFunction)ZstdDecompressor_decompress, - METH_VARARGS|METH_KEYWORDS, ZstdDecompressor_decompress_doc}, - - {"_reset_session", (PyCFunction)ZstdDecompressor_reset_session, - METH_NOARGS, ZstdDecompressor_reset_session_doc}, - - {"__reduce__", (PyCFunction)reduce_cannot_pickle, - METH_NOARGS, reduce_cannot_pickle_doc}, - - {0} -}; - -PyDoc_STRVAR(ZstdDecompressor_eof_doc, -"True means the end of the first frame has been reached. If decompress data\n" -"after that, an EOFError exception will be raised."); - -PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, -"If the max_length output limit in .decompress() method has been reached, and\n" -"the decompressor has (or may has) unconsumed input data, it will be set to\n" -"False. In this case, pass b'' to .decompress() method may output further data."); - -PyDoc_STRVAR(ZstdDecompressor_unused_data_doc, -"A bytes object. When ZstdDecompressor object stops after a frame is\n" -"decompressed, unused input data after the frame. Otherwise this will be b''."); - -static PyMemberDef ZstdDecompressor_members[] = { - {"eof", T_BOOL, offsetof(ZstdDecompressor, eof), - READONLY, ZstdDecompressor_eof_doc}, - - {"needs_input", T_BOOL, offsetof(ZstdDecompressor, needs_input), - READONLY, ZstdDecompressor_needs_input_doc}, - - {0} -}; - -static PyGetSetDef ZstdDecompressor_getset[] = { - {"unused_data", (getter)unused_data_get, NULL, - ZstdDecompressor_unused_data_doc}, - - {0} -}; - -static PyType_Slot ZstdDecompressor_slots[] = { - {Py_tp_new, ZstdDecompressor_new}, - {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, ZstdDecompressor_init}, - {Py_tp_methods, ZstdDecompressor_methods}, - {Py_tp_members, ZstdDecompressor_members}, - {Py_tp_getset, ZstdDecompressor_getset}, - {Py_tp_doc, (char*)ZstdDecompressor_doc}, - {0} -}; - -static PyType_Spec ZstdDecompressor_type_spec = { - .name = "pyzstd.ZstdDecompressor", - .basicsize = sizeof(ZstdDecompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = ZstdDecompressor_slots, -}; - -/* ------------------------------- - EndlessZstdDecompressor code - ------------------------------- */ -PyDoc_STRVAR(EndlessZstdDecompressor_doc, -"A streaming decompressor, accepts multiple concatenated frames.\n" -"Thread-safe at method level.\n\n" -"EndlessZstdDecompressor.__init__(self, zstd_dict=None, option=None)\n" -"----\n" -"Initialize an EndlessZstdDecompressor object.\n\n" -"Parameters\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary.\n" -"option: A dict object that contains advanced decompression parameters."); - -PyDoc_STRVAR(EndlessZstdDecompressor_decompress_doc, -"decompress(data, max_length=-1)\n" -"----\n" -"Decompress data, return a chunk of decompressed data if possible, or b''\n" -"otherwise.\n\n" -"Parameters\n" -"data: A bytes-like object, zstd data to be decompressed.\n" -"max_length: Maximum size of returned data. When it is negative, the size of\n" -" output buffer is unlimited. When it is nonnegative, returns at\n" -" most max_length bytes of decompressed data."); - -static PyObject * -EndlessZstdDecompressor_decompress(ZstdDecompressor *self, PyObject *args, PyObject *kwargs) -{ - return stream_decompress(self, args, kwargs, TYPE_ENDLESS_DECOMPRESSOR); -} - -static PyObject * -EndlessZstdDecompressor_reset_session(ZstdDecompressor *self) -{ - /* Thread-safe code */ - ACQUIRE_LOCK(self); - decompressor_reset_session(self, TYPE_ENDLESS_DECOMPRESSOR); - RELEASE_LOCK(self); - - Py_RETURN_NONE; -} - -static PyMethodDef EndlessZstdDecompressor_methods[] = { - {"decompress", (PyCFunction)EndlessZstdDecompressor_decompress, - METH_VARARGS|METH_KEYWORDS, EndlessZstdDecompressor_decompress_doc}, - - {"_reset_session", (PyCFunction)EndlessZstdDecompressor_reset_session, - METH_NOARGS, ZstdDecompressor_reset_session_doc}, - - {"__reduce__", (PyCFunction)reduce_cannot_pickle, - METH_NOARGS, reduce_cannot_pickle_doc}, - - {0} -}; - -PyDoc_STRVAR(EndlessZstdDecompressor_at_frame_edge_doc, -"True when both the input and output streams are at a frame edge, means a frame is\n" -"completely decoded and fully flushed, or the decompressor just be initialized.\n\n" -"This flag could be used to check data integrity in some cases."); - -static PyMemberDef EndlessZstdDecompressor_members[] = { - {"at_frame_edge", T_BOOL, offsetof(ZstdDecompressor, at_frame_edge), - READONLY, EndlessZstdDecompressor_at_frame_edge_doc}, - - {"needs_input", T_BOOL, offsetof(ZstdDecompressor, needs_input), - READONLY, ZstdDecompressor_needs_input_doc}, - - {0} -}; - -static PyType_Slot EndlessZstdDecompressor_slots[] = { - {Py_tp_new, ZstdDecompressor_new}, - {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, ZstdDecompressor_init}, - {Py_tp_methods, EndlessZstdDecompressor_methods}, - {Py_tp_members, EndlessZstdDecompressor_members}, - {Py_tp_doc, (char*)EndlessZstdDecompressor_doc}, - {0} -}; - -static PyType_Spec EndlessZstdDecompressor_type_spec = { - .name = "pyzstd.EndlessZstdDecompressor", - .basicsize = sizeof(ZstdDecompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = EndlessZstdDecompressor_slots, -}; - -PyDoc_STRVAR(decompress_doc, -"decompress(data, zstd_dict=None, option=None)\n" -"----\n" -"Decompress a zstd data, return a bytes object.\n\n" -"Support multiple concatenated frames.\n\n" -"Parameters\n" -"data: A bytes-like object, compressed zstd data.\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary.\n" -"option: A dict object, contains advanced decompression parameters."); - -static PyObject * -decompress(PyObject *module, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"data", "zstd_dict", "option", NULL}; - Py_buffer data; - PyObject *zstd_dict = Py_None; - PyObject *option = Py_None; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "y*|OO:decompress", kwlist, - &data, &zstd_dict, &option)) { - return NULL; - } - - uint64_t decompressed_size; - Py_ssize_t initial_size; - ZstdDecompressor self = {0}; - ZSTD_inBuffer in; - STATE_FROM_MODULE(module); - PyObject *ret = NULL; - - /* Initialize & set ZstdDecompressor */ - self.dctx = ZSTD_createDCtx(); - if (self.dctx == NULL) { - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_DCtx instance."); - goto error; - } - self.at_frame_edge = 1; -#ifdef USE_MULTI_PHASE_INIT - self.module_state = MODULE_STATE; -#endif - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (load_d_dict(&self, zstd_dict) < 0) { - goto error; - } - } - - /* Set option to decompression context */ - if (option != Py_None) { - if (set_d_parameters(&self, option) < 0) { - goto error; - } - } - - /* Prepare input data */ - in.src = data.buf; - in.size = data.len; - in.pos = 0; - - /* Get decompressed size */ - decompressed_size = ZSTD_getFrameContentSize(data.buf, data.len); - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) */ - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX) { - initial_size = (Py_ssize_t) decompressed_size; - } else { - initial_size = -1; - } - - /* Decompress */ - ret = decompress_impl(&self, &in, -1, initial_size, - TYPE_ENDLESS_DECOMPRESSOR); - if (ret == NULL) { - goto error; - } - - /* Check data integrity. at_frame_edge flag is 1 when both the input and - output streams are at a frame edge. */ - if (self.at_frame_edge == 0 || in.pos == 0) { - char *extra_msg = (Py_SIZE(ret) == 0) ? "." : - ", if want to output these decompressed data, use " - "decompress_stream function or " - "EndlessZstdDecompressor class to decompress."; - PyErr_Format(MS_MEMBER(ZstdError), - "Decompression failed: zstd data ends in an incomplete " - "frame, maybe the input data was truncated. Decompressed " - "data is %zd bytes%s", - Py_SIZE(ret), extra_msg); - goto error; - } - - goto success; - -error: - Py_CLEAR(ret); -success: - /* Free decompression context */ - ZSTD_freeDCtx(self.dctx); - /* Release data */ - PyBuffer_Release(&data); - return ret; -} diff --git a/src/bin_ext/dict.c b/src/bin_ext/dict.c deleted file mode 100644 index b2a3dce..0000000 --- a/src/bin_ext/dict.c +++ /dev/null @@ -1,507 +0,0 @@ -#include "pyzstd.h" - -/* ----------------- - ZstdDict code - ----------------- */ -static PyObject * -ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - ZstdDict *self; - self = (ZstdDict*)type->tp_alloc(type, 0); - if (self == NULL) { - goto error; - } - - /* Keep this first. Set module state to self. */ - SET_STATE_TO_OBJ(type, self); - - assert(self->dict_content == NULL); - assert(self->dict_id == 0); - assert(self->d_dict == NULL); - assert(self->inited == 0); - - /* ZSTD_CDict dict */ - self->c_dicts = PyDict_New(); - if (self->c_dicts == NULL) { - goto error; - } - - /* Thread lock */ - self->lock = PyThread_allocate_lock(); - if (self->lock == NULL) { - PyErr_NoMemory(); - goto error; - } - return (PyObject*)self; - -error: - Py_XDECREF(self); - return NULL; -} - -static void -ZstdDict_dealloc(ZstdDict *self) -{ - /* Free ZSTD_CDict instances */ - Py_XDECREF(self->c_dicts); - - /* Free ZSTD_DDict instance */ - ZSTD_freeDDict(self->d_dict); - - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_XDECREF(self->dict_content); - - /* Free thread lock */ - if (self->lock) { - PyThread_free_lock(self->lock); - } - - PyTypeObject *tp = Py_TYPE(self); - tp->tp_free((PyObject*)self); - Py_DECREF(tp); -} - -static int -ZstdDict_init(ZstdDict *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"dict_content", "is_raw", NULL}; - PyObject *dict_content; - int is_raw = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O|p:ZstdDict.__init__", kwlist, - &dict_content, &is_raw)) { - return -1; - } - - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - return -1; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstd dictionary content should at least 8 bytes."); - return -1; - } - - /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); - - /* Check validity for ordinary dictionary */ - if (!is_raw && self->dict_id == 0) { - char *msg = "The dict_content argument is not a valid zstd " - "dictionary. The first 4 bytes of a valid zstd dictionary " - "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" - "If you are an advanced user, and can be sure that " - "dict_content argument is a \"raw content\" zstd " - "dictionary, set is_raw parameter to True."; - PyErr_SetString(PyExc_ValueError, msg); - return -1; - } - - return 0; -} - -static PyObject * -ZstdDict_reduce(ZstdDict *self) -{ - /* return Py_BuildValue("O(On)", Py_TYPE(self), - self->dict_content, - self->dict_id == 0); - v0.15.7 added .as_* attributes, pickle will cause more confusion. */ - PyErr_SetString(PyExc_TypeError, - "ZstdDict object intentionally doesn't support pickle. If need " - "to save zstd dictionary to disk, please save .dict_content " - "attribute, it's a bytes object. So that the zstd dictionary " - "can be used with other programs."); - return NULL; -} - -static PyMethodDef ZstdDict_methods[] = { - {"__reduce__", (PyCFunction)ZstdDict_reduce, - METH_NOARGS, reduce_cannot_pickle_doc}, - - {0} -}; - -PyDoc_STRVAR(ZstdDict_dict_doc, -"Zstd dictionary, used for compression/decompression.\n\n" -"ZstdDict.__init__(self, dict_content, is_raw=False)\n" -"----\n" -"Initialize a ZstdDict object.\n\n" -"Parameters\n" -"dict_content: A bytes-like object, dictionary's content.\n" -"is_raw: This parameter is for advanced user. True means dict_content\n" -" argument is a \"raw content\" dictionary, free of any format\n" -" restriction. False means dict_content argument is an ordinary\n" -" zstd dictionary, was created by zstd functions, follow a\n" -" specified format."); - -PyDoc_STRVAR(ZstdDict_dictid_doc, -"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" -"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" -"a specified format.\n\n" -"0 means a \"raw content\" dictionary, free of any format restriction, used\n" -"for advanced user."); - -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" -"argument in ZstdDict.__init__() method. It can be used with other programs."); - -static PyObject * -ZstdDict_str(ZstdDict *dict) -{ - char buf[64]; - PyOS_snprintf(buf, sizeof(buf), - "", - dict->dict_id, Py_SIZE(dict->dict_content)); - - return PyUnicode_FromString(buf); -} - -static PyMemberDef ZstdDict_members[] = { - {"dict_id", T_UINT, offsetof(ZstdDict, dict_id), READONLY, ZstdDict_dictid_doc}, - {"dict_content", T_OBJECT_EX, offsetof(ZstdDict, dict_content), READONLY, ZstdDict_dictcontent_doc}, - {0} -}; - -PyDoc_STRVAR(ZstdDict_as_digested_dict_doc, -"Load as a digested dictionary to compressor, by passing this attribute as\n" -"zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" -"1, Some advanced compression parameters of compressor may be overridden\n" -" by parameters of digested dictionary.\n" -"2, ZstdDict has a digested dictionaries cache for each compression level.\n" -" It's faster when loading again a digested dictionary with the same\n" -" compression level.\n" -"3, No need to use this for decompression."); - -static PyObject * -ZstdDict_as_digested_dict_get(ZstdDict *self, void *Py_UNUSED(ignored)) -{ - return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); -} - -PyDoc_STRVAR(ZstdDict_as_undigested_dict_doc, -"Load as an undigested dictionary to compressor, by passing this attribute as\n" -"zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" -"1, The advanced compression parameters of compressor will not be overridden.\n" -"2, Loading an undigested dictionary is costly. If load an undigested dictionary\n" -" multiple times, consider reusing a compressor object.\n" -"3, No need to use this for decompression."); - -static PyObject * -ZstdDict_as_undigested_dict_get(ZstdDict *self, void *Py_UNUSED(ignored)) -{ - return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); -} - -PyDoc_STRVAR(ZstdDict_as_prefix_doc, -"Load as a prefix to compressor/decompressor, by passing this attribute as\n" -"zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" -"1, Prefix is compatible with long distance matching, while dictionary is not.\n" -"2, It only works for the first frame, then the compressor/decompressor will\n" -" return to no prefix state.\n" -"3, When decompressing, must use the same prefix as when compressing."); - -static PyObject * -ZstdDict_as_prefix_get(ZstdDict *self, void *Py_UNUSED(ignored)) -{ - return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); -} - -static PyGetSetDef ZstdDict_getset[] = { - {"as_digested_dict", (getter)ZstdDict_as_digested_dict_get, - NULL, ZstdDict_as_digested_dict_doc}, - - {"as_undigested_dict", (getter)ZstdDict_as_undigested_dict_get, - NULL, ZstdDict_as_undigested_dict_doc}, - - {"as_prefix", (getter)ZstdDict_as_prefix_get, - NULL, ZstdDict_as_prefix_doc}, - - {0} -}; - -static Py_ssize_t -ZstdDict_length(ZstdDict *self) -{ - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); -} - -static PyType_Slot zstddict_slots[] = { - {Py_tp_methods, ZstdDict_methods}, - {Py_tp_members, ZstdDict_members}, - {Py_tp_getset, ZstdDict_getset}, - {Py_tp_new, ZstdDict_new}, - {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_init, ZstdDict_init}, - {Py_tp_str, ZstdDict_str}, - {Py_tp_doc, (char*)ZstdDict_dict_doc}, - {Py_sq_length, ZstdDict_length}, - {0} -}; - -static PyType_Spec zstddict_type_spec = { - .name = "pyzstd.ZstdDict", - .basicsize = sizeof(ZstdDict), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = zstddict_slots, -}; - -/* ------------------------- - Train dictionary code - ------------------------- */ -PyDoc_STRVAR(_train_dict_doc, -"Internal function, train a zstd dictionary."); - -static PyObject * -_train_dict(PyObject *module, PyObject *args) -{ - PyBytesObject *samples_bytes; - PyObject *samples_size_list; - Py_ssize_t dict_size; - - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; - PyObject *dst_dict_bytes = NULL; - size_t zstd_ret; - Py_ssize_t sizes_sum; - Py_ssize_t i; - - if (!PyArg_ParseTuple(args, "SOn:_train_dict", - &samples_bytes, &samples_size_list, &dict_size)) { - return NULL; - } - - /* Check arguments */ - if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - if (!PyList_Check(samples_size_list)) { - PyErr_SetString(PyExc_TypeError, - "samples_size_list argument should be a list."); - return NULL; - } - - chunks_number = Py_SIZE(samples_size_list); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_SetString(PyExc_ValueError, - "The number of samples should <= UINT32_MAX."); - return NULL; - } - - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_Malloc(chunks_number * sizeof(size_t)); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyList_GET_ITEM(samples_size_list, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Items in samples_size_list should be an int " - "object, with a size_t value."); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size list doesn't match the concatenation's size."); - goto error; - } - - /* Allocate dict buffer */ - dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); - if (dst_dict_bytes == NULL) { - goto error; - } - - /* Train the dictionary */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZDICT_trainFromBuffer(PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(samples_bytes), - chunk_sizes, (uint32_t)chunks_number); - Py_END_ALLOW_THREADS - - /* Check zstd dict error */ - if (ZDICT_isError(zstd_ret)) { - STATE_FROM_MODULE(module); - set_zstd_error(MODULE_STATE, ERR_TRAIN_DICT, zstd_ret); - goto error; - } - - /* Resize dict_buffer */ - if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { - goto error; - } - - goto success; - -error: - Py_CLEAR(dst_dict_bytes); - -success: - PyMem_Free(chunk_sizes); - return dst_dict_bytes; -} - -PyDoc_STRVAR(_finalize_dict_doc, -"Internal function, finalize a zstd dictionary."); - -static PyObject * -_finalize_dict(PyObject *module, PyObject *args) -{ -#if ZSTD_VERSION_NUMBER < 10405 - PyErr_Format(PyExc_NotImplementedError, - "_finalize_dict function only available when the underlying " - "zstd library's version is greater than or equal to v1.4.5. " - "At pyzstd module's compile-time, zstd version < v1.4.5. At " - "pyzstd module's run-time, zstd version is v%s.", - ZSTD_versionString()); - return NULL; -#else - if (ZSTD_versionNumber() < 10405) { - /* Must be dynamically linked */ - PyErr_Format(PyExc_NotImplementedError, - "_finalize_dict function only available when the underlying " - "zstd library's version is greater than or equal to v1.4.5. " - "At pyzstd module's compile-time, zstd version >= v1.4.5. At " - "pyzstd module's run-time, zstd version is v%s.", - ZSTD_versionString()); - return NULL; - } - - PyBytesObject *custom_dict_bytes; - PyBytesObject *samples_bytes; - PyObject *samples_size_list; - Py_ssize_t dict_size; - int compression_level; - - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; - PyObject *dst_dict_bytes = NULL; - size_t zstd_ret; - ZDICT_params_t params; - Py_ssize_t sizes_sum; - Py_ssize_t i; - - if (!PyArg_ParseTuple(args, "SSOni:_finalize_dict", - &custom_dict_bytes, &samples_bytes, &samples_size_list, - &dict_size, &compression_level)) { - return NULL; - } - - /* Check arguments */ - if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - if (!PyList_Check(samples_size_list)) { - PyErr_SetString(PyExc_TypeError, - "samples_size_list argument should be a list."); - return NULL; - } - - chunks_number = Py_SIZE(samples_size_list); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_SetString(PyExc_ValueError, - "The number of samples should <= UINT32_MAX."); - return NULL; - } - - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_Malloc(chunks_number * sizeof(size_t)); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyList_GET_ITEM(samples_size_list, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Items in samples_size_list should be an int " - "object, with a size_t value."); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size list doesn't match the concatenation's size."); - goto error; - } - - /* Allocate dict buffer */ - dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); - if (dst_dict_bytes == NULL) { - goto error; - } - - /* Parameters */ - - /* Optimize for a specific zstd compression level, 0 means default. */ - params.compressionLevel = compression_level; - /* Write log to stderr, 0 = none. */ - params.notificationLevel = 0; - /* Force dictID value, 0 means auto mode (32-bits random value). */ - params.dictID = 0; - - /* Finalize the dictionary */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZDICT_finalizeDictionary( - PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), - PyBytes_AS_STRING(samples_bytes), chunk_sizes, - (uint32_t)chunks_number, params); - Py_END_ALLOW_THREADS - - /* Check zstd dict error */ - if (ZDICT_isError(zstd_ret)) { - STATE_FROM_MODULE(module); - set_zstd_error(MODULE_STATE, ERR_FINALIZE_DICT, zstd_ret); - goto error; - } - - /* Resize dict_buffer */ - if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { - goto error; - } - - goto success; - -error: - Py_CLEAR(dst_dict_bytes); - -success: - PyMem_Free(chunk_sizes); - return dst_dict_bytes; -#endif -} diff --git a/src/bin_ext/file.c b/src/bin_ext/file.c deleted file mode 100644 index 0911ea4..0000000 --- a/src/bin_ext/file.c +++ /dev/null @@ -1,765 +0,0 @@ -#include "pyzstd.h" - -/* This file has two classes: - 1, ZstdFileReader is expected to be used with io.BufferedReader. - 2, ZstdFileWriter is expected to be used with ZstdFile/SeekableZstdFile. -*/ - -typedef struct { - PyObject_HEAD - - /* Decompression context */ - ZSTD_DCtx *dctx; - /* ZstdDict object in use */ - PyObject *dict; - - /* Read chunk size, an int object. */ - PyObject *read_size; - - /* File states. On Linux/macOS/Windows, Py_off_t is signed, so - ZstdFile/SeekableZstdFile use int64_t as file position/size. */ - PyObject *fp; /* File object */ - int sof; /* At SOF, 0 or 1. */ - int eof; /* At EOF, 0 or 1. */ - int64_t pos; /* Decompressed position, >= 0. */ - int64_t size; /* File size, -1 means unknown. */ - - /* Decompression states, 0 or 1. */ - int needs_input; - int at_frame_edge; - - /* Input state, in.size/in.pos need to be initialized with 0. */ - PyObject *in_dat; - ZSTD_inBuffer in; - - /* Lazy create forward output buffer */ - char *tmp_output; - -#ifdef USE_MULTI_PHASE_INIT - _zstd_state *module_state; -#endif -} ZstdFileReader; - -typedef struct { - PyObject_HEAD - - /* Compression context */ - ZSTD_CCtx *cctx; - /* ZstdDict object in use */ - PyObject *dict; - - PyObject *fp; /* File object */ - int fp_has_flush; /* fp has .flush() method, 0 or 1. */ - - /* Last mode, initialized to ZSTD_e_end. */ - int last_mode; - - /* Use multi-threaded compression, 0 or 1. */ - int use_multithread; - - /* Compression level */ - int compression_level; - - /* Write buffer */ - char *write_buffer; - size_t write_buffer_size; - -#ifdef USE_MULTI_PHASE_INIT - _zstd_state *module_state; -#endif -} ZstdFileWriter; - -/* Generate 4 functions using macro: - 1, file_set_c_parameters(ZstdFileWriter *self, PyObject *level_or_option) - 2, file_load_c_dict(ZstdFileWriter *self, PyObject *dict) - 3, file_set_d_parameters(ZstdFileReader *self, PyObject *option) - 4, file_load_d_dict(ZstdFileReader *self, PyObject *dict) */ -#undef PYZSTD_C_CLASS -#define PYZSTD_C_CLASS ZstdFileWriter -#undef PYZSTD_D_CLASS -#define PYZSTD_D_CLASS ZstdFileReader -#undef PYZSTD_FUN_PREFIX -#define PYZSTD_FUN_PREFIX(F) file_##F -#include "macro_functions.h" - -/* ----------------------- - ZstdFileReader code - ----------------------- */ -static int -ZstdFileReader_init(ZstdFileReader *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"fp", "zstd_dict", "option", - "read_size", NULL}; - PyObject *fp; - PyObject *zstd_dict; - PyObject *option; - PyObject *read_size; - - assert(ZSTD_DStreamInSize() == 131075); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OOOO:ZstdFileReader.__init__", kwlist, - &fp, &zstd_dict, &option, &read_size)) { - return -1; - } - - /* Keep this first. Set module state to self. */ - SET_STATE_TO_OBJ(Py_TYPE(self), self); - - assert(self->dctx == NULL); - assert(self->dict == NULL); - assert(self->read_size == NULL); - assert(self->fp == NULL); - assert(self->sof == 0); - assert(self->eof == 0); - assert(self->pos == 0); - assert(self->size == 0); - assert(self->needs_input == 0); - assert(self->at_frame_edge == 0); - assert(self->in_dat == NULL); - assert(self->in.size == 0); - assert(self->in.pos == 0); - assert(self->tmp_output == NULL); - - /* Read chunk size */ - { - Py_ssize_t v = PyLong_AsSsize_t(read_size); - if (v <= 0) { - if (v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "read_size argument should be integer"); - goto error; - } - PyErr_SetString(PyExc_ValueError, - "read_size argument should > 0"); - goto error; - } - } - Py_INCREF(read_size); - self->read_size = read_size; - - /* File states */ - Py_INCREF(fp); - self->fp = fp; - self->sof = 1; - self->size = -1; - - /* Decompression states */ - self->needs_input = 1; - self->at_frame_edge = 1; - - /* Decompression context */ - self->dctx = ZSTD_createDCtx(); - if (self->dctx == NULL) { - STATE_FROM_OBJ(self); - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_DCtx instance."); - goto error; - } - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (file_load_d_dict(self, zstd_dict) < 0) { - goto error; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (option != Py_None) { - if (file_set_d_parameters(self, option) < 0) { - goto error; - } - } - return 0; -error: - return -1; -} - -static void -ZstdFileReader_dealloc(ZstdFileReader *self) -{ - /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); - /* Py_XDECREF the dict after free decompression context */ - Py_XDECREF(self->dict); - - Py_XDECREF(self->read_size); - Py_XDECREF(self->fp); - Py_XDECREF(self->in_dat); - PyMem_Free(self->tmp_output); - - PyTypeObject *tp = Py_TYPE(self); - tp->tp_free((PyObject*)self); - Py_DECREF(tp); -} - -/* If fill_full is true, fill the output buffer. - If fill_full is false, only output once, then exit. - On success, return 0. - On failure, return -1. */ -FORCE_INLINE int -decompress_into(ZstdFileReader *self, - ZSTD_outBuffer *out, const int fill_full) -{ - Py_buffer buf; - const size_t orig_pos = out->pos; - size_t zstd_ret; - - /* Return */ - if (self->eof || out->size == out->pos) { - return 0; - } - - while (1) { - if (self->in.size == self->in.pos && self->needs_input) { - void *read_buf; - Py_ssize_t read_len; - - /* Read */ - Py_XDECREF(self->in_dat); - { - STATE_FROM_OBJ(self); - self->in_dat = invoke_method_one_arg( - self->fp, - MS_MEMBER(str_read), - self->read_size); - if (self->in_dat == NULL) { - return -1; - } - } - - /* Get address and length */ - if (PyObject_GetBuffer(self->in_dat, &buf, PyBUF_SIMPLE) < 0) { - return -1; - } - read_buf = buf.buf; - read_len = buf.len; - PyBuffer_Release(&buf); - - /* EOF */ - if (read_len == 0) { - if (self->at_frame_edge && !self->sof) { - self->eof = 1; - self->pos += out->pos - orig_pos; - self->size = self->pos; - return 0; - } else { - PyErr_SetString(PyExc_EOFError, - "Compressed file ended before the " - "end-of-stream marker was reached"); - return -1; - } - } - self->in.src = read_buf; - self->in.size = read_len; - self->in.pos = 0; - self->sof = 0; - } - - /* Decompress */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZSTD_decompressStream(self->dctx, out, &self->in); - Py_END_ALLOW_THREADS - - if (ZSTD_isError(zstd_ret)) { - STATE_FROM_OBJ(self); - set_zstd_error(MODULE_STATE, ERR_DECOMPRESS, zstd_ret); - return -1; - } - - /* Set flags */ - if (zstd_ret == 0) { - self->needs_input = 1; - self->at_frame_edge = 1; - } else { - self->needs_input = (out->size != out->pos); - self->at_frame_edge = 0; - } - - if (fill_full) { - if (out->size != out->pos) { - continue; - } else { - self->pos += out->pos - orig_pos; - return 0; - } - } else { - if (out->pos != orig_pos) { - self->pos += out->pos - orig_pos; - return 0; - } - } - } -} - -static PyObject * -ZstdFileReader_readinto(ZstdFileReader *self, PyObject *arg) -{ - ZSTD_outBuffer out; - Py_buffer buf; - - if (PyObject_GetBuffer(arg, &buf, PyBUF_WRITABLE) < 0) { - return NULL; - } - out.dst = buf.buf; - out.size = buf.len; - out.pos = 0; - PyBuffer_Release(&buf); - - if (decompress_into(self, &out, 0) < 0) { - return NULL; - } - return PyLong_FromSize_t(out.pos); -} - -static PyObject * -ZstdFileReader_readall(ZstdFileReader *self) -{ - PYZSTD_OUTPUT_BUFFER(buffer); - ZSTD_outBuffer out; - PyObject *ret; - - if (self->size >= 0) { - /* Known file size */ - const int64_t length = self->size - self->pos; - if (length > (int64_t)PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - goto error; - } - if (OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t)length) < 0) { - goto error; - } - } else { - /* Unknown file size */ - if (OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) { - goto error; - } - } - - while (1) { - if (decompress_into(self, &out, 1) < 0) { - goto error; - } - - if (self->eof) { - /* Finished */ - break; - } - if (out.size == out.pos) { - /* Grow output buffer */ - if (OutputBuffer_Grow(&buffer, &out) < 0) { - goto error; - } - } - } - ret = OutputBuffer_Finish(&buffer, &out); - if (ret != NULL) { - return ret; - } - -error: - OutputBuffer_OnError(&buffer); - return NULL; -} - -/* If obj is None, forward to EOF. - If obj <= 0, do nothing. */ -static PyObject * -ZstdFileReader_forward(ZstdFileReader *self, PyObject *arg) -{ - ZSTD_outBuffer out; - const size_t DStreamOutSize = ZSTD_DStreamOutSize(); - - /* Lazy create forward output buffer */ - if (self->tmp_output == NULL) { - self->tmp_output = PyMem_Malloc(DStreamOutSize); - if (self->tmp_output == NULL) { - PyErr_NoMemory(); - return NULL; - } - } - out.dst = self->tmp_output; - - if (arg == Py_None) { - /* Forward to EOF */ - out.size = DStreamOutSize; - while (1) { - out.pos = 0; - if (decompress_into(self, &out, 1) < 0) { - return NULL; - } - if (self->eof) { - Py_RETURN_NONE; - } - } - } else { - /* Offset argument */ - int64_t offset = PyLong_AsLongLong(arg); - if (offset == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "offset argument should be int64_t integer"); - return NULL; - } - - /* Forward to offset */ - while (offset > 0) { - out.size = (size_t) Py_MIN((int64_t)DStreamOutSize, offset); - out.pos = 0; - if (decompress_into(self, &out, 1) < 0) { - return NULL; - } - if (self->eof) { - Py_RETURN_NONE; - } - offset -= out.pos; - } - Py_RETURN_NONE; - } -} - -static PyObject * -ZstdFileReader_reset_session(ZstdFileReader *self) -{ - /* Reset decompression states */ - self->needs_input = 1; - self->sof = 1; - self->at_frame_edge = 1; - self->in.size = 0; - self->in.pos = 0; - - /* Resetting session never fail */ - ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); - - Py_RETURN_NONE; -} - -static PyMethodDef ZstdFileReader_methods[] = { - {"readinto", (PyCFunction)ZstdFileReader_readinto, METH_O}, - {"readall", (PyCFunction)ZstdFileReader_readall, METH_NOARGS}, - {"forward", (PyCFunction)ZstdFileReader_forward, METH_O}, - {"reset_session", (PyCFunction)ZstdFileReader_reset_session, METH_NOARGS}, - {0} -}; - -static PyMemberDef ZstdFileReader_members[] = { - {"eof", T_INT, offsetof(ZstdFileReader, eof), 0}, - {"pos", T_LONGLONG, offsetof(ZstdFileReader, pos), 0}, - {"size", T_LONGLONG, offsetof(ZstdFileReader, size), 0}, - {0} -}; - -static PyType_Slot ZstdFileReader_slots[] = { - {Py_tp_init, ZstdFileReader_init}, - {Py_tp_dealloc, ZstdFileReader_dealloc}, - {Py_tp_methods, ZstdFileReader_methods}, - {Py_tp_members, ZstdFileReader_members}, - {0} -}; - -static PyType_Spec ZstdFileReader_type_spec = { - .name = "pyzstd.zstdfile.ZstdFileReader", - .basicsize = sizeof(ZstdFileReader), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = ZstdFileReader_slots, -}; - -/* ----------------------- - ZstdFileWriter code - ----------------------- */ -static int -ZstdFileWriter_init(ZstdFileWriter *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"fp", "level_or_option", "zstd_dict", - "write_size", NULL}; - PyObject *fp; - PyObject *level_or_option; - PyObject *zstd_dict; - Py_ssize_t write_size; - - assert(ZSTD_CStreamOutSize() == 131591); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OOOn:ZstdFileWriter.__init__", kwlist, - &fp, &level_or_option, - &zstd_dict, &write_size)) { - return -1; - } - - /* Keep this first. Set module state to self. */ - SET_STATE_TO_OBJ(Py_TYPE(self), self); - STATE_FROM_OBJ(self); - - assert(self->cctx == NULL); - assert(self->dict == NULL); - assert(self->fp == NULL); - assert(self->fp_has_flush == 0); - assert(self->last_mode == 0); - assert(self->use_multithread == 0); - assert(self->compression_level == 0); - assert(self->write_buffer == NULL); - assert(self->write_buffer_size == 0); - - /* File object */ - Py_INCREF(fp); - self->fp = fp; - self->fp_has_flush = PyObject_HasAttr(fp, MS_MEMBER(str_flush)); - - /* Last mode */ - self->last_mode = ZSTD_e_end; - - /* Write buffer */ - if (write_size <= 0) { - PyErr_SetString(PyExc_ValueError, - "write_size argument should > 0"); - goto error; - } - self->write_buffer_size = (size_t)write_size; - - self->write_buffer = PyMem_Malloc(write_size); - if (self->write_buffer == NULL) { - PyErr_NoMemory(); - goto error; - } - - /* Compression context */ - self->cctx = ZSTD_createCCtx(); - if (self->cctx == NULL) { - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_CCtx instance."); - goto error; - } - - /* Set compressLevel/option to compression context */ - if (level_or_option != Py_None) { - if (file_set_c_parameters(self, level_or_option) < 0) { - goto error; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (file_load_c_dict(self, zstd_dict) < 0) { - goto error; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - return 0; -error: - return -1; -} - -static void -ZstdFileWriter_dealloc(ZstdFileWriter *self) -{ - /* Free compression context */ - ZSTD_freeCCtx(self->cctx); - /* Py_XDECREF the dict after free the compression context */ - Py_XDECREF(self->dict); - - Py_XDECREF(self->fp); - PyMem_Free(self->write_buffer); - - PyTypeObject *tp = Py_TYPE(self); - tp->tp_free((PyObject*)self); - Py_DECREF(tp); -} - -static PyObject * -ZstdFileWriter_write(ZstdFileWriter *self, PyObject *arg) -{ - ZSTD_inBuffer in; - ZSTD_outBuffer out; - uint64_t output_size = 0; - Py_buffer buf; - size_t zstd_ret; - PyObject *ret; - STATE_FROM_OBJ(self); - - /* Input buffer */ - if (PyObject_GetBuffer(arg, &buf, PyBUF_SIMPLE) < 0) { - goto error; - } - in.src = buf.buf; - in.size = buf.len; - in.pos = 0; - PyBuffer_Release(&buf); - - /* Output buffer, out.pos will be set later. */ - out.dst = self->write_buffer; - out.size = self->write_buffer_size; - - /* State */ - self->last_mode = ZSTD_e_continue; - - /* Compress & write */ - while (1) { - /* Output position */ - out.pos = 0; - - /* Compress */ - Py_BEGIN_ALLOW_THREADS - if (!self->use_multithread) { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } else { - do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); - } - Py_END_ALLOW_THREADS - - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - - /* Accumulate output bytes */ - output_size += out.pos; - - /* Write output to fp */ - if (write_to_fp(MODULE_STATE, "self._fp.write()", - self->fp, &out) < 0) { - goto error; - } - - /* Finished */ - if (!self->use_multithread) { - /* Single-thread compression + .CONTINUE mode */ - if (zstd_ret == 0) { - break; - } - } else { - /* Multi-thread compression + .CONTINUE mode */ - if (mt_continue_should_break(&in, &out)) { - break; - } - } - } - - ret = Py_BuildValue("KK", (uint64_t)in.size, output_size); - if (ret != NULL) { - return ret; - } -error: - return NULL; -} - -static PyObject * -ZstdFileWriter_flush(ZstdFileWriter *self, PyObject *arg) -{ - int mode; - ZSTD_inBuffer in; - ZSTD_outBuffer out; - uint64_t output_size = 0; - size_t zstd_ret; - PyObject *ret; - STATE_FROM_OBJ(self); - - /* Mode argument */ - mode = PyLong_AsInt(arg); - - assert(ZSTD_e_flush == 1 && ZSTD_e_end == 2); - if (mode != ZSTD_e_flush && mode != ZSTD_e_end) { - /* Wrong type */ - if (mode == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, "mode should be int type"); - goto error; - } - /* Wrong value */ - PyErr_SetString(PyExc_ValueError, - "mode argument wrong value, it should be " - "ZstdFile.FLUSH_BLOCK or ZstdFile.FLUSH_FRAME."); - goto error; - } - - /* Don't generate empty content frame */ - if (mode == self->last_mode) { - goto finish; - } - - /* Input buffer */ - in.src = ∈ - in.size = 0; - in.pos = 0; - - /* Output buffer, out.pos will be set later. */ - out.dst = self->write_buffer; - out.size = self->write_buffer_size; - - /* State */ - self->last_mode = mode; - - /* Compress & write */ - while (1) { - /* Output position */ - out.pos = 0; - - /* Compress */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, mode); - Py_END_ALLOW_THREADS - - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - - /* Accumulate output bytes */ - output_size += out.pos; - - /* Write output to fp */ - if (write_to_fp(MODULE_STATE, "self._fp.write()", - self->fp, &out) < 0) { - goto error; - } - - /* Finished */ - if (zstd_ret == 0) { - break; - } - } - - /* Flush */ - if (self->fp_has_flush) { - ret = invoke_method_no_arg(self->fp, MS_MEMBER(str_flush)); - if (ret == NULL) { - goto error; - } - Py_DECREF(ret); - } - -finish: - ret = Py_BuildValue("IK", (uint32_t)0, output_size); - if (ret != NULL) { - return ret; - } -error: - return NULL; -} - -static PyMethodDef ZstdFileWriter_methods[] = { - {"write", (PyCFunction)ZstdFileWriter_write, METH_O}, - {"flush", (PyCFunction)ZstdFileWriter_flush, METH_O}, - {0} -}; - -static PyType_Slot ZstdFileWriter_slots[] = { - {Py_tp_init, ZstdFileWriter_init}, - {Py_tp_dealloc, ZstdFileWriter_dealloc}, - {Py_tp_methods, ZstdFileWriter_methods}, - {0} -}; - -static PyType_Spec ZstdFileWriter_type_spec = { - .name = "pyzstd.zstdfile.ZstdFileWriter", - .basicsize = sizeof(ZstdFileWriter), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = ZstdFileWriter_slots, -}; diff --git a/src/bin_ext/macro_functions.h b/src/bin_ext/macro_functions.h deleted file mode 100644 index 41f0d58..0000000 --- a/src/bin_ext/macro_functions.h +++ /dev/null @@ -1,315 +0,0 @@ -#include "pyzstd.h" - -/* Generate functions using macros - PYZSTD_C_CLASS: compressor class struct - PYZSTD_D_CLASS: decompressor class struct - PYZSTD_FUN_PREFIX: add prefix to function names */ - -/* Set compressLevel or compression parameters to compression context */ -static int -PYZSTD_FUN_PREFIX(set_c_parameters)(PYZSTD_C_CLASS *self, PyObject *level_or_option) -{ - size_t zstd_ret; - STATE_FROM_OBJ(self); - - /* Integer compression level */ - if (PyLong_Check(level_or_option)) { - const int level = PyLong_AsInt(level_or_option); - if (level == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Compression level should be 32-bit signed int value."); - return -1; - } - - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; - - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_SET_C_LEVEL, zstd_ret); - return -1; - } - return 0; - } - - /* Options dict */ - if (PyDict_Check(level_or_option)) { - PyObject *key, *value; - Py_ssize_t pos = 0; - - while (PyDict_Next(level_or_option, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == MS_MEMBER(DParameter_type)) { - PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DParameter."); - return -1; - } - - /* Both key & value should be 32-bit signed int */ - const int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of option dict should be 32-bit signed int value."); - return -1; - } - - const int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be 32-bit signed int value."); - return -1; - } - - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } else if (key_v == ZSTD_c_nbWorkers) { - /* From zstd library doc: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } - - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(MODULE_STATE, 1, key_v, value_v); - return -1; - } - } - return 0; - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, "level_or_option argument wrong type."); - return -1; -} - -/* Load dictionary or prefix to compression context */ -static int -PYZSTD_FUN_PREFIX(load_c_dict)(PYZSTD_C_CLASS *self, PyObject *dict) -{ - size_t zstd_ret; - STATE_FROM_OBJ(self); - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)MS_MEMBER(ZstdDict_type)); - if (ret < 0) { - return -1; - } else if (ret > 0) { - /* When compressing, use undigested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_UNDIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)MS_MEMBER(ZstdDict_type)); - if (ret < 0) { - return -1; - } else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: - if (type == DICT_TYPE_DIGESTED) { - /* Get ZSTD_CDict */ - ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); - if (c_dict == NULL) { - return -1; - } - /* Reference a prepared dictionary. - It overrides some compression context's parameters. */ - zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); - } else if (type == DICT_TYPE_UNDIGESTED) { - /* Load a dictionary. - It doesn't override compression context's parameters. */ - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - } else if (type == DICT_TYPE_PREFIX) { - /* Load a prefix */ - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - } else { - /* Impossible code path */ - PyErr_SetString(PyExc_SystemError, - "load_c_dict() impossible code path"); - return -1; - } - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_LOAD_C_DICT, zstd_ret); - return -1; - } - return 0; -} - -/* Set decompression parameters to decompression context */ -static int -PYZSTD_FUN_PREFIX(set_d_parameters)(PYZSTD_D_CLASS *self, PyObject *option) -{ - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; - STATE_FROM_OBJ(self); - - if (!PyDict_Check(option)) { - PyErr_SetString(PyExc_TypeError, - "option argument should be dict object."); - return -1; - } - - pos = 0; - while (PyDict_Next(option, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == MS_MEMBER(CParameter_type)) { - PyErr_SetString(PyExc_TypeError, - "Key of decompression option dict should " - "NOT be CParameter."); - return -1; - } - - /* Both key & value should be 32-bit signed int */ - const int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of option dict should be 32-bit signed integer value."); - return -1; - } - - const int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be 32-bit signed integer value."); - return -1; - } - - /* Set parameter to compression context */ - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(MODULE_STATE, 0, key_v, value_v); - return -1; - } - } - return 0; -} - -/* Load dictionary or prefix to decompression context */ -static int -PYZSTD_FUN_PREFIX(load_d_dict)(PYZSTD_D_CLASS *self, PyObject *dict) -{ - size_t zstd_ret; - STATE_FROM_OBJ(self); - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)MS_MEMBER(ZstdDict_type)); - if (ret < 0) { - return -1; - } else if (ret > 0) { - /* When decompressing, use digested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_DIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)MS_MEMBER(ZstdDict_type)); - if (ret < 0) { - return -1; - } else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: - if (type == DICT_TYPE_DIGESTED) { - /* Get ZSTD_DDict */ - ZSTD_DDict *d_dict = _get_DDict(zd); - if (d_dict == NULL) { - return -1; - } - /* Reference a prepared dictionary */ - zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); - } else if (type == DICT_TYPE_UNDIGESTED) { - /* Load a dictionary */ - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - } else if (type == DICT_TYPE_PREFIX) { - /* Load a prefix */ - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - } else { - /* Impossible code path */ - PyErr_SetString(PyExc_SystemError, - "load_d_dict() impossible code path"); - return -1; - } - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_LOAD_D_DICT, zstd_ret); - return -1; - } - return 0; -} diff --git a/src/bin_ext/output_buffer.h b/src/bin_ext/output_buffer.h deleted file mode 100644 index 38bfb2c..0000000 --- a/src/bin_ext/output_buffer.h +++ /dev/null @@ -1,526 +0,0 @@ -#include "pyzstd.h" - -/* This file has two codes: - 1, mremap output buffer - When realloc, mremap can avoid memcpy, so this code uses - _PyBytes_Resize to extend output buffer. - 2, Blocks output buffer - This code uses blocks to represent output buffer, it can - provide decent performance on systems without mremap. */ - -/* Only use mremap output buffer on Linux. - On macOS, mremap can only be used for shrinking, can't be used for expanding. - CPython 3.13+ use mimalloc, currently it doesn't support mremap. - 0x030D0000 is Python 3.13. This condition can be removed when: - 1, CPython no longer uses mimalloc. - 2, CPython's mimalloc supports mremap. - 3, _PyBytes_FromSize() uses PyMem_RawMalloc(), rather than PyObject_Malloc(). */ -#if defined(__linux__) && defined(_GNU_SOURCE) && \ - PY_VERSION_HEX < 0x030D0000 && !defined(PYZSTD_NO_MREMAP) -# define MREMAP_OUTPUT_BUFFER -#else -# define BLOCKS_OUTPUT_BUFFER -#endif - -#define KB (1024) -#define MB (1024*KB) -static const char unable_allocate_msg[] = "Unable to allocate output buffer."; - -/* Resize a bytes object. - Return 0 on success. - Return -1 on failure, and *obj is set to NULL. */ -FORCE_INLINE int -resize_bytes(PyObject **obj, - const Py_ssize_t old_size, - const Py_ssize_t new_size, - const int RESIZE_FOR_0_SIZE) -{ - assert(Py_SIZE(*obj) == old_size); - - if (old_size == 0 && PY_VERSION_HEX < 0x030800B1) { - /* In CPython 3.7-, 0-length bytes object can't be resized, - see bpo-33817. 0x030800B1 is 3.8 Beta 1. */ - if (RESIZE_FOR_0_SIZE) { - Py_DECREF(*obj); - *obj = PyBytes_FromStringAndSize(NULL, new_size); - if (*obj == NULL) { - return -1; - } - } else { - assert(new_size == 0); - } - } else { - /* Resize */ - if (_PyBytes_Resize(obj, new_size) < 0) { - /* *obj is set to NULL */ - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - } - return 0; -} - -#if defined(MREMAP_OUTPUT_BUFFER) -/* ----------------------------- - mremap output buffer code - ----------------------------- */ -#define PYZSTD_OB_INIT_SIZE (16*KB) - -typedef struct { - /* Bytes object */ - PyObject *obj; - /* Max length of the buffer, negative number for unlimited length. */ - Py_ssize_t max_length; -} MremapBuffer; -#define PYZSTD_OUTPUT_BUFFER(BUFFER) \ - MremapBuffer BUFFER = {.obj = NULL}; - -/* Initialize the buffer, and grow the buffer. - max_length: Max length of the buffer, -1 for unlimited length. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_InitAndGrow(MremapBuffer *buffer, ZSTD_outBuffer *ob, - const Py_ssize_t max_length) -{ - PyObject *b; - Py_ssize_t b_size; - - /* Ensure .obj was set to NULL */ - assert(buffer->obj == NULL); - - /* Initial size */ - if (0 <= max_length && max_length < PYZSTD_OB_INIT_SIZE) { - b_size = max_length; - } else { - b_size = PYZSTD_OB_INIT_SIZE; - } - - /* bytes object */ - b = PyBytes_FromStringAndSize(NULL, b_size); - if (b == NULL) { - return -1; - } - - /* Set variables */ - buffer->obj = b; - buffer->max_length = max_length; - - ob->dst = PyBytes_AS_STRING(b); - ob->size = (size_t) b_size; - ob->pos = 0; - return 0; -} - -/* Initialize the buffer, with an initial size. - init_size: the initial size. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_InitWithSize(MremapBuffer *buffer, ZSTD_outBuffer *ob, - const Py_ssize_t max_length, - const Py_ssize_t init_size) -{ - PyObject *b; - Py_ssize_t b_size; - - /* Ensure .obj was set to NULL */ - assert(buffer->obj == NULL); - - /* Initial size */ - if (0 <= max_length && max_length < init_size) { - b_size = max_length; - } else { - b_size = init_size; - } - - /* bytes object */ - b = PyBytes_FromStringAndSize(NULL, b_size); - if (b == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - /* Set variables */ - buffer->obj = b; - buffer->max_length = max_length; - - ob->dst = PyBytes_AS_STRING(b); - ob->size = (size_t) b_size; - ob->pos = 0; - return 0; -} - -/* Grow the buffer. The avail_out must be 0, please check it before calling. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_Grow(MremapBuffer *buffer, ZSTD_outBuffer *ob) -{ - Py_ssize_t new_size; - const Py_ssize_t old_size = Py_SIZE(buffer->obj); - const Py_ssize_t max_length = buffer->max_length; - - /* Ensure no gaps in the data */ - assert(ob->pos == ob->size); - - /* Get new size, note that it can't be 0. - This growth works well on 64-bit Ubuntu 22.04 (glibc 2.35). */ - if (old_size == 0) { - new_size = PYZSTD_OB_INIT_SIZE; - } else if (old_size <= 16*KB) { - new_size = 64*KB; - } else if (old_size <= 64*KB) { - new_size = 128*KB; - } else if (old_size <= 64*MB) { - new_size = old_size + 128*KB; - } else { - new_size = old_size + (old_size >> 6); - - /* Check overflow. - In 32-bit build, at most 32MiB (~2GiB >> 6) may be wasted. */ - if (new_size < 0) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - } - - /* Check max_length */ - if (0 <= max_length && max_length < new_size) { - new_size = max_length; - assert(new_size > old_size); - } - - /* Resize */ - if (resize_bytes(&buffer->obj, old_size, new_size, 1) < 0) { - return -1; - } - - /* Set variables */ - ob->dst = PyBytes_AS_STRING(buffer->obj) + old_size; - ob->size = (size_t)(new_size - old_size); - ob->pos = 0; - return 0; -} - -/* Whether the output data has reached max_length. - The avail_out must be 0, please check it before calling. */ -static inline int -OutputBuffer_ReachedMaxLength(MremapBuffer *buffer, ZSTD_outBuffer *ob) -{ - /* Ensure (data size == allocated size) */ - assert(ob->pos == ob->size); - - return Py_SIZE(buffer->obj) == buffer->max_length; -} - -/* Finish the buffer. - Return a bytes object on success - Return NULL on failure */ -static inline PyObject * -OutputBuffer_Finish(MremapBuffer *buffer, ZSTD_outBuffer *ob) -{ - PyObject *ret; - const Py_ssize_t old_size = Py_SIZE(buffer->obj); - const Py_ssize_t new_size = old_size - (ob->size - ob->pos); - - /* Resize */ - if (resize_bytes(&buffer->obj, old_size, new_size, 0) < 0) { - return NULL; - } - - ret = buffer->obj; - buffer->obj = NULL; - return ret; -} - -/* Clean up the buffer */ -static inline void -OutputBuffer_OnError(MremapBuffer *buffer) -{ - Py_CLEAR(buffer->obj); -} - -#elif defined(BLOCKS_OUTPUT_BUFFER) -/* ----------------------------- - Blocks output buffer code - ----------------------------- */ -typedef struct { - /* List of blocks */ - PyObject *list; - /* Number of whole allocated size */ - Py_ssize_t allocated; - /* Max length of the buffer, negative number for unlimited length. */ - Py_ssize_t max_length; -} BlocksBuffer; -#define PYZSTD_OUTPUT_BUFFER(BUFFER) \ - BlocksBuffer BUFFER = {.list = NULL}; - -/* Block size sequence */ -static const Py_ssize_t BUFFER_BLOCK_SIZE[] = - /* If change this list, also change: - The CFFI implementation - OutputBufferTestCase unittest - If change the first blocks's size, also change: - _32_KiB in ZstdFile/SeekableZstdFile - FileTestCase.test_decompress_limited() test */ - { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, - 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, - 256*MB }; - -/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole - allocated size growth step is: - 1 32 KB +32 KB - 2 96 KB +64 KB - 3 352 KB +256 KB - 4 1.34 MB +1 MB - 5 5.34 MB +4 MB - 6 13.34 MB +8 MB - 7 29.34 MB +16 MB - 8 45.34 MB +16 MB - 9 77.34 MB +32 MB - 10 109.34 MB +32 MB - 11 141.34 MB +32 MB - 12 173.34 MB +32 MB - 13 237.34 MB +64 MB - 14 301.34 MB +64 MB - 15 429.34 MB +128 MB - 16 557.34 MB +128 MB - 17 813.34 MB +256 MB - 18 1069.34 MB +256 MB - 19 1325.34 MB +256 MB - 20 1581.34 MB +256 MB - 21 1837.34 MB +256 MB - 22 2093.34 MB +256 MB - ... */ - -/* Initialize the buffer, and grow the buffer. - max_length: Max length of the buffer, -1 for unlimited length. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_InitAndGrow(BlocksBuffer *buffer, ZSTD_outBuffer *ob, - const Py_ssize_t max_length) -{ - PyObject *b; - Py_ssize_t block_size; - - /* Ensure .list was set to NULL */ - assert(buffer->list == NULL); - - /* Get block size */ - if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) { - block_size = max_length; - } else { - block_size = BUFFER_BLOCK_SIZE[0]; - } - - /* The first block */ - b = PyBytes_FromStringAndSize(NULL, block_size); - if (b == NULL) { - return -1; - } - - /* Create the list */ - buffer->list = PyList_New(1); - if (buffer->list == NULL) { - Py_DECREF(b); - return -1; - } - PyList_SET_ITEM(buffer->list, 0, b); - - /* Set variables */ - buffer->allocated = block_size; - buffer->max_length = max_length; - - ob->dst = PyBytes_AS_STRING(b); - ob->size = (size_t) block_size; - ob->pos = 0; - return 0; -} - -/* Initialize the buffer, with an initial size. - init_size: the initial size. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_InitWithSize(BlocksBuffer *buffer, ZSTD_outBuffer *ob, - const Py_ssize_t max_length, - const Py_ssize_t init_size) -{ - PyObject *b; - Py_ssize_t block_size; - - /* Ensure .list was set to NULL */ - assert(buffer->list == NULL); - - /* Get block size */ - if (0 <= max_length && max_length < init_size) { - block_size = max_length; - } else { - block_size = init_size; - } - - /* The first block */ - b = PyBytes_FromStringAndSize(NULL, block_size); - if (b == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - /* Create the list */ - buffer->list = PyList_New(1); - if (buffer->list == NULL) { - Py_DECREF(b); - return -1; - } - PyList_SET_ITEM(buffer->list, 0, b); - - /* Set variables */ - buffer->allocated = block_size; - buffer->max_length = max_length; - - ob->dst = PyBytes_AS_STRING(b); - ob->size = (size_t) block_size; - ob->pos = 0; - return 0; -} - -/* Grow the buffer. The avail_out must be 0, please check it before calling. - Return 0 on success - Return -1 on failure */ -static inline int -OutputBuffer_Grow(BlocksBuffer *buffer, ZSTD_outBuffer *ob) -{ - PyObject *b; - const Py_ssize_t list_len = Py_SIZE(buffer->list); - Py_ssize_t block_size; - int append_ret; - - /* Ensure no gaps in the data */ - assert(ob->pos == ob->size); - - /* Get block size */ - if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { - block_size = BUFFER_BLOCK_SIZE[list_len]; - } else { - block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; - } - - /* Check max_length */ - if (buffer->max_length >= 0) { - /* If (rest == 0), should not grow the buffer. */ - Py_ssize_t rest = buffer->max_length - buffer->allocated; - assert(rest > 0); - - /* block_size of the last block */ - if (block_size > rest) { - block_size = rest; - } - } - - /* Check buffer->allocated overflow */ - if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - /* Create the block */ - b = PyBytes_FromStringAndSize(NULL, block_size); - if (b == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - /* Append to list */ - append_ret = PyList_Append(buffer->list, b); - Py_DECREF(b); - if (append_ret < 0) { - return -1; - } - - /* Set variables */ - buffer->allocated += block_size; - - ob->dst = PyBytes_AS_STRING(b); - ob->size = (size_t) block_size; - ob->pos = 0; - return 0; -} - -/* Whether the output data has reached max_length. - The avail_out must be 0, please check it before calling. */ -static inline int -OutputBuffer_ReachedMaxLength(BlocksBuffer *buffer, ZSTD_outBuffer *ob) -{ - /* Ensure (data size == allocated size) */ - assert(ob->pos == ob->size); - - return buffer->allocated == buffer->max_length; -} - -/* Finish the buffer. - Return a bytes object on success - Return NULL on failure */ -static inline PyObject * -OutputBuffer_Finish(BlocksBuffer *buffer, ZSTD_outBuffer *ob) -{ - PyObject *result, *block; - const Py_ssize_t list_len = Py_SIZE(buffer->list); - - /* Fast path for single block */ - if (list_len == 1 || (list_len == 2 && ob->pos == 0)) { - /* Clear .list */ - block = PyList_GET_ITEM(buffer->list, 0); - Py_INCREF(block); - Py_CLEAR(buffer->list); - - /* Resize */ - if (list_len == 1) { - /* Resize. On failure, block is set to NULL. */ - resize_bytes(&block, Py_SIZE(block), ob->pos, 0); - } - return block; - } - - /* Final bytes object */ - result = PyBytes_FromStringAndSize( - NULL, - buffer->allocated - (ob->size - ob->pos)); - if (result == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return NULL; - } - - /* Memory copy */ - if (list_len > 0) { - char *posi = PyBytes_AS_STRING(result); - - /* Blocks except the last one */ - Py_ssize_t i = 0; - for (; i < list_len-1; i++) { - block = PyList_GET_ITEM(buffer->list, i); - memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); - posi += Py_SIZE(block); - } - /* The last block */ - block = PyList_GET_ITEM(buffer->list, i); - memcpy(posi, PyBytes_AS_STRING(block), ob->pos); - } else { - assert(Py_SIZE(result) == 0); - } - - Py_CLEAR(buffer->list); - return result; -} - -/* Clean up the buffer */ -static inline void -OutputBuffer_OnError(BlocksBuffer *buffer) -{ - Py_CLEAR(buffer->list); -} - -#else -#error "no output buffer code chosen" -#endif diff --git a/src/bin_ext/pyzstd.c b/src/bin_ext/pyzstd.c deleted file mode 100644 index ada3f90..0000000 --- a/src/bin_ext/pyzstd.c +++ /dev/null @@ -1,628 +0,0 @@ -#include "pyzstd.h" -#include "dict.c" -#include "compressor.c" -#include "decompressor.c" -#include "file.c" -#include "stream.c" - -/* -------------------------- - Module level functions - -------------------------- */ -PyDoc_STRVAR(_get_param_bounds_doc, -"Internal function, get CParameter/DParameter bounds."); - -static PyObject * -_get_param_bounds(PyObject *module, PyObject *args) -{ - int is_compress; - int parameter; - - ZSTD_bounds bound; - - if (!PyArg_ParseTuple(args, "ii:_get_param_bounds", &is_compress, ¶meter)) { - return NULL; - } - - if (is_compress) { - bound = ZSTD_cParam_getBounds(parameter); - if (ZSTD_isError(bound.error)) { - STATE_FROM_MODULE(module); - set_zstd_error(MODULE_STATE, ERR_GET_C_BOUNDS, bound.error); - return NULL; - } - } else { - bound = ZSTD_dParam_getBounds(parameter); - if (ZSTD_isError(bound.error)) { - STATE_FROM_MODULE(module); - set_zstd_error(MODULE_STATE, ERR_GET_D_BOUNDS, bound.error); - return NULL; - } - } - - return Py_BuildValue("ii", bound.lowerBound, bound.upperBound); -} - -PyDoc_STRVAR(get_frame_size_doc, -"get_frame_size(frame_buffer)\n" -"----\n" -"Get the size of a zstd frame, including frame header and 4-byte checksum if it\n" -"has.\n\n" -"It will iterate all blocks' header within a frame, to accumulate the frame size.\n\n" -"Parameter\n" -"frame_buffer: A bytes-like object, it should starts from the beginning of a\n" -" frame, and contains at least one complete frame."); - -static PyObject * -get_frame_size(PyObject *module, PyObject *args) -{ - Py_buffer frame_buffer; - - size_t frame_size; - PyObject *ret; - - if (!PyArg_ParseTuple(args, "y*:get_frame_size", &frame_buffer)) { - return NULL; - } - - frame_size = ZSTD_findFrameCompressedSize(frame_buffer.buf, frame_buffer.len); - if (ZSTD_isError(frame_size)) { - STATE_FROM_MODULE(module); - PyErr_Format(MS_MEMBER(ZstdError), - "Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " - "complete frame. Zstd error message: %s.", - ZSTD_getErrorName(frame_size)); - goto error; - } - - ret = PyLong_FromSize_t(frame_size); - if (ret == NULL) { - goto error; - } - goto success; - -error: - ret = NULL; -success: - PyBuffer_Release(&frame_buffer); - return ret; -} - -PyDoc_STRVAR(_get_frame_info_doc, -"Internal function, get zstd frame information from a frame header."); - -static PyObject * -_get_frame_info(PyObject *module, PyObject *args) -{ - Py_buffer frame_buffer; - - uint64_t decompressed_size; - uint32_t dict_id; - PyObject *ret = NULL; - - if (!PyArg_ParseTuple(args, "y*:_get_frame_info", &frame_buffer)) { - return NULL; - } - - /* ZSTD_getFrameContentSize */ - decompressed_size = ZSTD_getFrameContentSize(frame_buffer.buf, - frame_buffer.len); - - /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) - #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ - if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { - STATE_FROM_MODULE(module); - PyErr_SetString(MS_MEMBER(ZstdError), - "Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " - "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes)."); - goto error; - } - - /* ZSTD_getDictID_fromFrame */ - dict_id = ZSTD_getDictID_fromFrame(frame_buffer.buf, frame_buffer.len); - - /* Build tuple */ - if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { - ret = Py_BuildValue("OI", Py_None, dict_id); - } else { - ret = Py_BuildValue("KI", decompressed_size, dict_id); - } - - if (ret == NULL) { - goto error; - } - goto success; -error: - Py_CLEAR(ret); -success: - PyBuffer_Release(&frame_buffer); - return ret; -} - -PyDoc_STRVAR(_set_parameter_types_doc, -"Internal function, set CParameter/DParameter types for validity check."); - -static PyObject * -_set_parameter_types(PyObject *module, PyObject *args) -{ - PyObject *c_parameter_type; - PyObject *d_parameter_type; - STATE_FROM_MODULE(module); - - if (!PyArg_ParseTuple(args, "OO:_set_parameter_types", &c_parameter_type, &d_parameter_type)) { - return NULL; - } - - if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { - PyErr_SetString(PyExc_ValueError, - "The two arguments should be CParameter and " - "DParameter types."); - return NULL; - } - - Py_XDECREF(MS_MEMBER(CParameter_type)); - Py_INCREF(c_parameter_type); - MS_MEMBER(CParameter_type) = (PyTypeObject*)c_parameter_type; - - Py_XDECREF(MS_MEMBER(DParameter_type)); - Py_INCREF(d_parameter_type); - MS_MEMBER(DParameter_type) = (PyTypeObject*)d_parameter_type; - - Py_RETURN_NONE; -} - -static PyMethodDef _zstd_methods[] = { - {"decompress", (PyCFunction)decompress, METH_VARARGS|METH_KEYWORDS, decompress_doc}, - {"_train_dict", (PyCFunction)_train_dict, METH_VARARGS, _train_dict_doc}, - {"_finalize_dict", (PyCFunction)_finalize_dict, METH_VARARGS, _finalize_dict_doc}, - {"_get_param_bounds", (PyCFunction)_get_param_bounds, METH_VARARGS, _get_param_bounds_doc}, - {"get_frame_size", (PyCFunction)get_frame_size, METH_VARARGS, get_frame_size_doc}, - {"_get_frame_info", (PyCFunction)_get_frame_info, METH_VARARGS, _get_frame_info_doc}, - {"compress_stream", (PyCFunction)compress_stream, METH_VARARGS|METH_KEYWORDS, compress_stream_doc}, - {"decompress_stream", (PyCFunction)decompress_stream, METH_VARARGS|METH_KEYWORDS, decompress_stream_doc}, - {"_set_parameter_types", (PyCFunction)_set_parameter_types, METH_VARARGS, _set_parameter_types_doc}, - {0} -}; - -/* -------------------- - Initialize code - -------------------- */ -#define ADD_INT_PREFIX_MACRO(module, macro) \ - do { \ - if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \ - return -1; \ - } \ - } while(0) - -static int -add_parameters(PyObject *module) -{ - /* If add new parameters, please also add to cp_list/dp_list above. */ - - /* Compression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy); -#if ZSTD_VERSION_NUMBER >= 10506 - ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetCBlockSize); -#endif - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog); - - /* Decompression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax); - - /* ZSTD_strategy enum */ - ADD_INT_PREFIX_MACRO(module, ZSTD_fast); - ADD_INT_PREFIX_MACRO(module, ZSTD_dfast); - ADD_INT_PREFIX_MACRO(module, ZSTD_greedy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btopt); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2); - - return 0; -} - -static inline PyObject * -get_zstd_version_info(void) -{ - const uint32_t ver = ZSTD_versionNumber(); - uint32_t major, minor, release; - - major = ver / 10000; - minor = (ver / 100) % 100; - release = ver % 100; - - return Py_BuildValue("III", major, minor, release); -} - -static inline int -add_vars_to_module(PyObject *module) -{ - PyObject *obj; - - /* zstd_version, a str. */ - if (PyModule_AddStringConstant(module, "zstd_version", - ZSTD_versionString()) < 0) { - return -1; - } - - /* zstd_version_info, a tuple. */ - obj = get_zstd_version_info(); - if (PyModule_AddObject(module, "zstd_version_info", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - - /* Add zstd parameters */ - if (add_parameters(module) < 0) { - return -1; - } - - /* _compressionLevel_values: (default, min, max) - ZSTD_defaultCLevel() was added in zstd v1.5.0 */ - obj = Py_BuildValue("iii", -#if ZSTD_VERSION_NUMBER < 10500 - ZSTD_CLEVEL_DEFAULT, -#else - ZSTD_defaultCLevel(), -#endif - ZSTD_minCLevel(), - ZSTD_maxCLevel()); - if (PyModule_AddObject(module, - "_compressionLevel_values", - obj) < 0) { - Py_XDECREF(obj); - return -1; - } - - /* _ZSTD_CStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_CStreamInSize(), - (uint32_t)ZSTD_CStreamOutSize()); - if (PyModule_AddObject(module, "_ZSTD_CStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - - /* _ZSTD_DStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_DStreamInSize(), - (uint32_t)ZSTD_DStreamOutSize()); - if (PyModule_AddObject(module, "_ZSTD_DStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - - /* PYZSTD_CONFIG */ - obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c", -/* Statically link to zstd lib */ -#ifdef PYZSTD_STATIC_LINK - Py_True, -#else - Py_False, -#endif -/* Use multi-phase initialization */ -#ifdef USE_MULTI_PHASE_INIT - Py_True, -#else - Py_False, -#endif -/* User mremap output buffer */ -#if defined(MREMAP_OUTPUT_BUFFER) - Py_True -#elif defined(BLOCKS_OUTPUT_BUFFER) - Py_False -#endif - ); - if (PyModule_AddObject(module, "PYZSTD_CONFIG", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - - return 0; -} - -static inline int -add_type_to_module(PyObject *module, const char *name, - PyType_Spec *type_spec, PyTypeObject **dest) -{ - PyObject *temp; - -#ifdef USE_MULTI_PHASE_INIT - temp = PyType_FromModuleAndSpec(module, type_spec, NULL); -#else - temp = PyType_FromSpec(type_spec); -#endif - - if (PyModule_AddObject(module, name, temp) < 0) { - Py_XDECREF(temp); - return -1; - } - - Py_INCREF(temp); - *dest = (PyTypeObject*) temp; - - return 0; -} - -static inline int -add_constant_to_type(PyTypeObject *type, const char *name, const long value) -{ - PyObject *temp; - - temp = PyLong_FromLong(value); - if (temp == NULL) { - return -1; - } - - if (PyObject_SetAttrString((PyObject*) type, name, temp) < 0) { - Py_DECREF(temp); - return -1; - } - Py_DECREF(temp); - - return 0; -} - -#define ADD_STR_TO_STATE_MACRO(STR) \ - do { \ - MS_MEMBER(str_##STR) = PyUnicode_FromString(#STR); \ - if (MS_MEMBER(str_##STR) == NULL) { \ - return -1; \ - } \ - } while(0) - -static int _zstd_exec(PyObject *module) { - STATE_FROM_MODULE(module); - - /* Reusable objects & variables */ - MS_MEMBER(empty_bytes) = PyBytes_FromStringAndSize(NULL, 0); - if (MS_MEMBER(empty_bytes) == NULL) { - return -1; - } - - MS_MEMBER(empty_readonly_memoryview) = - PyMemoryView_FromMemory((char*)MODULE_STATE, 0, PyBUF_READ); - if (MS_MEMBER(empty_readonly_memoryview) == NULL) { - return -1; - } - - /* Add str to module state */ - ADD_STR_TO_STATE_MACRO(read); - ADD_STR_TO_STATE_MACRO(readinto); - ADD_STR_TO_STATE_MACRO(write); - ADD_STR_TO_STATE_MACRO(flush); - - MS_MEMBER(CParameter_type) = NULL; - MS_MEMBER(DParameter_type) = NULL; - - /* Add variables to module */ - if (add_vars_to_module(module) < 0) { - return -1; - } - - /* ZstdError */ - MS_MEMBER(ZstdError) = PyErr_NewExceptionWithDoc( - "pyzstd.ZstdError", - "Call to the underlying zstd library failed.", - NULL, NULL); - if (MS_MEMBER(ZstdError) == NULL) { - return -1; - } - - Py_INCREF(MS_MEMBER(ZstdError)); - if (PyModule_AddObject(module, "ZstdError", MS_MEMBER(ZstdError)) < 0) { - Py_DECREF(MS_MEMBER(ZstdError)); - return -1; - } - - /* ZstdDict */ - if (add_type_to_module(module, - "ZstdDict", - &zstddict_type_spec, - &MS_MEMBER(ZstdDict_type)) < 0) { - return -1; - } - - /* ZstdCompressor */ - if (add_type_to_module(module, - "ZstdCompressor", - &zstdcompressor_type_spec, - &MS_MEMBER(ZstdCompressor_type)) < 0) { - return -1; - } - - /* Add EndDirective enum to ZstdCompressor */ - if (add_constant_to_type(MS_MEMBER(ZstdCompressor_type), - "CONTINUE", - ZSTD_e_continue) < 0) { - return -1; - } - - if (add_constant_to_type(MS_MEMBER(ZstdCompressor_type), - "FLUSH_BLOCK", - ZSTD_e_flush) < 0) { - return -1; - } - - if (add_constant_to_type(MS_MEMBER(ZstdCompressor_type), - "FLUSH_FRAME", - ZSTD_e_end) < 0) { - return -1; - } - - /* RichMemZstdCompressor */ - if (add_type_to_module(module, - "RichMemZstdCompressor", - &richmem_zstdcompressor_type_spec, - &MS_MEMBER(RichMemZstdCompressor_type)) < 0) { - return -1; - } - - /* ZstdDecompressor */ - if (add_type_to_module(module, - "ZstdDecompressor", - &ZstdDecompressor_type_spec, - &MS_MEMBER(ZstdDecompressor_type)) < 0) { - return -1; - } - - /* EndlessZstdDecompressor */ - if (add_type_to_module(module, - "EndlessZstdDecompressor", - &EndlessZstdDecompressor_type_spec, - &MS_MEMBER(EndlessZstdDecompressor_type)) < 0) { - return -1; - } - - /* ZstdFileReader */ - if (add_type_to_module(module, - "ZstdFileReader", - &ZstdFileReader_type_spec, - &MS_MEMBER(ZstdFileReader_type)) < 0) { - return -1; - } - - /* ZstdFileWriter */ - if (add_type_to_module(module, - "ZstdFileWriter", - &ZstdFileWriter_type_spec, - &MS_MEMBER(ZstdFileWriter_type)) < 0) { - return -1; - } - - return 0; -} - -static int -_zstd_traverse(PyObject *module, visitproc visit, void *arg) -{ - STATE_FROM_MODULE(module); - - Py_VISIT(MS_MEMBER(empty_bytes)); - Py_VISIT(MS_MEMBER(empty_readonly_memoryview)); - Py_VISIT(MS_MEMBER(str_read)); - Py_VISIT(MS_MEMBER(str_readinto)); - Py_VISIT(MS_MEMBER(str_write)); - Py_VISIT(MS_MEMBER(str_flush)); - - Py_VISIT(MS_MEMBER(ZstdDict_type)); - Py_VISIT(MS_MEMBER(ZstdCompressor_type)); - Py_VISIT(MS_MEMBER(RichMemZstdCompressor_type)); - Py_VISIT(MS_MEMBER(ZstdDecompressor_type)); - Py_VISIT(MS_MEMBER(EndlessZstdDecompressor_type)); - Py_VISIT(MS_MEMBER(ZstdFileReader_type)); - Py_VISIT(MS_MEMBER(ZstdFileWriter_type)); - Py_VISIT(MS_MEMBER(ZstdError)); - - Py_VISIT(MS_MEMBER(CParameter_type)); - Py_VISIT(MS_MEMBER(DParameter_type)); - return 0; -} - -static int -_zstd_clear(PyObject *module) -{ - STATE_FROM_MODULE(module); - - Py_CLEAR(MS_MEMBER(empty_bytes)); - Py_CLEAR(MS_MEMBER(empty_readonly_memoryview)); - Py_CLEAR(MS_MEMBER(str_read)); - Py_CLEAR(MS_MEMBER(str_readinto)); - Py_CLEAR(MS_MEMBER(str_write)); - Py_CLEAR(MS_MEMBER(str_flush)); - - Py_CLEAR(MS_MEMBER(ZstdDict_type)); - Py_CLEAR(MS_MEMBER(ZstdCompressor_type)); - Py_CLEAR(MS_MEMBER(RichMemZstdCompressor_type)); - Py_CLEAR(MS_MEMBER(ZstdDecompressor_type)); - Py_CLEAR(MS_MEMBER(EndlessZstdDecompressor_type)); - Py_CLEAR(MS_MEMBER(ZstdFileReader_type)); - Py_CLEAR(MS_MEMBER(ZstdFileWriter_type)); - Py_CLEAR(MS_MEMBER(ZstdError)); - - Py_CLEAR(MS_MEMBER(CParameter_type)); - Py_CLEAR(MS_MEMBER(DParameter_type)); - return 0; -} - -static void -_zstd_free(void *module) -{ - _zstd_clear((PyObject *)module); -} - -#ifdef USE_MULTI_PHASE_INIT -static PyModuleDef_Slot _zstd_slots[] = { - {Py_mod_exec, _zstd_exec}, - // note: we do NOT support multiple interpreters - // note: we do NOT support free-threading - {0} -}; -#endif - -static PyModuleDef _zstdmodule = { - PyModuleDef_HEAD_INIT, - .m_name = "_zstd", -#ifdef USE_MULTI_PHASE_INIT - .m_size = sizeof(_zstd_state), - .m_slots = _zstd_slots, -#else - .m_size = -1, -#endif - .m_methods = _zstd_methods, - .m_traverse = _zstd_traverse, - .m_clear = _zstd_clear, - .m_free = _zstd_free -}; - -#ifdef USE_MULTI_PHASE_INIT -/* For forward declaration of _zstdmodule */ -static inline PyModuleDef* _get_zstd_PyModuleDef() -{ - return &_zstdmodule; -} -#endif - -PyMODINIT_FUNC -PyInit__zstd(void) -{ -#ifdef USE_MULTI_PHASE_INIT - return PyModuleDef_Init(&_zstdmodule); -#else - PyObject *module; - module = PyModule_Create(&_zstdmodule); - if (module == NULL) { - return NULL; - } - if (_zstd_exec(module) != 0) { - Py_DECREF(module); - return NULL; - } - return module; -#endif -} diff --git a/src/bin_ext/pyzstd.h b/src/bin_ext/pyzstd.h deleted file mode 100644 index 62bcc53..0000000 --- a/src/bin_ext/pyzstd.h +++ /dev/null @@ -1,663 +0,0 @@ -/* https://github.com/Rogdham/pyzstd */ - -#ifndef PYZSTD_H_INCLUDED -#define PYZSTD_H_INCLUDED - -#include "stdint.h" /* For MSVC + Python 3.5 */ - -#include "Python.h" -#include "pythread.h" /* For Python 3.5 */ -#include "structmember.h" - -#include "zstd.h" -#include "zdict.h" - -#if ZSTD_VERSION_NUMBER < 10400 - #error "pyzstd module requires zstd v1.4.0+" -#endif - -/* Added in Python 3.7 */ -#ifndef Py_UNREACHABLE - #define Py_UNREACHABLE() assert(0) -#endif - -/* Added in Python 3.13 */ -#if PY_VERSION_HEX < 0x030D00A1 - #define PyLong_AsInt _PyLong_AsInt -#endif - -/* Multi-phase init (PEP-489) */ -#if PY_VERSION_HEX < 0x030B00B1 && defined(USE_MULTI_PHASE_INIT) - /* PyType_GetModuleByDef() function is available on CPython 3.11+. - 0x030B00B1 is 3.11 Beta 1. */ - #undef USE_MULTI_PHASE_INIT -#elif PY_VERSION_HEX >= 0x030C00B1 && !defined(USE_MULTI_PHASE_INIT) - /* CPython 3.12+ have per-interpreter GIL, always enable to prevent - confusion. 0x030C00B1 is 3.12 Beta 1. */ - #define USE_MULTI_PHASE_INIT -#endif - -/* Force inlining. Same as zstd library. */ -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE static inline __forceinline -#else -# define FORCE_INLINE static inline -#endif - -/* Force no inlining. Same as zstd library. */ -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_NO_INLINE static __attribute__((__noinline__)) -#elif defined(_MSC_VER) -# define FORCE_NO_INLINE static __declspec(noinline) -#else -# define FORCE_NO_INLINE static -#endif - -#include "output_buffer.h" - -/* Forward declaration */ -typedef struct _zstd_state _zstd_state; - -typedef struct { - PyObject_HEAD - - /* Thread lock for generating ZSTD_CDict/ZSTD_DDict */ - PyThread_type_lock lock; - - /* Reusable compress/decompress dictionary, they are created once and - can be shared by multiple threads concurrently, since its usage is - read-only. - c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ - ZSTD_DDict *d_dict; - PyObject *c_dicts; - - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; - /* Dictionary id */ - uint32_t dict_id; - - /* __init__ has been called, 0 or 1. */ - int inited; - -#ifdef USE_MULTI_PHASE_INIT - _zstd_state *module_state; -#endif -} ZstdDict; - -typedef struct { - PyObject_HEAD - - /* Thread lock for compressing */ - PyThread_type_lock lock; - - /* Compression context */ - ZSTD_CCtx *cctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Last mode, initialized to ZSTD_e_end */ - int last_mode; - - /* (nbWorker >= 1) ? 1 : 0 */ - int use_multithread; - - /* Compression level */ - int compression_level; - - /* __init__ has been called, 0 or 1. */ - int inited; - -#ifdef USE_MULTI_PHASE_INIT - _zstd_state *module_state; -#endif -} ZstdCompressor; - -typedef struct { - PyObject_HEAD - - /* Thread lock for compressing */ - PyThread_type_lock lock; - - /* Decompression context */ - ZSTD_DCtx *dctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Unconsumed input data */ - char *input_buffer; - size_t input_buffer_size; - size_t in_begin, in_end; - - /* Unused data */ - PyObject *unused_data; - - /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ - char needs_input; - - /* For EndlessZstdDecompressor, 0 or 1. - 1 when both input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor - just be initialized. */ - char at_frame_edge; - - /* For ZstdDecompressor, 0 or 1. - 1 means the end of the first frame has been reached. */ - char eof; - - /* Used for fast reset above three variables */ - char _unused_char_for_align; - - /* __init__ has been called, 0 or 1. */ - int inited; - -#ifdef USE_MULTI_PHASE_INIT - _zstd_state *module_state; -#endif -} ZstdDecompressor; - -struct _zstd_state { - PyObject *empty_bytes; - PyObject *empty_readonly_memoryview; - PyObject *str_read; - PyObject *str_readinto; - PyObject *str_write; - PyObject *str_flush; - - PyTypeObject *ZstdDict_type; - PyTypeObject *ZstdCompressor_type; - PyTypeObject *RichMemZstdCompressor_type; - PyTypeObject *ZstdDecompressor_type; - PyTypeObject *EndlessZstdDecompressor_type; - PyTypeObject *ZstdFileReader_type; - PyTypeObject *ZstdFileWriter_type; - PyObject *ZstdError; - - PyTypeObject *CParameter_type; - PyTypeObject *DParameter_type; -}; - -#ifdef USE_MULTI_PHASE_INIT - /* For forward declaration of _zstdmodule */ - static inline PyModuleDef* _get_zstd_PyModuleDef(); - - /* Get module state from a class type, and set it to supported object. - Used in Py_tp_new or Py_tp_init. */ - #define SET_STATE_TO_OBJ(type, obj) \ - do { \ - PyModuleDef* const module_def = _get_zstd_PyModuleDef(); \ - PyObject *module = PyType_GetModuleByDef(type, module_def); \ - if (module == NULL) { \ - goto error; \ - } \ - (obj)->module_state = (_zstd_state*)PyModule_GetState(module);\ - if ((obj)->module_state == NULL) { \ - goto error; \ - } \ - } while (0) - /* Get module state from module object */ - #define STATE_FROM_MODULE(module) \ - _zstd_state* const _module_state = (_zstd_state*)PyModule_GetState(module); \ - assert(_module_state != NULL); - /* Get module state from supported object */ - #define STATE_FROM_OBJ(obj) \ - _zstd_state* const _module_state = (obj)->module_state; \ - assert(_module_state != NULL); - /* Place as module state. Only as r-value. */ - #define MODULE_STATE (1 ? _module_state : NULL) - /* Access a member of module state. Can be l-value or r-value. */ - #define MS_MEMBER(member) (_module_state->member) -#else /* Don't use multi-phase init */ - static _zstd_state static_state; - - /* Get module state from a class type, and set it to supported object. - Used in Py_tp_new or Py_tp_init. */ - #define SET_STATE_TO_OBJ(type, obj) ; - /* Get module state from module object */ - #define STATE_FROM_MODULE(module) ; - /* Get module state from supported object */ - #define STATE_FROM_OBJ(obj) ; - /* Place as module state. Only as r-value. */ - #define MODULE_STATE (1 ? &static_state : NULL) - /* Access a member of module state. Can be l-value or r-value. */ - #define MS_MEMBER(member) (static_state.member) -#endif - -/* ------------------ - Global macro - ------------------ */ -#define ACQUIRE_LOCK(obj) do { \ - if (!PyThread_acquire_lock((obj)->lock, 0)) { \ - Py_BEGIN_ALLOW_THREADS \ - PyThread_acquire_lock((obj)->lock, 1); \ - Py_END_ALLOW_THREADS \ - } } while (0) -#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) - -/* ------------------------- - Parameters from zstd - ------------------------- */ -typedef struct { - const int parameter; - const char parameter_name[32]; -} ParameterInfo; - -static const ParameterInfo cp_list[] = -{ - {ZSTD_c_compressionLevel, "compressionLevel"}, - {ZSTD_c_windowLog, "windowLog"}, - {ZSTD_c_hashLog, "hashLog"}, - {ZSTD_c_chainLog, "chainLog"}, - {ZSTD_c_searchLog, "searchLog"}, - {ZSTD_c_minMatch, "minMatch"}, - {ZSTD_c_targetLength, "targetLength"}, - {ZSTD_c_strategy, "strategy"}, - -#if ZSTD_VERSION_NUMBER >= 10506 - {ZSTD_c_targetCBlockSize, "targetCBlockSize"}, -#endif - - {ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"}, - {ZSTD_c_ldmHashLog, "ldmHashLog"}, - {ZSTD_c_ldmMinMatch, "ldmMinMatch"}, - {ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"}, - {ZSTD_c_ldmHashRateLog, "ldmHashRateLog"}, - - {ZSTD_c_contentSizeFlag, "contentSizeFlag"}, - {ZSTD_c_checksumFlag, "checksumFlag"}, - {ZSTD_c_dictIDFlag, "dictIDFlag"}, - - {ZSTD_c_nbWorkers, "nbWorkers"}, - {ZSTD_c_jobSize, "jobSize"}, - {ZSTD_c_overlapLog, "overlapLog"} -}; - -static const ParameterInfo dp_list[] = -{ - {ZSTD_d_windowLogMax, "windowLogMax"} -}; - -/* Format an user friendly error message. */ -FORCE_NO_INLINE void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) -{ - ParameterInfo const *list; - int list_size; - char const *name; - char *type; - ZSTD_bounds bounds; - int i; - char pos_msg[128]; - - if (is_compress) { - list = cp_list; - list_size = Py_ARRAY_LENGTH(cp_list); - type = "compression"; - } else { - list = dp_list; - list_size = Py_ARRAY_LENGTH(dp_list); - type = "decompression"; - } - - /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { - if (key_v == (list+i)->parameter) { - name = (list+i)->parameter_name; - break; - } - } - - /* Unknown parameter */ - if (name == NULL) { - PyOS_snprintf(pos_msg, sizeof(pos_msg), - "unknown parameter (key %d)", key_v); - name = pos_msg; - } - - /* Get parameter bounds */ - if (is_compress) { - bounds = ZSTD_cParam_getBounds(key_v); - } else { - bounds = ZSTD_dParam_getBounds(key_v); - } - if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Zstd %s parameter \"%s\" is invalid. (zstd v%s)", - type, name, ZSTD_versionString()); - return; - } - - /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(zstd v%s, %d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t)); -} - -/* -------------------------------------- - Global functions - - set parameters - - load dictionary - - reduce_cannot_pickle - -------------------------------------- */ -static const char init_twice_msg[] = "__init__ method is called twice."; - -FORCE_INLINE PyObject * -invoke_method_no_arg(PyObject *obj, PyObject *meth) -{ -#if PY_VERSION_HEX < 0x030900B1 - return PyObject_CallMethodObjArgs(obj, meth, NULL); -#else - return PyObject_CallMethodNoArgs(obj, meth); -#endif -} - -FORCE_INLINE PyObject * -invoke_method_one_arg(PyObject *obj, PyObject *meth, PyObject *arg) -{ -#if PY_VERSION_HEX < 0x030900B1 - return PyObject_CallMethodObjArgs(obj, meth, arg, NULL); -#else - return PyObject_CallMethodOneArg(obj, meth, arg); -#endif -} - -typedef enum { - ERR_DECOMPRESS, - ERR_COMPRESS, - ERR_SET_PLEDGED_INPUT_SIZE, - - ERR_LOAD_D_DICT, - ERR_LOAD_C_DICT, - - ERR_GET_C_BOUNDS, - ERR_GET_D_BOUNDS, - ERR_SET_C_LEVEL, - - ERR_TRAIN_DICT, - ERR_FINALIZE_DICT -} error_type; - -typedef enum { - DICT_TYPE_DIGESTED = 0, - DICT_TYPE_UNDIGESTED = 1, - DICT_TYPE_PREFIX = 2 -} dictionary_type; - -/* Format error message and set ZstdError. */ -FORCE_NO_INLINE void -set_zstd_error(const _zstd_state* const state, - const error_type type, const size_t zstd_ret) -{ - char buf[128]; - char *msg; - assert(ZSTD_isError(zstd_ret)); - - switch (type) - { - case ERR_DECOMPRESS: - msg = "Unable to decompress zstd data: %s"; - break; - case ERR_COMPRESS: - msg = "Unable to compress zstd data: %s"; - break; - case ERR_SET_PLEDGED_INPUT_SIZE: - msg = "Unable to set pledged uncompressed content size: %s"; - break; - - case ERR_LOAD_D_DICT: - msg = "Unable to load zstd dictionary or prefix for decompression: %s"; - break; - case ERR_LOAD_C_DICT: - msg = "Unable to load zstd dictionary or prefix for compression: %s"; - break; - - case ERR_GET_C_BOUNDS: - msg = "Unable to get zstd compression parameter bounds: %s"; - break; - case ERR_GET_D_BOUNDS: - msg = "Unable to get zstd decompression parameter bounds: %s"; - break; - case ERR_SET_C_LEVEL: - msg = "Unable to set zstd compression level: %s"; - break; - - case ERR_TRAIN_DICT: - msg = "Unable to train zstd dictionary: %s"; - break; - case ERR_FINALIZE_DICT: - msg = "Unable to finalize zstd dictionary: %s"; - break; - - default: - Py_UNREACHABLE(); - } - PyOS_snprintf(buf, sizeof(buf), msg, ZSTD_getErrorName(zstd_ret)); - PyErr_SetString(state->ZstdError, buf); -} - -static void -capsule_free_cdict(PyObject *capsule) -{ - ZSTD_CDict *cdict = PyCapsule_GetPointer(capsule, NULL); - ZSTD_freeCDict(cdict); -} - -static inline ZSTD_CDict * -_get_CDict(ZstdDict *self, int compressionLevel) -{ - PyObject *level = NULL; - PyObject *capsule; - ZSTD_CDict *cdict; - - ACQUIRE_LOCK(self); - - /* int level object */ - level = PyLong_FromLong(compressionLevel); - if (level == NULL) { - goto error; - } - - /* Get PyCapsule object from self->c_dicts */ - capsule = PyDict_GetItemWithError(self->c_dicts, level); - if (capsule == NULL) { - if (PyErr_Occurred()) { - goto error; - } - - /* Create ZSTD_CDict instance */ - Py_BEGIN_ALLOW_THREADS - cdict = ZSTD_createCDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content), compressionLevel); - Py_END_ALLOW_THREADS - - if (cdict == NULL) { - STATE_FROM_OBJ(self); - PyErr_SetString(MS_MEMBER(ZstdError), - "Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); - goto error; - } - - /* Put ZSTD_CDict instance into PyCapsule object */ - capsule = PyCapsule_New(cdict, NULL, capsule_free_cdict); - if (capsule == NULL) { - ZSTD_freeCDict(cdict); - goto error; - } - - /* Add PyCapsule object to self->c_dicts */ - if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { - Py_DECREF(capsule); - goto error; - } - Py_DECREF(capsule); - } else { - /* ZSTD_CDict instance already exists */ - cdict = PyCapsule_GetPointer(capsule, NULL); - } - goto success; - -error: - cdict = NULL; -success: - Py_XDECREF(level); - RELEASE_LOCK(self); - return cdict; -} - -static inline ZSTD_DDict * -_get_DDict(ZstdDict *self) -{ - ZSTD_DDict *ret; - - /* Already created */ - if (self->d_dict != NULL) { - return self->d_dict; - } - - ACQUIRE_LOCK(self); - if (self->d_dict == NULL) { - /* Create ZSTD_DDict instance from dictionary content */ - Py_BEGIN_ALLOW_THREADS - self->d_dict = ZSTD_createDDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); - Py_END_ALLOW_THREADS - - if (self->d_dict == NULL) { - STATE_FROM_OBJ(self); - PyErr_SetString(MS_MEMBER(ZstdError), - "Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); - } - } - - /* Don't lose any exception */ - ret = self->d_dict; - RELEASE_LOCK(self); - - return ret; -} - -/* Generate 4 functions using macro: - 1, set_c_parameters(ZstdCompressor *self, PyObject *level_or_option) - 2, load_c_dict(ZstdCompressor *self, PyObject *dict) - 3, set_d_parameters(ZstdDecompressor *self, PyObject *option) - 4, load_d_dict(ZstdDecompressor *self, PyObject *dict) */ -#undef PYZSTD_C_CLASS -#define PYZSTD_C_CLASS ZstdCompressor -#undef PYZSTD_D_CLASS -#define PYZSTD_D_CLASS ZstdDecompressor -#undef PYZSTD_FUN_PREFIX -#define PYZSTD_FUN_PREFIX(F) F -#include "macro_functions.h" - -/* In multi-thread compression + .CONTINUE mode: If input buffer exhausted, - there may be a lot of data in internal buffer that can be outputted. - This conditional expression output as much as possible. */ -FORCE_INLINE int -mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { - return in->size == in->pos && out->size != out->pos; -} - -/* Get Py_ssize_t value from the returned object of .readinto()/.write() - methods, and Py_DECREF() the object. - If fp_ret is NULL, or not an integer, or (v < lower || v > upper), set - an error and return -1. */ -FORCE_INLINE Py_ssize_t -check_and_get_fp_ret(char *func_name, PyObject *fp_ret, - Py_ssize_t lower, Py_ssize_t upper) -{ - Py_ssize_t ret_value; - - /* .readinto()/.write() return value should >= 0. - This function returns -1 for failure. */ - assert(lower >= 0); - - /* .readinto()/.write() failed */ - if (fp_ret == NULL) { - return -1; - } - - /* Get Py_ssize_t value */ - ret_value = PyLong_AsSsize_t(fp_ret); - Py_DECREF(fp_ret); - - /* Check bounds */ - assert(lower >= 0); - if (ret_value < lower || ret_value > upper) { - /* Check PyLong_AsSsize_t() failed */ - if (ret_value == -1 && PyErr_Occurred()) { - PyErr_Format(PyExc_TypeError, - "%s return value should be int type", - func_name); - return -1; - } - - PyErr_Format(PyExc_ValueError, - "%s returned invalid length %zd " - "(should be %zd <= value <= %zd)", - func_name, ret_value, - lower, upper); - return -1; - } - return ret_value; -} - -/* Write output data to fp. - If (out->pos == 0), do nothing. */ -FORCE_INLINE int -write_to_fp(const _zstd_state* const state, - char *func_name, - PyObject *fp, ZSTD_outBuffer *out) -{ - PyObject *mv; - PyObject *write_ret; - - /* Data length is 0 */ - if (out->pos == 0) { - return 0; - } - - /* memoryview object */ - mv = PyMemoryView_FromMemory((char*)out->dst, out->pos, PyBUF_READ); - if (mv == NULL) { - goto error; - } - - /* Write */ - write_ret = invoke_method_one_arg(fp, state->str_write, mv); - Py_DECREF(mv); - - /* Check .write() return value */ - if (check_and_get_fp_ret(func_name, write_ret, - out->pos, out->pos) < 0) { - goto error; - } - - return 0; -error: - return -1; -} - -PyDoc_STRVAR(reduce_cannot_pickle_doc, -"Intentionally not supporting pickle."); - -static PyObject * -reduce_cannot_pickle(PyObject *self) -{ - PyErr_Format(PyExc_TypeError, - "Cannot pickle %s object.", - Py_TYPE(self)->tp_name); - return NULL; -} - -#endif /* PYZSTD_H_INCLUDED */ diff --git a/src/bin_ext/stream.c b/src/bin_ext/stream.c deleted file mode 100644 index 7295a2d..0000000 --- a/src/bin_ext/stream.c +++ /dev/null @@ -1,600 +0,0 @@ -#include "pyzstd.h" - -/* Invoke callback function */ -FORCE_INLINE int -invoke_callback(const _zstd_state* const state, PyObject *callback, - ZSTD_inBuffer *in, size_t *callback_read_pos, - ZSTD_outBuffer *out, - const uint64_t total_input_size, - const uint64_t total_output_size) -{ - PyObject *in_memoryview; - PyObject *out_memoryview; - PyObject *cb_args; - PyObject *cb_ret; - - /* Only yield input data once */ - const size_t in_size = in->size - *callback_read_pos; - *callback_read_pos = in->size; - - /* Don't yield empty data */ - if (in_size == 0 && out->pos == 0) { - return 0; - } - - /* Input memoryview */ - if (in_size != 0) { - in_memoryview = PyMemoryView_FromMemory((char*) in->src, in_size, PyBUF_READ); - if (in_memoryview == NULL) { - goto error; - } - } else { - in_memoryview = state->empty_readonly_memoryview; - Py_INCREF(in_memoryview); - } - - /* Output memoryview */ - if (out->pos != 0) { - out_memoryview = PyMemoryView_FromMemory(out->dst, out->pos, PyBUF_READ); - if (out_memoryview == NULL) { - Py_DECREF(in_memoryview); - goto error; - } - } else { - out_memoryview = state->empty_readonly_memoryview; - Py_INCREF(out_memoryview); - } - - /* callback function arguments */ - cb_args = Py_BuildValue("KKOO", - total_input_size, total_output_size, - in_memoryview, out_memoryview); - if (cb_args == NULL) { - Py_DECREF(in_memoryview); - Py_DECREF(out_memoryview); - goto error; - } - - /* Callback */ - cb_ret = PyObject_CallObject(callback, cb_args); - Py_DECREF(cb_args); - Py_DECREF(in_memoryview); - Py_DECREF(out_memoryview); - - if (cb_ret == NULL) { - goto error; - } - Py_DECREF(cb_ret); - - return 0; -error: - return -1; -} - -PyDoc_STRVAR(compress_stream_doc, -"compress_stream(input_stream, output_stream, *,\n" -" level_or_option=None, zstd_dict=None,\n" -" pledged_input_size=None,\n" -" read_size=131072, write_size=131591,\n" -" callback=None)\n" -"----\n" -"Compresses input_stream and writes the compressed data to output_stream, it\n" -"doesn't close the streams.\n\n" -"----\n" -"DEPRECATION NOTICE\n" -"The (de)compress_stream are deprecated and will be removed in a future version.\n" -"See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives\n" -"----\n\n" -"If input stream is b'', nothing will be written to output stream.\n\n" -"Return a tuple, (total_input, total_output), the items are int objects.\n\n" -"Parameters\n" -"input_stream: Input stream that has a .readinto(b) method.\n" -"output_stream: Output stream that has a .write(b) method. If use callback\n" -" function, this parameter can be None.\n" -"level_or_option: When it's an int object, it represents the compression\n" -" level. When it's a dict object, it contains advanced compression\n" -" parameters.\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary.\n" -"pledged_input_size: If set this parameter to the size of input data, the\n" -" size will be written into the frame header. If the actual input data\n" -" doesn't match it, a ZstdError will be raised.\n" -"read_size: Input buffer size, in bytes.\n" -"write_size: Output buffer size, in bytes.\n" -"callback: A callback function that accepts four parameters:\n" -" (total_input, total_output, read_data, write_data), the first two are\n" -" int objects, the last two are readonly memoryview objects." -); - -static PyObject * -compress_stream(PyObject *module, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"input_stream", "output_stream", - "level_or_option", "zstd_dict", - "pledged_input_size", "read_size", "write_size", - "callback", NULL}; - PyObject *input_stream; - PyObject *output_stream; - PyObject *level_or_option = Py_None; - PyObject *zstd_dict = Py_None; - PyObject *pledged_input_size = Py_None; - Py_ssize_t read_size = ZSTD_CStreamInSize(); - Py_ssize_t write_size = ZSTD_CStreamOutSize(); - PyObject *callback = Py_None; - - /* If fails, modify value in __init__.pyi and doc. */ - assert(read_size == 131072); - assert(write_size == 131591); - - size_t zstd_ret; - PyObject *temp; - ZstdCompressor self = {0}; - uint64_t pledged_size_value = ZSTD_CONTENTSIZE_UNKNOWN; - ZSTD_inBuffer in = {.src = NULL}; - ZSTD_outBuffer out = {.dst = NULL}; - PyObject *in_memoryview = NULL; - uint64_t total_input_size = 0; - uint64_t total_output_size = 0; - STATE_FROM_MODULE(module); - PyObject *ret = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$OOOnnO:compress_stream", kwlist, - &input_stream, &output_stream, - &level_or_option, &zstd_dict, - &pledged_input_size, &read_size, &write_size, - &callback)) { - return NULL; - } - - /* Check arguments */ - if (!PyObject_HasAttr(input_stream, MS_MEMBER(str_readinto))) { - PyErr_SetString(PyExc_TypeError, - "input_stream argument should have a .readinto(b) method."); - return NULL; - } - - if (output_stream != Py_None) { - if (!PyObject_HasAttr(output_stream, MS_MEMBER(str_write))) { - PyErr_SetString(PyExc_TypeError, - "output_stream argument should have a .write(b) method."); - return NULL; - } - } else { - if (callback == Py_None) { - PyErr_SetString(PyExc_TypeError, - "At least one of output_stream argument and " - "callback argument should be non-None."); - return NULL; - } - } - - if (pledged_input_size != Py_None) { - pledged_size_value = PyLong_AsUnsignedLongLong(pledged_input_size); - if (pledged_size_value == (uint64_t)-1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "pledged_input_size argument should be 64-bit " - "unsigned integer value."); - return NULL; - } - } - - if (read_size <= 0 || write_size <= 0) { - PyErr_SetString(PyExc_ValueError, - "read_size argument and write_size argument should " - "be positive numbers."); - return NULL; - } - - /* Initialize & set ZstdCompressor */ - self.cctx = ZSTD_createCCtx(); - if (self.cctx == NULL) { - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_CCtx instance."); - goto error; - } -#ifdef USE_MULTI_PHASE_INIT - self.module_state = MODULE_STATE; -#endif - - if (level_or_option != Py_None) { - if (set_c_parameters(&self, level_or_option) < 0) { - goto error; - } - } - - if (zstd_dict != Py_None) { - if (load_c_dict(&self, zstd_dict) < 0) { - goto error; - } - } - - if (pledged_size_value != ZSTD_CONTENTSIZE_UNKNOWN) { - zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self.cctx, pledged_size_value); - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - } - - /* Input buffer, in.size and in.pos will be set later. */ - in.src = PyMem_Malloc(read_size); - if (in.src == NULL) { - PyErr_NoMemory(); - goto error; - } - in_memoryview = PyMemoryView_FromMemory((char*) in.src, read_size, PyBUF_WRITE); - if (in_memoryview == NULL) { - goto error; - } - - /* Output buffer, out.pos will be set later. */ - out.dst = PyMem_Malloc(write_size); - if (out.dst == NULL) { - PyErr_NoMemory(); - goto error; - } - out.size = write_size; - - /* Read */ - while (1) { - Py_ssize_t read_bytes; - size_t callback_read_pos; - ZSTD_EndDirective end_directive; - - /* Invoke .readinto() method */ - temp = invoke_method_one_arg(input_stream, - MS_MEMBER(str_readinto), - in_memoryview); - read_bytes = check_and_get_fp_ret("input_stream.readinto()", - temp, 0, read_size); - if (read_bytes < 0) { - goto error; - } - - /* Don't generate empty frame */ - if (read_bytes == 0 && total_input_size == 0) { - break; - } - total_input_size += (size_t) read_bytes; - - in.size = (size_t) read_bytes; - in.pos = 0; - callback_read_pos = 0; - end_directive = (read_bytes == 0) ? ZSTD_e_end : ZSTD_e_continue; - - /* Compress & write */ - while (1) { - /* Output position */ - out.pos = 0; - - /* Compress */ - Py_BEGIN_ALLOW_THREADS - if (self.use_multithread && end_directive == ZSTD_e_continue) { - do { - zstd_ret = ZSTD_compressStream2(self.cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); - } else { - zstd_ret = ZSTD_compressStream2(self.cctx, &out, &in, end_directive); - } - Py_END_ALLOW_THREADS - - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_COMPRESS, zstd_ret); - goto error; - } - - /* Accumulate output bytes */ - total_output_size += out.pos; - - /* Write all output to output_stream */ - if (output_stream != Py_None) { - if (write_to_fp(MODULE_STATE, "output_stream.write()", - output_stream, &out) < 0) { - goto error; - } - } - - /* Invoke callback */ - if (callback != Py_None) { - if (invoke_callback(MODULE_STATE, callback, &in, &callback_read_pos, - &out, total_input_size, total_output_size) < 0) { - goto error; - } - } - - /* Finished */ - if (self.use_multithread && end_directive == ZSTD_e_continue) { - if (mt_continue_should_break(&in, &out)) { - break; - } - } else { - if (zstd_ret == 0) { - break; - } - } - } /* Compress & write loop */ - - /* Input stream ended */ - if (read_bytes == 0) { - break; - } - } /* Read loop */ - - /* Return value */ - ret = Py_BuildValue("KK", total_input_size, total_output_size); - if (ret == NULL) { - goto error; - } - - goto success; - -error: - Py_CLEAR(ret); - -success: - ZSTD_freeCCtx(self.cctx); - - Py_XDECREF(in_memoryview); - PyMem_Free((void*) in.src); - PyMem_Free(out.dst); - - return ret; -} - -PyDoc_STRVAR(decompress_stream_doc, -"decompress_stream(input_stream, output_stream, *,\n" -" zstd_dict=None, option=None,\n" -" read_size=131075, write_size=131072,\n" -" callback=None)\n" -"----\n" -"Decompresses input_stream and writes the decompressed data to output_stream,\n" -"it doesn't close the streams.\n\n" -"----\n" -"DEPRECATION NOTICE\n" -"The (de)compress_stream are deprecated and will be removed in a future version.\n" -"See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives\n" -"----\n\n" -"Supports multiple concatenated frames.\n\n" -"Return a tuple, (total_input, total_output), the items are int objects.\n\n" -"Parameters\n" -"input_stream: Input stream that has a .readinto(b) method.\n" -"output_stream: Output stream that has a .write(b) method. If use callback\n" -" function, this parameter can be None.\n" -"zstd_dict: A ZstdDict object, pre-trained zstd dictionary.\n" -"option: A dict object, contains advanced decompression parameters.\n" -"read_size: Input buffer size, in bytes.\n" -"write_size: Output buffer size, in bytes.\n" -"callback: A callback function that accepts four parameters:\n" -" (total_input, total_output, read_data, write_data), the first two are\n" -" int objects, the last two are readonly memoryview objects." -); - -static PyObject * -decompress_stream(PyObject *module, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"input_stream", "output_stream", - "zstd_dict", "option", - "read_size", "write_size", - "callback", NULL}; - PyObject *input_stream; - PyObject *output_stream; - PyObject *zstd_dict = Py_None; - PyObject *option = Py_None; - Py_ssize_t read_size = ZSTD_DStreamInSize(); - Py_ssize_t write_size = ZSTD_DStreamOutSize(); - PyObject *callback = Py_None; - - /* If fails, modify value in __init__.pyi and doc. */ - assert(read_size == 131075); - assert(write_size == 131072); - - size_t zstd_ret; - PyObject *temp; - ZstdDecompressor self = {0}; - ZSTD_inBuffer in = {.src = NULL}; - ZSTD_outBuffer out = {.dst = NULL}; - PyObject *in_memoryview = NULL; - uint64_t total_input_size = 0; - uint64_t total_output_size = 0; - STATE_FROM_MODULE(module); - PyObject *ret = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$OOnnO:decompress_stream", kwlist, - &input_stream, &output_stream, - &zstd_dict, &option, - &read_size, &write_size, - &callback)) { - return NULL; - } - - /* Check arguments */ - if (!PyObject_HasAttr(input_stream, MS_MEMBER(str_readinto))) { - PyErr_SetString(PyExc_TypeError, - "input_stream argument should have a .readinto(b) method."); - return NULL; - } - - if (output_stream != Py_None) { - if (!PyObject_HasAttr(output_stream, MS_MEMBER(str_write))) { - PyErr_SetString(PyExc_TypeError, - "output_stream argument should have a .write(b) method."); - return NULL; - } - } else { - if (callback == Py_None) { - PyErr_SetString(PyExc_TypeError, - "At least one of output_stream argument and " - "callback argument should be non-None."); - return NULL; - } - } - - if (read_size <= 0 || write_size <= 0) { - PyErr_SetString(PyExc_ValueError, - "read_size argument and write_size argument should " - "be positive numbers."); - return NULL; - } - - /* Initialize & set ZstdDecompressor */ - self.dctx = ZSTD_createDCtx(); - if (self.dctx == NULL) { - PyErr_SetString(MS_MEMBER(ZstdError), - "Unable to create ZSTD_DCtx instance."); - goto error; - } - self.at_frame_edge = 1; -#ifdef USE_MULTI_PHASE_INIT - self.module_state = MODULE_STATE; -#endif - - if (zstd_dict != Py_None) { - if (load_d_dict(&self, zstd_dict) < 0) { - goto error; - } - } - - if (option != Py_None) { - if (set_d_parameters(&self, option) < 0) { - goto error; - } - } - - /* Input buffer, in.size and in.pos will be set later. */ - in.src = PyMem_Malloc(read_size); - if (in.src == NULL) { - PyErr_NoMemory(); - goto error; - } - in_memoryview = PyMemoryView_FromMemory((char*) in.src, read_size, PyBUF_WRITE); - if (in_memoryview == NULL) { - goto error; - } - - /* Output buffer, out.pos will be set later. */ - out.dst = PyMem_Malloc(write_size); - if (out.dst == NULL) { - PyErr_NoMemory(); - goto error; - } - out.size = write_size; - - /* Read */ - while (1) { - Py_ssize_t read_bytes; - size_t callback_read_pos; - - /* Invoke .readinto() method */ - temp = invoke_method_one_arg(input_stream, - MS_MEMBER(str_readinto), - in_memoryview); - read_bytes = check_and_get_fp_ret("input_stream.readinto()", - temp, 0, read_size); - if (read_bytes < 0) { - goto error; - } - - total_input_size += (size_t) read_bytes; - - in.size = (size_t) read_bytes; - in.pos = 0; - callback_read_pos = 0; - - /* Decompress & write */ - while (1) { - /* AFE check for setting .at_frame_edge flag, search "AFE check" in - this file to see details. */ - if (self.at_frame_edge && in.pos == in.size) { - break; - } - - /* Output position */ - out.pos = 0; - - /* Decompress */ - Py_BEGIN_ALLOW_THREADS - zstd_ret = ZSTD_decompressStream(self.dctx, &out, &in); - Py_END_ALLOW_THREADS - - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(MODULE_STATE, ERR_DECOMPRESS, zstd_ret); - goto error; - } - - /* Set .af_frame_edge flag */ - self.at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* Accumulate output bytes */ - total_output_size += out.pos; - - /* Write all output to output_stream */ - if (output_stream != Py_None) { - if (write_to_fp(MODULE_STATE, "output_stream.write()", - output_stream, &out) < 0) { - goto error; - } - } - - /* Invoke callback */ - if (callback != Py_None) { - if (invoke_callback(MODULE_STATE, callback, &in, &callback_read_pos, - &out, total_input_size, total_output_size) < 0) { - goto error; - } - } - - /* Finished. When a frame is fully decoded, but not fully flushed, - the last byte is kept as hostage, it will be released when all - output is flushed. */ - if (in.pos == in.size) { - /* If input stream ends in an incomplete frame, output as much - as possible. */ - if (read_bytes == 0 && - self.at_frame_edge == 0 && - out.pos == out.size) - { - continue; - } - - break; - } - } /* Decompress & write loop */ - - /* Input stream ended */ - if (read_bytes == 0) { - /* Check data integrity. at_frame_edge flag is 1 when both the - input and output streams are at a frame edge. */ - if (self.at_frame_edge == 0) { - PyErr_Format(MS_MEMBER(ZstdError), - "Decompression failed: zstd data ends in an " - "incomplete frame, maybe the input data was " - "truncated. Total input %llu bytes, total output " - "%llu bytes.", - total_input_size, total_output_size); - goto error; - } - break; - } - } /* Read loop */ - - /* Return value */ - ret = Py_BuildValue("KK", total_input_size, total_output_size); - if (ret == NULL) { - goto error; - } - - goto success; - -error: - Py_CLEAR(ret); - -success: - ZSTD_freeDCtx(self.dctx); - - Py_XDECREF(in_memoryview); - PyMem_Free((void*) in.src); - PyMem_Free(out.dst); - - return ret; -} diff --git a/src/pyzstd/__init__.py b/src/pyzstd/__init__.py new file mode 100644 index 0000000..d59cfa9 --- /dev/null +++ b/src/pyzstd/__init__.py @@ -0,0 +1,709 @@ +import sys +import warnings + +from collections import namedtuple +from enum import IntEnum + +if sys.version_info < (3, 14): + from backports import zstd +else: + from compression import zstd + +try: + from warnings import deprecated +except ImportError: + from typing_extensions import deprecated + +from pyzstd._version import __version__ + + +__doc__ = """\ +Python bindings to Zstandard (zstd) compression library, the API style is +similar to Python's bz2/lzma/zlib modules. + +Command line interface of this module: python -m pyzstd --help + +Documentation: https://pyzstd.readthedocs.io +GitHub: https://github.com/Rogdham/pyzstd +PyPI: https://pypi.org/project/pyzstd""" + +__all__ = ( + "ZstdCompressor", + "RichMemZstdCompressor", + "ZstdDecompressor", + "EndlessZstdDecompressor", + "CParameter", + "DParameter", + "Strategy", + "ZstdError", + "compress", + "richmem_compress", + "decompress", + "compress_stream", + "decompress_stream", + "ZstdDict", + "train_dict", + "finalize_dict", + "get_frame_info", + "get_frame_size", + "ZstdFile", + "open", + "zstd_version", + "zstd_version_info", + "zstd_support_multithread", + "compressionLevel_values", + "SeekableZstdFile", + "SeekableFormatError", +) + + +class _DeprecatedPlaceholder: + def __repr__(self): + return "" + + +_DEPRECATED_PLACEHOLDER = _DeprecatedPlaceholder() + + +class CParameter(IntEnum): + """Compression parameters""" + + compressionLevel = zstd.CompressionParameter.compression_level + windowLog = zstd.CompressionParameter.window_log + hashLog = zstd.CompressionParameter.hash_log + chainLog = zstd.CompressionParameter.chain_log + searchLog = zstd.CompressionParameter.search_log + minMatch = zstd.CompressionParameter.min_match + targetLength = zstd.CompressionParameter.target_length + strategy = zstd.CompressionParameter.strategy + targetCBlockSize = 130 # not part of PEP-784 + + enableLongDistanceMatching = zstd.CompressionParameter.enable_long_distance_matching + ldmHashLog = zstd.CompressionParameter.ldm_hash_log + ldmMinMatch = zstd.CompressionParameter.ldm_min_match + ldmBucketSizeLog = zstd.CompressionParameter.ldm_bucket_size_log + ldmHashRateLog = zstd.CompressionParameter.ldm_hash_rate_log + + contentSizeFlag = zstd.CompressionParameter.content_size_flag + checksumFlag = zstd.CompressionParameter.checksum_flag + dictIDFlag = zstd.CompressionParameter.dict_id_flag + + nbWorkers = zstd.CompressionParameter.nb_workers + jobSize = zstd.CompressionParameter.job_size + overlapLog = zstd.CompressionParameter.overlap_log + + def bounds(self): + """Return lower and upper bounds of a compression parameter, both inclusive.""" + return zstd.CompressionParameter(self).bounds() + + +class DParameter(IntEnum): + """Decompression parameters""" + + windowLogMax = zstd.DecompressionParameter.window_log_max + + def bounds(self): + """Return lower and upper bounds of a decompression parameter, both inclusive.""" + return zstd.DecompressionParameter(self).bounds() + + +def _convert_level_or_option(level_or_option, mode): + """Transform pyzstd params into PEP-784 `options` param""" + if not isinstance(mode, str): + raise ValueError(f"Invalid mode type: {mode}") + read_mode = mode.startswith("r") + if isinstance(level_or_option, int): + if read_mode: + raise TypeError( + ( + "In read mode (decompression), level_or_option argument " + "should be a dict object, that represents decompression " + "option. It doesn't support int type compression level " + "in this case." + ) + ) + return { + CParameter.compressionLevel: level_or_option, + } + if level_or_option is not None: + invalid_class = CParameter if read_mode else DParameter + for key in level_or_option: + if isinstance(key, invalid_class): + raise TypeError( + "Key of compression option dict should " + f"NOT be {invalid_class.__name__}." + ) + return level_or_option + + +class ZstdCompressor: + """A streaming compressor. Thread-safe at method level.""" + + CONTINUE = zstd.ZstdCompressor.CONTINUE + """Used for mode parameter in .compress() method. + + Collect more data, encoder decides when to output compressed result, for optimal + compression ratio. Usually used for traditional streaming compression. + """ + + FLUSH_BLOCK = zstd.ZstdCompressor.FLUSH_BLOCK + """Used for mode parameter in .compress(), .flush() methods. + + Flush any remaining data, but don't close the current frame. Usually used for + communication scenarios. + + If there is data, it creates at least one new block, that can be decoded + immediately on reception. If no remaining data, no block is created, return b''. + + Note: Abuse of this mode will reduce compression ratio. Use it only when + necessary. + """ + + FLUSH_FRAME = zstd.ZstdCompressor.FLUSH_FRAME + """Used for mode parameter in .compress(), .flush() methods. + + Flush any remaining data, and close the current frame. Usually used for + traditional flush. + + Since zstd data consists of one or more independent frames, data can still be + provided after a frame is closed. + + Note: Abuse of this mode will reduce compression ratio, and some programs can + only decompress single frame data. Use it only when necessary. + """ + + def __init__(self, level_or_option=None, zstd_dict=None): + """Initialize a ZstdCompressor object. + + Parameters + level_or_option: When it's an int object, it represents the compression level. + When it's a dict object, it contains advanced compression + parameters. + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + """ + self._compressor = zstd.ZstdCompressor( + options=_convert_level_or_option(level_or_option, "w"), zstd_dict=zstd_dict + ) + + def compress(self, data, mode=zstd.ZstdCompressor.CONTINUE): + """Provide data to the compressor object. + Return a chunk of compressed data if possible, or b'' otherwise. + + Parameters + data: A bytes-like object, data to be compressed. + mode: Can be these 3 values .CONTINUE, .FLUSH_BLOCK, .FLUSH_FRAME. + """ + return self._compressor.compress(data, mode) + + def flush(self, mode=zstd.ZstdCompressor.FLUSH_FRAME): + """Flush any remaining data in internal buffer. + + Since zstd data consists of one or more independent frames, the compressor + object can still be used after this method is called. + + Parameter + mode: Can be these 2 values .FLUSH_FRAME, .FLUSH_BLOCK. + """ + return self._compressor.flush(mode) + + def _set_pledged_input_size(self, size): + """*This is an undocumented method, because it may be used incorrectly.* + + Set uncompressed content size of a frame, the size will be written into the + frame header. + 1, If called when (.last_mode != .FLUSH_FRAME), a RuntimeError will be raised. + 2, If the actual size doesn't match the value, a ZstdError will be raised, and + the last compressed chunk is likely to be lost. + 3, The size is only valid for one frame, then it restores to "unknown size". + + Parameter + size: Uncompressed content size of a frame, None means "unknown size". + """ + return self._compressor.set_pledged_input_size(size) + + @property + def last_mode(self): + """The last mode used to this compressor object, its value can be .CONTINUE, + .FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME. + + It can be used to get the current state of a compressor, such as, data flushed, + a frame ended. + """ + return self._compressor.last_mode + + def __reduce__(self): + raise TypeError(f"Cannot pickle {type(self)} object.") + + +class ZstdDecompressor: + """A streaming decompressor, it stops after a frame is decompressed. + Thread-safe at method level.""" + + def __init__(self, zstd_dict=None, option=None): + """Initialize a ZstdDecompressor object. + + Parameters + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + option: A dict object that contains advanced decompression parameters. + """ + self._decompressor = zstd.ZstdDecompressor( + zstd_dict=zstd_dict, options=_convert_level_or_option(option, "r") + ) + + def decompress(self, data, max_length=-1): + """Decompress data, return a chunk of decompressed data if possible, or b'' + otherwise. + + It stops after a frame is decompressed. + + Parameters + data: A bytes-like object, zstd data to be decompressed. + max_length: Maximum size of returned data. When it is negative, the size of + output buffer is unlimited. When it is nonnegative, returns at + most max_length bytes of decompressed data. + """ + return self._decompressor.decompress(data, max_length) + + @property + def eof(self): + """True means the end of the first frame has been reached. If decompress data + after that, an EOFError exception will be raised.""" + return self._decompressor.eof + + @property + def needs_input(self): + """If the max_length output limit in .decompress() method has been reached, and + the decompressor has (or may has) unconsumed input data, it will be set to + False. In this case, pass b'' to .decompress() method may output further data. + """ + return self._decompressor.needs_input + + @property + def unused_data(self): + """A bytes object. When ZstdDecompressor object stops after a frame is + decompressed, unused input data after the frame. Otherwise this will be b''.""" + return self._decompressor.unused_data + + def __reduce__(self): + raise TypeError(f"Cannot pickle {type(self)} object.") + + +class EndlessZstdDecompressor: + """A streaming decompressor, accepts multiple concatenated frames. + Thread-safe at method level.""" + + def __init__(self, zstd_dict=None, option=None): + """Initialize an EndlessZstdDecompressor object. + + Parameters + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + option: A dict object that contains advanced decompression parameters. + """ + self._zstd_dict = zstd_dict + self._options = _convert_level_or_option(option, "r") + self._reset() + + def _reset(self, data=b""): + self._decompressor = zstd.ZstdDecompressor( + zstd_dict=self._zstd_dict, options=self._options + ) + self._buffer = data + self._at_frame_edge = not data + + def decompress(self, data, max_length=-1): + """Decompress data, return a chunk of decompressed data if possible, or b'' + otherwise. + + Parameters + data: A bytes-like object, zstd data to be decompressed. + max_length: Maximum size of returned data. When it is negative, the size of + output buffer is unlimited. When it is nonnegative, returns at + most max_length bytes of decompressed data. + """ + if not isinstance(data, bytes) or not isinstance(max_length, int): + raise TypeError + self._buffer += data + self._at_frame_edge &= not self._buffer + out = b"" + while True: + try: + out += self._decompressor.decompress(self._buffer, max_length) + except ZstdError: + self._reset() + raise + if self._decompressor.eof: + self._reset(self._decompressor.unused_data) + max_length -= len(out) + else: + self._buffer = b"" + break + return out + + @property + def at_frame_edge(self): + """True when both the input and output streams are at a frame edge, means a + frame is completely decoded and fully flushed, or the decompressor just be + initialized. + + This flag could be used to check data integrity in some cases. + """ + return self._at_frame_edge + + @property + def needs_input(self): + """If the max_length output limit in .decompress() method has been reached, and + the decompressor has (or may has) unconsumed input data, it will be set to + False. In this case, pass b'' to .decompress() method may output further data. + """ + return not self._buffer and ( + self._at_frame_edge or self._decompressor.needs_input + ) + + def __reduce__(self): + raise TypeError(f"Cannot pickle {type(self)} object.") + + +def compress(data, level_or_option=None, zstd_dict=None): + """Compress a block of data, return a bytes object. + + Compressing b'' will get an empty content frame (9 bytes or more). + + Parameters + data: A bytes-like object, data to be compressed. + level_or_option: When it's an int object, it represents compression level. + When it's a dict object, it contains advanced compression + parameters. + zstd_dict: A ZstdDict object, pre-trained dictionary for compression. + """ + return zstd.compress( + data, + options=_convert_level_or_option(level_or_option, "w"), + zstd_dict=zstd_dict, + ) + + +def decompress(data, zstd_dict=None, option=None): + """Decompress a zstd data, return a bytes object. + + Support multiple concatenated frames. + + Parameters + data: A bytes-like object, compressed zstd data. + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + option: A dict object, contains advanced decompression parameters. + """ + return zstd.decompress( + data, options=_convert_level_or_option(option, "r"), zstd_dict=zstd_dict + ) + + +@deprecated( + "See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.RichMemZstdCompressor" +) +class RichMemZstdCompressor: + def __init__(self, level_or_option=None, zstd_dict=None): + self._compress_kwargs = { + "options": _convert_level_or_option(level_or_option, "w"), + "zstd_dict": zstd_dict, + } + + def compress(self, data): + return zstd.compress(data, **self._compress_kwargs) + + def __reduce__(self): + raise TypeError(f"Cannot pickle {type(self)} object.") + + +class ZstdFile(zstd.ZstdFile): + """A file object providing transparent zstd (de)compression. + + A ZstdFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that ZstdFile provides a *binary* file interface - data read is + returned as bytes, and data to be written should be an object that + supports the Buffer Protocol. + """ + + FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK + FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME + + def __init__( + self, + filename, + mode="r", + *, + level_or_option=None, + zstd_dict=None, + read_size=_DEPRECATED_PLACEHOLDER, + write_size=_DEPRECATED_PLACEHOLDER, + ): + """Open a zstd compressed file in binary mode. + + filename can be either an actual file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it can be + an existing file object to read from or write to. + + mode can be "r" for reading (default), "w" for (over)writing, "x" for + creating exclusively, or "a" for appending. These can equivalently be + given as "rb", "wb", "xb" and "ab" respectively. + + Parameters + level_or_option: When it's an int object, it represents compression + level. When it's a dict object, it contains advanced compression + parameters. Note, in read mode (decompression), it can only be a + dict object, that represents decompression option. It doesn't + support int type compression level in this case. + zstd_dict: A ZstdDict object, pre-trained dictionary for compression / + decompression. + """ + if read_size != _DEPRECATED_PLACEHOLDER: + warnings.warn( + "pyzstd.ZstdFile()'s read_size parameter is deprecated", + DeprecationWarning, + stacklevel=2, + ) + if write_size != _DEPRECATED_PLACEHOLDER: + warnings.warn( + "pyzstd.ZstdFile()'s write_size parameter is deprecated", + DeprecationWarning, + stacklevel=2, + ) + super().__init__( + filename, + mode, + options=_convert_level_or_option(level_or_option, mode), + zstd_dict=zstd_dict, + ) + + +def open( + filename, + mode="rb", + *, + level_or_option=None, + zstd_dict=None, + encoding=None, + errors=None, + newline=None, +): + """Open a zstd compressed file in binary or text mode. + + filename can be either an actual file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it can be an + existing file object to read from or write to. + + The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", + "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. + + The level_or_option and zstd_dict parameters specify the settings, as for + ZstdCompressor, ZstdDecompressor and ZstdFile. + + When using read mode (decompression), the level_or_option parameter can + only be a dict object, that represents decompression option. It doesn't + support int type compression level in this case. + + For binary mode, this function is equivalent to the ZstdFile constructor: + ZstdFile(filename, mode, ...). In this case, the encoding, errors and + newline parameters must not be provided. + + For text mode, an ZstdFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + return zstd.open( + filename, + mode, + options=_convert_level_or_option(level_or_option, mode), + zstd_dict=zstd_dict, + encoding=encoding, + errors=errors, + newline=newline, + ) + + +def _create_callback(output_stream, callback): + if output_stream is None: + if callback is None: + raise TypeError( + "At least one of output_stream argument and callback argument should be non-None." + ) + + def cb(total_input, total_output, data_in, data_out): + callback( + total_input, total_output, memoryview(data_in), memoryview(data_out) + ) + + elif callback is None: + + def cb(total_input, total_output, data_in, data_out): + output_stream.write(data_out) + + else: + + def cb(total_input, total_output, data_in, data_out): + output_stream.write(data_out) + callback( + total_input, total_output, memoryview(data_in), memoryview(data_out) + ) + + return cb + + +@deprecated( + "See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.compress_stream" +) +def compress_stream( + input_stream, + output_stream, + *, + level_or_option=None, + zstd_dict=None, + pledged_input_size=None, + read_size=131_072, + write_size=_DEPRECATED_PLACEHOLDER, + callback=None, +): + """Compresses input_stream and writes the compressed data to output_stream, it + doesn't close the streams. + + ---- + DEPRECATION NOTICE + The (de)compress_stream are deprecated and will be removed in a future version. + See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives + ---- + + If input stream is b'', nothing will be written to output stream. + + Return a tuple, (total_input, total_output), the items are int objects. + + Parameters + input_stream: Input stream that has a .readinto(b) method. + output_stream: Output stream that has a .write(b) method. If use callback + function, this parameter can be None. + level_or_option: When it's an int object, it represents the compression + level. When it's a dict object, it contains advanced compression + parameters. + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + pledged_input_size: If set this parameter to the size of input data, the + size will be written into the frame header. If the actual input data + doesn't match it, a ZstdError will be raised. + read_size: Input buffer size, in bytes. + callback: A callback function that accepts four parameters: + (total_input, total_output, read_data, write_data), the first two are + int objects, the last two are readonly memoryview objects. + """ + if not hasattr(input_stream, "read"): + raise TypeError("input_stream argument should have a .read() method.") + if output_stream is not None and not hasattr(output_stream, "write"): + raise TypeError("output_stream argument should have a .write() method.") + if read_size < 1: + raise ValueError("read_size argument should be a positive number.") + callback = _create_callback(output_stream, callback) + total_input = 0 + total_output = 0 + compressor = ZstdCompressor(level_or_option, zstd_dict) + if pledged_input_size is not None and pledged_input_size != 2**64 - 1: + compressor._set_pledged_input_size(pledged_input_size) + while data_in := input_stream.read(read_size): + total_input += len(data_in) + data_out = compressor.compress(data_in) + total_output += len(data_out) + callback(total_input, total_output, data_in, data_out) + if not total_input: + return total_input, total_output + data_out = compressor.flush() + total_output += len(data_out) + callback(total_input, total_output, b"", data_out) + return total_input, total_output + + +@deprecated( + "See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.decompress_stream" +) +def decompress_stream( + input_stream, + output_stream, + *, + zstd_dict=None, + option=None, + read_size=131_075, + write_size=131_072, + callback=None, +): + """Decompresses input_stream and writes the decompressed data to output_stream, + it doesn't close the streams. + + ---- + DEPRECATION NOTICE + The (de)compress_stream are deprecated and will be removed in a future version. + See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives + ---- + + Supports multiple concatenated frames. + + Return a tuple, (total_input, total_output), the items are int objects. + + Parameters + input_stream: Input stream that has a .readinto(b) method. + output_stream: Output stream that has a .write(b) method. If use callback + function, this parameter can be None. + zstd_dict: A ZstdDict object, pre-trained zstd dictionary. + option: A dict object, contains advanced decompression parameters. + read_size: Input buffer size, in bytes. + write_size: Output buffer size, in bytes. + callback: A callback function that accepts four parameters: + (total_input, total_output, read_data, write_data), the first two are + int objects, the last two are readonly memoryview objects. + """ + if not hasattr(input_stream, "read"): + raise TypeError("input_stream argument should have a .read() method.") + if output_stream is not None and not hasattr(output_stream, "write"): + raise TypeError("output_stream argument should have a .write() method.") + if read_size < 1 or write_size < 1: + raise ValueError( + "read_size argument and write_size argument should be positive numbers." + ) + callback = _create_callback(output_stream, callback) + total_input = 0 + total_output = 0 + decompressor = EndlessZstdDecompressor(zstd_dict, option) + while True: + if decompressor.needs_input: + data_in = input_stream.read(read_size) + if not data_in: + break + else: + data_in = b"" + total_input += len(data_in) + data_out = decompressor.decompress(data_in, write_size) + total_output += len(data_out) + callback(total_input, total_output, data_in, data_out) + if not decompressor.at_frame_edge: + raise ZstdError( + "Decompression failed: zstd data ends in an incomplete frame," + " maybe the input data was truncated." + f" Total input {total_input} bytes, total output {total_output} bytes." + ) + return total_input, total_output + + +Strategy = zstd.Strategy +ZstdError = zstd.ZstdError +richmem_compress = deprecated( + "See https://pyzstd.readthedocs.io/en/stable/deprecated.html for alternatives to pyzstd.richmem_compress" +)(compress) +ZstdDict = zstd.ZstdDict +train_dict = zstd.train_dict +finalize_dict = zstd.finalize_dict +get_frame_info = zstd.get_frame_info +get_frame_size = zstd.get_frame_size +zstd_version = zstd.zstd_version +zstd_version_info = zstd.zstd_version_info +zstd_support_multithread = CParameter.nbWorkers.bounds() != (0, 0) +compressionLevel_values = namedtuple("values", ["default", "min", "max"])( + zstd.COMPRESSION_LEVEL_DEFAULT, *CParameter.compressionLevel.bounds() +) + +# import here to avoid circular dependency issues +from ._seekable_zstdfile import SeekableFormatError, SeekableZstdFile diff --git a/src/__init__.pyi b/src/pyzstd/__init__.pyi similarity index 99% rename from src/__init__.pyi rename to src/pyzstd/__init__.pyi index dbdb47b..9f9c523 100644 --- a/src/__init__.pyi +++ b/src/pyzstd/__init__.pyi @@ -121,8 +121,6 @@ class ZstdDecompressor: data: ByteString, max_length: int = -1) -> bytes: ... - def _reset_session(self) -> None: ... - class EndlessZstdDecompressor: needs_input: bool at_frame_edge: bool @@ -135,8 +133,6 @@ class EndlessZstdDecompressor: data: ByteString, max_length: int = -1) -> bytes: ... - def _reset_session(self) -> None: ... - class ZstdError(Exception): ... diff --git a/src/__main__.py b/src/pyzstd/__main__.py similarity index 97% rename from src/__main__.py rename to src/pyzstd/__main__.py index 7c46f97..2fcbed0 100644 --- a/src/__main__.py +++ b/src/pyzstd/__main__.py @@ -9,7 +9,7 @@ CParameter, DParameter, \ train_dict, ZstdDict, ZstdFile, \ compressionLevel_values, zstd_version, \ - __version__ as pyzstd_version, PYZSTD_CONFIG + __version__ as pyzstd_version # buffer sizes recommended by zstd C_READ_BUFFER = 131072 @@ -343,16 +343,10 @@ def __call__(self, parser, args, values, option_string=None): # check range if not (min <= v <= max): - # 32/64 bits message - if bits_msg: - bits = 'in {}-bit build, '.format(PYZSTD_CONFIG[0]) - else: - bits = '' - # message - msg = ('{}{} value should: {} <= v <= {}. ' + msg = ('{} value should: {} <= v <= {}. ' 'provided value is {}.').format( - bits, option_string, min, max, v) + option_string, min, max, v) raise ValueError(msg) setattr(args, self.dest, v) @@ -463,7 +457,7 @@ def parse_arg(): args.dict.close() # Magic_Number: 4 bytes, value 0xEC30A437, little-endian format. is_raw = zd_content[:4] != b'\x37\xA4\x30\xEC' - args.zd = ZstdDict(zd_content, is_raw) + args.zd = ZstdDict(zd_content, is_raw=is_raw) else: args.zd = None diff --git a/src/_seekable_zstdfile.py b/src/pyzstd/_seekable_zstdfile.py similarity index 99% rename from src/_seekable_zstdfile.py rename to src/pyzstd/_seekable_zstdfile.py index 7998c96..3ef7e8d 100644 --- a/src/_seekable_zstdfile.py +++ b/src/pyzstd/_seekable_zstdfile.py @@ -6,13 +6,10 @@ from struct import Struct import warnings -from pyzstd import _ZSTD_DStreamSizes, ZstdCompressor, ZstdDecompressor -from pyzstd._zstdfile import _DEPRECATED_PLACEHOLDER +from pyzstd import ZstdCompressor, ZstdDecompressor, _DEPRECATED_PLACEHOLDER __all__ = ('SeekableFormatError', 'SeekableZstdFile') -_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] - _MODE_CLOSED = 0 _MODE_READ = 1 _MODE_WRITE = 2 @@ -590,7 +587,7 @@ def __init__(self, filename, mode="r", *, zstd_dict=zstd_dict, option=level_or_option, read_size=read_size) - self._buffer = io.BufferedReader(raw, _ZSTD_DStreamOutSize) + self._buffer = io.BufferedReader(raw) elif mode == "a": if self._fp.seekable(): @@ -758,7 +755,7 @@ def read1(self, size=-1): """ self._check_can_read() if size < 0: - size = _ZSTD_DStreamOutSize + size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) def readinto(self, b): diff --git a/src/py.typed b/src/pyzstd/py.typed similarity index 100% rename from src/py.typed rename to src/pyzstd/py.typed diff --git a/tests/test_seekable.py b/tests/test_seekable.py index d3eb070..126d21c 100644 --- a/tests/test_seekable.py +++ b/tests/test_seekable.py @@ -27,7 +27,6 @@ ZstdError, ZstdFile ) -from pyzstd import PYZSTD_CONFIG # type: ignore from pyzstd._seekable_zstdfile import _SeekTable @contextmanager @@ -45,7 +44,6 @@ def _check_deprecated(testcase): ] ) -BIT_BUILD = PYZSTD_CONFIG[0] DECOMPRESSED = b'1234567890' assert len(DECOMPRESSED) == 10 COMPRESSED = compress(DECOMPRESSED) @@ -541,7 +539,6 @@ def test_load_bad2(self): 'cumulated compressed size'): t.load_seek_table(b, seek_to_0=True) - @unittest.skipIf(BIT_BUILD == 32, 'skip in 32-bit build') def test_write_table(self): class MockError(Exception): pass @@ -745,19 +742,19 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): SeekableZstdFile(BytesIO(), "w", level_or_option='asd') # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): SeekableZstdFile(BytesIO(), "w", level_or_option={999:9999}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): SeekableZstdFile(BytesIO(), "w", level_or_option={CParameter.windowLog:99}) with self.assertRaises(TypeError): SeekableZstdFile(BytesIO(self.two_frames), "r", level_or_option=33) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): SeekableZstdFile(BytesIO(self.two_frames), level_or_option={DParameter.windowLogMax:2**31}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): SeekableZstdFile(BytesIO(self.two_frames), level_or_option={444:333}) diff --git a/tests/test_zstd.py b/tests/test_zstd.py index 7990b3f..2931642 100644 --- a/tests/test_zstd.py +++ b/tests/test_zstd.py @@ -10,7 +10,6 @@ import array import pathlib import pickle -import platform import random import subprocess import tempfile @@ -25,45 +24,7 @@ ZstdDict, train_dict, finalize_dict, \ zstd_version, zstd_version_info, zstd_support_multithread, \ compressionLevel_values, get_frame_info, get_frame_size, \ - ZstdFile, open, __version__ as pyzstd_version, PYZSTD_CONFIG, \ - _train_dict, _finalize_dict, \ - _ZstdFileReader, _ZstdFileWriter - -if PYZSTD_CONFIG[1] == 'c': - from pyzstd._c import _zstd # type: ignore - -build_info = ('Pyzstd build information:\n' - ' - Environment:\n' - ' * Machine type: {}\n' - ' * OS: {}\n' - ' * Python: {} {}, {}-bit build ({})\n' - ' - Pyzstd:\n' - ' * Pyzstd version: {}\n' - ' * Implementation: {}\n' - ' * Enable multi-phase init: {}\n' - ' * Use mremap output buffer: {}\n' - ' * Link to zstd library: {}\n' - ' - Zstd:\n' - ' * Zstd version: {}\n' - ' * Enable multi-threaded compression: {}\n').format( - platform.machine(), # Environment - platform.system(), - platform.python_implementation(), - platform.python_version(), - PYZSTD_CONFIG[0], - platform.python_compiler(), - pyzstd_version, # Pyzstd - PYZSTD_CONFIG[1].upper(), - 'Not for CFFI implementation' \ - if PYZSTD_CONFIG[1] == 'cffi' \ - else PYZSTD_CONFIG[3], - 'Not for CFFI implementation' \ - if PYZSTD_CONFIG[1] == 'cffi' \ - else PYZSTD_CONFIG[4], - 'Statically link' if PYZSTD_CONFIG[2] else 'Dynamically link', - zstd_version, # Zstd - zstd_support_multithread) -print(build_info, flush=True) + ZstdFile, open DAT_130K_D = None DAT_130K_C = None @@ -409,7 +370,6 @@ class SubClass(EndlessZstdDecompressor): pass def test_ZstdDict(self): - ZstdDict(b'12345678', True) zd = ZstdDict(b'12345678', is_raw=True) self.assertEqual(type(zd.dict_content), bytes) @@ -422,17 +382,11 @@ def test_ZstdDict(self): self.assertIn('.ZstdDict', str(type(zd))) # doesn't support pickle - with self.assertRaisesRegex(TypeError, - r'save \.dict_content attribute'): + with self.assertRaisesRegex((TypeError, pickle.PicklingError), 'pickle'): pickle.dumps(zd) - with self.assertRaisesRegex(TypeError, - r'save \.dict_content attribute'): + with self.assertRaisesRegex((TypeError, pickle.PicklingError), 'pickle'): pickle.dumps(zd.as_prefix) - # supports subclass - class SubClass(ZstdDict): - pass - def test_Strategy(self): # class attributes Strategy.fast @@ -454,11 +408,7 @@ def test_CParameter(self): CParameter.minMatch CParameter.targetLength CParameter.strategy - if zstd_version_info >= (1, 5, 6): - CParameter.targetCBlockSize - else: - with self.assertRaises(NotImplementedError): - CParameter.targetCBlockSize + CParameter.targetCBlockSize CParameter.enableLongDistanceMatching CParameter.ldmHashLog @@ -497,46 +447,6 @@ def test_zstderror_pickle(self): else: self.assertFalse(True, 'unreachable code path') - def test_pyzstd_config(self): - self.assertEqual(len(PYZSTD_CONFIG), 5) - if sys.maxsize > 2**32: - self.assertEqual(PYZSTD_CONFIG[0], 64) - else: - self.assertEqual(PYZSTD_CONFIG[0], 32) - self.assertIn(PYZSTD_CONFIG[1], ('c', 'cffi')) - self.assertEqual(type(PYZSTD_CONFIG[2]), bool) - self.assertEqual(type(PYZSTD_CONFIG[3]), bool) - self.assertEqual(type(PYZSTD_CONFIG[4]), bool) - - def test_ZstdFile_extend(self): - # These classes and variables can be used to extend ZstdFile, - # such as SeekableZstdFile(ZstdFile), so pin them down. - self.assertTrue(issubclass(ZstdFile, io.BufferedIOBase)) - - # file object - bio = BytesIO() - with ZstdFile(bio, 'r') as f: - self.assertTrue(hasattr(f, '_fp')) - self.assertTrue(hasattr(f, '_mode')) - self.assertTrue(hasattr(f, '_buffer')) - with ZstdFile(bio, 'w') as f: - self.assertTrue(hasattr(f, '_fp')) - self.assertTrue(hasattr(f, '_mode')) - self.assertTrue(hasattr(f, '_writer')) - - # file - with tempfile.NamedTemporaryFile(delete=False) as tmp_f: - PATH = tmp_f.name - with ZstdFile(PATH, 'r') as f: - self.assertTrue(hasattr(f, '_fp')) - self.assertTrue(hasattr(f, '_mode')) - self.assertTrue(hasattr(f, '_buffer')) - with ZstdFile(PATH, 'w') as f: - self.assertTrue(hasattr(f, '_fp')) - self.assertTrue(hasattr(f, '_mode')) - self.assertTrue(hasattr(f, '_writer')) - os.remove(PATH) - class CompressorDecompressorTestCase(unittest.TestCase): def test_simple_bad_args(self): @@ -551,14 +461,14 @@ def test_simple_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1:2, 3:4}) self.assertRaises(TypeError, ZstdCompressor, rich_mem=True) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdCompressor(2**31) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdCompressor({2**31 : 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor({CParameter.windowLog:100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor({3333 : 100}) # EndlessZstdDecompressor @@ -572,12 +482,12 @@ def test_simple_bad_args(self): self.assertRaises(TypeError, EndlessZstdDecompressor, option=b'abc') self.assertRaises(TypeError, EndlessZstdDecompressor, rich_mem=True) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): EndlessZstdDecompressor(option={2**31 : 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): EndlessZstdDecompressor(option={DParameter.windowLogMax:100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): EndlessZstdDecompressor(option={3333 : 100}) # Method bad arguments @@ -636,17 +546,17 @@ def test_compress_parameters(self): d[CParameter.targetCBlockSize] = 150 ZstdCompressor(level_or_option=d) - # larger than signed int, ValueError + # larger than signed int d1 = d.copy() d1[CParameter.ldmBucketSizeLog] = 2**31 - self.assertRaises(ValueError, ZstdCompressor, d1) + self.assertRaises(OverflowError, ZstdCompressor, d1) # clamp compressionLevel - compress(b'', compressionLevel_values.max+1) - compress(b'', compressionLevel_values.min-1) + self.assertRaises(ValueError, compress, b'', compressionLevel_values.max+1) + self.assertRaises(ValueError, compress, b'', compressionLevel_values.min-1) - compress(b'', {CParameter.compressionLevel:compressionLevel_values.max+1}) - compress(b'', {CParameter.compressionLevel:compressionLevel_values.min-1}) + self.assertRaises(ValueError, compress, b'', {CParameter.compressionLevel:compressionLevel_values.max+1}) + self.assertRaises(ValueError, compress, b'', {CParameter.compressionLevel:compressionLevel_values.min-1}) # zstd lib doesn't support MT compression if not zstd_support_multithread: @@ -659,45 +569,43 @@ def test_compress_parameters(self): # out of bounds error msg option = {CParameter.windowLog:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd compression parameter "windowLog", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')): + with self.assertRaisesRegex(ValueError, + (r"compression parameter 'window_log' " + r'received an illegal value 100; the valid range is')): compress(b'', option) def test_decompress_parameters(self): d = {DParameter.windowLogMax : 15} EndlessZstdDecompressor(option=d) - # larger than signed int, ValueError + # larger than signed int d1 = d.copy() d1[DParameter.windowLogMax] = 2**31 - self.assertRaises(ValueError, EndlessZstdDecompressor, None, d1) + self.assertRaises(OverflowError, EndlessZstdDecompressor, None, d1) # out of bounds error msg option = {DParameter.windowLogMax:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd decompression parameter "windowLogMax", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')): + with self.assertRaisesRegex(ValueError, + (r"decompression parameter 'window_log_max' " + r'received an illegal value 100; the valid range is')): decompress(b'', option=option) def test_unknown_compression_parameter(self): KEY = 100001234 option = {CParameter.compressionLevel: 10, KEY: 200000000} - pattern = r'Zstd compression parameter.*?"unknown parameter \(key %d\)"' \ + pattern = r"invalid compression parameter 'unknown parameter \(key %d\)'" \ % KEY - with self.assertRaisesRegex(ZstdError, pattern): + with self.assertRaisesRegex(ValueError, pattern): ZstdCompressor(option) def test_unknown_decompression_parameter(self): KEY = 100001234 option = {DParameter.windowLogMax: DParameter.windowLogMax.bounds()[1], KEY: 200000000} - pattern = r'Zstd decompression parameter.*?"unknown parameter \(key %d\)"' \ + pattern = r"invalid decompression parameter 'unknown parameter \(key %d\)'" \ % KEY - with self.assertRaisesRegex(ZstdError, pattern): + with self.assertRaisesRegex(ValueError, pattern): ZstdDecompressor(option=option) @unittest.skipIf(not zstd_support_multithread, @@ -715,7 +623,7 @@ def test_zstd_multithread_compress(self): self.assertEqual(dat2, b) # richmem_compress() - with self.assertWarns(ResourceWarning): + with _check_deprecated(self): dat1 = richmem_compress(b, option) dat2 = decompress(dat1) self.assertEqual(dat2, b) @@ -746,8 +654,7 @@ def test_rich_mem_compress(self): def test_rich_mem_compress_warn(self): b = THIS_FILE_BYTES[:len(THIS_FILE_BYTES)//3] - # warning when multi-threading compression - with self.assertWarns(ResourceWarning): + with _check_deprecated(self): dat1 = richmem_compress(b, {CParameter.nbWorkers:2}) dat2 = decompress(dat1) @@ -759,14 +666,14 @@ def test_set_pledged_input_size(self): # wrong value c = ZstdCompressor() - with self.assertRaisesRegex(ValueError, r'64-bit unsigned integer'): + with self.assertRaisesRegex(ValueError, r'positive int less than'): c._set_pledged_input_size(-300) # wrong mode c = ZstdCompressor(1) c.compress(b'123456') self.assertEqual(c.last_mode, c.CONTINUE) - with self.assertRaisesRegex(RuntimeError, r'\.last_mode == \.FLUSH_FRAME'): + with self.assertRaisesRegex(ValueError, r'last_mode == FLUSH_FRAME'): c._set_pledged_input_size(300) # None value @@ -1105,7 +1012,7 @@ def test_compress_flushframe(self): self.assertEqual(nt.decompressed_size, len(THIS_FILE_BYTES)) def test_decompressor_arg(self): - zd = ZstdDict(b'12345678', True) + zd = ZstdDict(b'12345678', is_raw=True) with self.assertRaises(TypeError): d = ZstdDecompressor(zstd_dict={}) @@ -1344,13 +1251,13 @@ def test_function_decompress(self): self.assertEqual(decompress(self.UNKNOWN_FRAME_42), self.DECOMPRESSED_42) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(self.FRAME_42[:1]) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(self.FRAME_42[:-4]) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(self.FRAME_42[:-1]) # 2 frames @@ -1364,19 +1271,19 @@ def test_function_decompress(self): self.assertEqual(decompress(self.UNKNOWN_FRAME_42 + self.FRAME_60), self.DECOMPRESSED_42_60) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(self.FRAME_42_60[:-4]) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(self.UNKNOWN_FRAME_42_60[:-1]) # 130KB self.assertEqual(decompress(DAT_130K_C), DAT_130K_D) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(DAT_130K_C[:-4]) - with self.assertRaisesRegex(ZstdError, "incomplete frame"): + with self.assertRaisesRegex(ZstdError, "Compressed data ended before the end-of-stream marker was reached"): decompress(DAT_130K_C[:-1]) # Unknown frame descriptor @@ -1847,57 +1754,6 @@ def decompress(self, data): with self.assertRaises(ZstdError): d.decompress(b'123456789') - def test_reset_session(self): - D_DAT = SAMPLES[0] - C_DAT = compress(D_DAT, zstd_dict=TRAINED_DICT) - C_2DAT = C_DAT * 2 - TAIL = b'1234' - - # ZstdDecompressor - d = ZstdDecompressor(zstd_dict=TRAINED_DICT) - # part data - dat = d.decompress(C_DAT+TAIL, 10) - self.assertEqual(dat, D_DAT[:10]) - self.assertFalse(d.eof) - self.assertFalse(d.needs_input) - self.assertEqual(d.unused_data, b'') - - # reset - self.assertIsNone(d._reset_session()) - self.assertFalse(d.eof) - self.assertTrue(d.needs_input) - self.assertEqual(d.unused_data, b'') - - # full - self.assertEqual(d.decompress(C_DAT+TAIL), D_DAT) - self.assertTrue(d.eof) - self.assertFalse(d.needs_input) - self.assertEqual(d.unused_data, TAIL) - - # reset - self.assertIsNone(d._reset_session()) - self.assertFalse(d.eof) - self.assertTrue(d.needs_input) - self.assertEqual(d.unused_data, b'') - - # full - self.assertEqual(d.decompress(C_2DAT), D_DAT) - self.assertTrue(d.eof) - self.assertFalse(d.needs_input) - self.assertEqual(d.unused_data, C_DAT) - - # EndlessZstdDecompressor - d = EndlessZstdDecompressor(zstd_dict=TRAINED_DICT) - dat = d.decompress(C_2DAT, 10) - self.assertEqual(dat, D_DAT[:10]) - self.assertFalse(d.at_frame_edge) - self.assertFalse(d.needs_input) - - self.assertIsNone(d._reset_session()) # reset - self.assertTrue(d.at_frame_edge) - self.assertTrue(d.needs_input) - self.assertEqual(d.decompress(C_2DAT), D_DAT*2) - class ZstdDictTestCase(unittest.TestCase): def test_is_raw(self): @@ -1948,28 +1804,28 @@ def test_invalid_dict(self): # corrupted zd = ZstdDict(dict_content, is_raw=False) - with self.assertRaisesRegex(ZstdError, r'ZSTD_CDict.*?corrupted'): + with self.assertRaisesRegex(ZstdError, r'Failed to create a ZSTD_CDict instance'): ZstdCompressor(zstd_dict=zd.as_digested_dict) - with self.assertRaisesRegex(ZstdError, r'ZSTD_DDict.*?corrupted'): + with self.assertRaisesRegex(ZstdError, r'Failed to create a ZSTD_DDict instance'): ZstdDecompressor(zd) # wrong type - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 3)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 3)) def test_train_dict(self): @@ -1977,7 +1833,7 @@ def test_train_dict(self): global TRAINED_DICT TRAINED_DICT = train_dict(SAMPLES, DICT_SIZE1) - ZstdDict(TRAINED_DICT.dict_content, False) + ZstdDict(TRAINED_DICT.dict_content, is_raw=False) self.assertNotEqual(TRAINED_DICT.dict_id, 0) self.assertGreater(len(TRAINED_DICT.dict_content), 0) @@ -1997,17 +1853,10 @@ def test_train_dict(self): self.assertEqual(sample, dat2) def test_finalize_dict(self): - if zstd_version_info < (1, 4, 5): - return - DICT_SIZE2 = 200*1024 C_LEVEL = 6 - try: - dic2 = finalize_dict(TRAINED_DICT, SAMPLES, DICT_SIZE2, C_LEVEL) - except NotImplementedError: - # < v1.4.5 at compile-time, >= v.1.4.5 at run-time - return + dic2 = finalize_dict(TRAINED_DICT, SAMPLES, DICT_SIZE2, C_LEVEL) self.assertNotEqual(dic2.dict_id, 0) self.assertGreater(len(dic2.dict_content), 0) @@ -2043,16 +1892,7 @@ def test_train_dict_arguments(self): train_dict(SAMPLES, 0) def test_finalize_dict_arguments(self): - if zstd_version_info < (1, 4, 5): - with self.assertRaises(NotImplementedError): - finalize_dict({1:2}, [b'aaa', b'bbb'], 100*KB, 2) - return - - try: - finalize_dict(TRAINED_DICT, SAMPLES, 1*MB, 2) - except NotImplementedError: - # < v1.4.5 at compile-time, >= v.1.4.5 at run-time - return + finalize_dict(TRAINED_DICT, SAMPLES, 1*MB, 2) with self.assertRaises(ValueError): finalize_dict(TRAINED_DICT, [], 100*KB, 2) @@ -2063,100 +1903,10 @@ def test_finalize_dict_arguments(self): with self.assertRaises(ValueError): finalize_dict(TRAINED_DICT, SAMPLES, 0, 2) - @unittest.skipIf(PYZSTD_CONFIG[1] == 'cffi', 'cffi implementation') - def test_train_dict_c(self): - # argument wrong type - with self.assertRaises(TypeError): - _zstd._train_dict({}, [], 100) - with self.assertRaises(TypeError): - _zstd._train_dict(b'', 99, 100) - with self.assertRaises(TypeError): - _zstd._train_dict(b'', [], 100.1) - - # size > size_t - with self.assertRaises(ValueError): - _zstd._train_dict(b'', [2**64+1], 100) - - # dict_size <= 0 - with self.assertRaises(ValueError): - _zstd._train_dict(b'', [], 0) - - @unittest.skipIf(PYZSTD_CONFIG[1] == 'cffi', 'cffi implementation') - def test_finalize_dict_c(self): - if zstd_version_info < (1, 4, 5): - with self.assertRaises(NotImplementedError): - _zstd._finalize_dict(1, 2, 3, 4, 5) - return - - try: - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'123', [3,], 1*MB, 5) - except NotImplementedError: - # < v1.4.5 at compile-time, >= v.1.4.5 at run-time - return - - # argument wrong type - with self.assertRaises(TypeError): - _zstd._finalize_dict({}, b'', [], 100, 5) - with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, {}, [], 100, 5) - with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', 99, 100, 5) - with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', [], 100.1, 5) - with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', [], 100, 5.1) - - # size > size_t - with self.assertRaises(ValueError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', [2**64+1], 100, 5) - - # dict_size <= 0 - with self.assertRaises(ValueError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', [], 0, 5) - - def test_train_buffer_protocol_samples(self): - def _nbytes(dat): - if isinstance(dat, (bytes, bytearray)): - return len(dat) - return memoryview(dat).nbytes - - # prepare samples - chunk_lst = [] - wrong_size_lst = [] - correct_size_lst = [] - for _ in range(300): - arr = array.array('Q', [random.randint(0, 20) for i in range(20)]) - chunk_lst.append(arr) - correct_size_lst.append(_nbytes(arr)) - wrong_size_lst.append(len(arr)) - concatenation = b''.join(chunk_lst) - - # wrong size list - with self.assertRaisesRegex(ValueError, - "The samples size list doesn't match the concatenation's size"): - _train_dict(concatenation, wrong_size_lst, 100*1024) - - # correct size list - _train_dict(concatenation, correct_size_lst, 3*1024) - - # test _finalize_dict - if zstd_version_info < (1, 4, 5): - return - - # wrong size list - with self.assertRaisesRegex(ValueError, - "The samples size list doesn't match the concatenation's size"): - _finalize_dict(TRAINED_DICT.dict_content, - concatenation, wrong_size_lst, 300*1024, 5) - - # correct size list - _finalize_dict(TRAINED_DICT.dict_content, - concatenation, correct_size_lst, 300*1024, 5) - def test_as_prefix(self): # V1 V1 = THIS_FILE_BYTES - zd = ZstdDict(V1, True) + zd = ZstdDict(V1, is_raw=True) # V2 mid = len(V1) // 2 @@ -2173,7 +1923,7 @@ def test_as_prefix(self): self.assertEqual(decompress(dat, zd.as_prefix), V2) # use wrong prefix - zd2 = ZstdDict(SAMPLES[0], True) + zd2 = ZstdDict(SAMPLES[0], is_raw=True) try: decompressed = decompress(dat, zd2.as_prefix) except ZstdError: # expected @@ -2580,19 +2330,19 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): ZstdFile(BytesIO(), "w", level_or_option='asd') # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(BytesIO(), "w", level_or_option={999:9999}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(BytesIO(), "w", level_or_option={CParameter.windowLog:99}) with self.assertRaises(TypeError): ZstdFile(BytesIO(COMPRESSED_100_PLUS_32KB), "r", level_or_option=33) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdFile(BytesIO(COMPRESSED_100_PLUS_32KB), level_or_option={DParameter.windowLogMax:2**31}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(BytesIO(COMPRESSED_100_PLUS_32KB), level_or_option={444:333}) @@ -2607,33 +2357,25 @@ def test_init_sizes_arg(self): with ZstdFile(BytesIO(), 'r', read_size=1): pass with _check_deprecated(self): - with self.assertRaises(ValueError): - ZstdFile(BytesIO(), 'r', read_size=0) + ZstdFile(BytesIO(), 'r', read_size=0) with _check_deprecated(self): - with self.assertRaises(ValueError): - ZstdFile(BytesIO(), 'r', read_size=-1) + ZstdFile(BytesIO(), 'r', read_size=-1) with _check_deprecated(self): - with self.assertRaises(TypeError): - ZstdFile(BytesIO(), 'r', read_size=(10,)) + ZstdFile(BytesIO(), 'r', read_size=(10,)) with _check_deprecated(self): - with self.assertRaisesRegex(ValueError, 'read_size'): - ZstdFile(BytesIO(), 'w', read_size=10) + ZstdFile(BytesIO(), 'w', read_size=10) with _check_deprecated(self): with ZstdFile(BytesIO(), 'w', write_size=1): pass - with self.assertRaises(ValueError): - with _check_deprecated(self): - ZstdFile(BytesIO(), 'w', write_size=0) - with self.assertRaises(ValueError): - with _check_deprecated(self): - ZstdFile(BytesIO(), 'w', write_size=-1) - with self.assertRaises(TypeError): - with _check_deprecated(self): - ZstdFile(BytesIO(), 'w', write_size=(10,)) - with self.assertRaisesRegex(ValueError, 'write_size'): - with _check_deprecated(self): - ZstdFile(BytesIO(), 'r', write_size=10) + with _check_deprecated(self): + ZstdFile(BytesIO(), 'w', write_size=0) + with _check_deprecated(self): + ZstdFile(BytesIO(), 'w', write_size=-1) + with _check_deprecated(self): + ZstdFile(BytesIO(), 'w', write_size=(10,)) + with _check_deprecated(self): + ZstdFile(BytesIO(), 'r', write_size=10) def test_init_close_fp(self): # get a temp file name @@ -2641,7 +2383,7 @@ def test_init_close_fp(self): tmp_f.write(DAT_130K_C) filename = tmp_f.name - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): ZstdFile(filename, level_or_option={'a':'b'}) # for PyPy @@ -2824,102 +2566,14 @@ def test_writable(self): f.close() self.assertRaises(ValueError, f.writable) - def test_ZstdFileWriter(self): - bo = BytesIO() - - # wrong arg - with self.assertRaisesRegex(TypeError, 'level_or_option'): - _ZstdFileWriter( - fp=bo, - level_or_option=TRAINED_DICT, - zstd_dict=None, - write_size=131591) - with self.assertRaisesRegex(TypeError, 'zstd_dict'): - _ZstdFileWriter( - fp=bo, - level_or_option=3, - zstd_dict={1:2}, - write_size=131591) - with self.assertRaisesRegex(ValueError, 'write_size'): - _ZstdFileWriter( - fp=bo, - level_or_option=3, - zstd_dict=TRAINED_DICT, - write_size=0) - - w = _ZstdFileWriter( - fp=bo, - level_or_option=None, - zstd_dict=None, - write_size=131591) - # write - ret = w.write(DAT_130K_D) - self.assertEqual(ret[0], len(DAT_130K_D)) - self.assertGreater(ret[1], 0) - # flush block - ret = w.flush(ZstdCompressor.FLUSH_BLOCK) - self.assertEqual(ret[0], 0) - self.assertGreaterEqual(ret[1], 0) - # flush frame - ret = w.flush(ZstdCompressor.FLUSH_FRAME) - self.assertEqual(ret[0], 0) - self.assertGreaterEqual(ret[1], 0) - # flush .CONTINUE - with self.assertRaisesRegex(ValueError, - 'mode argument wrong value'): - w.flush(ZstdCompressor.CONTINUE) - - self.assertEqual(decompress(bo.getvalue()), DAT_130K_D) - - def test_ZstdFileReader(self): - # wrong arg - with self.assertRaisesRegex(TypeError, 'zstd_dict'): - _ZstdFileReader( - fp=BytesIO(self.FRAME_42), - zstd_dict={1:2}, option=None, - read_size=131075) - with self.assertRaisesRegex(TypeError, 'option'): - _ZstdFileReader( - fp=BytesIO(self.FRAME_42), - zstd_dict=TRAINED_DICT, option=3, - read_size=131075) - with self.assertRaisesRegex(ValueError, 'read_size'): - _ZstdFileReader( - fp=BytesIO(self.FRAME_42), - zstd_dict=TRAINED_DICT, option=3, - read_size=0) - - r = _ZstdFileReader( - fp=BytesIO(self.FRAME_42), - zstd_dict=None, option=None, - read_size=131075) - ba = bytearray(100) - mv = memoryview(ba) - - # cffi implementation can't distinguish read-only buffer - if PYZSTD_CONFIG[1] != 'cffi': - with self.assertRaisesRegex(BufferError, 'not writable'): - r.readinto(b'123') - - self.assertEqual(r.readinto(mv[0:0]), 0) - self.assertEqual(r.readinto(mv[:42]), 42) - self.assertEqual(mv[:42], self.DECOMPRESSED_42) - self.assertFalse(r.eof) - self.assertEqual(r.readinto(mv[:10]), 0) - self.assertTrue(r.eof) - def test_read(self): with ZstdFile(BytesIO(self.FRAME_42)) as f: self.assertEqual(f.read(), self.DECOMPRESSED_42) - self.assertTrue(f._buffer.raw._decomp.eof) self.assertEqual(f.read(), b"") - self.assertTrue(f._buffer.raw._decomp.eof) with ZstdFile(BytesIO(COMPRESSED_100_PLUS_32KB)) as f: self.assertEqual(f.read(), DECOMPRESSED_100_PLUS_32KB) - self.assertTrue(f._buffer.raw._decomp.eof) self.assertEqual(f.read(), b"") - self.assertTrue(f._buffer.raw._decomp.eof) with _check_deprecated(self): with ZstdFile(BytesIO(DAT_130K_C), @@ -3420,10 +3074,8 @@ class T: def read(self, size): return b'a' * size - with self.assertRaises(AttributeError): # on close - with ZstdFile(T(), 'w') as f: - with self.assertRaises(AttributeError): # on write - f.write(b'1234') + with self.assertRaises(TypeError): + ZstdFile(T(), 'w') # 3 with ZstdFile(BytesIO(), 'w') as f: @@ -3782,7 +3434,7 @@ def test_compress_stream(self): with self.assertRaisesRegex(TypeError, r'output_stream'): with _check_deprecated(self): compress_stream(b1, 123) - with self.assertRaisesRegex(TypeError, r'level_or_option'): + with self.assertRaisesRegex(TypeError, r'options'): with _check_deprecated(self): compress_stream(b1, b2, level_or_option='3') with self.assertRaisesRegex(TypeError, r'zstd_dict'): @@ -3791,18 +3443,17 @@ def test_compress_stream(self): with self.assertRaisesRegex(TypeError, r'zstd_dict'): with _check_deprecated(self): compress_stream(b1, b2, zstd_dict=b'1234567890') - with self.assertRaisesRegex(ValueError, r'pledged_input_size'): + with self.assertRaisesRegex(ValueError, r'size argument'): with _check_deprecated(self): compress_stream(b1, b2, pledged_input_size=-1) - with self.assertRaisesRegex(ValueError, r'pledged_input_size'): + with self.assertRaisesRegex(ValueError, r'size argument'): with _check_deprecated(self): compress_stream(b1, b2, pledged_input_size=2**64+1) with self.assertRaisesRegex(ValueError, r'read_size'): with _check_deprecated(self): compress_stream(b1, b2, read_size=-1) - with self.assertRaises(OverflowError): - with _check_deprecated(self): - compress_stream(b1, b2, write_size=2**64+1) + with _check_deprecated(self): + compress_stream(b1, b2, write_size=2**64+1) with self.assertRaisesRegex(TypeError, r'callback'): with _check_deprecated(self): compress_stream(b1, None, callback=None) @@ -3898,9 +3549,8 @@ def test_decompress_stream(self): with self.assertRaisesRegex(ValueError, r'read_size'): with _check_deprecated(self): decompress_stream(b1, b2, read_size=-1) - with self.assertRaises(OverflowError): - with _check_deprecated(self): - decompress_stream(b1, b2, write_size=2**64+1) + with _check_deprecated(self): + decompress_stream(b1, b2, write_size=2**64+1) with self.assertRaisesRegex(TypeError, r'callback'): with _check_deprecated(self): decompress_stream(b1, None, callback=None) @@ -3967,12 +3617,6 @@ def write(self, b): with self.assertRaises(TypeError): with _check_deprecated(self): decompress_stream(M(), BytesIO()) - with self.assertRaises(TypeError): - with _check_deprecated(self): - compress_stream(BytesIO(THIS_FILE_BYTES), M()) - with self.assertRaises(TypeError): - with _check_deprecated(self): - decompress_stream(BytesIO(COMPRESSED_100_PLUS_32KB), M()) # wrong value class N: @@ -3984,32 +3628,20 @@ def write(self, b): return self.ret_value # < 0 - with self.assertRaisesRegex(ValueError, r'input_stream.readinto.*?<= \d+'): + with self.assertRaises(TypeError): with _check_deprecated(self): compress_stream(N(-1), BytesIO()) - with self.assertRaisesRegex(ValueError, r'input_stream.readinto.*?<= \d+'): + with self.assertRaises(TypeError): with _check_deprecated(self): decompress_stream(N(-2), BytesIO()) - with self.assertRaisesRegex(ValueError, r'output_stream.write.*?<= \d+'): - with _check_deprecated(self): - compress_stream(BytesIO(THIS_FILE_BYTES), N(-2)) - with self.assertRaisesRegex(ValueError, r'output_stream.write.*?<= \d+'): - with _check_deprecated(self): - decompress_stream(BytesIO(COMPRESSED_100_PLUS_32KB), N(-1)) # should > upper bound (~128 KiB) - with self.assertRaisesRegex(ValueError, r'input_stream.readinto.*?<= \d+'): + with self.assertRaises(TypeError): with _check_deprecated(self): compress_stream(N(10000000), BytesIO()) - with self.assertRaisesRegex(ValueError, r'input_stream.readinto.*?<= \d+'): + with self.assertRaises(TypeError): with _check_deprecated(self): decompress_stream(N(10000000), BytesIO()) - with self.assertRaisesRegex(ValueError, r'output_stream.write.*?<= \d+'): - with _check_deprecated(self): - compress_stream(BytesIO(THIS_FILE_BYTES), N(10000000)) - with self.assertRaisesRegex(ValueError, r'output_stream.write.*?<= \d+'): - with _check_deprecated(self): - decompress_stream(BytesIO(COMPRESSED_100_PLUS_32KB), N(10000000)) def test_empty_input_no_callback(self): def cb(a,b,c,d): @@ -4022,7 +3654,7 @@ def cb(a,b,c,d): decompress_stream(io.BytesIO(b''), io.BytesIO(), callback=cb) def test_stream_dict(self): - zd = ZstdDict(THIS_FILE_BYTES, True) + zd = ZstdDict(THIS_FILE_BYTES, is_raw=True) # default with BytesIO(THIS_FILE_BYTES) as bi, BytesIO() as bo: @@ -4053,7 +3685,7 @@ def test_stream_dict(self): self.assertEqual(decompressed, THIS_FILE_BYTES) def test_stream_prefix(self): - zd = ZstdDict(THIS_FILE_BYTES, True) + zd = ZstdDict(THIS_FILE_BYTES, is_raw=True) with BytesIO(THIS_FILE_BYTES) as bi, BytesIO() as bo: with _check_deprecated(self): @@ -4183,7 +3815,7 @@ def test_long_range(self): stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.assertRegex(result.stderr, - rb'(32|64)-bit build, --long value should:') + rb'--long value should:') def test_dictID_range(self): OUTPUT_FILE = os.path.join(self.dir_name, 'dictid_range') diff --git a/zstd b/zstd deleted file mode 160000 index f8745da..0000000 --- a/zstd +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f8745da6ff1ad1e7bab384bd1f9d742439278e99