From 760b81eb03f8372661aaed85bfd30cab84ae0977 Mon Sep 17 00:00:00 2001 From: houfu Date: Mon, 19 May 2025 00:32:32 +0800 Subject: [PATCH 01/14] Fix: Add character-level diffing and single token refinement Introduce a `character_level_diffing` option to refine single token replacements by analyzing prefix and suffix changes. This enables more precise diffing for minor text variations and improves the granularity of redline outputs. Signed-off-by: houfu --- redlines/processor.py | 153 +++++++++++++++++++++++++++++++++++++++++- redlines/redlines.py | 11 ++- 2 files changed, 161 insertions(+), 3 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index 93e547d..3e90102 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -124,6 +124,9 @@ class WholeDocumentProcessor(RedlinesProcessor): source: str test: str + def __init__(self, character_level_diffing: bool = True): + self.character_level_diffing = character_level_diffing + def process( self, source: Union[Document, str], test: Union[Document, str] ) -> List[Redline]: @@ -143,7 +146,7 @@ def process( matcher = SequenceMatcher(None, seq_source, seq_test) - return [ + redlines = [ Redline( source_chunk=Chunk(text=seq_source, chunk_location=None), test_chunk=Chunk(text=seq_test, chunk_location=None), @@ -151,3 +154,151 @@ def process( ) for opcode in matcher.get_opcodes() ] + + if self.character_level_diffing: + return self._refine_single_token_replacements(redlines) + return redlines + + def _refine_single_token_replacements( + self, redlines: List[Redline] + ) -> List[Redline]: + refined_redlines = [] + + for redline in redlines: + if redline.opcodes[0] == "replace": + tag, i1, i2, j1, j2 = redline.opcodes + + # Check if this is a single token replacement + if i2 - i1 == 1 and j2 - j1 == 1: + source_token = redline.source_chunk.text[i1].strip() + test_token = redline.test_chunk.text[j1].strip() + + # Skip tokens containing paragraph markers + if "¶" in source_token or "¶" in test_token: + refined_redlines.append(redline) + continue + + # Now check for prefix/suffix changes only + refined_opcodes = self._character_level_diff_if_prefix_suffix( + source_token, + test_token, + i1, + i2, + j1, + j2, + redline.source_chunk, + redline.test_chunk, + ) + + if refined_opcodes: + # Replace the original redline with the refined one(s) + refined_redlines.extend(refined_opcodes) + continue + + # If we didn't refine this redline, keep the original + refined_redlines.append(redline) + + return refined_redlines + + def _character_level_diff_if_prefix_suffix( + self, + source_token: str, + test_token: str, + i1: int, + i2: int, + j1: int, + j2: int, + source_chunk: Chunk, + test_chunk: Chunk, + ) -> List[Redline]: + # If tokens are identical, no need for character diffing + if source_token == test_token: + return [] + + # Find the longest common prefix + prefix_len = 0 + for i in range(min(len(source_token), len(test_token))): + if source_token[i] != test_token[i]: + break + prefix_len = i + 1 + + # Find the longest common suffix + suffix_len = 0 + for i in range( + 1, min(len(source_token) - prefix_len, len(test_token) - prefix_len) + 1 + ): + if source_token[-i] != test_token[-i]: + break + suffix_len = i + + # If either prefix or suffix is different (but not both), use character-level diffing + is_prefix_change = prefix_len == 0 and suffix_len > 0 + is_suffix_change = prefix_len > 0 and suffix_len == 0 + is_prefix_and_suffix_same = prefix_len > 0 and suffix_len > 0 + + if is_prefix_change or is_suffix_change or is_prefix_and_suffix_same: + # Create new redlines with character-level opcodes + result = [] + + # For prefix changes + if prefix_len > 0: + # Add the common prefix as 'equal' + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=("equal", i1, i1 + prefix_len, j1, j1 + prefix_len), + ) + ) + + # Add the different middle part + if len(source_token) - prefix_len - suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "delete", + i1 + prefix_len, + i1 + len(source_token) - suffix_len, + j1 + prefix_len, + j1 + prefix_len, + ), + ) + ) + + if len(test_token) - prefix_len - suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "insert", + i1 + prefix_len, + i1 + prefix_len, + j1 + prefix_len, + j1 + len(test_token) - suffix_len, + ), + ) + ) + + # For suffix changes, add the common suffix as 'equal' + if suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "equal", + i1 + len(source_token) - suffix_len, + i1 + len(source_token), + j1 + len(test_token) - suffix_len, + j1 + len(test_token), + ), + ) + ) + + return result + + # If not a prefix/suffix change only, return empty list to indicate no refinement + return [] diff --git a/redlines/redlines.py b/redlines/redlines.py index 0b2a3e6..5aca55f 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -41,7 +41,11 @@ def test(self, value): self._seq2 = tokenize_text(concatenate_paragraphs_and_add_chr_182(value)) def __init__( - self, source: str | Document, test: str | Document | None = None, **options + self, + source: str | Document, + test: str | Document | None = None, + character_level_diffing: bool = True, + **options, ): """ Redline is a class used to compare text, and producing human-readable differences or deltas @@ -85,7 +89,10 @@ def __init__( if test: self.test = test.text if isinstance(test, Document) else test # self.compare() - self.processor = WholeDocumentProcessor() + self.character_level_diffing = character_level_diffing + self.processor = WholeDocumentProcessor( + character_level_diffing=character_level_diffing + ) @property def opcodes(self) -> list[tuple[str, int, int, int, int]]: From 5394aa32df613c5d7ed3d4d419609aac85f1059e Mon Sep 17 00:00:00 2001 From: houfu Date: Mon, 19 May 2025 07:25:17 +0800 Subject: [PATCH 02/14] =?UTF-8?q?skip=20character-level=20refinements=20wh?= =?UTF-8?q?en=20they=E2=80=99re=20only=20inserting=20or=20deleting=20trail?= =?UTF-8?q?ing=20punctuation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: houfu --- redlines/processor.py | 66 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index 3e90102..82b1bf2 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -191,9 +191,15 @@ def _refine_single_token_replacements( ) if refined_opcodes: - # Replace the original redline with the refined one(s) - refined_redlines.extend(refined_opcodes) - continue + # Skip refinements that only insert or delete trailing punctuation, + # so that tokens like "weekend"->"weekend." are handled as full-token replacements + if not ( + len(refined_opcodes) == 2 + and refined_opcodes[0].opcodes[0] == "equal" + and refined_opcodes[1].opcodes[0] in ("insert", "delete") + ): + refined_redlines.extend(refined_opcodes) + continue # If we didn't refine this redline, keep the original refined_redlines.append(redline) @@ -231,6 +237,60 @@ def _character_level_diff_if_prefix_suffix( break suffix_len = i + # Special case for trailing punctuation differences + # Handle case where one token ends with punctuation and the other doesn't + if source_token.rstrip(".!?,;:") == test_token.rstrip(".!?,;:") and ( + source_token[-1] in ".!?,;:" or test_token[-1] in ".!?,;:" + ): + # Create special handling for this case + base_token = source_token.rstrip(".!?,;:") + result = [] + # Add the common part + if base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "equal", + i1, + i1 + len(base_token), + j1, + j1 + len(base_token), + ), + ) + ) + # Handle the punctuation differences + if source_token != base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "delete", + i1 + len(base_token), + i1 + len(source_token), + j1 + len(base_token), + j1 + len(base_token), + ), + ) + ) + if test_token != base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "insert", + i1 + len(base_token), + i1 + len(base_token), + j1 + len(base_token), + j1 + len(test_token), + ), + ) + ) + return result + # If either prefix or suffix is different (but not both), use character-level diffing is_prefix_change = prefix_len == 0 and suffix_len > 0 is_suffix_change = prefix_len > 0 and suffix_len == 0 From 510951a5cc31e43c6d280722ad08a8bccc21d693 Mon Sep 17 00:00:00 2001 From: Houfu Ang Date: Mon, 19 May 2025 23:26:42 +0800 Subject: [PATCH 03/14] Project maintenance (#49) * Update email addresses, remove Matrix references, and revise license Updated contact emails to use a standardized format with "OUTLOOK dot sg". Removed all references to the Matrix chat platform for communication. Updated license to include contributors and extend copyright to 2025. Signed-off-by: houfu * Update workflows and dependencies to use uv and upgrade libraries Replaced Poetry with uv in GitHub workflows for dependency management and streamlined project setup. Updated Python dependencies, libraries, and specified versions in `uv.lock` for improved stability and compatibility. Signed-off-by: houfu --------- Signed-off-by: houfu --- .github/workflows/docs.yml | 15 ++- .github/workflows/python-package.yml | 21 ++-- .github/workflows/python-publish.yml | 10 +- CONTRIBUTING.md | 15 +-- Code_of_Conduct.md | 2 +- LICENSE.txt | 2 +- README.md | 1 - pyproject.toml | 57 ++++------- uv.lock | 139 +++++++++++++++++---------- 9 files changed, 132 insertions(+), 130 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6ff05b8..6f3e3d7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,18 +19,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Install Poetry - uses: snok/install-poetry@v1 + - name: Install uv + uses: astral-sh/setup-uv@v5 with: - virtualenvs-create: false - virtualenvs-in-project: true - installer-parallel: true - - name: Install project - run: poetry install --no-interaction + enable-cache: true + cache-dependency-glob: "uv.lock" + - name: Install the project + run: uv sync --all-extras --dev - run: pdoc -o docs/ redlines - uses: actions/upload-pages-artifact@v2 with: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8e21fd4..936a47f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -19,22 +19,19 @@ jobs: python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install Poetry - uses: snok/install-poetry@v1 + - name: Install uv + uses: astral-sh/setup-uv@v5 with: - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - name: Install dependencies - run: poetry install --no-interaction --no-root - - name: Install project - run: poetry install --no-interaction + enable-cache: true + cache-dependency-glob: "uv.lock" + - name: Install the project + run: uv sync --all-extras --dev - name: Test with pytest run: | source .venv/bin/activate - pytest + uv run pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index bdaab28..3fad564 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,17 +21,17 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install build + uv python -m pip install --upgrade pip + uv pip install build - name: Build package - run: python -m build + run: uv python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 55078d0..c798ce6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,7 +32,7 @@ community looks forward to your contributions. 🎉 This project and everyone participating in it is governed by the [CONTRIBUTING.md Code of Conduct](Code_of_Conduct.md). By participating, you are expected to uphold this code. Please report unacceptable behavior -to . +to . ## I Have a Question @@ -45,7 +45,7 @@ search the internet for answers first. If you then still feel the need to ask a question and need clarification, we recommend the following: -- Open an [Issue](/issues/new) or join the [Matrix Chat](https://matrix.to/#/#redlines:matrix.esq.social). +- Open an [Issue](/issues/new). - Provide as much context as you can about what you're running into. - Provide project and platform versions (python version, platform like streamlit, colab etc), depending on what seems relevant. @@ -81,13 +81,11 @@ following steps in advance to help us fix any potential bug as fast as possible. - Possibly your input and the output - Can you reliably reproduce the issue? And can you also reproduce it with older versions? -You can join the project's [matrix room](https://chat.matrix.esq.social/#/room/#redlines:matrix.esq.social) to ask about -anything or get guidance. #### How Do I Submit a Good Bug Report? > You must never report security related issues, vulnerabilities or bugs including sensitive information to the issue -> tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to . +> tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to . We use GitHub issues to track bugs and errors. If you run into an issue with the project: @@ -144,8 +142,7 @@ Enhancement suggestions are tracked as [GitHub issues](/issues). ### Your First Code Contribution -Be relaxed and open to comments. Join the [Matrix Chat](https://matrix.to/#/#redlines:matrix.esq.social) to discuss or -get guidance. +Be relaxed and open to comments. We are all here to learn and grow together. ### Improving The Documentation @@ -170,10 +167,8 @@ It's helpful to use a "verb" (like add, fix etc) and a short description of what ## Join The Project Team -Please [email me](mailto:houfu@lovelawrobots.com) or file an [issue](/issues). +Please [email me](mailto:houfu@OUTLOOK dot sg) or file an [issue](/issues). -You can test the waters by joining the [Matrix Chat](https://matrix.to/#/#redlines:matrix.esq.social) and letting us -know what you think. ## Attribution diff --git a/Code_of_Conduct.md b/Code_of_Conduct.md index 298c514..cc88605 100644 --- a/Code_of_Conduct.md +++ b/Code_of_Conduct.md @@ -41,7 +41,7 @@ Examples of unacceptable behaviour by participants include: ## Enforcement Violations of the Code of Conduct may be reported by sending an email -to [houfu@lovelawrobots.com](mailto:houfu@lovelawrobots.com). +to [houfu@OUTLOOK dot sg](mailto:houfu@outlook.sg). All reports will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. Further details of specific enforcement policies may be posted separately. diff --git a/LICENSE.txt b/LICENSE.txt index ee088ec..f61d10f 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Ang Hou Fu +Copyright (c) 2022-2025 Ang Hou Fu and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 2f82e1f..64b1e44 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ ![GitHub Release Date - Published_At](https://img.shields.io/github/release-date/houfu/redlines) ![GitHub last commit (by committer)](https://img.shields.io/github/last-commit/houfu/redlines) ![PyPI - License](https://img.shields.io/pypi/l/redlines) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#redlines:matrix.esq.social) `Redlines` produces a text showing the differences between two strings/text. The changes are represented with strike-throughs and underlines, which looks similar to Microsoft Word's track changes. This method of showing changes is diff --git a/pyproject.toml b/pyproject.toml index 48539b5..f60009a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,51 +3,30 @@ name = "redlines" version = "0.5.1" description = "Compare text, and produce human-readable differences or deltas which look like track changes in Microsoft Word." readme = "README.md" -dependencies = [ - "click>=8.1.7", - "rich-click>=1.8.3", - "rich>=13.9.1", +authors = [ + { name = "houfu", email = "houfu@outlook.sg" } ] +license = { text = "MIT" } requires-python = ">=3.8" - -[tool.uv] -dev-dependencies = [ - "pdoc>=14.7.0", - "pytest>=8.3.3", - "pytest-cov>=5.0.0", - "pytest-sugar>=1.0.0", - "tox>=4.21.0", +dependencies = [ + "click>=8.1.7", + "rich-click>=1.6.1", + "rich>=13.3.5", ] +urls = { homepage = "https://houfu.github.io/redlines/", repository = "https://github.com/houfu/redlines" } [project.scripts] redlines = 'redlines.cli:cli' -[tool.poetry] -name = "redlines" -version = "0.5.1" -description = "Compare text, and produce human-readable differences or deltas which look like track changes in Microsoft Word." -authors = ["houfu "] -license = "MIT" -readme = "README.md" -homepage = "https://houfu.github.io/redlines/" -repository = "https://github.com/houfu/redlines" - -[tool.poetry.dependencies] -python = "<4.0,>=3.8" -rich = "^13.3.5" -click = "^8.1.3" -rich-click = "^1.6.1" - -[tool.poetry.dev-dependencies] -pytest = "^7.0.1" -pytest-sugar = "^0.9.4" -pytest-cov = "^3.0.0" -tox = "^3.24.5" -pdoc = "^14.1.0" +[tool.uv] +dev-dependencies = [ + "pdoc>=14.1.0", + "pytest>=7.4.2", + "pytest-cov>=3.0.0", + "pytest-sugar>=0.9.7", + "tox>=3.28.0", +] [build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" - -[tool.poetry.scripts] -redlines = 'redlines.cli:cli' +requires = ["hatchling>=1.18.0"] +build-backend = "hatchling.build" \ No newline at end of file diff --git a/uv.lock b/uv.lock index 1eef6f3..2580a3b 100644 --- a/uv.lock +++ b/uv.lock @@ -16,11 +16,11 @@ wheels = [ [[package]] name = "cachetools" -version = "5.5.0" +version = "5.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/38/a0f315319737ecf45b4319a8cd1f3a908e29d9277b46942263292115eee7/cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a", size = 27661 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 }, + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, ] [[package]] @@ -34,14 +34,14 @@ wheels = [ [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "platform_system == 'Windows'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, ] [[package]] @@ -139,20 +139,23 @@ toml = [ [[package]] name = "distlib" -version = "0.3.8" +version = "0.3.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c4/91/e2df406fb4efacdf46871c25cde65d3c6ee5e173b7e5a4547a47bae91920/distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64", size = 609931 } +sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/41/9307e4f5f9976bc8b7fea0b66367734e8faf3ec84bc0d412d8cfabbb66cd/distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", size = 468850 }, + { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973 }, ] [[package]] name = "exceptiongroup" -version = "1.2.2" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, + { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 }, ] [[package]] @@ -166,23 +169,23 @@ wheels = [ [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] [[package]] name = "jinja2" -version = "3.1.4" +version = "3.1.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/55/39036716d19cab0747a5020fc7e907f362fbf48c984b14e62127f7e68e5d/jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", size = 240245 } +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 }, + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, ] [[package]] @@ -266,11 +269,11 @@ wheels = [ [[package]] name = "packaging" -version = "24.1" +version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/65/50db4dda066951078f0a96cf12f4b9ada6e4b811516bf0262c0f4f7064d4/packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", size = 148788 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, ] [[package]] @@ -308,11 +311,11 @@ wheels = [ [[package]] name = "pygments" -version = "2.18.0" +version = "2.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] [[package]] @@ -330,7 +333,7 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -340,9 +343,9 @@ dependencies = [ { name = "pluggy" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 } +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, ] [[package]] @@ -409,39 +412,39 @@ dev = [ [[package]] name = "rich" -version = "13.9.1" +version = "14.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b3/78/87d00a1df7c457ad9aa0139f01b8a11c67209f27f927c503b0109bf2ed6c/rich-13.9.1.tar.gz", hash = "sha256:097cffdf85db1babe30cc7deba5ab3a29e1b9885047dab24c57e9a7f8a9c1466", size = 222907 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/71/cd9549551f1aa11cf7e5f92bae5817979e8b3a19e31e8810c15f3f45c311/rich-13.9.1-py3-none-any.whl", hash = "sha256:b340e739f30aa58921dc477b8adaa9ecdb7cecc217be01d93730ee1bc8aa83be", size = 242147 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, ] [[package]] name = "rich-click" -version = "1.8.3" +version = "1.8.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3a/a9/a1f1af87e83832d794342fbc09c96cc7cd6798b8dfb8adfbe6ccbef8d70c/rich_click-1.8.3.tar.gz", hash = "sha256:6d75bdfa7aa9ed2c467789a0688bc6da23fbe3a143e19aa6ad3f8bac113d2ab3", size = 38209 } +sdist = { url = "https://files.pythonhosted.org/packages/a6/7a/4b78c5997f2a799a8c5c07f3b2145bbcda40115c4d35c76fbadd418a3c89/rich_click-1.8.8.tar.gz", hash = "sha256:547c618dea916620af05d4a6456da797fbde904c97901f44d2f32f89d85d6c84", size = 39066 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/ea/5a0c5a8e6532e971983d1b0fc99268eb66a10f489da35d9022ce01044191/rich_click-1.8.3-py3-none-any.whl", hash = "sha256:636d9c040d31c5eee242201b5bf4f2d358bfae4db14bb22ec1cafa717cfd02cd", size = 35032 }, + { url = "https://files.pythonhosted.org/packages/fa/69/963f0bf44a654f6465bdb66fb5a91051b0d7af9f742b5bd7202607165036/rich_click-1.8.8-py3-none-any.whl", hash = "sha256:205aabd5a98e64ab2c105dee9e368be27480ba004c7dfa2accd0ed44f9f1550e", size = 35747 }, ] [[package]] name = "six" -version = "1.16.0" +version = "1.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", size = 34041 } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 }, + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] [[package]] @@ -455,16 +458,46 @@ wheels = [ [[package]] name = "tomli" -version = "2.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c0/3f/d7af728f075fb08564c5949a9c95e44352e23dee646869fa104a3b2060a3/tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f", size = 15164 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", size = 12757 }, +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, ] [[package]] name = "tox" -version = "4.21.0" +version = "4.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cachetools" }, @@ -479,39 +512,39 @@ dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.11'" }, { name = "virtualenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3b/f0/6865d4131e78583fe5956655ab6fd85491aa80d3a7314fdbb8d87f1c7b25/tox-4.21.0.tar.gz", hash = "sha256:e64dd9847ff3a7ec90368be412d7efe61a39caf043222ffbe9ad638ea435f6f6", size = 187638 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/87/692478f0a194f1cad64803692642bd88c12c5b64eee16bf178e4a32e979c/tox-4.25.0.tar.gz", hash = "sha256:dd67f030317b80722cf52b246ff42aafd3ed27ddf331c415612d084304cf5e52", size = 196255 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/8a/e353b35c80323a84c432f1611a8bb965e956119e2b0917ab8b4d28038172/tox-4.21.0-py3-none-any.whl", hash = "sha256:693ac51378255d34ad7aab6dd2ce9ab6a1cf1924eb930183fde850ad503b681d", size = 165223 }, + { url = "https://files.pythonhosted.org/packages/f9/38/33348de6fc4b1afb3d76d8485c8aecbdabcfb3af8da53d40c792332e2b37/tox-4.25.0-py3-none-any.whl", hash = "sha256:4dfdc7ba2cc6fdc6688dde1b21e7b46ff6c41795fb54586c91a3533317b5255c", size = 172420 }, ] [[package]] name = "typing-extensions" -version = "4.12.2" +version = "4.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, ] [[package]] name = "virtualenv" -version = "20.26.6" +version = "20.31.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3f/40/abc5a766da6b0b2457f819feab8e9203cbeae29327bd241359f866a3da9d/virtualenv-20.26.6.tar.gz", hash = "sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48", size = 9372482 } +sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316 } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/90/57b8ac0c8a231545adc7698c64c5a36fa7cd8e376c691b9bde877269f2eb/virtualenv-20.26.6-py3-none-any.whl", hash = "sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2", size = 5999862 }, + { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982 }, ] [[package]] name = "wheel" -version = "0.44.0" +version = "0.45.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/a0/95e9e962c5fd9da11c1e28aa4c0d8210ab277b1ada951d2aee336b505813/wheel-0.44.0.tar.gz", hash = "sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49", size = 100733 } +sdist = { url = "https://files.pythonhosted.org/packages/8a/98/2d9906746cdc6a6ef809ae6338005b3f21bb568bea3165cfc6a243fdc25c/wheel-0.45.1.tar.gz", hash = "sha256:661e1abd9198507b1409a20c02106d9670b2576e916d58f520316666abca6729", size = 107545 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/d1/9babe2ccaecff775992753d8686970b1e2755d21c8a63be73aba7a4e7d77/wheel-0.44.0-py3-none-any.whl", hash = "sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f", size = 67059 }, + { url = "https://files.pythonhosted.org/packages/0b/2c/87f3254fd8ffd29e4c02732eee68a83a1d3c346ae39bc6822dcbcb697f2b/wheel-0.45.1-py3-none-any.whl", hash = "sha256:708e7481cc80179af0e556bbf0cc00b8444c7321e2700b8d8580231d13017248", size = 72494 }, ] From 615799a8e55ca4878b12ab24c0e630a58c028ac7 Mon Sep 17 00:00:00 2001 From: houfu Date: Mon, 19 May 2025 23:43:11 +0800 Subject: [PATCH 04/14] Enable manual workflow dispatch and fix indentation issues Added support for manual workflow dispatch in python-publish.yml with input for version control. Also corrected inconsistent indentation in both workflows for better readability and proper YAML formatting. Signed-off-by: houfu --- .github/workflows/python-package.yml | 22 ++++++++--------- .github/workflows/python-publish.yml | 36 ++++++++++++++++------------ 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 936a47f..d69a103 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -20,18 +20,18 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - name: Install uv uses: astral-sh/setup-uv@v5 - with: - enable-cache: true - cache-dependency-glob: "uv.lock" + with: + enable-cache: true + cache-dependency-glob: "uv.lock" - name: Install the project run: uv sync --all-extras --dev - - name: Test with pytest - run: | - source .venv/bin/activate - uv run pytest + - name: Test with pytest + run: | + source .venv/bin/activate + uv run pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 3fad564..e73fcaa 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -11,6 +11,12 @@ name: Upload Python Package on: release: types: [published] + workflow_dispatch: + inputs: + version: + description: 'Version to publish' + required: true + default: 'latest' permissions: contents: read @@ -22,18 +28,18 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - uv python -m pip install --upgrade pip - uv pip install build - - name: Build package - run: uv python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + uv python -m pip install --upgrade pip + uv pip install build + - name: Build package + run: uv python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} From 98eaaaa0dd15ff442d35c2d94f711adcbd6c32d8 Mon Sep 17 00:00:00 2001 From: Houfu Ang Date: Tue, 20 May 2025 00:10:16 +0800 Subject: [PATCH 05/14] Update docs.yml Outdated actions. --- .github/workflows/docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6f3e3d7..4402bb7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,7 +31,7 @@ jobs: - name: Install the project run: uv sync --all-extras --dev - run: pdoc -o docs/ redlines - - uses: actions/upload-pages-artifact@v2 + - uses: actions/upload-pages-artifact@v3 with: path: docs/ @@ -48,4 +48,4 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} steps: - id: deployment - uses: actions/deploy-pages@v2 \ No newline at end of file + uses: actions/deploy-pages@v4 From 8b16ad329772360a8aa593a7a5554bdde9653d39 Mon Sep 17 00:00:00 2001 From: Houfu Ang Date: Tue, 20 May 2025 00:12:12 +0800 Subject: [PATCH 06/14] Update docs.yml --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4402bb7..b27df12 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -30,7 +30,7 @@ jobs: cache-dependency-glob: "uv.lock" - name: Install the project run: uv sync --all-extras --dev - - run: pdoc -o docs/ redlines + - run: uv run pdoc -o docs/ redlines - uses: actions/upload-pages-artifact@v3 with: path: docs/ From a2166f9596277ccc94465c3d2a9485dc996d2716 Mon Sep 17 00:00:00 2001 From: Richard G Date: Tue, 20 May 2025 10:27:50 +0200 Subject: [PATCH 07/14] Normalize tokens by stripping whitespace for comparison (#45) * fix: match using normalized tokens, then apply the result to the original ones * doc: add explanatory comment Co-authored-by: Houfu Ang --- redlines/processor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/redlines/processor.py b/redlines/processor.py index 93e547d..1e73fac 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -138,10 +138,15 @@ def process( seq_source = tokenize_text(concatenate_paragraphs_and_add_chr_182(self.source)) seq_test = tokenize_text(concatenate_paragraphs_and_add_chr_182(self.test)) + # Normalize tokens by stripping whitespace for comparison + # This allows the matcher to focus on content differences rather than whitespace variations + # while still preserving the original tokens (including whitespace) for display in the output + seq_source_normalized = [token.strip() for token in seq_source] + seq_test_normalized = [token.strip() for token in seq_test] from difflib import SequenceMatcher - matcher = SequenceMatcher(None, seq_source, seq_test) + matcher = SequenceMatcher(None, seq_source_normalized, seq_test_normalized) return [ Redline( From e5341153eb74a784864aa6a8b5c02bd87e8c74a3 Mon Sep 17 00:00:00 2001 From: houfu Date: Mon, 19 May 2025 00:32:32 +0800 Subject: [PATCH 08/14] Fix: Add character-level diffing and single token refinement Introduce a `character_level_diffing` option to refine single token replacements by analyzing prefix and suffix changes. This enables more precise diffing for minor text variations and improves the granularity of redline outputs. Signed-off-by: houfu --- redlines/processor.py | 153 +++++++++++++++++++++++++++++++++++++++++- redlines/redlines.py | 11 ++- 2 files changed, 161 insertions(+), 3 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index 1e73fac..71190b9 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -124,6 +124,9 @@ class WholeDocumentProcessor(RedlinesProcessor): source: str test: str + def __init__(self, character_level_diffing: bool = True): + self.character_level_diffing = character_level_diffing + def process( self, source: Union[Document, str], test: Union[Document, str] ) -> List[Redline]: @@ -148,7 +151,7 @@ def process( matcher = SequenceMatcher(None, seq_source_normalized, seq_test_normalized) - return [ + redlines = [ Redline( source_chunk=Chunk(text=seq_source, chunk_location=None), test_chunk=Chunk(text=seq_test, chunk_location=None), @@ -156,3 +159,151 @@ def process( ) for opcode in matcher.get_opcodes() ] + + if self.character_level_diffing: + return self._refine_single_token_replacements(redlines) + return redlines + + def _refine_single_token_replacements( + self, redlines: List[Redline] + ) -> List[Redline]: + refined_redlines = [] + + for redline in redlines: + if redline.opcodes[0] == "replace": + tag, i1, i2, j1, j2 = redline.opcodes + + # Check if this is a single token replacement + if i2 - i1 == 1 and j2 - j1 == 1: + source_token = redline.source_chunk.text[i1].strip() + test_token = redline.test_chunk.text[j1].strip() + + # Skip tokens containing paragraph markers + if "¶" in source_token or "¶" in test_token: + refined_redlines.append(redline) + continue + + # Now check for prefix/suffix changes only + refined_opcodes = self._character_level_diff_if_prefix_suffix( + source_token, + test_token, + i1, + i2, + j1, + j2, + redline.source_chunk, + redline.test_chunk, + ) + + if refined_opcodes: + # Replace the original redline with the refined one(s) + refined_redlines.extend(refined_opcodes) + continue + + # If we didn't refine this redline, keep the original + refined_redlines.append(redline) + + return refined_redlines + + def _character_level_diff_if_prefix_suffix( + self, + source_token: str, + test_token: str, + i1: int, + i2: int, + j1: int, + j2: int, + source_chunk: Chunk, + test_chunk: Chunk, + ) -> List[Redline]: + # If tokens are identical, no need for character diffing + if source_token == test_token: + return [] + + # Find the longest common prefix + prefix_len = 0 + for i in range(min(len(source_token), len(test_token))): + if source_token[i] != test_token[i]: + break + prefix_len = i + 1 + + # Find the longest common suffix + suffix_len = 0 + for i in range( + 1, min(len(source_token) - prefix_len, len(test_token) - prefix_len) + 1 + ): + if source_token[-i] != test_token[-i]: + break + suffix_len = i + + # If either prefix or suffix is different (but not both), use character-level diffing + is_prefix_change = prefix_len == 0 and suffix_len > 0 + is_suffix_change = prefix_len > 0 and suffix_len == 0 + is_prefix_and_suffix_same = prefix_len > 0 and suffix_len > 0 + + if is_prefix_change or is_suffix_change or is_prefix_and_suffix_same: + # Create new redlines with character-level opcodes + result = [] + + # For prefix changes + if prefix_len > 0: + # Add the common prefix as 'equal' + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=("equal", i1, i1 + prefix_len, j1, j1 + prefix_len), + ) + ) + + # Add the different middle part + if len(source_token) - prefix_len - suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "delete", + i1 + prefix_len, + i1 + len(source_token) - suffix_len, + j1 + prefix_len, + j1 + prefix_len, + ), + ) + ) + + if len(test_token) - prefix_len - suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "insert", + i1 + prefix_len, + i1 + prefix_len, + j1 + prefix_len, + j1 + len(test_token) - suffix_len, + ), + ) + ) + + # For suffix changes, add the common suffix as 'equal' + if suffix_len > 0: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "equal", + i1 + len(source_token) - suffix_len, + i1 + len(source_token), + j1 + len(test_token) - suffix_len, + j1 + len(test_token), + ), + ) + ) + + return result + + # If not a prefix/suffix change only, return empty list to indicate no refinement + return [] diff --git a/redlines/redlines.py b/redlines/redlines.py index 0b2a3e6..5aca55f 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -41,7 +41,11 @@ def test(self, value): self._seq2 = tokenize_text(concatenate_paragraphs_and_add_chr_182(value)) def __init__( - self, source: str | Document, test: str | Document | None = None, **options + self, + source: str | Document, + test: str | Document | None = None, + character_level_diffing: bool = True, + **options, ): """ Redline is a class used to compare text, and producing human-readable differences or deltas @@ -85,7 +89,10 @@ def __init__( if test: self.test = test.text if isinstance(test, Document) else test # self.compare() - self.processor = WholeDocumentProcessor() + self.character_level_diffing = character_level_diffing + self.processor = WholeDocumentProcessor( + character_level_diffing=character_level_diffing + ) @property def opcodes(self) -> list[tuple[str, int, int, int, int]]: From a142c4e60b7157f64457cdaab012eb30ebfa5b55 Mon Sep 17 00:00:00 2001 From: houfu Date: Mon, 19 May 2025 07:25:17 +0800 Subject: [PATCH 09/14] =?UTF-8?q?skip=20character-level=20refinements=20wh?= =?UTF-8?q?en=20they=E2=80=99re=20only=20inserting=20or=20deleting=20trail?= =?UTF-8?q?ing=20punctuation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: houfu --- redlines/processor.py | 66 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index 71190b9..e4fba79 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -196,9 +196,15 @@ def _refine_single_token_replacements( ) if refined_opcodes: - # Replace the original redline with the refined one(s) - refined_redlines.extend(refined_opcodes) - continue + # Skip refinements that only insert or delete trailing punctuation, + # so that tokens like "weekend"->"weekend." are handled as full-token replacements + if not ( + len(refined_opcodes) == 2 + and refined_opcodes[0].opcodes[0] == "equal" + and refined_opcodes[1].opcodes[0] in ("insert", "delete") + ): + refined_redlines.extend(refined_opcodes) + continue # If we didn't refine this redline, keep the original refined_redlines.append(redline) @@ -236,6 +242,60 @@ def _character_level_diff_if_prefix_suffix( break suffix_len = i + # Special case for trailing punctuation differences + # Handle case where one token ends with punctuation and the other doesn't + if source_token.rstrip(".!?,;:") == test_token.rstrip(".!?,;:") and ( + source_token[-1] in ".!?,;:" or test_token[-1] in ".!?,;:" + ): + # Create special handling for this case + base_token = source_token.rstrip(".!?,;:") + result = [] + # Add the common part + if base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "equal", + i1, + i1 + len(base_token), + j1, + j1 + len(base_token), + ), + ) + ) + # Handle the punctuation differences + if source_token != base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "delete", + i1 + len(base_token), + i1 + len(source_token), + j1 + len(base_token), + j1 + len(base_token), + ), + ) + ) + if test_token != base_token: + result.append( + Redline( + source_chunk=source_chunk, + test_chunk=test_chunk, + opcodes=( + "insert", + i1 + len(base_token), + i1 + len(base_token), + j1 + len(base_token), + j1 + len(test_token), + ), + ) + ) + return result + # If either prefix or suffix is different (but not both), use character-level diffing is_prefix_change = prefix_len == 0 and suffix_len > 0 is_suffix_change = prefix_len > 0 and suffix_len == 0 From 01a624f1b9e9807930a35517333495e611fb0470 Mon Sep 17 00:00:00 2001 From: houfu Date: Tue, 20 May 2025 20:25:14 +0800 Subject: [PATCH 10/14] Refactor redlines processing for improved clarity and flexibility Refactored `Redlines` and `WholeDocumentProcessor` to streamline text processing logic. Introduced handling for `Redline` objects and moved tokenization and processing into a more cohesive structure. This improves the clarity, flexibility, and maintainability of change-tracking functionality. Signed-off-by: houfu --- redlines/processor.py | 62 ++++++++++----------------- redlines/redlines.py | 98 +++++++++++++++++++++++++++---------------- 2 files changed, 84 insertions(+), 76 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index e4fba79..f787366 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -6,33 +6,8 @@ from redlines.document import Document tokenizer = re.compile(r"((?:[^()\s]+|[().?!-])\s*)") -""" -This regular expression matches a group of characters that can include any character except for parentheses -and whitespace characters (which include spaces, tabs, and line breaks) or any character -that is a parenthesis or punctuation mark (.?!-). -The group can also include any whitespace characters that follow these characters. - -Breaking it down further: - -* `(` and `)` indicate a capturing group -* `(?: )` is a non-capturing group, meaning it matches the pattern but doesn't capture the matched text -* `[^()\s]+` matches one or more characters that are not parentheses or whitespace characters -* `|` indicates an alternative pattern -* `[().?!-]` matches any character that is a parenthesis or punctuation mark `(.?!-)` -* `\s*` matches zero or more whitespace characters (spaces, tabs, or line breaks) that follow the previous pattern. -""" -# This pattern matches one or more newline characters `\n`, and any spaces between them. - paragraph_pattern = re.compile(r"((?:\n *)+)") -""" -It is used to split the text into paragraphs. - -* `(?:\\n *)` is a non-capturing group that must start with a `\\n` and be followed by zero or more spaces. -* `((?:\\n *)+)` is the previous non-capturing group repeated one or more times. -""" - space_pattern = re.compile(r"(\s+)") -"""It is used to detect space.""" def tokenize_text(text: str) -> List[str]: @@ -121,11 +96,13 @@ class WholeDocumentProcessor(RedlinesProcessor): A redlines processor that compares two documents. It compares the entire documents as a single chunk. """ - source: str - test: str - def __init__(self, character_level_diffing: bool = True): self.character_level_diffing = character_level_diffing + self.source_text = None + self.test_text = None + self.source_tokens = None + self.test_tokens = None + self._redlines = None def process( self, source: Union[Document, str], test: Union[Document, str] @@ -136,33 +113,40 @@ def process( :param test: The test document to compare. :return: A list of `Redline` that describe the differences between the two documents. """ - self.source = source.text if isinstance(source, Document) else source - self.test = test.text if isinstance(test, Document) else test + # Extract text from documents if needed + self.source_text = source.text if isinstance(source, Document) else source + self.test_text = test.text if isinstance(test, Document) else test + + # Tokenize the texts + self.source_tokens = tokenize_text( + concatenate_paragraphs_and_add_chr_182(self.source_text) + ) + self.test_tokens = tokenize_text( + concatenate_paragraphs_and_add_chr_182(self.test_text) + ) - seq_source = tokenize_text(concatenate_paragraphs_and_add_chr_182(self.source)) - seq_test = tokenize_text(concatenate_paragraphs_and_add_chr_182(self.test)) # Normalize tokens by stripping whitespace for comparison # This allows the matcher to focus on content differences rather than whitespace variations # while still preserving the original tokens (including whitespace) for display in the output - seq_source_normalized = [token.strip() for token in seq_source] - seq_test_normalized = [token.strip() for token in seq_test] + seq_source_normalized = [token.strip() for token in self.source_tokens] + seq_test_normalized = [token.strip() for token in self.test_tokens] from difflib import SequenceMatcher matcher = SequenceMatcher(None, seq_source_normalized, seq_test_normalized) - redlines = [ + self._redlines = [ Redline( - source_chunk=Chunk(text=seq_source, chunk_location=None), - test_chunk=Chunk(text=seq_test, chunk_location=None), + source_chunk=Chunk(text=self.source_tokens, chunk_location=None), + test_chunk=Chunk(text=self.test_tokens, chunk_location=None), opcodes=opcode, ) for opcode in matcher.get_opcodes() ] if self.character_level_diffing: - return self._refine_single_token_replacements(redlines) - return redlines + self._redlines = self._refine_single_token_replacements(self._redlines) + return self._redlines def _refine_single_token_replacements( self, redlines: List[Redline] diff --git a/redlines/redlines.py b/redlines/redlines.py index 5aca55f..80e9794 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -5,11 +5,7 @@ from rich.text import Text from redlines.document import Document -from redlines.processor import ( - tokenize_text, - concatenate_paragraphs_and_add_chr_182, - WholeDocumentProcessor, -) +from redlines.processor import WholeDocumentProcessor, Redline class Redlines: @@ -27,8 +23,11 @@ def source(self) -> str: @source.setter def source(self, value): - self._source = value - self._seq1 = tokenize_text(concatenate_paragraphs_and_add_chr_182(value)) + self._source = value.text if isinstance(value, Document) else value + + # If test is already set, process the new source against it + if self._test is not None: + self._redlines = self.processor.process(self._source, self._test) @property def test(self): @@ -37,8 +36,24 @@ def test(self): @test.setter def test(self, value): - self._test = value - self._seq2 = tokenize_text(concatenate_paragraphs_and_add_chr_182(value)) + self._test = value.text if isinstance(value, Document) else value + + # Process the text against the source + if self._source is not None and self._test is not None: + self._redlines = self.processor.process(self._source, self._test) + + @property + def redlines(self) -> list[Redline]: + """ + Return the list of Redline objects representing the changes from source to test. + + :return: List of Redline objects + """ + if self._redlines is None: + raise ValueError( + "No test string was provided when the function was called, or during initialisation." + ) + return self._redlines def __init__( self, @@ -84,15 +99,20 @@ def __init__( :param source: The source text to be used as a basis for comparison. :param test: Optional test text to compare with the source. """ + self.processor = WholeDocumentProcessor( + character_level_diffing=character_level_diffing + ) self.source = source.text if isinstance(source, Document) else source self.options = options + self._redlines = None if test: self.test = test.text if isinstance(test, Document) else test # self.compare() self.character_level_diffing = character_level_diffing - self.processor = WholeDocumentProcessor( - character_level_diffing=character_level_diffing - ) + + # Process immediately if both source and test are provided + if self._test is not None: + self._redlines = self.processor.process(self._source, self._test) @property def opcodes(self) -> list[tuple[str, int, int, int, int]]: @@ -108,13 +128,7 @@ def opcodes(self) -> list[tuple[str, int, int, int, int]]: [('equal', 0, 4, 0, 4), ('replace', 4, 6, 4, 6), ('equal', 6, 9, 6, 9)] ``` """ - if self._seq2 is None: - raise ValueError( - "No test string was provided when the function was called, or during initialisation." - ) - - redlines = self.processor.process(self._source, self._test) - return [redline.opcodes for redline in redlines] + return [redline.opcodes for redline in self.redlines] @property def output_markdown(self) -> str: @@ -254,15 +268,19 @@ def output_markdown(self) -> str: elif style == "streamlit": md_styles = {"ins": ("**:green[", "]** "), "del": ("~~:red[", "]~~ ")} - for tag, i1, i2, j1, j2 in self.opcodes: + for redline in self.redlines: + tag, i1, i2, j1, j2 = redline.opcodes + source_tokens = redline.source_chunk.text + test_tokens = redline.test_chunk.text + if tag == "equal": - temp_str = "".join(self._seq1[i1:i2]) + temp_str = "".join(source_tokens[i1:i2]) temp_str = re.sub("¶ ", "\n\n", temp_str) # here we use '¶ ' instead of ' ¶ ', because the leading space will be included in the previous token, # according to tokenizer = re.compile(r"((?:[^()\s]+|[().?!-])\s*)") result.append(temp_str) elif tag == "insert": - temp_str = "".join(self._seq2[j1:j2]) + temp_str = "".join(test_tokens[j1:j2]) splits = re.split("¶ ", temp_str) for split in splits: result.append(f"{md_styles['ins'][0]}{split}{md_styles['ins'][1]}") @@ -271,15 +289,15 @@ def output_markdown(self) -> str: result.pop() elif tag == "delete": result.append( - f"{md_styles['del'][0]}{''.join(self._seq1[i1:i2])}{md_styles['del'][1]}" + f"{md_styles['del'][0]}{''.join(source_tokens[i1:i2])}{md_styles['del'][1]}" ) # for 'delete', we make no change, because otherwise there will be two times '\n\n' than the original # text. elif tag == "replace": result.append( - f"{md_styles['del'][0]}{''.join(self._seq1[i1:i2])}{md_styles['del'][1]}" + f"{md_styles['del'][0]}{''.join(source_tokens[i1:i2])}{md_styles['del'][1]}" ) - temp_str = "".join(self._seq2[j1:j2]) + temp_str = "".join(test_tokens[j1:j2]) splits = re.split("¶ ", temp_str) for split in splits: result.append(f"{md_styles['ins'][0]}{split}{md_styles['ins'][1]}") @@ -294,21 +312,25 @@ def output_rich(self) -> Text: """Returns the delta in text with colors/style for the console.""" console_text = Text() - for tag, i1, i2, j1, j2 in self.opcodes: + for redline in self.redlines: + tag, i1, i2, j1, j2 = redline.opcodes + source_tokens = redline.source_chunk.text + test_tokens = redline.test_chunk.text + if tag == "equal": - temp_str = "".join(self._seq1[i1:i2]) + temp_str = "".join(source_tokens[i1:i2]) temp_str = re.sub("¶ ", "\n\n", temp_str) console_text.append(temp_str) elif tag == "insert": - temp_str = "".join(self._seq2[j1:j2]) + temp_str = "".join(test_tokens[j1:j2]) splits = re.split("¶ ", temp_str) for split in splits: console_text.append(split, "green") elif tag == "delete": - console_text.append("".join(self._seq1[i1:i2]), "strike red") + console_text.append("".join(source_tokens[i1:i2]), "strike red") elif tag == "replace": - console_text.append("".join(self._seq1[i1:i2]), "strike red") - temp_str = "".join(self._seq2[j1:j2]) + console_text.append("".join(source_tokens[i1:i2]), "strike red") + temp_str = "".join(test_tokens[j1:j2]) splits = re.split("¶ ", temp_str) for split in splits: console_text.append(split, "green") @@ -323,20 +345,22 @@ def compare(self, test: str | None = None, output: str = "markdown", **options): :param output: The format which the delta should be produced. Currently, "markdown" and "rich" are supported. Defaults to "markdown". :return: The delta in the format specified by `output`. """ + if options: + self.options = options + if test: - if self.test and test == self.test: - return self.output_markdown + if self._test and test == self._test: + # If we've already processed this test string, no need to reprocess + pass else: self.test = test - elif self.test is None: + elif self._test is None: raise ValueError( "No test string was provided when the function was called, or during initialisation." ) - if options: - self.options = options - if output == "markdown": return self.output_markdown elif output == "rich": return self.output_rich + return self.output_markdown From c3e0cf612e51c6a7ccb9debb2a5506cf753b5a8d Mon Sep 17 00:00:00 2001 From: houfu Date: Tue, 20 May 2025 20:45:38 +0800 Subject: [PATCH 11/14] Simplify diff processing and clean up redundant attributes. Removed unused attributes from the processor class and updated token handling to use local variables, improving clarity and reducing state complexity. Minor adjustments were also made to standardize formatting and remove unnecessary string prefixes. Signed-off-by: houfu --- redlines/processor.py | 34 +++++++++++++--------------------- redlines/redlines.py | 12 ++++++------ 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/redlines/processor.py b/redlines/processor.py index f787366..bdfea59 100644 --- a/redlines/processor.py +++ b/redlines/processor.py @@ -1,6 +1,7 @@ import re from abc import ABC, abstractmethod from dataclasses import dataclass +from difflib import SequenceMatcher from typing import Tuple, List, Optional, Union from redlines.document import Document @@ -98,11 +99,6 @@ class WholeDocumentProcessor(RedlinesProcessor): def __init__(self, character_level_diffing: bool = True): self.character_level_diffing = character_level_diffing - self.source_text = None - self.test_text = None - self.source_tokens = None - self.test_tokens = None - self._redlines = None def process( self, source: Union[Document, str], test: Union[Document, str] @@ -114,39 +110,35 @@ def process( :return: A list of `Redline` that describe the differences between the two documents. """ # Extract text from documents if needed - self.source_text = source.text if isinstance(source, Document) else source - self.test_text = test.text if isinstance(test, Document) else test + source_text = source.text if isinstance(source, Document) else source + test_text = test.text if isinstance(test, Document) else test # Tokenize the texts - self.source_tokens = tokenize_text( - concatenate_paragraphs_and_add_chr_182(self.source_text) - ) - self.test_tokens = tokenize_text( - concatenate_paragraphs_and_add_chr_182(self.test_text) + source_tokens = tokenize_text( + concatenate_paragraphs_and_add_chr_182(source_text) ) + test_tokens = tokenize_text(concatenate_paragraphs_and_add_chr_182(test_text)) # Normalize tokens by stripping whitespace for comparison # This allows the matcher to focus on content differences rather than whitespace variations # while still preserving the original tokens (including whitespace) for display in the output - seq_source_normalized = [token.strip() for token in self.source_tokens] - seq_test_normalized = [token.strip() for token in self.test_tokens] - - from difflib import SequenceMatcher + seq_source_normalized = [token.strip() for token in source_tokens] + seq_test_normalized = [token.strip() for token in test_tokens] matcher = SequenceMatcher(None, seq_source_normalized, seq_test_normalized) - self._redlines = [ + redlines = [ Redline( - source_chunk=Chunk(text=self.source_tokens, chunk_location=None), - test_chunk=Chunk(text=self.test_tokens, chunk_location=None), + source_chunk=Chunk(text=source_tokens, chunk_location=None), + test_chunk=Chunk(text=test_tokens, chunk_location=None), opcodes=opcode, ) for opcode in matcher.get_opcodes() ] if self.character_level_diffing: - self._redlines = self._refine_single_token_replacements(self._redlines) - return self._redlines + redlines = self._refine_single_token_replacements(redlines) + return redlines def _refine_single_token_replacements( self, redlines: List[Redline] diff --git a/redlines/redlines.py b/redlines/redlines.py index 80e9794..75dd069 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -30,7 +30,7 @@ def source(self, value): self._redlines = self.processor.process(self._source, self._test) @property - def test(self): + def test(self) -> str: """:return: The text to be compared with the source.""" return self._test @@ -145,7 +145,7 @@ def output_markdown(self) -> str: test = Redlines( "The quick brown fox jumps over the lazy dog.", "The quick brown fox walks past the lazy dog.", - markdown_style="red" # This option specifies the style as red + markdown_style="red" # This option specifies the style as red ) test.compare(markdown_style="none") # This option specifies the style as none @@ -206,11 +206,11 @@ def output_markdown(self) -> str: md_styles = { "ins": ( - f"", + "", "", ), "del": ( - f"", + "", "", ), } @@ -223,11 +223,11 @@ def output_markdown(self) -> str: elif style == "red": md_styles = { "ins": ( - f"", + "", "", ), "del": ( - f"", + "", "", ), } From 99b6341dcd9e61162bb7ede9ac934885a0578fa7 Mon Sep 17 00:00:00 2001 From: houfu Date: Tue, 20 May 2025 20:50:36 +0800 Subject: [PATCH 12/14] Add docs on character level diffing Signed-off-by: houfu --- redlines/redlines.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/redlines/redlines.py b/redlines/redlines.py index 75dd069..690135d 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -94,6 +94,34 @@ def __init__( == "The quick brown fox jumps over walks past the lazy dog." ) + ``` + ## Character-level diffing + + By default, Redlines employs character-level diffing to provide more precise and readable differences + when only parts of a word have changed. This is especially useful for: + + - Words with minor spelling corrections (e.g., "recieve" → "receive") + - Words with added/removed suffixes (e.g., "weekend" → "weekend.") + - Words with changes in capitalization or formatting + + For example, with character-level diffing enabled: + ```python + # With character-level diffing (default) + test = Redlines( + "The dog ran quickly.", + "The dogs ran quickly." + ) + ``` + Would show only the 's' as added instead of replacing the whole word. + + You can disable this feature if you prefer whole-word replacements: + ```python + # Disable character-level diffing + test = Redlines( + "The dog ran quickly.", + "The dogs ran quickly.", + character_level_diffing=False + ) ``` :param source: The source text to be used as a basis for comparison. From 48cbc771f9e117f3369045305027fa4f7a6fc909 Mon Sep 17 00:00:00 2001 From: houfu Date: Tue, 20 May 2025 23:50:24 +0800 Subject: [PATCH 13/14] Update workflow trigger and enhance redlines documentation Change docs workflow trigger from push to release for better control. Add character-level diffing parameter to enhance functionality, and improve clarity in redlines documentation examples. Signed-off-by: houfu --- .github/workflows/docs.yml | 2 +- redlines/redlines.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b27df12..b197c0a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,7 +2,7 @@ name: website # build the documentation whenever there are new commits on main on: - push: + release: branches: - main # Alternative: only build for tags. diff --git a/redlines/redlines.py b/redlines/redlines.py index 690135d..aedcaa2 100644 --- a/redlines/redlines.py +++ b/redlines/redlines.py @@ -126,6 +126,7 @@ def __init__( :param source: The source text to be used as a basis for comparison. :param test: Optional test text to compare with the source. + :param character_level_diffing: If True, uses character-level diffing for more precise differences. """ self.processor = WholeDocumentProcessor( character_level_diffing=character_level_diffing @@ -216,7 +217,7 @@ def output_markdown(self) -> str: Try this: - * If streamlit version is >= 1.16.0, consider the markdown style "streamlit" + * If streamlit version is >= 1.16.0, consider the markdown style `streamlit` * If streamlit version is < 1.16.0, consider the markdown style `ghfm` * Enable parsing of HTML. In Streamlit, you need to set the `unsafe_allow_html` argument in `st.write` or `st.markdown` to `True`. From e9f16b1a7020a3bf0ce2c6800fbcbddcee8db09e Mon Sep 17 00:00:00 2001 From: houfu Date: Tue, 20 May 2025 23:50:39 +0800 Subject: [PATCH 14/14] Add tests for character-level diffing and punctuation handling Introduced parameterized tests to validate character-level diffing for various scenarios, such as word changes, capitalization, and punctuation. Also added dedicated tests to handle special cases like trailing and multiple punctuation. Ensures the Redlines library behaves correctly with and without character-level diffing enabled. Signed-off-by: houfu --- tests/test_redline.py | 89 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/test_redline.py b/tests/test_redline.py index eb62a66..18db502 100644 --- a/tests/test_redline.py +++ b/tests/test_redline.py @@ -257,3 +257,92 @@ def test_different_number_of_paragraphs(): ) test = Redlines(test_string_1, test_string_2, markdown_style="none") assert test.output_markdown == expected_md + + +@pytest.mark.parametrize( + "source, test, expected_md, expected_diffing_disabled", + [ + # Test 1: Adding a single character (suffix) + ( + "The dog ran quickly.", + "The dogs ran quickly.", + "The dogs ran quickly.", + "The dogdogs ran quickly.", + ), + # Test 2: Adding punctuation (suffix) + ( + "The quick brown fox jumps over the lazy dog", + "The quick brown fox jumps over the lazy dog.", + "The quick brown fox jumps over the lazy dog.", + "The quick brown fox jumps over the lazy dogdog.", + ), + # Test 3: Changing middle characters + ( + "The quick brown fox recieved the award.", + "The quick brown fox received the award.", + "The quick brown fox recieeived the award.", + "The quick brown fox recievedreceived the award.", + ), + # Test 4: Prefix changes + ( + "The dog is unhappy.", + "The dog is happy.", + "The dog is unhappy.", + "The dog is unhappyhappy.", + ), + # Test 5: Suffix and prefix preserved, middle changed + ( + "The implementation is fast.", + "The implementation is slow.", + "The implementation is fslastow.", + "The implementation is fastslow.", + ), + # Test 6: Multiple word changes that should be kept separate + ( + "The cats plays outside. They are happy.", + "The cat plays outside. It is happy.", + "The catscat plays outside. TheyIt areis happy.", + "The catscat plays outside. TheyIt areis happy.", + ), + # Test 7: Capitalization changes + ( + "The quick brown fox.", + "The Quick brown fox.", + "The qQuick brown fox.", + "The quickQuick brown fox.", + ), + ], +) +def test_character_level_diffing(source, test, expected_md, expected_diffing_disabled): + """Test character-level diffing with various types of changes.""" + # Test with character-level diffing enabled (default) + redlines_enabled = Redlines(source, test, markdown_style="none") + assert redlines_enabled.output_markdown == expected_md + + # Test with character-level diffing disabled + redlines_disabled = Redlines( + source, test, character_level_diffing=False, markdown_style="none" + ) + assert redlines_disabled.output_markdown == expected_diffing_disabled + + +def test_special_punctuation_handling(): + """Test special handling of trailing punctuation.""" + source = "Thank you" + test = "Thank you." + + # With character diffing enabled + redlines = Redlines(source, test, markdown_style="none") + assert redlines.output_markdown == "Thank you." + + # With character diffing disabled + redlines_disabled = Redlines( + source, test, character_level_diffing=False, markdown_style="none" + ) + assert redlines_disabled.output_markdown == "Thank youyou." + + # Test multiple punctuation characters + source = "Wow" + test = "Wow!!!" + redlines = Redlines(source, test, markdown_style="none") + assert redlines.output_markdown == "Wow!!!"