From 5fa5ebefdb581632de4d7de9c24963d1b6cb9051 Mon Sep 17 00:00:00 2001 From: James Braza Date: Wed, 8 Oct 2025 18:03:22 -0700 Subject: [PATCH 1/2] Loosened input types on Encoding's encode functions --- tiktoken/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tiktoken/core.py b/tiktoken/core.py index 225fffb..3557a79 100644 --- a/tiktoken/core.py +++ b/tiktoken/core.py @@ -158,7 +158,7 @@ def encode_to_numpy( buffer = self._core_bpe.encode_to_tiktoken_buffer(text, allowed_special) return np.frombuffer(buffer, dtype=np.uint32) - def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> list[list[int]]: + def encode_ordinary_batch(self, text: Sequence[str], *, num_threads: int = 8) -> list[list[int]]: """Encodes a list of strings into tokens, in parallel, ignoring special tokens. This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster). @@ -174,7 +174,7 @@ def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> lis def encode_batch( self, - text: list[str], + text: Sequence[str], *, num_threads: int = 8, allowed_special: Literal["all"] | AbstractSet[str] = set(), # noqa: B006 From f9dca60e639e9d2458d277796d77eee9dc035889 Mon Sep 17 00:00:00 2001 From: James Braza Date: Wed, 8 Oct 2025 18:05:59 -0700 Subject: [PATCH 2/2] Created CHANGELOG entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 068c509..b2bc888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ This is the changelog for the open source version of tiktoken. +## [v0.13.0] + +- Loosened `Encoding`'s batch encoding methods' typing to allow for `Sequence` + ## [v0.12.0] - Build wheels for Python 3.14 - Build musllinux aarch64 wheels