Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
07a5c45
Add ColModernVBERT to LateInteractionMultimodalEmbedding registry
kacperlukawski Dec 4, 2025
203ca31
Implement image processing based on Idefics3ImageProcessor logic
kacperlukawski Dec 5, 2025
8c45088
Fix padding support
kacperlukawski Dec 5, 2025
5b56a77
Implement ColModernVBERT logic
kacperlukawski Dec 5, 2025
e637a7f
Remove TODOs
kacperlukawski Dec 5, 2025
74f5c3e
Handle empty pixel values with proper image_size
kacperlukawski Dec 5, 2025
9e2929e
Add ColModernVBERT tests
kacperlukawski Dec 5, 2025
aa93a52
Run pre-commit
kacperlukawski Dec 8, 2025
6470e35
mypy fixes
kacperlukawski Dec 8, 2025
bf89317
mypy fixes
kacperlukawski Dec 8, 2025
39c7211
mypy fixes
kacperlukawski Dec 8, 2025
889a95b
mypy fixes
kacperlukawski Dec 8, 2025
144d867
Fix typo in the class name
kacperlukawski Dec 9, 2025
23768f1
Add processor_config.json to additional files
kacperlukawski Dec 9, 2025
7bea532
Fix mypy errors
kacperlukawski Dec 9, 2025
96eb50b
Refactor onnx_embed_image
kacperlukawski Dec 9, 2025
cf4100c
Fix mypy errors
kacperlukawski Dec 9, 2025
5a6d884
fix: colmodernvbert tests and query processing
kacperlukawski Jan 2, 2026
2dc7de8
fix: remove Union references
kacperlukawski Jan 2, 2026
46453a2
fix: fix exit stack, update tests, implement token count
joein Jan 8, 2026
8f6f057
fix: uncomment colpali in tests
joein Jan 9, 2026
01965c9
fix: lowercase models to cache
joein Jan 9, 2026
ef9c496
fix: fix models to cache
joein Jan 9, 2026
8b9f50c
refactor: move colmodernvbert related onnx embed to its class
joein Jan 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fastembed/common/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class OnnxOutputContext:
model_output: NumpyArray
attention_mask: NDArray[np.int64] | None = None
input_ids: NDArray[np.int64] | None = None
metadata: dict[str, Any] | None = None


class OnnxModel(Generic[T]):
Expand Down
7 changes: 4 additions & 3 deletions fastembed/common/preprocessor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ def load_tokenizer(model_dir: Path) -> tuple[Tokenizer, dict[str, int]]:

tokenizer = Tokenizer.from_file(str(tokenizer_path))
tokenizer.enable_truncation(max_length=max_context)
tokenizer.enable_padding(
pad_id=config.get("pad_token_id", 0), pad_token=tokenizer_config["pad_token"]
)
if not tokenizer.padding:
tokenizer.enable_padding(
pad_id=config.get("pad_token_id", 0), pad_token=tokenizer_config["pad_token"]
)

for token in tokens_map.values():
if isinstance(token, str):
Expand Down
6 changes: 4 additions & 2 deletions fastembed/image/onnx_image_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,11 @@ def _build_onnx_input(self, encoded: NumpyArray) -> dict[str, NumpyArray]:
return {input_name: encoded}

def onnx_embed(self, images: list[ImageInput], **kwargs: Any) -> OnnxOutputContext:
with contextlib.ExitStack():
with contextlib.ExitStack() as stack:
image_files = [
Image.open(image) if not isinstance(image, Image.Image) else image
stack.enter_context(Image.open(image))
if not isinstance(image, Image.Image)
else image
for image in images
]
assert self.processor is not None, "Processor is not initialized"
Expand Down
74 changes: 74 additions & 0 deletions fastembed/image/transform/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,77 @@ def pad2square(
new_image = Image.new(mode="RGB", size=(size, size), color=fill_color)
new_image.paste(image.crop((left, top, right, bottom)) if crop_required else image)
return new_image


def resize_longest_edge(
image: Image.Image,
max_size: int,
resample: int | Image.Resampling = Image.Resampling.LANCZOS,
) -> Image.Image:
height, width = image.height, image.width
aspect_ratio = width / height

if width >= height:
# Width is longer
new_width = max_size
new_height = int(new_width / aspect_ratio)
else:
# Height is longer
new_height = max_size
new_width = int(new_height * aspect_ratio)

# Ensure even dimensions
if new_height % 2 != 0:
new_height += 1
if new_width % 2 != 0:
new_width += 1

return image.resize((new_width, new_height), resample)


def crop_ndarray(
image: NumpyArray,
x1: int,
y1: int,
x2: int,
y2: int,
channel_first: bool = True,
) -> NumpyArray:
if channel_first:
# (C, H, W) format
return image[:, y1:y2, x1:x2]
else:
# (H, W, C) format
return image[y1:y2, x1:x2, :]


def resize_ndarray(
image: NumpyArray,
size: tuple[int, int],
resample: int | Image.Resampling = Image.Resampling.LANCZOS,
channel_first: bool = True,
) -> NumpyArray:
# Convert to PIL-friendly format (H, W, C)
if channel_first:
img_hwc = image.transpose((1, 2, 0))
else:
img_hwc = image

# Handle different dtypes
if img_hwc.dtype == np.float32 or img_hwc.dtype == np.float64:
# Assume normalized, scale to 0-255 for PIL
img_hwc_scaled = (img_hwc * 255).astype(np.uint8)
pil_img = Image.fromarray(img_hwc_scaled, mode="RGB")
resized = pil_img.resize(size, resample)
result = np.array(resized).astype(np.float32) / 255.0
else:
# uint8 or similar
pil_img = Image.fromarray(img_hwc.astype(np.uint8), mode="RGB")
resized = pil_img.resize(size, resample)
result = np.array(resized)

# Convert back to original format
if channel_first:
result = result.transpose((2, 0, 1))

return result
Loading