From d7e8869f4adae8915b80d03e4dfb229ef04879b2 Mon Sep 17 00:00:00 2001 From: goodnight Date: Sat, 20 Dec 2025 16:30:03 +0100 Subject: [PATCH 1/2] embed accuracy plot and standardize framework names --- CONTRIBUTING.md | 8 +- README.md | 10 +- RELEASE.md | 2 +- .../Binary Quantization from Scratch.ipynb | 105 +++++++++++++++++- docs/index.md | 10 +- 5 files changed, 117 insertions(+), 18 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c3a3e1132..37808766d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,7 +32,7 @@ Bugs are tracked as [GitHub issues](https://guides.github.com/features/issues/). Explain the problem and include additional details to help maintainers reproduce the problem: * **Use a clear and descriptive title** for the issue to identify the problem. -* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you are using FastEmbed, e.g. with Langchain, Qdrant Client, Llama Index and which command exactly you used. When listing steps, **don't just say what you did, but explain how you did it**. +* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you are using FastEmbed, e.g. with LangChain, Qdrant Client, LlamaIndex and which command exactly you used. When listing steps, **don't just say what you did, but explain how you did it**. * **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). * **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior. * **Explain which behavior you expected to see instead and why.** @@ -46,9 +46,9 @@ Include details about your configuration and environment: ### Your First Code Contribution -Unsure where to begin contributing to FastEmbed? You can start by looking through these `good-first-issue`issues: +Unsure where to begin contributing to FastEmbed? You can start by looking through these `good-first-issue` issues: -* [Good First Issue](https://github.com/qdrant/fastembed/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. These are a great way to get started with FastEmbed. This includes adding new models which are already tested and ready on Huggingface Hub. +* [Good First Issue](https://github.com/qdrant/fastembed/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. These are a great way to get started with FastEmbed. This includes adding new models which are already tested and ready on Hugging Face Hub. ## Pull Requests @@ -66,7 +66,7 @@ Even documentation improvements and tests are most welcome: 1. Open Requests for New Models are [here](https://github.com/qdrant/fastembed/labels/model%20request). 2. There are quite a few pull requests that were merged for this purpose and you can use them as a reference. Here is an example: https://github.com/qdrant/fastembed/pull/129 3. Make sure to add tests for the new model - - The CANONICAL_VECTOR values must come from a reference implementation usually from Huggingface Transformers or Sentence Transformers + - The CANONICAL_VECTOR values must come from a reference implementation usually from Hugging Face Transformers or Sentence Transformers - Here is a reference [Colab Notebook](https://colab.research.google.com/drive/1tNdV3DsiwsJzu2AXnUnoeF5av1Hp8HF1?usp=sharing) for how we will evaluate whether your VECTOR values in the test are correct or not. ## Styleguides diff --git a/README.md b/README.md index d4c882615..ebaddf1f0 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ from fastembed import TextEmbedding # Example list of documents documents: list[str] = [ "This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc.", - "fastembed is supported by and maintained by Qdrant.", + "FastEmbed is supported by and maintained by Qdrant.", ] # This will trigger the model download and initialization @@ -46,7 +46,7 @@ embeddings_list = list(embedding_model.embed(documents)) len(embeddings_list[0]) # Vector of 384 dimensions ``` -Fastembed supports a variety of models for different tasks and modalities. +FastEmbed supports a variety of models for different tasks and modalities. The list of all the available models can be found [here](https://qdrant.github.io/fastembed/examples/Supported_Models/) ### 🎒 Dense text embeddings @@ -182,7 +182,7 @@ from fastembed.rerank.cross_encoder import TextCrossEncoder query = "Who is maintaining Qdrant?" documents: list[str] = [ "This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc.", - "fastembed is supported by and maintained by Qdrant.", + "FastEmbed is supported by and maintained by Qdrant.", ] encoder = TextCrossEncoder(model_name="Xenova/ms-marco-MiniLM-L-6-v2") scores = list(encoder.rerank(query, documents)) @@ -254,8 +254,8 @@ client = QdrantClient("localhost", port=6333) # For production model_name = "sentence-transformers/all-MiniLM-L6-v2" payload = [ - {"document": "Qdrant has Langchain integrations", "source": "Langchain-docs", }, - {"document": "Qdrant also has Llama Index integrations", "source": "LlamaIndex-docs"}, + {"document": "Qdrant has LangChain integrations", "source": "LangChain-docs", }, + {"document": "Qdrant also has LlamaIndex integrations", "source": "LlamaIndex-docs"}, ] docs = [models.Document(text=data["document"], model=model_name) for data in payload] ids = [42, 2] diff --git a/RELEASE.md b/RELEASE.md index 10c4865e1..1f2eb074d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ # Releasing FastEmbed -This is a guide how to release `fastembed` and `fastembed-gpu` packages. +This is a guide on how to release `fastembed` and `fastembed-gpu` packages. ## How to diff --git a/docs/experimental/Binary Quantization from Scratch.ipynb b/docs/experimental/Binary Quantization from Scratch.ipynb index 1463c4969..7165f2db0 100644 --- a/docs/experimental/Binary Quantization from Scratch.ipynb +++ b/docs/experimental/Binary Quantization from Scratch.ipynb @@ -112,7 +112,9 @@ "outputs": [ { "data": { - "text/plain": "3072" + "text/plain": [ + "3072" + ] }, "execution_count": 5, "metadata": {}, @@ -358,8 +360,94 @@ "outputs": [ { "data": { - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
sampling_ratelimitmean_acc
0130.90
11100.83
2231.00
32100.97
4331.00
53100.98
6531.00
75100.99
\n
", - "text/plain": " sampling_rate limit mean_acc\n0 1 3 0.90\n1 1 10 0.83\n2 2 3 1.00\n3 2 10 0.97\n4 3 3 1.00\n5 3 10 0.98\n6 5 3 1.00\n7 5 10 0.99" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sampling_ratelimitmean_acc
0130.90
11100.83
2231.00
32100.97
4331.00
53100.98
6531.00
75100.99
\n", + "
" + ], + "text/plain": [ + " sampling_rate limit mean_acc\n", + "0 1 3 0.90\n", + "1 1 10 0.83\n", + "2 2 3 1.00\n", + "3 2 10 0.97\n", + "4 3 3 1.00\n", + "5 3 10 0.98\n", + "6 5 3 1.00\n", + "7 5 10 0.99" + ] }, "execution_count": 8, "metadata": {}, @@ -371,6 +459,17 @@ "results" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accuracy vs. Sampling Rate\n", + "\n", + "![Accuracy vs Sampling Rate](Accuracy_vs_SamplingRate.png)\n", + "\n", + "Higher oversampling keeps the top-k matches closer to the floating-point baseline, so accuracy climbs quickly after a sampling rate of 3." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/docs/index.md b/docs/index.md index 15ea646a3..b4008b96b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # ⚡️ What is FastEmbed? -FastEmbed is a lightweight, fast, Python library built for embedding generation. We [support popular text models](https://qdrant.github.io/fastembed/examples/Supported_Models/). Please [open a Github issue](https://github.com/qdrant/fastembed/issues/new) if you want us to add a new model. +FastEmbed is a lightweight, fast, Python library built for embedding generation. We [support popular text models](https://qdrant.github.io/fastembed/examples/Supported_Models/). Please [open a GitHub issue](https://github.com/qdrant/fastembed/issues/new) if you want us to add a new model. 1. Light & Fast - Quantized model weights @@ -30,7 +30,7 @@ documents: list[str] = [ "passage: Hello, World!", "query: Hello, World!", "passage: This is an example passage.", - "fastembed is supported by and maintained by Qdrant." + "FastEmbed is supported by and maintained by Qdrant." ] embedding_model = TextEmbedding() embeddings: list[np.ndarray] = embedding_model.embed(documents) @@ -53,10 +53,10 @@ from qdrant_client import QdrantClient client = QdrantClient(":memory:") # Using an in-process Qdrant # Prepare your documents, metadata, and IDs -docs = ["Qdrant has Langchain integrations", "Qdrant also has Llama Index integrations"] +docs = ["Qdrant has LangChain integrations", "Qdrant also has LlamaIndex integrations"] metadata = [ - {"source": "Langchain-docs"}, - {"source": "Llama-index-docs"}, + {"source": "LangChain-docs"}, + {"source": "LlamaIndex-docs"}, ] ids = [42, 2] From 0da3530560c96145142389e69815f9d89b005135 Mon Sep 17 00:00:00 2001 From: goodnight Date: Sat, 20 Dec 2025 16:35:15 +0100 Subject: [PATCH 2/2] embed accuracy plot and standardize framework names --- docs/experimental/Binary Quantization from Scratch.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/experimental/Binary Quantization from Scratch.ipynb b/docs/experimental/Binary Quantization from Scratch.ipynb index 7165f2db0..25630054e 100644 --- a/docs/experimental/Binary Quantization from Scratch.ipynb +++ b/docs/experimental/Binary Quantization from Scratch.ipynb @@ -467,7 +467,7 @@ "\n", "![Accuracy vs Sampling Rate](Accuracy_vs_SamplingRate.png)\n", "\n", - "Higher oversampling keeps the top-k matches closer to the floating-point baseline, so accuracy climbs quickly after a sampling rate of 3." + "Higher oversampling rapidly improves accuracy, with most of the gains occurring between sampling rates of 2 and 4." ] }, {