From d7e8869f4adae8915b80d03e4dfb229ef04879b2 Mon Sep 17 00:00:00 2001
From: goodnight <mohammedarbinsibi@gmail.com>
Date: Sat, 20 Dec 2025 16:30:03 +0100
Subject: [PATCH 1/2] embed accuracy plot and standardize framework names

---
 CONTRIBUTING.md                               |   8 +-
 README.md                                     |  10 +-
 RELEASE.md                                    |   2 +-
 .../Binary Quantization from Scratch.ipynb    | 105 +++++++++++++++++-
 docs/index.md                                 |  10 +-
 5 files changed, 117 insertions(+), 18 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c3a3e1132..37808766d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -32,7 +32,7 @@ Bugs are tracked as [GitHub issues](https://guides.github.com/features/issues/).
 Explain the problem and include additional details to help maintainers reproduce the problem:
 
 * **Use a clear and descriptive title** for the issue to identify the problem.
-* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you are using FastEmbed, e.g. with Langchain, Qdrant Client, Llama Index and which command exactly you used. When listing steps, **don't just say what you did, but explain how you did it**.
+* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you are using FastEmbed, e.g. with LangChain, Qdrant Client, LlamaIndex and which command exactly you used. When listing steps, **don't just say what you did, but explain how you did it**.
 * **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines).
 * **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior.
 * **Explain which behavior you expected to see instead and why.**
@@ -46,9 +46,9 @@ Include details about your configuration and environment:
 
 ### Your First Code Contribution
 
-Unsure where to begin contributing to FastEmbed? You can start by looking through these `good-first-issue`issues:
+Unsure where to begin contributing to FastEmbed? You can start by looking through these `good-first-issue` issues:
 
-* [Good First Issue](https://github.com/qdrant/fastembed/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. These are a great way to get started with FastEmbed. This includes adding new models which are already tested and ready on Huggingface Hub. 
+* [Good First Issue](https://github.com/qdrant/fastembed/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. These are a great way to get started with FastEmbed. This includes adding new models which are already tested and ready on Hugging Face Hub. 
 
 ## Pull Requests
 
@@ -66,7 +66,7 @@ Even documentation improvements and tests are most welcome:
 1. Open Requests for New Models are [here](https://github.com/qdrant/fastembed/labels/model%20request). 
 2. There are quite a few pull requests that were merged for this purpose and you can use them as a reference. Here is an example: https://github.com/qdrant/fastembed/pull/129
 3. Make sure to add tests for the new model
-    - The CANONICAL_VECTOR values must come from a reference implementation usually from Huggingface Transformers or Sentence Transformers
+    - The CANONICAL_VECTOR values must come from a reference implementation usually from Hugging Face Transformers or Sentence Transformers
     - Here is a reference [Colab Notebook](https://colab.research.google.com/drive/1tNdV3DsiwsJzu2AXnUnoeF5av1Hp8HF1?usp=sharing) for how we will evaluate whether your VECTOR values in the test are correct or not.
 
 ## Styleguides
diff --git a/README.md b/README.md
index d4c882615..ebaddf1f0 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ from fastembed import TextEmbedding
 # Example list of documents
 documents: list[str] = [
     "This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc.",
-    "fastembed is supported by and maintained by Qdrant.",
+    "FastEmbed is supported by and maintained by Qdrant.",
 ]
 
 # This will trigger the model download and initialization
@@ -46,7 +46,7 @@ embeddings_list = list(embedding_model.embed(documents))
 len(embeddings_list[0]) # Vector of 384 dimensions
 ```
 
-Fastembed supports a variety of models for different tasks and modalities.
+FastEmbed supports a variety of models for different tasks and modalities.
 The list of all the available models can be found [here](https://qdrant.github.io/fastembed/examples/Supported_Models/)
 ### 🎒 Dense text embeddings
 
@@ -182,7 +182,7 @@ from fastembed.rerank.cross_encoder import TextCrossEncoder
 query = "Who is maintaining Qdrant?"
 documents: list[str] = [
     "This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc.",
-    "fastembed is supported by and maintained by Qdrant.",
+    "FastEmbed is supported by and maintained by Qdrant.",
 ]
 encoder = TextCrossEncoder(model_name="Xenova/ms-marco-MiniLM-L-6-v2")
 scores = list(encoder.rerank(query, documents))
@@ -254,8 +254,8 @@ client = QdrantClient("localhost", port=6333) # For production
 
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 payload = [
-    {"document": "Qdrant has Langchain integrations", "source": "Langchain-docs", },
-    {"document": "Qdrant also has Llama Index integrations", "source": "LlamaIndex-docs"},
+    {"document": "Qdrant has LangChain integrations", "source": "LangChain-docs", },
+    {"document": "Qdrant also has LlamaIndex integrations", "source": "LlamaIndex-docs"},
 ]
 docs = [models.Document(text=data["document"], model=model_name) for data in payload]
 ids = [42, 2]
diff --git a/RELEASE.md b/RELEASE.md
index 10c4865e1..1f2eb074d 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,6 +1,6 @@
 # Releasing FastEmbed
 
-This is a guide how to release `fastembed` and `fastembed-gpu` packages.
+This is a guide on how to release `fastembed` and `fastembed-gpu` packages.
 
 ## How to
 
diff --git a/docs/experimental/Binary Quantization from Scratch.ipynb b/docs/experimental/Binary Quantization from Scratch.ipynb
index 1463c4969..7165f2db0 100644
--- a/docs/experimental/Binary Quantization from Scratch.ipynb	
+++ b/docs/experimental/Binary Quantization from Scratch.ipynb	
@@ -112,7 +112,9 @@
    "outputs": [
     {
      "data": {
-      "text/plain": "3072"
+      "text/plain": [
+       "3072"
+      ]
      },
      "execution_count": 5,
      "metadata": {},
@@ -358,8 +360,94 @@
    "outputs": [
     {
      "data": {
-      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>sampling_rate</th>\n      <th>limit</th>\n      <th>mean_acc</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>3</td>\n      <td>0.90</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>10</td>\n      <td>0.83</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>3</td>\n      <td>1.00</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>10</td>\n      <td>0.97</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>3</td>\n      <td>3</td>\n      <td>1.00</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>3</td>\n      <td>10</td>\n      <td>0.98</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>5</td>\n      <td>3</td>\n      <td>1.00</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>5</td>\n      <td>10</td>\n      <td>0.99</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
-      "text/plain": "   sampling_rate  limit  mean_acc\n0              1      3      0.90\n1              1     10      0.83\n2              2      3      1.00\n3              2     10      0.97\n4              3      3      1.00\n5              3     10      0.98\n6              5      3      1.00\n7              5     10      0.99"
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sampling_rate</th>\n",
+       "      <th>limit</th>\n",
+       "      <th>mean_acc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0.90</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>3</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.98</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>5</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.99</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   sampling_rate  limit  mean_acc\n",
+       "0              1      3      0.90\n",
+       "1              1     10      0.83\n",
+       "2              2      3      1.00\n",
+       "3              2     10      0.97\n",
+       "4              3      3      1.00\n",
+       "5              3     10      0.98\n",
+       "6              5      3      1.00\n",
+       "7              5     10      0.99"
+      ]
      },
      "execution_count": 8,
      "metadata": {},
@@ -371,6 +459,17 @@
     "results"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Accuracy vs. Sampling Rate\n",
+    "\n",
+    "![Accuracy vs Sampling Rate](Accuracy_vs_SamplingRate.png)\n",
+    "\n",
+    "Higher oversampling keeps the top-k matches closer to the floating-point baseline, so accuracy climbs quickly after a sampling rate of 3."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/docs/index.md b/docs/index.md
index 15ea646a3..b4008b96b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,6 +1,6 @@
 # ⚡️ What is FastEmbed?
 
-FastEmbed is a lightweight, fast, Python library built for embedding generation. We [support popular text models](https://qdrant.github.io/fastembed/examples/Supported_Models/). Please [open a Github issue](https://github.com/qdrant/fastembed/issues/new) if you want us to add a new model.
+FastEmbed is a lightweight, fast, Python library built for embedding generation. We [support popular text models](https://qdrant.github.io/fastembed/examples/Supported_Models/). Please [open a GitHub issue](https://github.com/qdrant/fastembed/issues/new) if you want us to add a new model.
 
 1. Light & Fast
     - Quantized model weights
@@ -30,7 +30,7 @@ documents: list[str] = [
     "passage: Hello, World!",
     "query: Hello, World!",
     "passage: This is an example passage.",
-    "fastembed is supported by and maintained by Qdrant."
+    "FastEmbed is supported by and maintained by Qdrant."
 ]
 embedding_model = TextEmbedding()
 embeddings: list[np.ndarray] = embedding_model.embed(documents)
@@ -53,10 +53,10 @@ from qdrant_client import QdrantClient
 client = QdrantClient(":memory:")  # Using an in-process Qdrant
 
 # Prepare your documents, metadata, and IDs
-docs = ["Qdrant has Langchain integrations", "Qdrant also has Llama Index integrations"]
+docs = ["Qdrant has LangChain integrations", "Qdrant also has LlamaIndex integrations"]
 metadata = [
-    {"source": "Langchain-docs"},
-    {"source": "Llama-index-docs"},
+    {"source": "LangChain-docs"},
+    {"source": "LlamaIndex-docs"},
 ]
 ids = [42, 2]
 

From 0da3530560c96145142389e69815f9d89b005135 Mon Sep 17 00:00:00 2001
From: goodnight <mohammedarbinsibi@gmail.com>
Date: Sat, 20 Dec 2025 16:35:15 +0100
Subject: [PATCH 2/2] embed accuracy plot and standardize framework names

---
 docs/experimental/Binary Quantization from Scratch.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/experimental/Binary Quantization from Scratch.ipynb b/docs/experimental/Binary Quantization from Scratch.ipynb
index 7165f2db0..25630054e 100644
--- a/docs/experimental/Binary Quantization from Scratch.ipynb	
+++ b/docs/experimental/Binary Quantization from Scratch.ipynb	
@@ -467,7 +467,7 @@
     "\n",
     "![Accuracy vs Sampling Rate](Accuracy_vs_SamplingRate.png)\n",
     "\n",
-    "Higher oversampling keeps the top-k matches closer to the floating-point baseline, so accuracy climbs quickly after a sampling rate of 3."
+    "Higher oversampling rapidly improves accuracy, with most of the gains occurring between sampling rates of 2 and 4."
    ]
   },
   {