patterns-ai-core · rodrigotoledo · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/.tool-versions b/.tool-versions
@@ -1 +1 @@
-ruby 3.2.2
+ruby 3.4.7
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,43 +9,60 @@
 - [DOCS]: Documentation changes. No changes to the library's behavior.
 - [SECURITY]: A change which fixes a security vulnerability.
 
+## [0.1.13] - 2025-12-10
+
+- [FEATURE]: Add `score_threshold` parameter to `similarity_search`, `similarity_search_by_vector`, and `ask` methods to filter out irrelevant results based on similarity score
+- [FEATURE]: Improve test coverage with comprehensive tests for the new `score_threshold` functionality and `ask` method
+
 ## [0.1.12] - 2024-09-20
+
 - Adding `rails g langchainrb_rails:assistant --llm=...` generator
 - Adding `rails g langchainrb_rails:prompt` generator
 
 ## [0.1.11] - 2024-06-16
+
 - Add destroy_from_vectorsearch hook
 
 ## [0.1.10] - 2024-05-20
 
 ## [0.1.9] - 2024-04-19
+
 - Bump langchainrb gem to include v0.11.x
 - Remove pg_vector Overriding Operator Constants
 
 ## [0.1.8] - 2024-03-16
+
 - Bump langchainrb gem
 
 ## [0.1.7] - 2024-01-29
+
 - Fix Pgvector#ask method
 
 ## [0.1.6] - 2024-01-25
+
 - Fix bug when multiple ActiveRecord models use vectorsearch
 - Bump langchainrb version
 - Avoid extra query when Pgvector is used
 
 ## [0.1.5] - 2023-11-30
+
 - Qdrant vectorsearch generator
 
 ## [0.1.4] - 2023-11-20
+
 - Bugfix AR integration when using vectorsearch other than Pgvector
 
 ## [0.1.3] - 2023-11-01
+
 - Pgvector vectorsearch generator
 
 ## [0.1.2] - 2023-10-27
+
 - Pinecone vectorsearch generator
 
 ## [0.1.1] - 2023-10-23
 
 ## [0.1.0] - 2023-10-22
+
 - Initial release
+
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb_rails (0.1.12)
+    langchainrb_rails (0.1.13)
       langchainrb (>= 0.19)
 
 GEM
@@ -141,7 +141,6 @@ GEM
     matrix (0.4.2)
     method_source (1.0.0)
     mini_mime (1.1.5)
-    mini_portile2 (2.8.8)
     minitest (5.25.4)
     mutex_m (0.3.0)
     net-imap (0.4.18)
@@ -154,12 +153,9 @@ GEM
     net-smtp (0.5.0)
       net-protocol
     nio4r (2.7.4)
-    nokogiri (1.17.2)
-      mini_portile2 (~> 2.8.2)
+    nokogiri (1.18.10-x86_64-darwin)
       racc (~> 1.4)
-    nokogiri (1.17.2-x86_64-darwin)
-      racc (~> 1.4)
-    nokogiri (1.17.2-x86_64-linux)
+    nokogiri (1.18.10-x86_64-linux-gnu)
       racc (~> 1.4)
     parallel (1.26.3)
     parser (3.3.6.0)
@@ -279,7 +275,6 @@ GEM
     zeitwerk (2.6.18)
 
 PLATFORMS
-  ruby
   x86_64-darwin-19
   x86_64-darwin-22
   x86_64-linux

diff --git a/langchainrb_rails-0.1.13.gem b/langchainrb_rails-0.1.13.gem
diff --git a/langchainrb_rails.gemspec b/langchainrb_rails.gemspec
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
   # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
   spec.files = Dir.chdir(__dir__) do
     `git ls-files -z`.split("\x0").reject do |f|
-      (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
+      (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)}) || f.match?(/\.gem$/)
     end
   end
   spec.bindir = "exe"

diff --git a/lib/langchainrb_overrides/vectorsearch/pgvector.rb b/lib/langchainrb_overrides/vectorsearch/pgvector.rb
@@ -79,38 +79,50 @@ def destroy_default_schema
     # Search for similar texts in the index
     # @param query [String] The text to search for
     # @param k [Integer] The number of top results to return
+    # @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
     # @return [Array<Hash>] The results of the search
     # TODO - drop the named "query:" param so it is the same interface as #ask?
-    def similarity_search(query:, k: 4)
+    def similarity_search(query:, k: 4, score_threshold: nil)
       embedding = llm.embed(text: query).embedding
 
       similarity_search_by_vector(
         embedding: embedding,
-        k: k
+        k: k,
+        score_threshold: score_threshold
       )
     end
 
     # Search for similar texts in the index by the passed in vector.
     # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
     # @param embedding [Array<Float>] The vector to search for
     # @param k [Integer] The number of top results to return
+    # @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
     # @return [Array<Hash>] The results of the search
     # TODO - drop the named "embedding:" param so it is the same interface as #ask?
-    def similarity_search_by_vector(embedding:, k: 4)
-      model
-        .nearest_neighbors(:embedding, embedding, distance: operator)
-        .limit(k)
+    def similarity_search_by_vector(embedding:, k: 4, score_threshold: nil)
+      query = model.nearest_neighbors(:embedding, embedding, distance: operator)
+
+      if score_threshold
+        # Fetch more results than needed and filter in Ruby to avoid depending on virtual columns
+        candidates = query.limit(k + 5)
+        filtered = candidates.select { |r| r.neighbor_distance <= score_threshold }.first(k)
+        ids = filtered.map(&:id)
+        model.where(id: ids).order(Arel.sql("array_position(ARRAY#{ids.inspect}, id)"))
+      else
+        query.limit(k)
+      end
     end
 
     # Ask a question and return the answer
     # @param question [String] The question to ask
     # @param k [Integer] The number of results to have in context
+    # @param score_threshold [Float] The minimum similarity score to include in results
     # @yield [String] Stream responses back one String at a time
     # @return [String] The answer to the question
-    def ask(question:, k: 4, &block)
+    def ask(question:, k: 4, score_threshold: nil, &block)
       # Noisy as the embedding column has a lot of data
       ActiveRecord::Base.logger.silence do
-        search_results = similarity_search(query: question, k: k)
+        search_results = similarity_search(query: question, k: k, score_threshold: score_threshold)
 
         context = search_results.map do |result|
           result.as_vector

diff --git a/lib/langchainrb_rails/active_record/hooks.rb b/lib/langchainrb_rails/active_record/hooks.rb
@@ -105,11 +105,13 @@ def embed!
         #
         # @param query [String] The query to search for
         # @param k [Integer] The number of results to return
+        # @param score_threshold [Float] The minimum similarity score to include in results
         # @return [ActiveRecord::Relation] The ActiveRecord relation
-        def similarity_search(query, k: 1)
+        def similarity_search(query, k: 1, score_threshold: nil)
           records = class_variable_get(:@@provider).similarity_search(
             query: query,
-            k: k
+            k: k,
+            score_threshold: score_threshold
           )
 
           return records if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
@@ -123,13 +125,15 @@ def similarity_search(query, k: 1)
         #
         # @param question [String] The question to ask
         # @param k [Integer] The number of results to have in context
+        # @param score_threshold [Float] The minimum similarity score to include in results
         # @yield [String] Stream responses back one String at a time
         # @return [String] The answer to the question
         # standard:disable Style/ArgumentsForwarding
-        def ask(question, k: 4, &block)
+        def ask(question, k: 4, score_threshold: nil, &block)
           class_variable_get(:@@provider).ask(
             question: question,
             k: k,
+            score_threshold: score_threshold,
             &block
           ).chat_completion
         end

diff --git a/lib/langchainrb_rails/version.rb b/lib/langchainrb_rails/version.rb
@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 
 module LangchainrbRails
-  VERSION = "0.1.12"
+  VERSION = "0.1.13"
 end
diff --git a/spec/langchainrb_overrides/vectorsearch/pgvector_spec.rb b/spec/langchainrb_overrides/vectorsearch/pgvector_spec.rb
@@ -1,27 +1,95 @@
 # frozen_string_literal: true
 
 RSpec.describe Langchain::Vectorsearch::Pgvector do
-  let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
+  let(:llm) { double("LLM") }
+  let(:model) { double("Model") }
   subject { described_class.new(llm: llm) }
 
-  describe "#add_texts" do
+  before do
+    subject.model = model
   end
 
-  describe "#update_text" do
-  end
-
-  describe "#create_default_schema" do
-  end
+  describe "#similarity_search" do
+    it "passes score_threshold to similarity_search_by_vector" do
+      allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))
+      allow(model).to receive(:nearest_neighbors).and_return(double(limit: []))
+      allow(model).to receive(:where).and_return([])
 
-  describe "#destroy_default_schema" do
-  end
+      expect(subject).to receive(:similarity_search_by_vector).with(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)
 
-  describe "#similarity_search" do
+      subject.similarity_search(query: "test", k: 4, score_threshold: 0.5)
+    end
   end
 
   describe "#similarity_search_by_vector" do
+    let(:query) { double("Query") }
+    let(:candidates) { double("Candidates") }
+    let(:filtered) { [double(id: 1, neighbor_distance: 0.3), double(id: 2, neighbor_distance: 0.4)] }
+
+    before do
+      allow(model).to receive(:nearest_neighbors).and_return(query)
+    end
+
+    context "without score_threshold" do
+      it "returns query.limit(k)" do
+        allow(query).to receive(:limit).with(4).and_return(:result)
+
+        result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4)
+
+        expect(result).to eq(:result)
+      end
+    end
+
+    context "with score_threshold" do
+      it "filters candidates and returns ordered results" do
+        allow(query).to receive(:limit).with(9).and_return(candidates) # k + 5 = 9
+        allow(candidates).to receive(:select).and_return(filtered)
+        allow(filtered).to receive(:first).with(4).and_return(filtered)
+        allow(model).to receive(:where).with(id: [1, 2]).and_return(double(order: :ordered_result))
+
+        result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)
+
+        expect(result).to eq(:ordered_result)
+      end
+    end
   end
 
   describe "#ask" do
+    it "passes score_threshold to similarity_search and processes results" do
+      # Mock embedding
+      allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))
+
+      # Mock nearest_neighbors and query chain
+      query = double("Query")
+      allow(model).to receive(:nearest_neighbors).and_return(query)
+      allow(query).to receive(:limit).and_return([])
+
+      # Mock search results
+      record1 = double("Record1", as_vector: "Vector 1")
+      record2 = double("Record2", as_vector: "Vector 2")
+      search_results = [record1, record2]
+
+      # Mock similarity_search to return the results
+      allow(subject).to receive(:similarity_search).and_return(search_results)
+
+      # Mock logger silence
+      logger = double("Logger")
+      allow(ActiveRecord::Base).to receive(:logger).and_return(logger)
+      allow(logger).to receive(:silence).and_yield
+
+      # Mock generate_rag_prompt
+      allow(subject).to receive(:generate_rag_prompt).and_return("Mocked prompt")
+
+      # Mock llm.chat
+      chat_response = double("ChatResponse", chat_completion: "Mocked answer")
+      allow(llm).to receive(:chat).and_return(chat_response)
+
+      result = subject.ask(question: "question", k: 4, score_threshold: 0.5)
+
+      expect(subject).to have_received(:similarity_search).with(query: "question", k: 4, score_threshold: 0.5)
+      expect(subject).to have_received(:generate_rag_prompt).with(question: "question", context: "Vector 1\n---\nVector 2")
+      expect(llm).to have_received(:chat).with(messages: [{role: "user", content: "Mocked prompt"}])
+      expect(result.chat_completion).to eq("Mocked answer")
+    end
   end
 end