Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .tool-versions
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ruby 3.2.2
ruby 3.4.7
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,60 @@
- [DOCS]: Documentation changes. No changes to the library's behavior.
- [SECURITY]: A change which fixes a security vulnerability.

## [0.1.13] - 2025-12-10

- [FEATURE]: Add `score_threshold` parameter to `similarity_search`, `similarity_search_by_vector`, and `ask` methods to filter out irrelevant results based on similarity score
- [FEATURE]: Improve test coverage with comprehensive tests for the new `score_threshold` functionality and `ask` method

## [0.1.12] - 2024-09-20

- Adding `rails g langchainrb_rails:assistant --llm=...` generator
- Adding `rails g langchainrb_rails:prompt` generator

## [0.1.11] - 2024-06-16

- Add destroy_from_vectorsearch hook

## [0.1.10] - 2024-05-20

## [0.1.9] - 2024-04-19

- Bump langchainrb gem to include v0.11.x
- Remove pg_vector Overriding Operator Constants

## [0.1.8] - 2024-03-16

- Bump langchainrb gem

## [0.1.7] - 2024-01-29

- Fix Pgvector#ask method

## [0.1.6] - 2024-01-25

- Fix bug when multiple ActiveRecord models use vectorsearch
- Bump langchainrb version
- Avoid extra query when Pgvector is used

## [0.1.5] - 2023-11-30

- Qdrant vectorsearch generator

## [0.1.4] - 2023-11-20

- Bugfix AR integration when using vectorsearch other than Pgvector

## [0.1.3] - 2023-11-01

- Pgvector vectorsearch generator

## [0.1.2] - 2023-10-27

- Pinecone vectorsearch generator

## [0.1.1] - 2023-10-23

## [0.1.0] - 2023-10-22

- Initial release

11 changes: 3 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
langchainrb_rails (0.1.12)
langchainrb_rails (0.1.13)
langchainrb (>= 0.19)

GEM
Expand Down Expand Up @@ -141,7 +141,6 @@ GEM
matrix (0.4.2)
method_source (1.0.0)
mini_mime (1.1.5)
mini_portile2 (2.8.8)
minitest (5.25.4)
mutex_m (0.3.0)
net-imap (0.4.18)
Expand All @@ -154,12 +153,9 @@ GEM
net-smtp (0.5.0)
net-protocol
nio4r (2.7.4)
nokogiri (1.17.2)
mini_portile2 (~> 2.8.2)
nokogiri (1.18.10-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.17.2-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.17.2-x86_64-linux)
nokogiri (1.18.10-x86_64-linux-gnu)
racc (~> 1.4)
parallel (1.26.3)
parser (3.3.6.0)
Expand Down Expand Up @@ -279,7 +275,6 @@ GEM
zeitwerk (2.6.18)

PLATFORMS
ruby
x86_64-darwin-19
x86_64-darwin-22
x86_64-linux
Expand Down
Binary file added langchainrb_rails-0.1.13.gem
Binary file not shown.
2 changes: 1 addition & 1 deletion langchainrb_rails.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
spec.files = Dir.chdir(__dir__) do
`git ls-files -z`.split("\x0").reject do |f|
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)}) || f.match?(/\.gem$/)
end
end
spec.bindir = "exe"
Expand Down
28 changes: 20 additions & 8 deletions lib/langchainrb_overrides/vectorsearch/pgvector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,38 +79,50 @@ def destroy_default_schema
# Search for similar texts in the index
# @param query [String] The text to search for
# @param k [Integer] The number of top results to return
# @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
# @return [Array<Hash>] The results of the search
# TODO - drop the named "query:" param so it is the same interface as #ask?
def similarity_search(query:, k: 4)
def similarity_search(query:, k: 4, score_threshold: nil)
embedding = llm.embed(text: query).embedding

similarity_search_by_vector(
embedding: embedding,
k: k
k: k,
score_threshold: score_threshold
)
end

# Search for similar texts in the index by the passed in vector.
# You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
# @param embedding [Array<Float>] The vector to search for
# @param k [Integer] The number of top results to return
# @param score_threshold [Float] The minimum similarity score (lower distance) to include in results
# @return [Array<Hash>] The results of the search
# TODO - drop the named "embedding:" param so it is the same interface as #ask?
def similarity_search_by_vector(embedding:, k: 4)
model
.nearest_neighbors(:embedding, embedding, distance: operator)
.limit(k)
def similarity_search_by_vector(embedding:, k: 4, score_threshold: nil)
query = model.nearest_neighbors(:embedding, embedding, distance: operator)

if score_threshold
# Fetch more results than needed and filter in Ruby to avoid depending on virtual columns
candidates = query.limit(k + 5)
filtered = candidates.select { |r| r.neighbor_distance <= score_threshold }.first(k)
ids = filtered.map(&:id)
model.where(id: ids).order(Arel.sql("array_position(ARRAY#{ids.inspect}, id)"))
else
query.limit(k)
end
end

# Ask a question and return the answer
# @param question [String] The question to ask
# @param k [Integer] The number of results to have in context
# @param score_threshold [Float] The minimum similarity score to include in results
# @yield [String] Stream responses back one String at a time
# @return [String] The answer to the question
def ask(question:, k: 4, &block)
def ask(question:, k: 4, score_threshold: nil, &block)
# Noisy as the embedding column has a lot of data
ActiveRecord::Base.logger.silence do
search_results = similarity_search(query: question, k: k)
search_results = similarity_search(query: question, k: k, score_threshold: score_threshold)

context = search_results.map do |result|
result.as_vector
Expand Down
10 changes: 7 additions & 3 deletions lib/langchainrb_rails/active_record/hooks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@ def embed!
#
# @param query [String] The query to search for
# @param k [Integer] The number of results to return
# @param score_threshold [Float] The minimum similarity score to include in results
# @return [ActiveRecord::Relation] The ActiveRecord relation
def similarity_search(query, k: 1)
def similarity_search(query, k: 1, score_threshold: nil)
records = class_variable_get(:@@provider).similarity_search(
query: query,
k: k
k: k,
score_threshold: score_threshold
)

return records if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
Expand All @@ -123,13 +125,15 @@ def similarity_search(query, k: 1)
#
# @param question [String] The question to ask
# @param k [Integer] The number of results to have in context
# @param score_threshold [Float] The minimum similarity score to include in results
# @yield [String] Stream responses back one String at a time
# @return [String] The answer to the question
# standard:disable Style/ArgumentsForwarding
def ask(question, k: 4, &block)
def ask(question, k: 4, score_threshold: nil, &block)
class_variable_get(:@@provider).ask(
question: question,
k: k,
score_threshold: score_threshold,
&block
).chat_completion
end
Expand Down
2 changes: 1 addition & 1 deletion lib/langchainrb_rails/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module LangchainrbRails
VERSION = "0.1.12"
VERSION = "0.1.13"
end
88 changes: 78 additions & 10 deletions spec/langchainrb_overrides/vectorsearch/pgvector_spec.rb
Original file line number Diff line number Diff line change
@@ -1,27 +1,95 @@
# frozen_string_literal: true

RSpec.describe Langchain::Vectorsearch::Pgvector do
let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
let(:llm) { double("LLM") }
let(:model) { double("Model") }
subject { described_class.new(llm: llm) }

describe "#add_texts" do
before do
subject.model = model
end

describe "#update_text" do
end

describe "#create_default_schema" do
end
describe "#similarity_search" do
it "passes score_threshold to similarity_search_by_vector" do
allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))
allow(model).to receive(:nearest_neighbors).and_return(double(limit: []))
allow(model).to receive(:where).and_return([])

describe "#destroy_default_schema" do
end
expect(subject).to receive(:similarity_search_by_vector).with(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)

describe "#similarity_search" do
subject.similarity_search(query: "test", k: 4, score_threshold: 0.5)
end
end

describe "#similarity_search_by_vector" do
let(:query) { double("Query") }
let(:candidates) { double("Candidates") }
let(:filtered) { [double(id: 1, neighbor_distance: 0.3), double(id: 2, neighbor_distance: 0.4)] }

before do
allow(model).to receive(:nearest_neighbors).and_return(query)
end

context "without score_threshold" do
it "returns query.limit(k)" do
allow(query).to receive(:limit).with(4).and_return(:result)

result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4)

expect(result).to eq(:result)
end
end

context "with score_threshold" do
it "filters candidates and returns ordered results" do
allow(query).to receive(:limit).with(9).and_return(candidates) # k + 5 = 9
allow(candidates).to receive(:select).and_return(filtered)
allow(filtered).to receive(:first).with(4).and_return(filtered)
allow(model).to receive(:where).with(id: [1, 2]).and_return(double(order: :ordered_result))

result = subject.similarity_search_by_vector(embedding: [0.1, 0.2], k: 4, score_threshold: 0.5)

expect(result).to eq(:ordered_result)
end
end
end

describe "#ask" do
it "passes score_threshold to similarity_search and processes results" do
# Mock embedding
allow(llm).to receive(:embed).and_return(double(embedding: [0.1, 0.2]))

# Mock nearest_neighbors and query chain
query = double("Query")
allow(model).to receive(:nearest_neighbors).and_return(query)
allow(query).to receive(:limit).and_return([])

# Mock search results
record1 = double("Record1", as_vector: "Vector 1")
record2 = double("Record2", as_vector: "Vector 2")
search_results = [record1, record2]

# Mock similarity_search to return the results
allow(subject).to receive(:similarity_search).and_return(search_results)

# Mock logger silence
logger = double("Logger")
allow(ActiveRecord::Base).to receive(:logger).and_return(logger)
allow(logger).to receive(:silence).and_yield

# Mock generate_rag_prompt
allow(subject).to receive(:generate_rag_prompt).and_return("Mocked prompt")

# Mock llm.chat
chat_response = double("ChatResponse", chat_completion: "Mocked answer")
allow(llm).to receive(:chat).and_return(chat_response)

result = subject.ask(question: "question", k: 4, score_threshold: 0.5)

expect(subject).to have_received(:similarity_search).with(query: "question", k: 4, score_threshold: 0.5)
expect(subject).to have_received(:generate_rag_prompt).with(question: "question", context: "Vector 1\n---\nVector 2")
expect(llm).to have_received(:chat).with(messages: [{role: "user", content: "Mocked prompt"}])
expect(result.chat_completion).to eq("Mocked answer")
end
end
end
Loading