From c9e89137cbbd8650744726f5186fc9839f062a29 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 09:21:03 +0200 Subject: [PATCH 01/10] Indexer sunspot_type --- sunspot/lib/sunspot/indexer.rb | 196 +++++++++++++++++---------------- 1 file changed, 103 insertions(+), 93 deletions(-) diff --git a/sunspot/lib/sunspot/indexer.rb b/sunspot/lib/sunspot/indexer.rb index 40c0570e3..6f2f02243 100644 --- a/sunspot/lib/sunspot/indexer.rb +++ b/sunspot/lib/sunspot/indexer.rb @@ -1,7 +1,7 @@ require 'sunspot/batcher' module Sunspot - # + # # This class presents a service for adding, updating, and removing data # from the Solr index. An Indexer instance is associated with a particular # setup, and thus is capable of indexing instances of a certain class (and its @@ -12,7 +12,7 @@ def initialize(connection) @connection = connection end - # + # # Construct a representation of the model for indexing and send it to the # connection for indexing # @@ -35,7 +35,7 @@ def add(model) # updates:: hash of updates where keys are model ids # and values are hash with property name/values to be updated # - def add_atomic_update(clazz, updates={}) + def add_atomic_update(clazz, updates = {}) documents = updates.map { |id, m| prepare_atomic_update(clazz, id, m) } add_batch_documents(documents) end @@ -93,109 +93,119 @@ def flush_batch private - def batcher - @batcher ||= Batcher.new - end - - # - # Convert documents into hash of indexed properties - # - def prepare_full_update(model) - document = document_for_full_update(model) - setup = setup_for_object(model) - if boost = setup.document_boost_for(model) - document.attrs[:boost] = boost - end - setup.all_field_factories.each do |field_factory| - field_factory.populate_document(document, model) + def batcher + @batcher ||= Batcher.new end - unless setup.child_field_factory.nil? - setup.child_field_factory.populate_document( - document, - model, - adapter: ->(child_model) { prepare_full_update(child_model) } - ) - end - document - end - def prepare_atomic_update(clazz, id, updates = {}) - document = document_for_atomic_update(clazz, id) - setup = setup_for_class(clazz) - # Child documents must be re-indexed with parent at each update, - # otherwise Solr would discard them. - unless setup.child_field_factory.nil? - raise 'Objects with child documents can\'t perform atomic updates' - end - setup.all_field_factories.each do |field_factory| - if updates.has_key?(field_factory.name) - field_factory.populate_document(document, nil, value: updates[field_factory.name], update: :set) + # + # Convert documents into hash of indexed properties + # + def prepare_full_update(model) + document = document_for_full_update(model) + setup = setup_for_object(model) + if boost = setup.document_boost_for(model) + document.attrs[:boost] = boost + end + setup.all_field_factories.each do |field_factory| + field_factory.populate_document(document, model) + end + unless setup.child_field_factory.nil? + setup.child_field_factory.populate_document( + document, + model, + adapter: ->(child_model) { prepare_full_update(child_model) } + ) end + document end - document - end - def add_documents(documents) - @connection.add(documents) - end + def prepare_atomic_update(clazz, id, updates = {}) + document = document_for_atomic_update(clazz, id) + setup = setup_for_class(clazz) + # Child documents must be re-indexed with parent at each update, + # otherwise Solr would discard them. + unless setup.child_field_factory.nil? + raise 'Objects with child documents can\'t perform atomic updates' + end + setup.all_field_factories.each do |field_factory| + if updates.has_key?(field_factory.name) + field_factory.populate_document(document, nil, value: updates[field_factory.name], update: :set) + end + end + document + end - def add_batch_documents(documents) - if batcher.batching? - batcher.concat(documents) - else - add_documents(documents) + def add_documents(documents) + @connection.add(documents) end - end - # - # All indexed documents index and store the +id+ and +type+ fields. - # These methods construct the document hash containing those key-value - # pairs. - # - def document_for_full_update(model) - RSolr::Xml::Document.new( - id: Adapters::InstanceAdapter.adapt(model).index_id, - type: Util.superclasses_for(model.class).map(&:name) - ) - end + def add_batch_documents(documents) + if batcher.batching? + batcher.concat(documents) + else + add_documents(documents) + end + end - def document_for_atomic_update(clazz, id) - if Adapters::InstanceAdapter.for(clazz) + # + # All indexed documents index and store the +id+ and +type+ fields. + # These methods construct the document hash containing those key-value + # pairs. + # + def document_for_full_update(model) RSolr::Xml::Document.new( - id: Adapters::InstanceAdapter.index_id_for(clazz.name, id), - type: Util.superclasses_for(clazz).map(&:name) + id: Adapters::InstanceAdapter.adapt(model).index_id, + type: sunspot_type(model) ) end - end - # - # Get the Setup object for the given object's class. - # - # ==== Parameters - # - # object:: The object whose setup is to be retrieved - # - # ==== Returns - # - # Sunspot::Setup:: The setup for the object's class - # - def setup_for_object(object) - setup_for_class(object.class) - end + def document_for_atomic_update(clazz, id) + if Adapters::InstanceAdapter.for(clazz) + RSolr::Xml::Document.new( + id: Adapters::InstanceAdapter.index_id_for(clazz.name, id), + type: sunspot_type(model) + ) + end + end - # - # Get the Setup object for the given class. - # - # ==== Parameters - # - # clazz:: The class whose setup is to be retrieved - # - # ==== Returns - # - # Sunspot::Setup:: The setup for the class - # - def setup_for_class(clazz) - Setup.for(clazz) || raise(NoSetupError, "Sunspot is not configured for #{clazz.inspect}") - end + def sunspot_type(element) + clazz = element.is_a?(Class) ? element : element.class + if clazz.respond_to?(:sunspot_type) + raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) + clazz.sunspot_type + else + Util.superclasses_for(clazz).map(&:name) + end + end + + # + # Get the Setup object for the given object's class. + # + # ==== Parameters + # + # object:: The object whose setup is to be retrieved + # + # ==== Returns + # + # Sunspot::Setup:: The setup for the object's class + # + def setup_for_object(object) + setup_for_class(object.class) + end + + # + # Get the Setup object for the given class. + # + # ==== Parameters + # + # clazz:: The class whose setup is to be retrieved + # + # ==== Returns + # + # Sunspot::Setup:: The setup for the class + # + def setup_for_class(clazz) + Setup.for(clazz) || raise(NoSetupError, "Sunspot is not configured for #{clazz.inspect}") + end end end From c7e41ebc045dcc9622b8844f5c1c8283a329bff0 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 09:26:38 +0200 Subject: [PATCH 02/10] Sunspot typ --- sunspot/lib/sunspot/indexer.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sunspot/lib/sunspot/indexer.rb b/sunspot/lib/sunspot/indexer.rb index 6f2f02243..6e597a0b8 100644 --- a/sunspot/lib/sunspot/indexer.rb +++ b/sunspot/lib/sunspot/indexer.rb @@ -163,11 +163,18 @@ def document_for_atomic_update(clazz, id) if Adapters::InstanceAdapter.for(clazz) RSolr::Xml::Document.new( id: Adapters::InstanceAdapter.index_id_for(clazz.name, id), - type: sunspot_type(model) + type: sunspot_type(clazz) ) end end + # + # Store on type field the class name we want to search for + # + # @param [Class/Object] element + # + # @return [Array[Strings]] + # def sunspot_type(element) clazz = element.is_a?(Class) ? element : element.class if clazz.respond_to?(:sunspot_type) From c682d21b2b8b868c6a6393d683af0da2235a4ae7 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 09:35:15 +0200 Subject: [PATCH 03/10] Use sunspot_type on superclasses_for if defined --- sunspot/lib/sunspot/indexer.rb | 21 +------ sunspot/lib/sunspot/util.rb | 109 +++++++++++++++++---------------- 2 files changed, 59 insertions(+), 71 deletions(-) diff --git a/sunspot/lib/sunspot/indexer.rb b/sunspot/lib/sunspot/indexer.rb index 6e597a0b8..21ed31365 100644 --- a/sunspot/lib/sunspot/indexer.rb +++ b/sunspot/lib/sunspot/indexer.rb @@ -155,7 +155,7 @@ def add_batch_documents(documents) def document_for_full_update(model) RSolr::Xml::Document.new( id: Adapters::InstanceAdapter.adapt(model).index_id, - type: sunspot_type(model) + type: Util.superclasses_for(model.class).map(&:name) ) end @@ -163,28 +163,11 @@ def document_for_atomic_update(clazz, id) if Adapters::InstanceAdapter.for(clazz) RSolr::Xml::Document.new( id: Adapters::InstanceAdapter.index_id_for(clazz.name, id), - type: sunspot_type(clazz) + type: Util.superclasses_for(clazz).map(&:name) ) end end - # - # Store on type field the class name we want to search for - # - # @param [Class/Object] element - # - # @return [Array[Strings]] - # - def sunspot_type(element) - clazz = element.is_a?(Class) ? element : element.class - if clazz.respond_to?(:sunspot_type) - raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) - clazz.sunspot_type - else - Util.superclasses_for(clazz).map(&:name) - end - end - # # Get the Setup object for the given object's class. # diff --git a/sunspot/lib/sunspot/util.rb b/sunspot/lib/sunspot/util.rb index 2a6c8ff6c..5bcf6a455 100644 --- a/sunspot/lib/sunspot/util.rb +++ b/sunspot/lib/sunspot/util.rb @@ -27,9 +27,14 @@ def child_documents_supported? # Array:: Collection containing class and its superclasses # def superclasses_for(clazz) - superclasses = [clazz] - superclasses << (clazz = clazz.superclass) while clazz.superclass != Object - superclasses + if clazz.respond_to?(:sunspot_type) + raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) + clazz.sunspot_type + else + superclasses = [clazz] + superclasses << (clazz = clazz.superclass) while clazz.superclass != Object + superclasses + end end # @@ -209,7 +214,7 @@ def parse_json_facet(field_name, options, setup) end Sunspot::Query::DateFieldJsonFacet.new(field, options, setup) elsif options[:range] - unless [Sunspot::Type::TimeType, Sunspot::Type::FloatType, Sunspot::Type::IntegerType ].find{|type| field.type.is_a?(type)} + unless [Sunspot::Type::TimeType, Sunspot::Type::FloatType, Sunspot::Type::IntegerType ].find { |type| field.type.is_a?(type) } raise( ArgumentError, ':range can only be specified for date or numeric fields' @@ -223,55 +228,55 @@ def parse_json_facet(field_name, options, setup) private - def parse_block_join_json_facet(field_name, options, setup) - facet_op = options[:block_join] - inner_setup = Sunspot::Setup.for(facet_op[:type]) - field = inner_setup.field(field_name) - Sunspot::Query::BlockJoin::JsonFacet.new( - field, - { op: facet_op[:op] }.merge!(options), - inner_setup, - facet_op[:query] - ) - end + def parse_block_join_json_facet(field_name, options, setup) + facet_op = options[:block_join] + inner_setup = Sunspot::Setup.for(facet_op[:type]) + field = inner_setup.field(field_name) + Sunspot::Query::BlockJoin::JsonFacet.new( + field, + { op: facet_op[:op] }.merge!(options), + inner_setup, + facet_op[:query] + ) + end - # - # Deep merge two hashes into a third hash, using rules that produce nice - # merged parameter hashes. The rules are as follows, for a given key: - # - # * If only one hash has a value, or if both hashes have the same value, - # just use the value. - # * If either of the values is not a hash, create arrays out of both - # values and concatenate them. - # * Otherwise, deep merge the two values (which are both hashes) - # - # ==== Parameters - # - # destination:: Hash into which to perform the merge - # left:: One hash to merge - # right:: The other hash to merge - # - # ==== Returns - # - # Hash:: destination - # - def deep_merge_into(destination, left, right) - left.each_pair do |name, left_value| - right_value = right[name] if right - destination[name] = - if right_value.nil? || left_value == right_value - left_value - elsif !left_value.respond_to?(:each_pair) || !right_value.respond_to?(:each_pair) - Array(left_value) + Array(right_value) - else - merged_value = {} - deep_merge_into(merged_value, left_value, right_value) - end + # + # Deep merge two hashes into a third hash, using rules that produce nice + # merged parameter hashes. The rules are as follows, for a given key: + # + # * If only one hash has a value, or if both hashes have the same value, + # just use the value. + # * If either of the values is not a hash, create arrays out of both + # values and concatenate them. + # * Otherwise, deep merge the two values (which are both hashes) + # + # ==== Parameters + # + # destination:: Hash into which to perform the merge + # left:: One hash to merge + # right:: The other hash to merge + # + # ==== Returns + # + # Hash:: destination + # + def deep_merge_into(destination, left, right) + left.each_pair do |name, left_value| + right_value = right[name] if right + destination[name] = + if right_value.nil? || left_value == right_value + left_value + elsif !left_value.respond_to?(:each_pair) || !right_value.respond_to?(:each_pair) + Array(left_value) + Array(right_value) + else + merged_value = {} + deep_merge_into(merged_value, left_value, right_value) + end + end + left_keys = Set.new(left.keys) + destination.merge!(right.reject { |k, v| left_keys.include?(k) }) + destination end - left_keys = Set.new(left.keys) - destination.merge!(right.reject { |k, v| left_keys.include?(k) }) - destination - end end Coordinates = Struct.new(:lat, :lng) @@ -280,7 +285,7 @@ class ContextBoundDelegate class < Date: Wed, 22 Jul 2020 15:20:39 +0200 Subject: [PATCH 04/10] Test with_sunspot_type --- sunspot/lib/sunspot/util.rb | 2 +- sunspot/spec/integration/indexing_spec.rb | 129 +++++++++++++------ sunspot/spec/mocks/post_with_sunspot_type.rb | 105 +++++++++++++++ 3 files changed, 199 insertions(+), 37 deletions(-) create mode 100644 sunspot/spec/mocks/post_with_sunspot_type.rb diff --git a/sunspot/lib/sunspot/util.rb b/sunspot/lib/sunspot/util.rb index 5bcf6a455..d5f5367b1 100644 --- a/sunspot/lib/sunspot/util.rb +++ b/sunspot/lib/sunspot/util.rb @@ -16,7 +16,7 @@ def child_documents_supported? # # Get all of the superclasses for a given class, including the class - # itself. + # itself, if sunspot_type defined returns it. # # ==== Parameters # diff --git a/sunspot/spec/integration/indexing_spec.rb b/sunspot/spec/integration/indexing_spec.rb index 24a1143aa..cd031528a 100644 --- a/sunspot/spec/integration/indexing_spec.rb +++ b/sunspot/spec/integration/indexing_spec.rb @@ -1,54 +1,111 @@ require File.expand_path('../spec_helper', File.dirname(__FILE__)) -describe 'indexing' do - it 'should index non-multivalued field with newlines' do - expect do - Sunspot.index!(Post.new(:title => "A\nTitle")) - end.not_to raise_error - end +contenxt 'indexing' do - it 'should correctly remove by model instance' do - post = Post.new(:title => 'test post') - Sunspot.index!(post) - Sunspot.remove!(post) - expect(Sunspot.search(Post) { with(:title, 'test post') }.results).to be_empty - end + describe 'without sunspot_type' do + it 'should index non-multivalued field with newlines' do + expect do + Sunspot.index!(Post.new(title: "A\nTitle")) + end.not_to raise_error + end - it 'should correctly delete by ID' do - post = Post.new(:title => 'test post') - Sunspot.index!(post) - Sunspot.remove_by_id!(Post, post.id) - expect(Sunspot.search(Post) { with(:title, 'test post') }.results).to be_empty - end + it 'should correctly remove by model instance' do + post = Post.new(title: 'test post') + Sunspot.index!(post) + Sunspot.remove!(post) + expect(Sunspot.search(Post) { with(:title, 'test post') }.results).to be_empty + end + + it 'should correctly delete by ID' do + post = Post.new(title: 'test post') + Sunspot.index!(post) + Sunspot.remove_by_id!(Post, post.id) + expect(Sunspot.search(Post) { with(:title, 'test post') }.results).to be_empty + end + + it 'removes documents by query' do + Sunspot.remove_all! + posts = [Post.new(title: 'birds'), Post.new(title: 'monkeys')] + Sunspot.index!(posts) - it 'removes documents by query' do - Sunspot.remove_all! - posts = [Post.new(:title => 'birds'), Post.new(:title => 'monkeys')] - Sunspot.index!(posts) + Sunspot.remove!(Post) do + with(:title, 'birds') + end + expect(Sunspot.search(Post).results.size).to eq(1) + end + + describe 'in batches' do + let(:post_1) { Post.new title: 'A tittle' } + let(:post_2) { Post.new title: 'Another title' } + + describe 'nested' do + let(:a_nested_batch) do + Sunspot.batch do + Sunspot.index post_1 + + Sunspot.batch do + Sunspot.index post_2 + end + end + end - Sunspot.remove!(Post) do - with(:title, 'birds') + it 'does not fail' do + expect { a_nested_batch }.to_not raise_error + end + end end - expect(Sunspot.search(Post).results.size).to eq(1) end - describe 'in batches' do - let(:post_1) { Post.new :title => 'A tittle' } - let(:post_2) { Post.new :title => 'Another title' } + describe 'with sunspot_type' do + it 'should index non-multivalued field with newlines' do + expect do + Sunspot.index!(PostWithSunspotType.new(title: "A\nTitle")) + end.not_to raise_error + end + + it 'should correctly remove by model instance' do + post = PostWithSunspotType.new(title: 'test post') + Sunspot.index!(post) + Sunspot.remove!(post) + expect(Sunspot.search(PostWithSunspotType) { with(:title, 'test post') }.results).to be_empty + end + + it 'should correctly delete by ID' do + post = PostWithSunspotType.new(title: 'test post') + Sunspot.index!(post) + Sunspot.remove_by_id!(PostWithSunspotType, post.id) + expect(Sunspot.search(PostWithSunspotType) { with(:title, 'test post') }.results).to be_empty + end - describe 'nested' do - let(:a_nested_batch) do - Sunspot.batch do - Sunspot.index post_1 + it 'removes documents by query' do + Sunspot.remove_all! + posts = [PostWithSunspotType.new(title: 'birds'), PostWithSunspotType.new(title: 'monkeys')] + Sunspot.index!(posts) + Sunspot.remove!(PostWithSunspotType) do + with(:title, 'birds') + end + expect(Sunspot.search(PostWithSunspotType).results.size).to eq(1) + end + + describe 'in batches' do + let(:post_1) { PostWithSunspotType.new title: 'A tittle' } + let(:post_2) { PostWithSunspotType.new title: 'Another title' } + + describe 'nested' do + let(:a_nested_batch) do Sunspot.batch do - Sunspot.index post_2 + Sunspot.index post_1 + + Sunspot.batch do + Sunspot.index post_2 + end end end - end - it 'does not fail' do - expect { a_nested_batch }.to_not raise_error + it 'does not fail' do + expect { a_nested_batch }.to_not raise_error + end end end end diff --git a/sunspot/spec/mocks/post_with_sunspot_type.rb b/sunspot/spec/mocks/post_with_sunspot_type.rb new file mode 100644 index 000000000..80a82d741 --- /dev/null +++ b/sunspot/spec/mocks/post_with_sunspot_type.rb @@ -0,0 +1,105 @@ +require File.join(File.dirname(__FILE__), 'blog') +require File.join(File.dirname(__FILE__), 'super_class') + +class PostWithSunspotType < SuperClass + attr_accessor :title, :body, :blog_id, :published_at, :ratings_average, + :author_name, :featured, :expire_date, :coordinates, :tags, + :featured_for + alias_method :featured?, :featured + + def self.sunspot_type + ['PostWithSunspotType'] + end + + def category_ids + @category_ids ||= [] + end + + def custom_string + @custom_string ||= {} + end + + def custom_underscored_string + @custom_underscored_string ||= {} + end + + def custom_fl + @custom_fl ||= {} + end + + def custom_time + @custom_time ||= {} + end + + def custom_boolean + @custom_boolean ||= {} + end + + private + + attr_writer :category_ids, :custom_string, :custom_underscored_string, :custom_fl, :custom_time, :custom_boolean +end + +Sunspot.setup(Post) do + text :title, :boost => 2 + text :text_array, :boost => 3 do + [title, title] + end + text :body, :stored => true, :more_like_this => true + text :backwards_title do + title.reverse if title + end + text :tags, :more_like_this => true + string :title, :stored => true + string :author_name + integer :blog_id, :references => Blog + integer :category_ids, :multiple => true + float :average_rating, :using => :ratings_average, :trie => true + time :published_at, :trie => true + date :expire_date + date_range :featured_for + boolean :featured, :using => :featured?, :stored => true + string :sort_title do + title.downcase.sub(/^(a|an|the)\W+/, '') if title + end + integer :primary_category_id do |post| + post.category_ids.first + end + time :last_indexed_at, :stored => true do + Time.now + end + location :coordinates + latlon(:coordinates_new) { coordinates } + + dynamic_string :custom_string, :stored => true + dynamic_string :custom_underscored_string, separator: '__' + dynamic_float :custom_float, :multiple => true, :using => :custom_fl + dynamic_integer :custom_integer do + category_ids.inject({}) do |hash, category_id| + hash.merge(category_id => 1) + end + end + dynamic_time :custom_time + dynamic_boolean :custom_boolean + + boost do + if ratings_average + 1 + (ratings_average - 3.0) / 4.0 + end + end + + string :legacy, :as => :legacy_field_s do + "legacy #{title}" + end + + string :legacy_array, :as => :legacy_array_field_sm, :multiple => true do + ['first string', 'second string'] + end + + string :tag_list, :multiple => true, :stored => true do + tags + end +end + +class PhotoPost < Post +end From a95b0b5bd0fccb3b9465f8211cb0d72c05fded50 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 15:23:30 +0200 Subject: [PATCH 05/10] Fix typo --- sunspot/spec/integration/indexing_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sunspot/spec/integration/indexing_spec.rb b/sunspot/spec/integration/indexing_spec.rb index cd031528a..dc7322679 100644 --- a/sunspot/spec/integration/indexing_spec.rb +++ b/sunspot/spec/integration/indexing_spec.rb @@ -1,6 +1,6 @@ require File.expand_path('../spec_helper', File.dirname(__FILE__)) -contenxt 'indexing' do +context 'indexing' do describe 'without sunspot_type' do it 'should index non-multivalued field with newlines' do From 092f1ca5f9ae87f6cb134a6650f14fa2b41b3b57 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 16:01:12 +0200 Subject: [PATCH 06/10] Fix Util.superclasses_for --- sunspot/lib/sunspot/util.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sunspot/lib/sunspot/util.rb b/sunspot/lib/sunspot/util.rb index d5f5367b1..c34b64257 100644 --- a/sunspot/lib/sunspot/util.rb +++ b/sunspot/lib/sunspot/util.rb @@ -29,7 +29,7 @@ def child_documents_supported? def superclasses_for(clazz) if clazz.respond_to?(:sunspot_type) raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) - clazz.sunspot_type + Struct.new(:name).new(clazz.sunspot_type) else superclasses = [clazz] superclasses << (clazz = clazz.superclass) while clazz.superclass != Object From 9366332b4549c6adfe24a3fa15f77d07502687c7 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 16:06:02 +0200 Subject: [PATCH 07/10] Fix Util.superclasses_for --- sunspot/lib/sunspot/util.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sunspot/lib/sunspot/util.rb b/sunspot/lib/sunspot/util.rb index c34b64257..4c5a06f43 100644 --- a/sunspot/lib/sunspot/util.rb +++ b/sunspot/lib/sunspot/util.rb @@ -29,7 +29,7 @@ def child_documents_supported? def superclasses_for(clazz) if clazz.respond_to?(:sunspot_type) raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) - Struct.new(:name).new(clazz.sunspot_type) + clazz.sunspot_type.map { |t| Struct.new(:name).new(t) } else superclasses = [clazz] superclasses << (clazz = clazz.superclass) while clazz.superclass != Object From adf2ae9488bbe23e766ba6b7dc127385c7b0ee71 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 16:10:24 +0200 Subject: [PATCH 08/10] Sunspot is not configured for PostWithSunspotType --- sunspot/spec/mocks/post_with_sunspot_type.rb | 36 ++++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sunspot/spec/mocks/post_with_sunspot_type.rb b/sunspot/spec/mocks/post_with_sunspot_type.rb index 80a82d741..66e45c38b 100644 --- a/sunspot/spec/mocks/post_with_sunspot_type.rb +++ b/sunspot/spec/mocks/post_with_sunspot_type.rb @@ -37,43 +37,43 @@ def custom_boolean private - attr_writer :category_ids, :custom_string, :custom_underscored_string, :custom_fl, :custom_time, :custom_boolean + attr_writer :category_ids, :custom_string, :custom_underscored_string, :custom_fl, :custom_time, :custom_boolean end -Sunspot.setup(Post) do - text :title, :boost => 2 - text :text_array, :boost => 3 do +Sunspot.setup(PostWithSunspotType) do + text :title, boost: 2 + text :text_array, boost: 3 do [title, title] end - text :body, :stored => true, :more_like_this => true + text :body, stored: true, more_like_this: true text :backwards_title do title.reverse if title end - text :tags, :more_like_this => true - string :title, :stored => true + text :tags, more_like_this: true + string :title, stored: true string :author_name - integer :blog_id, :references => Blog - integer :category_ids, :multiple => true - float :average_rating, :using => :ratings_average, :trie => true - time :published_at, :trie => true + integer :blog_id, references: Blog + integer :category_ids, multiple: true + float :average_rating, using: :ratings_average, trie: true + time :published_at, trie: true date :expire_date date_range :featured_for - boolean :featured, :using => :featured?, :stored => true + boolean :featured, using: :featured?, stored: true string :sort_title do title.downcase.sub(/^(a|an|the)\W+/, '') if title end integer :primary_category_id do |post| post.category_ids.first end - time :last_indexed_at, :stored => true do + time :last_indexed_at, stored: true do Time.now end location :coordinates latlon(:coordinates_new) { coordinates } - dynamic_string :custom_string, :stored => true + dynamic_string :custom_string, stored: true dynamic_string :custom_underscored_string, separator: '__' - dynamic_float :custom_float, :multiple => true, :using => :custom_fl + dynamic_float :custom_float, multiple: true, using: :custom_fl dynamic_integer :custom_integer do category_ids.inject({}) do |hash, category_id| hash.merge(category_id => 1) @@ -88,15 +88,15 @@ def custom_boolean end end - string :legacy, :as => :legacy_field_s do + string :legacy, as: :legacy_field_s do "legacy #{title}" end - string :legacy_array, :as => :legacy_array_field_sm, :multiple => true do + string :legacy_array, as: :legacy_array_field_sm, multiple: true do ['first string', 'second string'] end - string :tag_list, :multiple => true, :stored => true do + string :tag_list, multiple: true, stored: true do tags end end From 4d5a695f00fa3168f30842cfa2d740caca69eda1 Mon Sep 17 00:00:00 2001 From: Duccio Giovannelli Date: Wed, 22 Jul 2020 19:00:30 +0200 Subject: [PATCH 09/10] Add disable ancestors to be able to store only self class name and not ancestors on type field --- README.md | 28 ++--- sunspot/lib/sunspot/util.rb | 8 +- sunspot/spec/integration/indexing_spec.rb | 26 ++--- .../sunspot_disable_ancestors_spec.rb | 35 ++++++ ...type.rb => post_with_disable_ancestors.rb} | 13 ++- sunspot/spec/spec_helper.rb | 1 + .../solr/configsets/sunspot/conf/schema.xml | 10 +- .../configsets/sunspot/conf/solrconfig.xml | 110 +++++++++--------- 8 files changed, 135 insertions(+), 96 deletions(-) create mode 100644 sunspot/spec/integration/sunspot_disable_ancestors_spec.rb rename sunspot/spec/mocks/{post_with_sunspot_type.rb => post_with_disable_ancestors.rb} (90%) diff --git a/README.md b/README.md index 212f1ea84..88c029b5f 100644 --- a/README.md +++ b/README.md @@ -695,10 +695,10 @@ in the query. ```ruby # Search for all books with specified title and facets # on the timestamp of reviews. -# An additional filter on children can be specified inside the on_child operator. +# An additional filter on children can be specified inside the on_child operator. Sunspot.search(Book) do fulltext 'awesome book title', fields: [:title] - + json_facet :review_date, block_join: (on_child(Review) do with(:review_date).greater_than(DateTime.parse('2015-01-01T00:00:00Z')) end) @@ -715,9 +715,9 @@ Faceting is performed on parents of the children found in the query. # Perform faceting on the book category. Sunspot.search(Review) do with :author, 'yonik' - + # An empty block means no additional filters: takes all parents - # of the selected children. + # of the selected children. json_facet :category, block_join: on_parent(Book) {} end ``` @@ -1114,12 +1114,12 @@ class Post < ActiveRecord::Base end ``` -As a result, all `Blogs` and `Posts` will be stored on a single shard. But +As a result, all `Blogs` and `Posts` will be stored on a single shard. But since other `Blogs` will generate other prefixes Solr will distribute them evenly across the available shards. -If you have large collections that you want to use joins with and still want to -utilize sharding instead of storing everything on a single shard, it's also +If you have large collections that you want to use joins with and still want to +utilize sharding instead of storing everything on a single shard, it's also possible to only ensure a single `Blog` and its associated `Posts` stored on a signle shard, while the whole collections could still be distributed across multiple shards. The thing is that Solr **can** do distributed joins across @@ -1150,15 +1150,15 @@ class Post < ActiveRecord::Base end ``` -This way a single `Blog` and its `Ports` have the same ID prefix and will go +This way a single `Blog` and its `Ports` have the same ID prefix and will go to a single Shard. *NOTE:* Solr developers also recommend adjusting replication factor so every shard node contains replicas of all shards in the cluster. If you have 4 shards on separate nodes each of these nodes should have 4 replicas (one replica of each shard). -More information and usage examples could be found here: -https://lucene.apache.org/solr/guide/6_6/shards-and-indexing-data-in-solrcloud.html +More information and usage examples could be found here: +https://lucene.apache.org/solr/guide/6_6/shards-and-indexing-data-in-solrcloud.html ### Highlighting @@ -1268,7 +1268,7 @@ from 1984. ```ruby search = Sunspot.search(Book) do with(:pub_year).greater_than(1983) - + # The :on parameter is needed here! # It must match the type specified in :block_join stats :stars, sort: :avg, on: Review do @@ -1282,7 +1282,7 @@ end Solr will execute the query, selecting all `Book`s with `pub_year` from 1984. Then, facets on the `author_name` values present in the `Review` documents -that are children of the `Book`s found. +that are children of the `Book`s found. In this case, we'll have just one facet. At last, executes statistics on the generated facet. @@ -1485,7 +1485,7 @@ contents in Solr. Stored fields allow data to be retrieved without also hitting the underlying database (usually an SQL server). -The store option using DocValues as stored is not like having the value really stored in the index, if you want to use +The store option using DocValues as stored is not like having the value really stored in the index, if you want to use highlighting and more like this queries and atomic updates, remember to change the schema.xml according to this. Stored fields (stored="true" in the schema) come at some performance cost in the Solr index, so use @@ -1598,7 +1598,7 @@ Sunspot.session = Sunspot::SessionProxy::ThreadLocalSessionProxy.new Within a Rails app, to ensure your `config/sunspot.yml` settings are properly setup in this session you can use [Sunspot::Rails.build_session](http://sunspot.github.io/sunspot/docs/Sunspot/Rails.html#build_session-class_method) to mirror the normal Sunspot setup process: ```ruby - session = Sunspot::Rails.build_session Sunspot::Rails::Configuration.new + session = Sunspot::Rails.build_session Sunspot::Rails::Configuration.new Sunspot.session = session ``` diff --git a/sunspot/lib/sunspot/util.rb b/sunspot/lib/sunspot/util.rb index 4c5a06f43..ee56cbbd0 100644 --- a/sunspot/lib/sunspot/util.rb +++ b/sunspot/lib/sunspot/util.rb @@ -16,7 +16,7 @@ def child_documents_supported? # # Get all of the superclasses for a given class, including the class - # itself, if sunspot_type defined returns it. + # itself, if sunspot_disable_ancestors defined and true we use the class without ancestors. # # ==== Parameters # @@ -27,9 +27,9 @@ def child_documents_supported? # Array:: Collection containing class and its superclasses # def superclasses_for(clazz) - if clazz.respond_to?(:sunspot_type) - raise StandardError.new('sunspot_type must be an array of strings') unless clazz.sunspot_type.is_a?(Array) - clazz.sunspot_type.map { |t| Struct.new(:name).new(t) } + if clazz.respond_to?(:sunspot_disable_ancestors) + raise StandardError.new('sunspot_disable_ancestors must be true') unless clazz.sunspot_disable_ancestors.is_a?(TrueClass) + superclasses = [clazz] else superclasses = [clazz] superclasses << (clazz = clazz.superclass) while clazz.superclass != Object diff --git a/sunspot/spec/integration/indexing_spec.rb b/sunspot/spec/integration/indexing_spec.rb index dc7322679..de10edf3f 100644 --- a/sunspot/spec/integration/indexing_spec.rb +++ b/sunspot/spec/integration/indexing_spec.rb @@ -2,7 +2,7 @@ context 'indexing' do - describe 'without sunspot_type' do + describe 'without sunspot_disable_ancestors' do it 'should index non-multivalued field with newlines' do expect do Sunspot.index!(Post.new(title: "A\nTitle")) @@ -56,41 +56,41 @@ end end - describe 'with sunspot_type' do + describe 'with sunspot_disable_ancestors' do it 'should index non-multivalued field with newlines' do expect do - Sunspot.index!(PostWithSunspotType.new(title: "A\nTitle")) + Sunspot.index!(PostWithDisableAncestors.new(title: "A\nTitle")) end.not_to raise_error end it 'should correctly remove by model instance' do - post = PostWithSunspotType.new(title: 'test post') + post = PostWithDisableAncestors.new(title: 'test post') Sunspot.index!(post) Sunspot.remove!(post) - expect(Sunspot.search(PostWithSunspotType) { with(:title, 'test post') }.results).to be_empty + expect(Sunspot.search(PostWithDisableAncestors) { with(:title, 'test post') }.results).to be_empty end it 'should correctly delete by ID' do - post = PostWithSunspotType.new(title: 'test post') + post = PostWithDisableAncestors.new(title: 'test post') Sunspot.index!(post) - Sunspot.remove_by_id!(PostWithSunspotType, post.id) - expect(Sunspot.search(PostWithSunspotType) { with(:title, 'test post') }.results).to be_empty + Sunspot.remove_by_id!(PostWithDisableAncestors, post.id) + expect(Sunspot.search(PostWithDisableAncestors) { with(:title, 'test post') }.results).to be_empty end it 'removes documents by query' do Sunspot.remove_all! - posts = [PostWithSunspotType.new(title: 'birds'), PostWithSunspotType.new(title: 'monkeys')] + posts = [PostWithDisableAncestors.new(title: 'birds'), PostWithDisableAncestors.new(title: 'monkeys')] Sunspot.index!(posts) - Sunspot.remove!(PostWithSunspotType) do + Sunspot.remove!(PostWithDisableAncestors) do with(:title, 'birds') end - expect(Sunspot.search(PostWithSunspotType).results.size).to eq(1) + expect(Sunspot.search(PostWithDisableAncestors).results.size).to eq(1) end describe 'in batches' do - let(:post_1) { PostWithSunspotType.new title: 'A tittle' } - let(:post_2) { PostWithSunspotType.new title: 'Another title' } + let(:post_1) { PostWithDisableAncestors.new title: 'A tittle' } + let(:post_2) { PostWithDisableAncestors.new title: 'Another title' } describe 'nested' do let(:a_nested_batch) do diff --git a/sunspot/spec/integration/sunspot_disable_ancestors_spec.rb b/sunspot/spec/integration/sunspot_disable_ancestors_spec.rb new file mode 100644 index 000000000..ef40ff270 --- /dev/null +++ b/sunspot/spec/integration/sunspot_disable_ancestors_spec.rb @@ -0,0 +1,35 @@ +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe 'sunspot_disable_ancestors' do + before :each do + Sunspot.remove_all + @posts = PostWithDisableAncestors.new(ratings_average: 4.0, author_name: 'caio', blog_id: 1), + PhotoPostWithDisableAncestors.new(ratings_average: 4.0, author_name: 'caio', blog_id: 1) + Sunspot.index!(@posts) + end + + it 'PostWithDisableAncestors returns returns an array with only its class name' do + expect(Sunspot.search(PostWithDisableAncestors).send(:solr_response)["docs"][0]["type"]).to eq(["PostWithDisableAncestors"]) + end + + it 'PhotoPostWithDisableAncestors returns an array with only its class name' do + expect(Sunspot.search(PhotoPostWithDisableAncestors).send(:solr_response)["docs"][0]["type"]).to eq(["PhotoPostWithDisableAncestors"]) + end +end + +describe 'without sunspot_disable_ancestors' do + before :each do + Sunspot.remove_all + @posts = Post.new(ratings_average: 4.0, author_name: 'caio', blog_id: 1), + PhotoPost.new(ratings_average: 4.0, author_name: 'caio', blog_id: 1) + Sunspot.index!(@posts) + end + + it 'Post returns Posts and ancestors' do + expect(Sunspot.search(Post).send(:solr_response)["docs"][0]["type"]).to eq( ["Post", "SuperClass", "MockRecord"]) + end + + it 'PhotoPost returns PhotoPost and ancestors' do + expect(Sunspot.search(Post).send(:solr_response)["docs"][1]["type"]).to eq(["PhotoPost", "Post", "SuperClass", "MockRecord"]) + end +end diff --git a/sunspot/spec/mocks/post_with_sunspot_type.rb b/sunspot/spec/mocks/post_with_disable_ancestors.rb similarity index 90% rename from sunspot/spec/mocks/post_with_sunspot_type.rb rename to sunspot/spec/mocks/post_with_disable_ancestors.rb index 66e45c38b..8755b2448 100644 --- a/sunspot/spec/mocks/post_with_sunspot_type.rb +++ b/sunspot/spec/mocks/post_with_disable_ancestors.rb @@ -1,14 +1,14 @@ require File.join(File.dirname(__FILE__), 'blog') require File.join(File.dirname(__FILE__), 'super_class') -class PostWithSunspotType < SuperClass +class PostWithDisableAncestors < SuperClass attr_accessor :title, :body, :blog_id, :published_at, :ratings_average, :author_name, :featured, :expire_date, :coordinates, :tags, :featured_for alias_method :featured?, :featured - def self.sunspot_type - ['PostWithSunspotType'] + def self.sunspot_disable_ancestors + true end def category_ids @@ -40,7 +40,7 @@ def custom_boolean attr_writer :category_ids, :custom_string, :custom_underscored_string, :custom_fl, :custom_time, :custom_boolean end -Sunspot.setup(PostWithSunspotType) do +Sunspot.setup(PostWithDisableAncestors) do text :title, boost: 2 text :text_array, boost: 3 do [title, title] @@ -101,5 +101,8 @@ def custom_boolean end end -class PhotoPost < Post +class PhotoPostWithDisableAncestors < PostWithDisableAncestors + def self.sunspot_disable_ancestors + true + end end diff --git a/sunspot/spec/spec_helper.rb b/sunspot/spec/spec_helper.rb index a32204c31..5add6a22e 100644 --- a/sunspot/spec/spec_helper.rb +++ b/sunspot/spec/spec_helper.rb @@ -2,6 +2,7 @@ require 'ostruct' require 'sunspot' +require 'byebug' require File.join(File.dirname(__FILE__), 'mocks', 'mock_record.rb') Dir.glob(File.join(File.dirname(__FILE__), 'mocks', '**', '*.rb')).each do |file| diff --git a/sunspot_solr/solr/solr/configsets/sunspot/conf/schema.xml b/sunspot_solr/solr/solr/configsets/sunspot/conf/schema.xml index d77fbf416..5c82868e2 100644 --- a/sunspot_solr/solr/solr/configsets/sunspot/conf/schema.xml +++ b/sunspot_solr/solr/solr/configsets/sunspot/conf/schema.xml @@ -15,10 +15,10 @@ See the License for the specific language governing permissions and limitations under the License. --> - - + @@ -255,11 +255,11 @@ - + - + diff --git a/sunspot_solr/solr/solr/configsets/sunspot/conf/solrconfig.xml b/sunspot_solr/solr/solr/configsets/sunspot/conf/solrconfig.xml index b139fea75..f6ea25851 100644 --- a/sunspot_solr/solr/solr/configsets/sunspot/conf/solrconfig.xml +++ b/sunspot_solr/solr/solr/configsets/sunspot/conf/solrconfig.xml @@ -16,9 +16,9 @@ limitations under the License. --> - - 6.6.0 + 7.7.2 - - + - @@ -147,16 +147,16 @@ uncommitted changes to the index, so use of a hard autoCommit is recommended (see below). "dir" - the target directory for transaction logs, defaults to the - solr data directory. --> + solr data directory. --> ${solr.ulog.dir:} - + - - ${solr.autoCommit.maxTime:15000} - false + + ${solr.autoCommit.maxTime:15000} + false - - ${solr.autoSoftCommit.maxTime:-1} + + ${solr.autoSoftCommit.maxTime:-1} - + @@ -199,12 +199,12 @@ is thrown if exceeded. ** WARNING ** - + This option actually modifies a global Lucene property that will affect all SolrCores. If multiple solrconfig.xml files disagree on this property, the value at any given moment will be based on the last SolrCore to be initialized. - + --> 1024 @@ -213,7 +213,7 @@ There are two implementations of cache available for Solr, LRUCache, based on a synchronized LinkedHashMap, and - FastLRUCache, based on a ConcurrentHashMap. + FastLRUCache, based on a ConcurrentHashMap. FastLRUCache has faster gets and slower puts in single threaded operation and thus is generally faster than LRUCache @@ -238,7 +238,7 @@ initialSize - the initial capacity (number of entries) of the cache. (see java.util.HashMap) autowarmCount - the number of entries to prepopulate from - and old cache. + and old cache. --> - + - - + + 20 200 @@ -310,7 +310,7 @@ false - + @@ -389,7 +389,7 @@ - - @@ -516,20 +516,20 @@ request parameter that holds the query text to be analyzed. It also supports the "analysis.showmatch" parameter which when set to true, all field tokens that match the query tokens will be marked - as a "match". + as a "match". --> - - explicit + explicit true - + textSpell @@ -583,18 +583,18 @@