class_methods.rb
11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
require File.dirname(__FILE__) + '/common_methods'
require File.dirname(__FILE__) + '/parser_methods'
module ActsAsSolr #:nodoc:
module ClassMethods
include CommonMethods
include ParserMethods
# Finds instances of a model. Terms are ANDed by default, can be overwritten
# by using OR between terms
#
# Here's a sample (untested) code for your controller:
#
# def search
# results = Book.find_by_solr params[:query]
# end
#
# You can also search for specific fields by searching for 'field:value'
#
# ====options:
# offset:: - The first document to be retrieved (offset)
# limit:: - The number of rows per page
# order:: - Orders (sort by) the result set using a given criteria:
#
# Book.find_by_solr 'ruby', :order => 'description asc'
#
# field_types:: This option is deprecated and will be obsolete by version 1.0.
# There's no need to specify the :field_types anymore when doing a
# search in a model that specifies a field type for a field. The field
# types are automatically traced back when they're included.
#
# class Electronic < ActiveRecord::Base
# acts_as_solr :fields => [{:price => :range_float}]
# end
#
# facets:: This option argument accepts the following arguments:
# fields:: The fields to be included in the faceted search (Solr's facet.field)
# query:: The queries to be included in the faceted search (Solr's facet.query)
# zeros:: Display facets with count of zero. (true|false)
# sort:: Sorts the faceted resuls by highest to lowest count. (true|false)
# browse:: This is where the 'drill-down' of the facets work. Accepts an array of
# fields in the format "facet_field:term"
# mincount:: Replacement for zeros (it has been deprecated in Solr). Specifies the
# minimum count necessary for a facet field to be returned. (Solr's
# facet.mincount) Overrides :zeros if it is specified. Default is 0.
#
# dates:: Run date faceted queries using the following arguments:
# fields:: The fields to be included in the faceted date search (Solr's facet.date).
# It may be either a String/Symbol or Hash. If it's a hash the options are the
# same as date_facets minus the fields option (i.e., :start:, :end, :gap, :other,
# :between). These options if provided will override the base options.
# (Solr's f.<field_name>.date.<key>=<value>).
# start:: The lower bound for the first date range for all Date Faceting. Required if
# :fields is present
# end:: The upper bound for the last date range for all Date Faceting. Required if
# :fields is prsent
# gap:: The size of each date range expressed as an interval to be added to the lower
# bound using the DateMathParser syntax. Required if :fields is prsent
# hardend:: A Boolean parameter instructing Solr what do do in the event that
# facet.date.gap does not divide evenly between facet.date.start and facet.date.end.
# other:: This param indicates that in addition to the counts for each date range
# constraint between facet.date.start and facet.date.end, other counds should be
# calculated. May specify more then one in an Array. The possible options are:
# before:: - all records with lower bound less than start
# after:: - all records with upper bound greater than end
# between:: - all records with field values between start and end
# none:: - compute no other bounds (useful in per field assignment)
# all:: - shortcut for before, after, and between
# filter:: Similar to :query option provided by :facets, in that accepts an array of
# of date queries to limit results. Can not be used as a part of a :field hash.
# This is the only option that can be used if :fields is not present.
#
# Example:
#
# Electronic.find_by_solr "memory", :facets => {:zeros => false, :sort => true,
# :query => ["price:[* TO 200]",
# "price:[200 TO 500]",
# "price:[500 TO *]"],
# :fields => [:category, :manufacturer],
# :browse => ["category:Memory","manufacturer:Someone"]}
#
#
# Examples of date faceting:
#
# basic:
# Electronic.find_by_solr "memory", :facets => {:dates => {:fields => [:updated_at, :created_at],
# :start => 'NOW-10YEARS/DAY', :end => 'NOW/DAY', :gap => '+2YEARS', :other => :before}}
#
# advanced:
# Electronic.find_by_solr "memory", :facets => {:dates => {:fields => [:updated_at,
# {:created_at => {:start => 'NOW-20YEARS/DAY', :end => 'NOW-10YEARS/DAY', :other => [:before, :after]}
# }], :start => 'NOW-10YEARS/DAY', :end => 'NOW/DAY', :other => :before, :filter =>
# ["created_at:[NOW-10YEARS/DAY TO NOW/DAY]", "updated_at:[NOW-1YEAR/DAY TO NOW/DAY]"]}}
#
# filter only:
# Electronic.find_by_solr "memory", :facets => {:dates => {:filter => "updated_at:[NOW-1YEAR/DAY TO NOW/DAY]"}}
#
#
#
# scores:: If set to true this will return the score as a 'solr_score' attribute
# for each one of the instances found. Does not currently work with find_id_by_solr
#
# books = Book.find_by_solr 'ruby OR splinter', :scores => true
# books.records.first.solr_score
# => 1.21321397
# books.records.last.solr_score
# => 0.12321548
#
# lazy:: If set to true the search will return objects that will touch the database when you ask for one
# of their attributes for the first time. Useful when you're using fragment caching based solely on
# types and ids.
#
def find_by_solr(query, options={})
data = parse_query(query, options)
return parse_results(data, options) if data
end
# Finds instances of a model and returns an array with the ids:
# Book.find_id_by_solr "rails" => [1,4,7]
# The options accepted are the same as find_by_solr
#
def find_id_by_solr(query, options={})
data = parse_query(query, options)
return parse_results(data, {:format => :ids}) if data
end
# This method can be used to execute a search across multiple models:
# Book.multi_solr_search "Napoleon OR Tom", :models => [Movie]
#
# ====options:
# Accepts the same options as find_by_solr plus:
# models:: The additional models you'd like to include in the search
# results_format:: Specify the format of the results found
# :objects :: Will return an array with the results being objects (default). Example:
# Book.multi_solr_search "Napoleon OR Tom", :models => [Movie], :results_format => :objects
# :ids :: Will return an array with the ids of each entry found. Example:
# Book.multi_solr_search "Napoleon OR Tom", :models => [Movie], :results_format => :ids
# => [{"id" => "Movie:1"},{"id" => Book:1}]
# Where the value of each array is as Model:instance_id
# scores:: If set to true this will return the score as a 'solr_score' attribute
# for each one of the instances found. Does not currently work with find_id_by_solr
#
# books = Book.multi_solr_search 'ruby OR splinter', :scores => true
# books.records.first.solr_score
# => 1.21321397
# books.records.last.solr_score
# => 0.12321548
#
def multi_solr_search(query, options = {})
models = multi_model_suffix(options)
options.update(:results_format => :objects) unless options[:results_format]
data = parse_query(query, options, models)
if data.nil? or data.total_hits == 0
return SearchResults.new(:docs => [], :total => 0)
end
result = find_multi_search_objects(data, options)
if options[:scores] and options[:results_format] == :objects
add_scores(result, data)
end
SearchResults.new :docs => result, :total => data.total_hits
end
def find_multi_search_objects(data, options)
result = []
if options[:results_format] == :objects
data.hits.each do |doc|
k = doc.fetch('id').first.to_s.split(':')
result << k[0].constantize.find_by_id(k[1])
end
elsif options[:results_format] == :ids
data.hits.each{|doc| result << {"id" => doc.values.pop.to_s}}
end
result
end
def multi_model_suffix(options)
models = "AND (#{solr_configuration[:type_field]}:#{self.name}"
models << " OR " + options[:models].collect {|m| "#{solr_configuration[:type_field]}:" + m.to_s}.join(" OR ") if options[:models].is_a?(Array)
models << ")"
end
# returns the total number of documents found in the query specified:
# Book.count_by_solr 'rails' => 3
#
def count_by_solr(query, options = {})
data = parse_query(query, options)
data.total_hits
end
# It's used to rebuild the Solr index for a specific model.
# Book.rebuild_solr_index
#
# If batch_size is greater than 0, adds will be done in batches.
# NOTE: If using sqlserver, be sure to use a finder with an explicit order.
# Non-edge versions of rails do not handle pagination correctly for sqlserver
# without an order clause.
#
# If a finder block is given, it will be called to retrieve the items to index.
# This can be very useful for things such as updating based on conditions or
# using eager loading for indexed associations.
def rebuild_solr_index(batch_size=100, &finder)
finder ||= lambda { |ar, options| ar.find(:all, options.merge({:order => self.primary_key})) }
start_time = Time.now
if batch_size > 0
items_processed = 0
limit = batch_size
offset = 0
begin
iteration_start = Time.now
items = finder.call(self, {:limit => limit, :offset => offset})
add_batch = items.collect { |content| content.to_solr_doc }
if items.size > 0
solr_add add_batch
solr_commit
end
items_processed += items.size
last_id = items.last.id if items.last
time_so_far = Time.now - start_time
iteration_time = Time.now - iteration_start
logger.info "#{Process.pid}: #{items_processed} items for #{self.name} have been batch added to index in #{'%.3f' % time_so_far}s at #{'%.3f' % (items_processed / time_so_far)} items/sec (#{'%.3f' % (items.size / iteration_time)} items/sec for the last batch). Last id: #{last_id}"
offset += items.size
end while items.nil? || items.size > 0
else
items = finder.call(self, {})
items.each { |content| content.solr_save }
items_processed = items.size
end
solr_optimize
logger.info items_processed > 0 ? "Index for #{self.name} has been rebuilt" : "Nothing to index for #{self.name}"
end
end
end