local_index.rb
8.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
module ActsAsFerret
class LocalIndex < AbstractIndex
include MoreLikeThis::IndexMethods
def initialize(aaf_configuration)
super
ensure_index_exists
end
def reopen!
if @ferret_index
@ferret_index.close
@ferret_index = nil
end
logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}"
ferret_index
end
# The 'real' Ferret Index instance
def ferret_index
ensure_index_exists
returning @ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret]) do
@ferret_index.batch_size = aaf_configuration[:reindex_batch_size]
@ferret_index.logger = logger
end
end
# Checks for the presence of a segments file in the index directory
# Rebuilds the index if none exists.
def ensure_index_exists
logger.debug "LocalIndex: ensure_index_exists at #{aaf_configuration[:index_dir]}"
unless File.file? "#{aaf_configuration[:index_dir]}/segments"
ActsAsFerret::ensure_directory(aaf_configuration[:index_dir])
close
rebuild_index
end
end
# Closes the underlying index instance
def close
@ferret_index.close if @ferret_index
rescue StandardError
# is raised when index already closed
ensure
@ferret_index = nil
end
# rebuilds the index from all records of the model class this index belongs
# to. Arguments can be given in shared index scenarios to name multiple
# model classes to include in the index
def rebuild_index(*models)
models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name])
models = models.flatten.uniq.map(&:constantize)
logger.debug "rebuild index: #{models.inspect}"
index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,
:field_infos => ActsAsFerret::field_infos(models),
:create => true))
index.batch_size = aaf_configuration[:reindex_batch_size]
index.logger = logger
index.index_models models
end
# Parses the given query string into a Ferret Query object.
def process_query(query)
# work around ferret bug in #process_query (doesn't ensure the
# reader is open)
ferret_index.synchronize do
ferret_index.send(:ensure_reader_open)
original_query = ferret_index.process_query(query)
end
end
# Total number of hits for the given query.
# To count the results of a multi_search query, specify an array of
# class names with the :models option.
def total_hits(query, options = {})
index = (models = options.delete(:models)) ? multi_index(models) : ferret_index
index.search(query, options).total_hits
end
def determine_lazy_fields(options = {})
stored_fields = options[:lazy]
if stored_fields && !(Array === stored_fields)
stored_fields = aaf_configuration[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
end
logger.debug "stored_fields: #{stored_fields}"
return stored_fields
end
# Queries the Ferret index to retrieve model class, id, score and the
# values of any fields stored in the index for each hit.
# If a block is given, these are yielded and the number of total hits is
# returned. Otherwise [total_hits, result_array] is returned.
def find_id_by_contents(query, options = {})
result = []
index = ferret_index
logger.debug "query: #{ferret_index.process_query query}" # TODO only enable this for debugging purposes
lazy_fields = determine_lazy_fields options
total_hits = index.search_each(query, options) do |hit, score|
doc = index[hit]
model = aaf_configuration[:store_class_name] ? doc[:class_name] : aaf_configuration[:class_name]
# fetch stored fields if lazy loading
data = {}
lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
if block_given?
yield model, doc[:id], score, data
else
result << { :model => model, :id => doc[:id], :score => score, :data => data }
end
end
#logger.debug "id_score_model array: #{result.inspect}"
return block_given? ? total_hits : [total_hits, result]
end
# Queries multiple Ferret indexes to retrieve model class, id and score for
# each hit. Use the models parameter to give the list of models to search.
# If a block is given, model, id and score are yielded and the number of
# total hits is returned. Otherwise [total_hits, result_array] is returned.
def id_multi_search(query, models, options = {})
index = multi_index(models)
result = []
lazy_fields = determine_lazy_fields options
total_hits = index.search_each(query, options) do |hit, score|
doc = index[hit]
# fetch stored fields if lazy loading
data = {}
lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
raise "':store_class_name => true' required for multi_search to work" if doc[:class_name].blank?
if block_given?
yield doc[:class_name], doc[:id], score, doc, data
else
result << { :model => doc[:class_name], :id => doc[:id], :score => score, :data => data }
end
end
return block_given? ? total_hits : [ total_hits, result ]
end
######################################
# methods working on a single record
# called from instance_methods, here to simplify interfacing with the
# remote ferret server
# TODO having to pass id and class_name around like this isn't nice
######################################
# add record to index
# record may be the full AR object, a Ferret document instance or a Hash
def add(record)
record = record.to_doc unless Hash === record || Ferret::Document === record
ferret_index << record
end
alias << add
# delete record from index
def remove(id, class_name)
ferret_index.query_delete query_for_record(id, class_name)
end
# highlight search terms for the record with the given id.
def highlight(id, class_name, query, options = {})
options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
highlights = []
ferret_index.synchronize do
doc_num = document_number(id, class_name)
if options[:field]
highlights << ferret_index.highlight(query, doc_num, options)
else
query = process_query(query) # process only once
aaf_configuration[:ferret_fields].each_pair do |field, config|
next if config[:store] == :no || config[:highlight] == :no
options[:field] = field
highlights << ferret_index.highlight(query, doc_num, options)
end
end
end
return highlights.compact.flatten[0..options[:num_excerpts]-1]
end
# retrieves the ferret document number of the record with the given id.
def document_number(id, class_name)
hits = ferret_index.search(query_for_record(id, class_name))
return hits.hits.first.doc if hits.total_hits == 1
raise "cannot determine document number from primary key: #{id}"
end
# build a ferret query matching only the record with the given id
# the class name only needs to be given in case of a shared index configuration
def query_for_record(id, class_name = nil)
Ferret::Search::TermQuery.new(:id, id.to_s)
end
protected
# returns a MultiIndex instance operating on a MultiReader
def multi_index(model_classes)
model_classes.map!(&:constantize) if String === model_classes.first
model_classes.sort! { |a, b| a.name <=> b.name }
key = model_classes.inject("") { |s, clazz| s + clazz.name }
multi_config = aaf_configuration[:ferret].dup
multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
end
end
end