local_index.rb
7.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
module ActsAsFerret
class LocalIndex < AbstractIndex
include MoreLikeThis::IndexMethods
def initialize(index_name)
super
ensure_index_exists
end
def reopen!
logger.debug "reopening index at #{index_definition[:ferret][:path]}"
close
ferret_index
end
# The 'real' Ferret Index instance
def ferret_index
ensure_index_exists
returning @ferret_index ||= Ferret::Index::Index.new(index_definition[:ferret]) do
@ferret_index.batch_size = index_definition[:reindex_batch_size]
@ferret_index.logger = logger
end
end
# Checks for the presence of a segments file in the index directory
# Rebuilds the index if none exists.
def ensure_index_exists
#logger.debug "LocalIndex: ensure_index_exists at #{index_definition[:index_dir]}"
unless File.file? "#{index_definition[:index_dir]}/segments"
ActsAsFerret::ensure_directory(index_definition[:index_dir])
rebuild_index
end
end
# Closes the underlying index instance
def close
@ferret_index.close if @ferret_index
rescue StandardError
# is raised when index already closed
ensure
@ferret_index = nil
end
# rebuilds the index from all records of the model classes associated with this index
def rebuild_index
models = index_definition[:registered_models]
logger.debug "rebuild index with models: #{models.inspect}"
close
index = Ferret::Index::Index.new(index_definition[:ferret].dup.update(:auto_flush => false,
:field_infos => ActsAsFerret::field_infos(index_definition),
:create => true))
index.batch_size = index_definition[:reindex_batch_size]
index.logger = logger
index.index_models models
reopen!
end
def bulk_index(class_name, ids, options)
ferret_index.bulk_index(class_name.constantize, ids, options)
end
# Parses the given query string into a Ferret Query object.
def process_query(query, options = {})
return query unless String === query
ferret_index.synchronize do
if options[:analyzer]
# use per-query analyzer if present
qp = Ferret::QueryParser.new ferret_index.instance_variable_get('@options').merge(options)
reader = ferret_index.reader
qp.fields =
reader.fields unless options[:all_fields] || options[:fields]
qp.tokenized_fields =
reader.tokenized_fields unless options[:tokenized_fields]
return qp.parse query
else
# work around ferret bug in #process_query (doesn't ensure the
# reader is open)
ferret_index.send(:ensure_reader_open)
return ferret_index.process_query(query)
end
end
end
# Total number of hits for the given query.
def total_hits(query, options = {})
ferret_index.search(query, options).total_hits
end
def searcher
ferret_index
end
######################################
# methods working on a single record
# called from instance_methods, here to simplify interfacing with the
# remote ferret server
# TODO having to pass id and class_name around like this isn't nice
######################################
# add record to index
# record may be the full AR object, a Ferret document instance or a Hash
def add(record, analyzer = nil)
unless Hash === record || Ferret::Document === record
analyzer = record.ferret_analyzer
record = record.to_doc
end
ferret_index.add_document(record, analyzer)
end
alias << add
# delete record from index
def remove(id, class_name)
ferret_index.query_delete query_for_record(id, class_name)
end
# highlight search terms for the record with the given id.
def highlight(id, class_name, query, options = {})
logger.debug("highlight: #{class_name} / #{id} query: #{query}")
options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
highlights = []
ferret_index.synchronize do
doc_num = document_number(id, class_name)
if options[:field]
highlights << ferret_index.highlight(query, doc_num, options)
else
query = process_query(query) # process only once
index_definition[:ferret_fields].each_pair do |field, config|
next if config[:store] == :no || config[:highlight] == :no
options[:field] = field
highlights << ferret_index.highlight(query, doc_num, options)
end
end
end
return highlights.compact.flatten[0..options[:num_excerpts]-1]
end
# retrieves the ferret document number of the record with the given id.
def document_number(id, class_name)
hits = ferret_index.search(query_for_record(id, class_name))
return hits.hits.first.doc if hits.total_hits == 1
raise "cannot determine document number for class #{class_name} / primary key: #{id}\nresult was: #{hits.inspect}"
end
# build a ferret query matching only the record with the given id
# the class name only needs to be given in case of a shared index configuration
def query_for_record(id, class_name = nil)
if shared?
raise InvalidArgumentError.new("shared index needs class_name argument") if class_name.nil?
returning bq = Ferret::Search::BooleanQuery.new do
bq.add_query(Ferret::Search::TermQuery.new(:id, id.to_s), :must)
bq.add_query(Ferret::Search::TermQuery.new(:class_name, class_name), :must)
end
else
Ferret::Search::TermQuery.new(:id, id.to_s)
end
end
def determine_stored_fields(options = {})
stored_fields = options[:lazy]
if stored_fields && !(Array === stored_fields)
stored_fields = index_definition[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
end
logger.debug "stored_fields: #{stored_fields.inspect}"
return stored_fields
end
# loads data for fields declared as :lazy from the Ferret document
def extract_stored_fields(doc, stored_fields)
fields = index_definition[:ferret_fields]
data = {}
logger.debug "extracting stored fields #{stored_fields.inspect} from document #{doc[:class_name]} / #{doc[:id]}"
stored_fields.each do |field|
if field_cfg = fields[field]
data[field_cfg[:via]] = doc[field]
end
end if stored_fields
logger.debug "done: #{data.inspect}"
return data
end
protected
# returns a MultiIndex instance operating on a MultiReader
#def multi_index(model_classes)
# model_classes.map!(&:constantize) if String === model_classes.first
# model_classes.sort! { |a, b| a.name <=> b.name }
# key = model_classes.inject("") { |s, clazz| s + clazz.name }
# multi_config = index_definition[:ferret].dup
# multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
# ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
#end
end
end