class_methods.rb
11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
module ActsAsFerret
        
  module ClassMethods
    # Disables ferret index updates for this model. When a block is given,
    # Ferret will be re-enabled again after executing the block.
    def disable_ferret
      aaf_configuration[:enabled] = false
      if block_given?
        yield
        enable_ferret
      end
    end
    def enable_ferret
      aaf_configuration[:enabled] = true
    end
    def ferret_enabled?
      aaf_configuration[:enabled]
    end
    # rebuild the index from all data stored for this model, and any other
    # model classes associated with the same index.
    # This is called automatically when no index exists yet.
    #
    def rebuild_index
      aaf_index.rebuild_index
    end
    # re-index a number records specified by the given ids. Use for large
    # indexing jobs i.e. after modifying a lot of records with Ferret disabled.
    # Please note that the state of Ferret (enabled or disabled at class or
    # record level) is not checked by this method, so if you need to do so
    # (e.g. because of a custom ferret_enabled? implementation), you have to do
    # so yourself.
    def bulk_index(*ids)
      options = Hash === ids.last ? ids.pop : {}
      ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
      aaf_index.bulk_index(self.name, ids, options)
    end
    # true if our db and table appear to be suitable for the mysql fast batch
    # hack (see
    # http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
    def use_fast_batches?
      if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
        logger.info "using mysql specific batched find :all. Turn off with  :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
        true
      end
    end
    # Returns all records modified or created after the specified time.
    # Used by the rake rebuild task to find models that need to be updated in
    # the index after the rebuild finished because they changed while the
    # rebuild was running.
    # Override if your models don't stick to the created_at/updated_at
    # convention.
    def records_modified_since(time)
      condition = []
      %w(updated_at created_at).each do |col|
        condition << "#{col} >= ?" if column_names.include? col
      end
      if condition.empty?
        logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild."
        []
      else
        find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ]
      end
    end
    # runs across all records yielding those to be indexed when the index is rebuilt
    def records_for_rebuild(batch_size = 1000)
      transaction do
        if use_fast_batches?
          offset = 0
          while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
            offset = rows.last.id
            yield rows, offset
          end
        else
          order = "#{primary_key} ASC" # fixes #212
          0.step(self.count, batch_size) do |offset|
            yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
          end
        end
      end
    end
    # yields the records with the given ids, in batches of batch_size
    def records_for_bulk_index(ids, batch_size = 1000)
      transaction do
        offset = 0
        ids.each_slice(batch_size) do |id_slice|
          records = find( :all, :conditions => ["id in (?)", id_slice] )
          #yield records, offset
          yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
          offset += batch_size
        end
      end
    end
    # Retrieve the index instance for this model class. This can either be a
    # LocalIndex, or a RemoteIndex instance.
    # 
    def aaf_index
      @index ||= ActsAsFerret::get_index(aaf_configuration[:name])
    end 
    
    # Finds instances by searching the Ferret index. Terms are ANDed by default, use 
    # OR between terms for ORed queries. Or specify +:or_default => true+ in the
    # +:ferret+ options hash of acts_as_ferret.
    #
    # You may either use the +offset+ and +limit+ options to implement your own
    # pagination logic, or use the +page+ and +per_page+ options to use the
    # built in pagination support which is compatible with will_paginate's view
    # helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
    # ignored.
    #
    # == options:
    # page::        page of search results to retrieve
    # per_page::    number of search results that are displayed per page
    # offset::      first hit to retrieve (useful for paging)
    # limit::       number of hits to retrieve, or :all to retrieve
    #               all results
    # lazy::        Array of field names whose contents should be read directly
    #               from the index. Those fields have to be marked 
    #               +:store => :yes+ in their field options. Give true to get all
    #               stored fields. Note that if you have a shared index, you have 
    #               to explicitly state the fields you want to fetch, true won't
    #               work here)
    #
    # +find_options+ is a hash passed on to active_record's find when
    # retrieving the data from db, useful to i.e. prefetch relationships with
    # :include or to specify additional filter criteria with :conditions.
    #
    # This method returns a +SearchResults+ instance, which really is an Array that has 
    # been decorated with a total_hits attribute holding the total number of hits.
    # Additionally, SearchResults is compatible with the pagination helper
    # methods of the will_paginate plugin.
    #
    # Please keep in mind that the number of results delivered might be less than 
    # +limit+ if you specify any active record conditions that further limit 
    # the result. Use +limit+ and +offset+ as AR find_options instead.
    # +page+ and +per_page+ are supposed to work regardless of any 
    # +conitions+ present in +find_options+.
    def find_with_ferret(q, options = {}, find_options = {})
      if respond_to?(:scope) && scope(:find, :conditions)
        if find_options[:conditions]
          find_options[:conditions] = "(#{find_options[:conditions]}) AND (#{scope(:find, :conditions)})"
        else
          find_options[:conditions] = scope(:find, :conditions)
        end
      end
      if options[:per_page]
        options[:page] = options[:page] ? options[:page].to_i : 1
        limit = options[:per_page]
        offset = (options[:page] - 1) * limit
        if find_options[:conditions]
          find_options[:limit] = limit
          find_options[:offset] = offset
          options[:limit] = :all
          options.delete :offset
        else
          # do pagination with ferret
          options[:limit] = limit
          options[:offset] = offset
        end
      elsif find_options[:conditions]
        find_options[:limit] ||= options.delete(:limit) unless options[:limit] == :all
        find_options[:offset] ||= options.delete(:offset)
        options[:limit] = :all
      end
      total_hits, result = aaf_index.find_records q, options.merge(:models => [self]), find_options
      logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
      SearchResults.new(result, total_hits, options[:page], options[:per_page])
    end 
    # Returns the total number of hits for the given query 
    #
    # Note that since we don't query the database here, this method won't deliver 
    # the expected results when used on an AR association.
    #
    def total_hits(q, options={})
      aaf_index.total_hits(q, options)
    end
    # Finds instance model name, ids and scores by contents. 
    # Useful e.g. if you want to search across models or do not want to fetch
    # all result records (yet).
    #
    # Options are the same as for find_by_contents
    #
    # A block can be given too, it will be executed with every result:
    # find_ids_with_ferret(q, options) do |model, id, score|
    #    id_array << id
    #    scores_by_id[id] = score 
    # end
    # NOTE: in case a block is given, only the total_hits value will be returned
    # instead of the [total_hits, results] array!
    # 
    def find_ids_with_ferret(q, options = {}, &block)
      aaf_index.find_ids(q, options, &block)
    end
    
    protected
#    def find_records_lazy_or_not(q, options = {}, find_options = {})
#      if options[:lazy]
#        logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
#        lazy_find_by_contents q, options
#      else
#        ar_find_by_contents q, options, find_options
#      end
#    end
#
#    def ar_find_by_contents(q, options = {}, find_options = {})
#      result_ids = {}
#      total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
#        # stores ids, index and score of each hit for later ordering of
#        # results
#        result_ids[id] = [ result_ids.size + 1, score ]
#      end
#
#      result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options )
#      
#      # count total_hits via sql when using conditions or when we're called
#      # from an ActiveRecord association.
#      if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
#        # chances are the ferret result count is not our total_hits value, so
#        # we correct this here.
#        if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
#          # our ferret result has been limited, so we need to re-run that
#          # search to get the full result set from ferret.
#          result_ids = {}
#          find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
#            result_ids[id] = [ result_ids.size + 1, score ]
#          end
#          # Now ask the database for the total size of the final result set.
#          total_hits = count_records( { self.name => result_ids }, find_options )
#        else
#          # what we got from the database is our full result set, so take
#          # it's size
#          total_hits = result.length
#        end
#      end
#
#      [ total_hits, result ]
#    end
#
#    def lazy_find_by_contents(q, options = {})
#      logger.debug "lazy_find_by_contents: #{q}"
#      result = []
#      rank   = 0
#      total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
#        logger.debug "model: #{model}, id: #{id}, data: #{data}"
#        result << FerretResult.new(model, id, score, rank += 1, data)
#      end
#      [ total_hits, result ]
#    end
    def model_find(model, id, find_options = {})
      model.constantize.find(id, find_options)
    end
#    def count_records(id_arrays, find_options = {})
#      count_options = find_options.dup
#      count_options.delete :limit
#      count_options.delete :offset
#      count = 0
#      id_arrays.each do |model, id_array|
#        next if id_array.empty?
#        model = model.constantize
#        # merge conditions
#        conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ], 
#                                        find_options[:conditions])
#        opts = find_options.merge :conditions => conditions
#        opts.delete :limit; opts.delete :offset
#        count += model.count opts
#      end
#      count
#    end
  end
  
end