class_methods.rb 12.3 KB
module ActsAsFerret
        
  module ClassMethods

    # rebuild the index from all data stored for this model.
    # This is called automatically when no index exists yet.
    #
    # When calling this method manually, you can give any additional 
    # model classes that should also go into this index as parameters. 
    # Useful when using the :single_index option.
    # Note that attributes named the same in different models will share
    # the same field options in the shared index.
    def rebuild_index(*models)
      models << self unless models.include?(self)
      aaf_index.rebuild_index models.map(&:to_s)
      index_dir = find_last_index_version(aaf_configuration[:index_base_dir]) unless aaf_configuration[:remote]
    end

    # runs across all records yielding those to be indexed when the index is rebuilt
    def records_for_rebuild(batch_size = 1000)
      transaction do
        if connection.class.name =~ /Mysql/ && primary_key == 'id'
          logger.info "using mysql specific batched find :all"
          offset = 0
          while (rows = find :all, :conditions => ["id > ?", offset ], :limit => batch_size).any?
            offset = rows.last.id
            yield rows, offset
          end
        else
          # sql server adapter won't batch correctly without defined ordering
          order = "#{primary_key} ASC" if connection.class.name =~ /SQLServer/
          0.step(self.count, batch_size) do |offset|
            yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
          end
        end
      end
    end

    # Switches this class to a new index located in dir.
    # Used by the DRb server when switching to a new index version.
    def index_dir=(dir)
      logger.debug "changing index dir to #{dir}"
      aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir
      aaf_index.reopen!
      logger.debug "index dir is now #{dir}"
    end
    
    # Retrieve the index instance for this model class. This can either be a
    # LocalIndex, or a RemoteIndex instance.
    # 
    # Index instances are stored in a hash, using the index directory
    # as the key. So model classes sharing a single index will share their
    # Index object, too.
    def aaf_index
      ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
    end 
    
    # Finds instances by searching the Ferret index. Terms are ANDed by default, use 
    # OR between terms for ORed queries. Or specify +:or_default => true+ in the
    # +:ferret+ options hash of acts_as_ferret.
    #
    # == options:
    # offset::      first hit to retrieve (useful for paging)
    # limit::       number of hits to retrieve, or :all to retrieve
    #               all results
    # lazy::        Array of field names whose contents should be read directly
    #               from the index. Those fields have to be marked 
    #               +:store => :yes+ in their field options. Give true to get all
    #               stored fields. Note that if you have a shared index, you have 
    #               to explicitly state the fields you want to fetch, true won't
    #               work here)
    # models::      only for single_index scenarios: an Array of other Model classes to 
    #               include in this search. Use :all to query all models.
    #
    # +find_options+ is a hash passed on to active_record's find when
    # retrieving the data from db, useful to i.e. prefetch relationships with
    # :include or to specify additional filter criteria with :conditions.
    #
    # This method returns a +SearchResults+ instance, which really is an Array that has 
    # been decorated with a total_hits attribute holding the total number of hits.
    #
    # Please keep in mind that the number of total hits might be wrong if you specify 
    # both ferret options and active record find_options that somehow limit the result 
    # set (e.g. +:num_docs+ and some +:conditions+).
    def find_with_ferret(q, options = {}, find_options = {})
      total_hits, result = find_records_lazy_or_not q, options, find_options
      logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
      return SearchResults.new(result, total_hits)
    end 
    alias find_by_contents find_with_ferret

   

    # Returns the total number of hits for the given query 
    # To count the results of a multi_search query, specify an array of 
    # class names with the :models option.
    def total_hits(q, options={})
      if models = options[:models]
        options[:models] = add_self_to_model_list_if_necessary(models).map(&:to_s)
      end
      aaf_index.total_hits(q, options)
    end

    # Finds instance model name, ids and scores by contents. 
    # Useful e.g. if you want to search across models or do not want to fetch
    # all result records (yet).
    #
    # Options are the same as for find_by_contents
    #
    # A block can be given too, it will be executed with every result:
    # find_id_by_contents(q, options) do |model, id, score|
    #    id_array << id
    #    scores_by_id[id] = score 
    # end
    # NOTE: in case a block is given, only the total_hits value will be returned
    # instead of the [total_hits, results] array!
    # 
    def find_id_by_contents(q, options = {}, &block)
      deprecated_options_support(options)
      aaf_index.find_id_by_contents(q, options, &block)
    end

    # requires the store_class_name option of acts_as_ferret to be true
    # for all models queried this way.
    def multi_search(query, additional_models = [], options = {}, find_options = {})
      result = []

      if options[:lazy]
        logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
        total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
          result << FerretResult.new(model, id, score, data)
        end
      else
        id_arrays = {}
        rank = 0
        total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
          id_arrays[model] ||= {}
          id_arrays[model][id] = [ rank += 1, score ]
        end
        result = retrieve_records(id_arrays, find_options)
      end

      SearchResults.new(result, total_hits)
    end
    
    # returns an array of hashes, each containing :class_name,
    # :id and :score for a hit.
    #
    # if a block is given, class_name, id and score of each hit will 
    # be yielded, and the total number of hits is returned.
    def id_multi_search(query, additional_models = [], options = {}, &proc)
      deprecated_options_support(options)
      additional_models = add_self_to_model_list_if_necessary(additional_models)
      aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
    end
    

    protected

    def add_self_to_model_list_if_necessary(models)
      models = [ models ] unless models.is_a? Array
      models << self unless models.include?(self)
    end

    def find_records_lazy_or_not(q, options = {}, find_options = {})
      if options[:lazy]
        logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
        lazy_find_by_contents q, options
      else
        ar_find_by_contents q, options, find_options
      end
    end

    def ar_find_by_contents(q, options = {}, find_options = {})
      result_ids = {}
      total_hits = find_id_by_contents(q, options) do |model, id, score, data|
        # stores ids, index of each id for later ordering of
        # results, and score
        result_ids[id] = [ result_ids.size + 1, score ]
      end

      result = retrieve_records( { self.name => result_ids }, find_options )
      
      if find_options[:conditions]
        if options[:limit] != :all
          # correct result size if the user specified conditions
          #  wenn conditions: options[:limit] != :all --> ferret-query mit :all wiederholen und select count machen
          result_ids = {}
          find_id_by_contents(q, options.update(:limit => :all)) do |model, id, score, data|
            result_ids[id] = [ result_ids.size + 1, score ]
          end
          total_hits = count_records( { self.name => result_ids }, find_options )
        else
          total_hits = result.length
        end
      end

      [ total_hits, result ]
    end

    def lazy_find_by_contents(q, options = {})
      result = []
      total_hits = find_id_by_contents(q, options) do |model, id, score, data|
        result << FerretResult.new(model, id, score, data)
      end
      [ total_hits, result ]
    end


    def model_find(model, id, find_options = {})
      model.constantize.find(id, find_options)
    end

    # retrieves search result records from a data structure like this:
    # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
    #
    # TODO: in case of STI AR will filter out hits from other 
    # classes for us, but this
    # will lead to less results retrieved --> scoping of ferret query
    # to self.class is still needed.
    # from the ferret ML (thanks Curtis Hatter)
    # > I created a method in my base STI class so I can scope my query. For scoping
    # > I used something like the following line:
    # > 
    # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
    # > 
    # > Though you could make it more generic by simply asking
    # > "self.descends_from_active_record?" which is how rails decides if it should
    # > scope your "find" query for STI models. You can check out "base.rb" in
    # > activerecord to see that.
    # but maybe better do the scoping in find_id_by_contents...
    def retrieve_records(id_arrays, find_options = {})
      result = []
      # get objects for each model
      id_arrays.each do |model, id_array|
        next if id_array.empty?
        begin
          model = model.constantize
        rescue
          raise "Please use ':store_class_name => true' if you want to use multi_search.\n#{$!}"
        end

        # check for include association that might only exist on some models in case of multi_search
        filtered_include_options = []
        if include_options = find_options[:include]
          include_options.each do |include_option|
            filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
          end
        end
        filtered_include_options=nil if filtered_include_options.empty?

        # fetch
        tmp_result = nil
        model.send(:with_scope, :find => find_options) do 
          tmp_result = model.find( :all, :conditions => [ 
            "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],  
            :include => filtered_include_options ) 
        end

        # set scores and rank
        tmp_result.each do |record|
          record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
        end
        # merge with result array
        result.concat tmp_result
      end
      
      # order results as they were found by ferret, unless an AR :order
      # option was given
      result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
      return result
    end

    def count_records(id_arrays, find_options = {})
      count = 0
      id_arrays.each do |model, id_array|
        next if id_array.empty?
        begin
          model = model.constantize
          model.send(:with_scope, :find => find_options) do 
            count += model.count(:conditions => [ "#{model.table_name}.#{model.primary_key} in (?)",
                                                  id_array.keys ]) 
          end
        rescue TypeError
          raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
        end
      end
      count
    end

    def deprecated_options_support(options)
      if options[:num_docs]
        logger.warn ":num_docs is deprecated, use :limit instead!"
        options[:limit] ||= options[:num_docs]
      end
      if options[:first_doc]
        logger.warn ":first_doc is deprecated, use :offset instead!"
        options[:offset] ||= options[:first_doc]
      end
    end

    # creates a new Index instance.
    def create_index_instance
      if aaf_configuration[:remote]
       RemoteIndex
      elsif aaf_configuration[:single_index]
        SharedIndex
      else
        LocalIndex
      end.new(aaf_configuration)
    end

  end
  
end