local_index.rb 7.21 KB
module ActsAsFerret
  class LocalIndex < AbstractIndex
    include MoreLikeThis::IndexMethods

    def initialize(index_name)
      super
      ensure_index_exists
    end

    def reopen!
      logger.debug "reopening index at #{index_definition[:ferret][:path]}"
      close
      ferret_index
    end

    # The 'real' Ferret Index instance
    def ferret_index
      ensure_index_exists
      returning @ferret_index ||= Ferret::Index::Index.new(index_definition[:ferret]) do
        @ferret_index.batch_size = index_definition[:reindex_batch_size]
        @ferret_index.logger = logger
      end
    end

    # Checks for the presence of a segments file in the index directory
    # Rebuilds the index if none exists.
    def ensure_index_exists
      #logger.debug "LocalIndex: ensure_index_exists at #{index_definition[:index_dir]}"
      unless File.file? "#{index_definition[:index_dir]}/segments"
        ActsAsFerret::ensure_directory(index_definition[:index_dir])
        rebuild_index 
      end
    end

    # Closes the underlying index instance
    def close
      @ferret_index.close if @ferret_index
    rescue StandardError 
      # is raised when index already closed
    ensure
      @ferret_index = nil
    end

    # rebuilds the index from all records of the model classes associated with this index
    def rebuild_index
      models = index_definition[:registered_models]
      logger.debug "rebuild index with models: #{models.inspect}"
      close
      index = Ferret::Index::Index.new(index_definition[:ferret].dup.update(:auto_flush  => false, 
                                                                            :field_infos => ActsAsFerret::field_infos(index_definition),
                                                                            :create      => true))
      index.batch_size = index_definition[:reindex_batch_size]
      index.logger = logger
      index.index_models models
      reopen!
    end

    def bulk_index(class_name, ids, options)
      ferret_index.bulk_index(class_name.constantize, ids, options)
    end

    # Parses the given query string into a Ferret Query object.
    def process_query(query, options = {})
      return query unless String === query
      ferret_index.synchronize do
        if options[:analyzer]
          # use per-query analyzer if present
          qp = Ferret::QueryParser.new ferret_index.instance_variable_get('@options').merge(options)
          reader = ferret_index.reader
          qp.fields =
              reader.fields unless options[:all_fields] || options[:fields]
          qp.tokenized_fields =
              reader.tokenized_fields unless options[:tokenized_fields]
          return qp.parse query
        else
          # work around ferret bug in #process_query (doesn't ensure the
          # reader is open)
          ferret_index.send(:ensure_reader_open)
          return ferret_index.process_query(query)
        end
      end
    end

    # Total number of hits for the given query. 
    def total_hits(query, options = {})
      ferret_index.search(query, options).total_hits
    end

    def searcher
      ferret_index
    end


    ######################################
    # methods working on a single record
    # called from instance_methods, here to simplify interfacing with the
    # remote ferret server
    # TODO having to pass id and class_name around like this isn't nice
    ######################################

    # add record to index
    # record may be the full AR object, a Ferret document instance or a Hash
    def add(record, analyzer = nil)
      unless Hash === record || Ferret::Document === record
        analyzer = record.ferret_analyzer
        record = record.to_doc 
      end
      ferret_index.add_document(record, analyzer)
    end
    alias << add

    # delete record from index
    def remove(id, class_name)
      ferret_index.query_delete query_for_record(id, class_name)
    end

    # highlight search terms for the record with the given id.
    def highlight(id, class_name, query, options = {})
      logger.debug("highlight: #{class_name} / #{id} query: #{query}")
      options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
      highlights = []
      ferret_index.synchronize do
        doc_num = document_number(id, class_name)

        if options[:field]
          highlights << ferret_index.highlight(query, doc_num, options)
        else
          query = process_query(query) # process only once
          index_definition[:ferret_fields].each_pair do |field, config|
            next if config[:store] == :no || config[:highlight] == :no
            options[:field] = field
            highlights << ferret_index.highlight(query, doc_num, options)
          end
        end
      end
      return highlights.compact.flatten[0..options[:num_excerpts]-1]
    end

    # retrieves the ferret document number of the record with the given id.
    def document_number(id, class_name)
      hits = ferret_index.search(query_for_record(id, class_name))
      return hits.hits.first.doc if hits.total_hits == 1
      raise "cannot determine document number for class #{class_name} / primary key: #{id}\nresult was: #{hits.inspect}"
    end

    # build a ferret query matching only the record with the given id
    # the class name only needs to be given in case of a shared index configuration
    def query_for_record(id, class_name = nil)
      if shared?
        raise InvalidArgumentError.new("shared index needs class_name argument") if class_name.nil?
        returning bq = Ferret::Search::BooleanQuery.new do
          bq.add_query(Ferret::Search::TermQuery.new(:id,         id.to_s),    :must)
          bq.add_query(Ferret::Search::TermQuery.new(:class_name, class_name), :must)
        end
      else
        Ferret::Search::TermQuery.new(:id, id.to_s)
      end
    end



    def determine_stored_fields(options = {})
      stored_fields = options[:lazy]
      if stored_fields && !(Array === stored_fields)
        stored_fields = index_definition[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
      end
      logger.debug "stored_fields: #{stored_fields.inspect}"
      return stored_fields
    end

    # loads data for fields declared as :lazy from the Ferret document
    def extract_stored_fields(doc, stored_fields) 
      fields = index_definition[:ferret_fields] 
      data = {} 
      logger.debug "extracting stored fields #{stored_fields.inspect} from document #{doc[:class_name]} / #{doc[:id]}"
      stored_fields.each do |field|
        if field_cfg = fields[field]
          data[field_cfg[:via]] = doc[field]
        end
      end if stored_fields 
      logger.debug "done: #{data.inspect}"
      return data 
    end

    protected

    # returns a MultiIndex instance operating on a MultiReader
    #def multi_index(model_classes)
    #  model_classes.map!(&:constantize) if String === model_classes.first
    #  model_classes.sort! { |a, b| a.name <=> b.name }
    #  key = model_classes.inject("") { |s, clazz| s + clazz.name }
    #  multi_config = index_definition[:ferret].dup
    #  multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching
    #  ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
    #end
 
  end

end