local_index.rb 7.21 KB
Edit Raw Blame History



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199


module ActsAsFerret
  class LocalIndex < AbstractIndex
    include MoreLikeThis::IndexMethods

    def initialize(index_name)
      super
      ensure_index_exists
    end

    def reopen!
      logger.debug "reopening index at #{index_definition[:ferret][:path]}"
      close
      ferret_index
    end

    # The 'real' Ferret Index instance
    def ferret_index
      ensure_index_exists
      returning @ferret_index ||= Ferret::Index::Index.new(index_definition[:ferret]) do
        @ferret_index.batch_size = index_definition[:reindex_batch_size]
        @ferret_index.logger = logger
      end
    end

    # Checks for the presence of a segments file in the index directory
    # Rebuilds the index if none exists.
    def ensure_index_exists
      #logger.debug "LocalIndex: ensure_index_exists at #{index_definition[:index_dir]}"
      unless File.file? "#{index_definition[:index_dir]}/segments"
        ActsAsFerret::ensure_directory(index_definition[:index_dir])
        rebuild_index 
      end
    end

    # Closes the underlying index instance
    def close
      @ferret_index.close if @ferret_index
    rescue StandardError 
      # is raised when index already closed
    ensure
      @ferret_index = nil
    end

    # rebuilds the index from all records of the model classes associated with this index
    def rebuild_index
      models = index_definition[:registered_models]
      logger.debug "rebuild index with models: #{models.inspect}"
      close
      index = Ferret::Index::Index.new(index_definition[:ferret].dup.update(:auto_flush  => false, 
                                                                            :field_infos => ActsAsFerret::field_infos(index_definition),
                                                                            :create      => true))
      index.batch_size = index_definition[:reindex_batch_size]
      index.logger = logger
      index.index_models models
      reopen!
    end

    def bulk_index(class_name, ids, options)
      ferret_index.bulk_index(class_name.constantize, ids, options)
    end

    # Parses the given query string into a Ferret Query object.
    def process_query(query, options = {})
      return query unless String === query
      ferret_index.synchronize do
        if options[:analyzer]
          # use per-query analyzer if present
          qp = Ferret::QueryParser.new ferret_index.instance_variable_get('@options').merge(options)
          reader = ferret_index.reader
          qp.fields =
              reader.fields unless options[:all_fields] || options[:fields]
          qp.tokenized_fields =
              reader.tokenized_fields unless options[:tokenized_fields]
          return qp.parse query
        else
          # work around ferret bug in #process_query (doesn't ensure the
          # reader is open)
          ferret_index.send(:ensure_reader_open)
          return ferret_index.process_query(query)
        end
      end
    end

    # Total number of hits for the given query. 
    def total_hits(query, options = {})
      ferret_index.search(query, options).total_hits
    end

    def searcher
      ferret_index
    end


    ######################################
    # methods working on a single record
    # called from instance_methods, here to simplify interfacing with the
    # remote ferret server
    # TODO having to pass id and class_name around like this isn't nice
    ######################################

    # add record to index
    # record may be the full AR object, a Ferret document instance or a Hash
    def add(record, analyzer = nil)
      unless Hash === record || Ferret::Document === record
        analyzer = record.ferret_analyzer
        record = record.to_doc 
      end
      ferret_index.add_document(record, analyzer)
    end
    alias << add

    # delete record from index
    def remove(id, class_name)
      ferret_index.query_delete query_for_record(id, class_name)
    end

    # highlight search terms for the record with the given id.
    def highlight(id, class_name, query, options = {})
      logger.debug("highlight: #{class_name} / #{id} query: #{query}")
      options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
      highlights = []
      ferret_index.synchronize do
        doc_num = document_number(id, class_name)

        if options[:field]
          highlights << ferret_index.highlight(query, doc_num, options)
        else
          query = process_query(query) # process only once
          index_definition[:ferret_fields].each_pair do |field, config|
            next if config[:store] == :no || config[:highlight] == :no
            options[:field] = field
            highlights << ferret_index.highlight(query, doc_num, options)
          end
        end
      end
      return highlights.compact.flatten[0..options[:num_excerpts]-1]
    end

    # retrieves the ferret document number of the record with the given id.
    def document_number(id, class_name)
      hits = ferret_index.search(query_for_record(id, class_name))
      return hits.hits.first.doc if hits.total_hits == 1
      raise "cannot determine document number for class #{class_name} / primary key: #{id}\nresult was: #{hits.inspect}"
    end

    # build a ferret query matching only the record with the given id
    # the class name only needs to be given in case of a shared index configuration
    def query_for_record(id, class_name = nil)
      if shared?
        raise InvalidArgumentError.new("shared index needs class_name argument") if class_name.nil?
        returning bq = Ferret::Search::BooleanQuery.new do
          bq.add_query(Ferret::Search::TermQuery.new(:id,         id.to_s),    :must)
          bq.add_query(Ferret::Search::TermQuery.new(:class_name, class_name), :must)
        end
      else
        Ferret::Search::TermQuery.new(:id, id.to_s)
      end
    end


    def determine_stored_fields(options = {})
      stored_fields = options[:lazy]
      if stored_fields && !(Array === stored_fields)
        stored_fields = index_definition[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
      end
      logger.debug "stored_fields: #{stored_fields.inspect}"
      return stored_fields
    end

    # loads data for fields declared as :lazy from the Ferret document
    def extract_stored_fields(doc, stored_fields) 
      fields = index_definition[:ferret_fields] 
      data = {} 
      logger.debug "extracting stored fields #{stored_fields.inspect} from document #{doc[:class_name]} / #{doc[:id]}"
      stored_fields.each do |field|
        if field_cfg = fields[field]
          data[field_cfg[:via]] = doc[field]
        end
      end if stored_fields 
      logger.debug "done: #{data.inspect}"
      return data 
    end

    protected

    # returns a MultiIndex instance operating on a MultiReader
    #def multi_index(model_classes)
    #  model_classes.map!(&:constantize) if String === model_classes.first
    #  model_classes.sort! { |a, b| a.name <=> b.name }
    #  key = model_classes.inject("") { |s, clazz| s + clazz.name }
    #  multi_config = index_definition[:ferret].dup
    #  multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching
    #  ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
    #end
 
  end

end