Commit 122fa1cde02f297126cae0de2df654213d35c276
1 parent
ad8ab8df
Exists in
master
and in
29 other branches
ActionItem129: added acts_as_ferret back with piston and new version
git-svn-id: https://svn.colivre.coop.br/svn/noosfero/trunk@1832 3f533792-8f58-4932-b0fe-aaf55b0a4547
Showing
38 changed files
with
3681 additions
and
2 deletions
Show diff stats
lib/acts_as_searchable.rb
1 | 1 | class << ActiveRecord::Base |
2 | 2 | |
3 | - def acts_as_searchable(options = {}, ferret_options = {}) | |
4 | - acts_as_ferret({ :remote => true }.merge(options), ferret_options) | |
3 | + def acts_as_searchable(options = {}) | |
4 | + acts_as_ferret({ :remote => true }.merge(options)) | |
5 | + def find_by_contents(*args) | |
6 | + find_with_ferret(*args) | |
7 | + end | |
5 | 8 | end |
6 | 9 | |
7 | 10 | end | ... | ... |
... | ... | @@ -0,0 +1,20 @@ |
1 | +Copyright (c) 2006 Kasper Weibel, Jens Kraemer | |
2 | + | |
3 | +Permission is hereby granted, free of charge, to any person obtaining | |
4 | +a copy of this software and associated documentation files (the | |
5 | +"Software"), to deal in the Software without restriction, including | |
6 | +without limitation the rights to use, copy, modify, merge, publish, | |
7 | +distribute, sublicense, and/or sell copies of the Software, and to | |
8 | +permit persons to whom the Software is furnished to do so, subject to | |
9 | +the following conditions: | |
10 | + | |
11 | +The above copyright notice and this permission notice shall be | |
12 | +included in all copies or substantial portions of the Software. | |
13 | + | |
14 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
15 | +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
16 | +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
17 | +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |
18 | +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
19 | +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
20 | +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ... | ... |
... | ... | @@ -0,0 +1,53 @@ |
1 | += acts_as_ferret | |
2 | + | |
3 | +This ActiveRecord mixin adds full text search capabilities to any Rails model. | |
4 | + | |
5 | +It is heavily based on the original acts_as_ferret plugin done by | |
6 | +Kasper Weibel and a modified version done by Thomas Lockney, which | |
7 | +both can be found on http://ferret.davebalmain.com/trac/wiki/FerretOnRails | |
8 | + | |
9 | +== Installation | |
10 | + | |
11 | +=== Installation inside your Rails project via script/plugin | |
12 | + | |
13 | +script/plugin install svn://projects.jkraemer.net/acts_as_ferret/trunk/plugin/acts_as_ferret | |
14 | + | |
15 | +Aaf is is also available via git from Rubyforge: | |
16 | +git clone git://rubyforge.org/actsasferret.git | |
17 | + | |
18 | +=== System-wide installation with Rubygems | |
19 | + | |
20 | +<tt>sudo gem install acts_as_ferret</tt> | |
21 | + | |
22 | +To use acts_as_ferret in your project, add the following line to your | |
23 | +project's config/environment.rb: | |
24 | + | |
25 | +<tt>require 'acts_as_ferret'</tt> | |
26 | + | |
27 | +Call the aaf_install script inside your project directory to install the sample | |
28 | +config file and the drb server start/stop script. | |
29 | + | |
30 | + | |
31 | +== Usage | |
32 | + | |
33 | +include the following in your model class (specifiying the fields you want to get indexed): | |
34 | + | |
35 | +<tt>acts_as_ferret :fields => [ :title, :description ]</tt> | |
36 | + | |
37 | +now you can use ModelClass.find_by_contents(query) to find instances of your model | |
38 | +whose indexed fields match a given query. All query terms are required by default, | |
39 | +but explicit OR queries are possible. This differs from the ferret default, but | |
40 | +imho is the more often needed/expected behaviour (more query terms result in | |
41 | +less results). | |
42 | + | |
43 | +Please see ActsAsFerret::ActMethods#acts_as_ferret for more information. | |
44 | + | |
45 | +== License | |
46 | + | |
47 | +Released under the MIT license. | |
48 | + | |
49 | +== Authors | |
50 | + | |
51 | +* Kasper Weibel Nielsen-Refs (original author) | |
52 | +* Jens Kraemer <jk@jkraemer.net> (current maintainer) | |
53 | + | ... | ... |
... | ... | @@ -0,0 +1,23 @@ |
1 | +# acts_as_ferret gem install script | |
2 | +# Use inside the root of your Rails project | |
3 | +require 'fileutils' | |
4 | + | |
5 | +@basedir = File.join(File.dirname(__FILE__), '..') | |
6 | + | |
7 | +def install(dir, file, executable=false) | |
8 | + puts "Installing: #{file}" | |
9 | + target = File.join('.', dir, file) | |
10 | + if File.exists?(target) | |
11 | + puts "#{target} already exists, skipping" | |
12 | + else | |
13 | + FileUtils.cp File.join(@basedir, dir, file), target | |
14 | + FileUtils.chmod 0755, target if executable | |
15 | + end | |
16 | +end | |
17 | + | |
18 | + | |
19 | +install 'script', 'ferret_server', true | |
20 | +install 'config', 'ferret_server.yml' | |
21 | + | |
22 | +puts IO.read(File.join(@basedir, 'README')) | |
23 | + | ... | ... |
... | ... | @@ -0,0 +1,24 @@ |
1 | +# configuration for the acts_as_ferret DRb server | |
2 | +# host: where to reach the DRb server (used by application processes to contact the server) | |
3 | +# port: which port the server should listen on | |
4 | +# socket: where the DRb server should create the socket (absolute path), this setting overrides host:port configuration | |
5 | +# pid_file: location of the server's pid file (relative to RAILS_ROOT) | |
6 | +# log_file: log file (default: RAILS_ROOT/log/ferret_server.log | |
7 | +# log_level: log level for the server's logger | |
8 | +production: | |
9 | + host: localhost | |
10 | + port: 9010 | |
11 | + pid_file: log/ferret.pid | |
12 | + log_file: log/ferret_server.log | |
13 | + log_level: warn | |
14 | + | |
15 | +# aaf won't try to use the DRb server in environments that are not | |
16 | +# configured here. | |
17 | +#development: | |
18 | +# host: localhost | |
19 | +# port: 9010 | |
20 | +# pid_file: log/ferret.pid | |
21 | +#test: | |
22 | +# host: localhost | |
23 | +# port: 9009 | |
24 | +# pid_file: log/ferret.pid | ... | ... |
... | ... | @@ -0,0 +1,23 @@ |
1 | +Credits | |
2 | +======= | |
3 | + | |
4 | +The Win32 service support scripts have been written by | |
5 | +Herryanto Siatono <herryanto@pluitsolutions.com>. | |
6 | + | |
7 | +See his accompanying blog posting at | |
8 | +http://www.pluitsolutions.com/2007/07/30/acts-as-ferret-drbserver-win32-service/ | |
9 | + | |
10 | + | |
11 | +Usage | |
12 | +===== | |
13 | + | |
14 | +There are two scripts: | |
15 | + | |
16 | +script/ferret_service is used to install/remove/start/stop the win32 service. | |
17 | + | |
18 | +script/ferret_daemon is to be called by Win32 service to start/stop the | |
19 | +DRbServer. | |
20 | + | |
21 | +Run 'ruby script/ferret_service -h' for more info. | |
22 | + | |
23 | + | ... | ... |
... | ... | @@ -0,0 +1,22 @@ |
1 | +# monit configuration snippet to watch the Ferret DRb server shipped with | |
2 | +# acts_as_ferret | |
3 | +check process ferret with pidfile /path/to/ferret.pid | |
4 | + | |
5 | + # username is the user the drb server should be running as (It's good practice | |
6 | + # to run such services as a non-privileged user) | |
7 | + start program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production start' username" | |
8 | + stop program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production stop' username" | |
9 | + | |
10 | + # cpu usage boundaries | |
11 | + if cpu > 60% for 2 cycles then alert | |
12 | + if cpu > 90% for 5 cycles then restart | |
13 | + | |
14 | + # memory usage varies with index size and usage scenarios, so check how | |
15 | + # much memory your DRb server uses up usually and add some spare to that | |
16 | + # before enabling this rule: | |
17 | + # if totalmem > 50.0 MB for 5 cycles then restart | |
18 | + | |
19 | + # adjust port numbers according to your setup: | |
20 | + if failed port 9010 then alert | |
21 | + if failed port 9010 for 2 cycles then restart | |
22 | + group ferret | ... | ... |
... | ... | @@ -0,0 +1,24 @@ |
1 | +# Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer | |
2 | +# | |
3 | +# Permission is hereby granted, free of charge, to any person obtaining a copy | |
4 | +# of this software and associated documentation files (the "Software"), to deal | |
5 | +# in the Software without restriction, including without limitation the rights | |
6 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
7 | +# copies of the Software, and to permit persons to whom the Software is | |
8 | +# furnished to do so, subject to the following conditions: | |
9 | +# | |
10 | +# The above copyright notice and this permission notice shall be included in all | |
11 | +# copies or substantial portions of the Software. | |
12 | +# | |
13 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
17 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
18 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
19 | +# SOFTWARE. | |
20 | + | |
21 | +require 'acts_as_ferret' | |
22 | + | |
23 | +config.after_initialize { ActsAsFerret::load_config } | |
24 | +config.to_prepare { ActsAsFerret::load_config } | ... | ... |
... | ... | @@ -0,0 +1,18 @@ |
1 | +# acts_as_ferret install script | |
2 | +require 'fileutils' | |
3 | + | |
4 | +def install(file) | |
5 | + puts "Installing: #{file}" | |
6 | + target = File.join(File.dirname(__FILE__), '..', '..', '..', file) | |
7 | + if File.exists?(target) | |
8 | + puts "target #{target} already exists, skipping" | |
9 | + else | |
10 | + FileUtils.cp File.join(File.dirname(__FILE__), file), target | |
11 | + end | |
12 | +end | |
13 | + | |
14 | +install File.join( 'script', 'ferret_server' ) | |
15 | +install File.join( 'config', 'ferret_server.yml' ) | |
16 | + | |
17 | +puts IO.read(File.join(File.dirname(__FILE__), 'README')) | |
18 | + | ... | ... |
... | ... | @@ -0,0 +1,155 @@ |
1 | +module ActsAsFerret #:nodoc: | |
2 | + | |
3 | + # This module defines the acts_as_ferret method and is included into | |
4 | + # ActiveRecord::Base | |
5 | + module ActMethods | |
6 | + | |
7 | + | |
8 | + def reloadable?; false end | |
9 | + | |
10 | + # declares a class as ferret-searchable. | |
11 | + # | |
12 | + # ====options: | |
13 | + # fields:: names all fields to include in the index. If not given, | |
14 | + # all attributes of the class will be indexed. You may also give | |
15 | + # symbols pointing to instance methods of your model here, i.e. | |
16 | + # to retrieve and index data from a related model. | |
17 | + # | |
18 | + # additional_fields:: names fields to include in the index, in addition | |
19 | + # to those derived from the db scheme. use if you want | |
20 | + # to add custom fields derived from methods to the db | |
21 | + # fields (which will be picked by aaf). This option will | |
22 | + # be ignored when the fields option is given, in that | |
23 | + # case additional fields get specified there. | |
24 | + # | |
25 | + # if:: Can be set to a block that will be called with the record in question | |
26 | + # to determine if it should be indexed or not. | |
27 | + # | |
28 | + # index_dir:: declares the directory where to put the index for this class. | |
29 | + # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME. | |
30 | + # The index directory will be created if it doesn't exist. | |
31 | + # | |
32 | + # reindex_batch_size:: reindexing is done in batches of this size, default is 1000 | |
33 | + # mysql_fast_batches:: set this to false to disable the faster mysql batching | |
34 | + # algorithm if this model uses a non-integer primary key named | |
35 | + # 'id' on MySQL. | |
36 | + # | |
37 | + # ferret:: Hash of Options that directly influence the way the Ferret engine works. You | |
38 | + # can use most of the options the Ferret::I class accepts here, too. Among the | |
39 | + # more useful are: | |
40 | + # | |
41 | + # or_default:: whether query terms are required by | |
42 | + # default (the default, false), or not (true) | |
43 | + # | |
44 | + # analyzer:: the analyzer to use for query parsing (default: nil, | |
45 | + # which means the ferret StandardAnalyzer gets used) | |
46 | + # | |
47 | + # default_field:: use to set one or more fields that are searched for query terms | |
48 | + # that don't have an explicit field list. This list should *not* | |
49 | + # contain any untokenized fields. If it does, you're asking | |
50 | + # for trouble (i.e. not getting results for queries having | |
51 | + # stop words in them). Aaf by default initializes the default field | |
52 | + # list to contain all tokenized fields. If you use :single_index => true, | |
53 | + # you really should set this option specifying your default field | |
54 | + # list (which should be equal in all your classes sharing the index). | |
55 | + # Otherwise you might get incorrect search results and you won't get | |
56 | + # any lazy loading of stored field data. | |
57 | + # | |
58 | + # For downwards compatibility reasons you can also specify the Ferret options in the | |
59 | + # last Hash argument. | |
60 | + def acts_as_ferret(options={}) | |
61 | + | |
62 | + extend ClassMethods | |
63 | + | |
64 | + include InstanceMethods | |
65 | + include MoreLikeThis::InstanceMethods | |
66 | + | |
67 | + if options[:rdig] | |
68 | + cattr_accessor :rdig_configuration | |
69 | + self.rdig_configuration = options[:rdig] | |
70 | + require 'rdig_adapter' | |
71 | + include ActsAsFerret::RdigAdapter | |
72 | + end | |
73 | + | |
74 | + unless included_modules.include?(ActsAsFerret::WithoutAR) | |
75 | + # set up AR hooks | |
76 | + after_create :ferret_create | |
77 | + after_update :ferret_update | |
78 | + after_destroy :ferret_destroy | |
79 | + end | |
80 | + | |
81 | + cattr_accessor :aaf_configuration | |
82 | + | |
83 | + # apply default config for rdig based models | |
84 | + if options[:rdig] | |
85 | + options[:fields] ||= { :title => { :boost => 3, :store => :yes }, | |
86 | + :content => { :store => :yes } } | |
87 | + end | |
88 | + | |
89 | + # name of this index | |
90 | + index_name = options.delete(:index) || self.name.underscore | |
91 | + | |
92 | + index = ActsAsFerret::register_class_with_index(self, index_name, options) | |
93 | + self.aaf_configuration = index.index_definition.dup | |
94 | + logger.debug "configured index for class #{self.name}:\n#{aaf_configuration.inspect}" | |
95 | + | |
96 | + # update our copy of the global index config with options local to this class | |
97 | + aaf_configuration[:class_name] ||= self.name | |
98 | + aaf_configuration[:if] ||= options[:if] | |
99 | + | |
100 | + # add methods for retrieving field values | |
101 | + add_fields options[:fields] | |
102 | + add_fields options[:additional_fields] | |
103 | + add_fields aaf_configuration[:fields] | |
104 | + add_fields aaf_configuration[:additional_fields] | |
105 | + | |
106 | + # not good at class level, index might get initialized too early | |
107 | + #if options[:remote] | |
108 | + # aaf_index.ensure_index_exists | |
109 | + #end | |
110 | + end | |
111 | + | |
112 | + | |
113 | + protected | |
114 | + | |
115 | + | |
116 | + # helper to defines a method which adds the given field to a ferret | |
117 | + # document instance | |
118 | + def define_to_field_method(field, options = {}) | |
119 | + method_name = "#{field}_to_ferret" | |
120 | + return if instance_methods.include?(method_name) # already defined | |
121 | + aaf_configuration[:defined_fields] ||= {} | |
122 | + aaf_configuration[:defined_fields][field] = options | |
123 | + dynamic_boost = options[:boost] if options[:boost].is_a?(Symbol) | |
124 | + via = options[:via] || field | |
125 | + define_method(method_name.to_sym) do | |
126 | + val = begin | |
127 | + content_for_field_name(field, via, dynamic_boost) | |
128 | + rescue | |
129 | + logger.warn("Error retrieving value for field #{field}: #{$!}") | |
130 | + '' | |
131 | + end | |
132 | + logger.debug("Adding field #{field} with value '#{val}' to index") | |
133 | + val | |
134 | + end | |
135 | + end | |
136 | + | |
137 | + def add_fields(field_config) | |
138 | + # TODO | |
139 | + #field_config.each do |*args| | |
140 | + # define_to_field_method *args | |
141 | + #end | |
142 | + if field_config.is_a? Hash | |
143 | + field_config.each_pair do |field, options| | |
144 | + define_to_field_method field, options | |
145 | + end | |
146 | + elsif field_config.respond_to?(:each) | |
147 | + field_config.each do |field| | |
148 | + define_to_field_method field | |
149 | + end | |
150 | + end | |
151 | + end | |
152 | + | |
153 | + end | |
154 | + | |
155 | +end | ... | ... |
... | ... | @@ -0,0 +1,567 @@ |
1 | +# Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer | |
2 | +# | |
3 | +# Permission is hereby granted, free of charge, to any person obtaining a copy | |
4 | +# of this software and associated documentation files (the "Software"), to deal | |
5 | +# in the Software without restriction, including without limitation the rights | |
6 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
7 | +# copies of the Software, and to permit persons to whom the Software is | |
8 | +# furnished to do so, subject to the following conditions: | |
9 | +# | |
10 | +# The above copyright notice and this permission notice shall be included in all | |
11 | +# copies or substantial portions of the Software. | |
12 | +# | |
13 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
17 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
18 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
19 | +# SOFTWARE. | |
20 | + | |
21 | +require 'active_support' | |
22 | +require 'active_record' | |
23 | +require 'set' | |
24 | +require 'enumerator' | |
25 | +require 'ferret' | |
26 | + | |
27 | +require 'ferret_find_methods' | |
28 | +require 'remote_functions' | |
29 | +require 'blank_slate' | |
30 | +require 'bulk_indexer' | |
31 | +require 'ferret_extensions' | |
32 | +require 'act_methods' | |
33 | +require 'search_results' | |
34 | +require 'class_methods' | |
35 | +require 'ferret_result' | |
36 | +require 'instance_methods' | |
37 | +require 'without_ar' | |
38 | + | |
39 | +require 'multi_index' | |
40 | +require 'remote_multi_index' | |
41 | +require 'more_like_this' | |
42 | + | |
43 | +require 'index' | |
44 | +require 'local_index' | |
45 | +require 'remote_index' | |
46 | + | |
47 | +require 'ferret_server' | |
48 | + | |
49 | +require 'rdig_adapter' | |
50 | + | |
51 | +# The Rails ActiveRecord Ferret Mixin. | |
52 | +# | |
53 | +# This mixin adds full text search capabilities to any Rails model. | |
54 | +# | |
55 | +# The current version emerged from on the original acts_as_ferret plugin done by | |
56 | +# Kasper Weibel and a modified version done by Thomas Lockney, which both can be | |
57 | +# found on the Ferret Wiki: http://ferret.davebalmain.com/trac/wiki/FerretOnRails. | |
58 | +# | |
59 | +# basic usage: | |
60 | +# include the following in your model class (specifiying the fields you want to get indexed): | |
61 | +# acts_as_ferret :fields => [ :title, :description ] | |
62 | +# | |
63 | +# now you can use ModelClass.find_by_contents(query) to find instances of your model | |
64 | +# whose indexed fields match a given query. All query terms are required by default, but | |
65 | +# explicit OR queries are possible. This differs from the ferret default, but imho is the more | |
66 | +# often needed/expected behaviour (more query terms result in less results). | |
67 | +# | |
68 | +# Released under the MIT license. | |
69 | +# | |
70 | +# Authors: | |
71 | +# Kasper Weibel Nielsen-Refs (original author) | |
72 | +# Jens Kraemer <jk@jkraemer.net> (active maintainer since 2006) | |
73 | +# | |
74 | +# | |
75 | +# == Global properties | |
76 | +# | |
77 | +# raise_drb_errors:: Set this to true if you want aaf to raise Exceptions | |
78 | +# in case the DRb server cannot be reached (in other word - behave like | |
79 | +# versions up to 0.4.3). Defaults to false so DRb exceptions | |
80 | +# are logged but not raised. Be sure to set up some | |
81 | +# monitoring so you still detect when your DRb server died for | |
82 | +# whatever reason. | |
83 | +# | |
84 | +# remote:: Set this to false to force acts_as_ferret into local (non-DRb) mode even if | |
85 | +# config/ferret_server.yml contains a section for the current RAILS_ENV | |
86 | +# Usually you won't need to touch this option - just configure DRb for | |
87 | +# production mode in ferret_server.yml. | |
88 | +# | |
89 | +module ActsAsFerret | |
90 | + | |
91 | + class ActsAsFerretError < StandardError; end | |
92 | + class IndexNotDefined < ActsAsFerretError; end | |
93 | + class IndexAlreadyDefined < ActsAsFerretError; end | |
94 | + | |
95 | + # global Hash containing all multi indexes created by all classes using the plugin | |
96 | + # key is the concatenation of alphabetically sorted names of the classes the | |
97 | + # searcher searches. | |
98 | + @@multi_indexes = Hash.new | |
99 | + def self.multi_indexes; @@multi_indexes end | |
100 | + | |
101 | + # global Hash containing the ferret indexes of all classes using the plugin | |
102 | + # key is the index name. | |
103 | + @@ferret_indexes = Hash.new | |
104 | + def self.ferret_indexes; @@ferret_indexes end | |
105 | + | |
106 | + # mapping from class name to index name | |
107 | + @@index_using_classes = {} | |
108 | + | |
109 | + @@logger = Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log" | |
110 | + @@logger.level = ActiveRecord::Base.logger.level rescue Logger::DEBUG | |
111 | + mattr_accessor :logger | |
112 | + | |
113 | + | |
114 | + # Default ferret configuration for index fields | |
115 | + DEFAULT_FIELD_OPTIONS = { | |
116 | + :store => :no, | |
117 | + :highlight => :yes, | |
118 | + :index => :yes, | |
119 | + :term_vector => :with_positions_offsets, | |
120 | + :boost => 1.0 | |
121 | + } | |
122 | + | |
123 | + @@raise_drb_errors = false | |
124 | + mattr_writer :raise_drb_errors | |
125 | + def self.raise_drb_errors?; @@raise_drb_errors end | |
126 | + | |
127 | + @@remote = nil | |
128 | + mattr_accessor :remote | |
129 | + def self.remote? | |
130 | + if @@remote.nil? | |
131 | + if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running | |
132 | + @@remote = false | |
133 | + else | |
134 | + @@remote = ActsAsFerret::Remote::Config.new.uri rescue false | |
135 | + end | |
136 | + if @@remote | |
137 | + logger.info "Will use remote index server which should be available at #{@@remote}" | |
138 | + else | |
139 | + logger.info "Will use local index." | |
140 | + end | |
141 | + end | |
142 | + @@remote | |
143 | + end | |
144 | + remote? | |
145 | + | |
146 | + | |
147 | + # Globally declares an index. | |
148 | + # | |
149 | + # This method is also used to implicitly declare an index when you use the | |
150 | + # acts_as_ferret call in your class. Returns the created index instance. | |
151 | + # | |
152 | + # === Options are: | |
153 | + # | |
154 | + # +models+:: Hash of model classes and their per-class option hashes which should | |
155 | + # use this index. Any models mentioned here will automatically use | |
156 | + # the index, there is no need to explicitly call +acts_as_ferret+ in the | |
157 | + # model class definition. | |
158 | + def self.define_index(name, options = {}) | |
159 | + name = name.to_sym | |
160 | + pending_classes = nil | |
161 | + if ferret_indexes.has_key?(name) | |
162 | + # seems models have been already loaded. remove that index for now, | |
163 | + # re-register any already loaded classes later on. | |
164 | + idx = get_index(name) | |
165 | + pending_classes = idx.index_definition[:registered_models] | |
166 | + pending_classes_configs = idx.registered_models_config | |
167 | + idx.close | |
168 | + ferret_indexes.delete(name) | |
169 | + end | |
170 | + | |
171 | + index_definition = { | |
172 | + :index_dir => "#{ActsAsFerret::index_dir}/#{name}", | |
173 | + :name => name, | |
174 | + :single_index => false, | |
175 | + :reindex_batch_size => 1000, | |
176 | + :ferret => {}, | |
177 | + :ferret_fields => {}, # list of indexed fields that will be filled later | |
178 | + :enabled => true, # used for class-wide disabling of Ferret | |
179 | + :mysql_fast_batches => true, # turn off to disable the faster, id based batching mechanism for MySQL | |
180 | + :raise_drb_errors => false # handle DRb connection errors by default | |
181 | + }.update( options ) | |
182 | + | |
183 | + index_definition[:registered_models] = [] | |
184 | + | |
185 | + # build ferret configuration | |
186 | + index_definition[:ferret] = { | |
187 | + :or_default => false, | |
188 | + :handle_parse_errors => true, | |
189 | + :default_field => nil, # will be set later on | |
190 | + #:max_clauses => 512, | |
191 | + #:analyzer => Ferret::Analysis::StandardAnalyzer.new, | |
192 | + # :wild_card_downcase => true | |
193 | + }.update( options[:ferret] || {} ) | |
194 | + | |
195 | + index_definition[:user_default_field] = index_definition[:ferret][:default_field] | |
196 | + | |
197 | + unless remote? | |
198 | + ActsAsFerret::ensure_directory index_definition[:index_dir] | |
199 | + index_definition[:index_base_dir] = index_definition[:index_dir] | |
200 | + index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir]) | |
201 | + logger.debug "using index in #{index_definition[:index_dir]}" | |
202 | + end | |
203 | + | |
204 | + # these properties are somewhat vital to the plugin and shouldn't | |
205 | + # be overwritten by the user: | |
206 | + index_definition[:ferret].update( | |
207 | + :key => [:id, :class_name], | |
208 | + :path => index_definition[:index_dir], | |
209 | + :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode? | |
210 | + :create_if_missing => true | |
211 | + ) | |
212 | + | |
213 | + # field config | |
214 | + index_definition[:ferret_fields] = build_field_config( options[:fields] ) | |
215 | + index_definition[:ferret_fields].update build_field_config( options[:additional_fields] ) | |
216 | + | |
217 | + idx = ferret_indexes[name] = create_index_instance( index_definition ) | |
218 | + | |
219 | + # re-register early loaded classes | |
220 | + if pending_classes | |
221 | + pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) } | |
222 | + end | |
223 | + | |
224 | + if models = options[:models] | |
225 | + models.each do |clazz, config| | |
226 | + clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret) | |
227 | + clazz.acts_as_ferret config.merge(:index => name) | |
228 | + end | |
229 | + end | |
230 | + | |
231 | + return idx | |
232 | + end | |
233 | + | |
234 | + # called internally by the acts_as_ferret method | |
235 | + # | |
236 | + # returns the index | |
237 | + def self.register_class_with_index(clazz, index_name, options = {}) | |
238 | + index_name = index_name.to_sym | |
239 | + @@index_using_classes[clazz.name] = index_name | |
240 | + unless index = ferret_indexes[index_name] | |
241 | + # index definition on the fly | |
242 | + # default to all attributes of this class | |
243 | + options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym } | |
244 | + index = define_index index_name, options | |
245 | + end | |
246 | + index.register_class(clazz, options) | |
247 | + return index | |
248 | + end | |
249 | + | |
250 | + def self.load_config | |
251 | + # using require_dependency to make the reloading in dev mode work. | |
252 | + require_dependency "#{RAILS_ROOT}/config/aaf.rb" | |
253 | + ActsAsFerret::logger.info "loaded configuration file aaf.rb" | |
254 | + rescue LoadError | |
255 | + ensure | |
256 | + @aaf_config_loaded = true | |
257 | + end | |
258 | + | |
259 | + # returns the index with the given name. | |
260 | + def self.get_index(name) | |
261 | + name = name.to_sym rescue nil | |
262 | + unless ferret_indexes.has_key?(name) | |
263 | + if @aaf_config_loaded | |
264 | + raise IndexNotDefined.new(name.to_s) | |
265 | + else | |
266 | + load_config and return get_index name | |
267 | + end | |
268 | + end | |
269 | + ferret_indexes[name] | |
270 | + end | |
271 | + | |
272 | + # count hits for a query with multiple models | |
273 | + def self.total_hits(query, models, options = {}) | |
274 | + find_index(models).total_hits query, options.merge( :models => models ) | |
275 | + end | |
276 | + | |
277 | + # find ids of records with multiple models | |
278 | + # TODO pagination logic? | |
279 | + def self.find_ids(query, models, options = {}, &block) | |
280 | + find_index(models).find_ids query, options.merge( :models => models ), &block | |
281 | + end | |
282 | + | |
283 | + def self.find_index(models_or_index_name) | |
284 | + case models_or_index_name | |
285 | + when Symbol | |
286 | + get_index models_or_index_name | |
287 | + when String | |
288 | + get_index models_or_index_name.to_sym | |
289 | + #when Array | |
290 | + # get_index_for models_or_index_name | |
291 | + else | |
292 | + get_index_for models_or_index_name | |
293 | + end | |
294 | + end | |
295 | + | |
296 | + def self.find(query, models_or_index_name, options = {}, ar_options = {}) | |
297 | + # TODO generalize local/remote index so we can remove the workaround below | |
298 | + # (replace logic in class_methods#find_with_ferret) | |
299 | + # maybe put pagination stuff in a module to be included by all index | |
300 | + # implementations | |
301 | + models = [ models_or_index_name ] if Class === models_or_index_name | |
302 | + if models && models.size == 1 | |
303 | + return models.shift.find_with_ferret query, options, ar_options | |
304 | + end | |
305 | + index = find_index(models_or_index_name) | |
306 | + multi = (MultiIndex === index or index.shared?) | |
307 | + if options[:per_page] | |
308 | + options[:page] = options[:page] ? options[:page].to_i : 1 | |
309 | + limit = options[:per_page] | |
310 | + offset = (options[:page] - 1) * limit | |
311 | + if ar_options[:conditions] && !multi | |
312 | + ar_options[:limit] = limit | |
313 | + ar_options[:offset] = offset | |
314 | + options[:limit] = :all | |
315 | + options.delete :offset | |
316 | + else | |
317 | + # do pagination with ferret (or after everything is done in the case | |
318 | + # of multi_search) | |
319 | + options[:limit] = limit | |
320 | + options[:offset] = offset | |
321 | + end | |
322 | + elsif ar_options[:conditions] | |
323 | + if multi | |
324 | + # multisearch ignores find_options limit and offset | |
325 | + options[:limit] ||= ar_options.delete(:limit) | |
326 | + options[:offset] ||= ar_options.delete(:offset) | |
327 | + else | |
328 | + # let the db do the limiting and offsetting for single-table searches | |
329 | + unless options[:limit] == :all | |
330 | + ar_options[:limit] ||= options.delete(:limit) | |
331 | + end | |
332 | + ar_options[:offset] ||= options.delete(:offset) | |
333 | + options[:limit] = :all | |
334 | + end | |
335 | + end | |
336 | + | |
337 | + total_hits, result = index.find_records query, options.merge(:models => models), ar_options | |
338 | + logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}" | |
339 | + SearchResults.new(result, total_hits, options[:page], options[:per_page]) | |
340 | + end | |
341 | + | |
342 | + # returns the index used by the given class. | |
343 | + # | |
344 | + # If multiple classes are given, either the single index shared by these | |
345 | + # classes, or a multi index (to be used for search only) across the indexes | |
346 | + # of all models, is returned. | |
347 | + def self.get_index_for(*classes) | |
348 | + classes.flatten! | |
349 | + raise ArgumentError.new("no class specified") unless classes.any? | |
350 | + classes.map!(&:constantize) unless Class === classes.first | |
351 | + logger.debug "index_for #{classes.inspect}" | |
352 | + index = if classes.size > 1 | |
353 | + indexes = classes.map { |c| get_index_for c }.uniq | |
354 | + indexes.size > 1 ? multi_index(indexes) : indexes.first | |
355 | + else | |
356 | + clazz = classes.first | |
357 | + clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name) | |
358 | + get_index @@index_using_classes[clazz.name] | |
359 | + end | |
360 | + raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil? | |
361 | + return index | |
362 | + end | |
363 | + | |
364 | + | |
365 | + # creates a new Index instance. | |
366 | + def self.create_index_instance(definition) | |
367 | + (remote? ? RemoteIndex : LocalIndex).new(definition) | |
368 | + end | |
369 | + | |
370 | + def self.rebuild_index(name) | |
371 | + get_index(name).rebuild_index | |
372 | + end | |
373 | + | |
374 | + def self.change_index_dir(name, new_dir) | |
375 | + get_index(name).change_index_dir new_dir | |
376 | + end | |
377 | + | |
378 | + # find the most recent version of an index | |
379 | + def self.find_last_index_version(basedir) | |
380 | + # check for versioned index | |
381 | + versions = Dir.entries(basedir).select do |f| | |
382 | + dir = File.join(basedir, f) | |
383 | + File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/ | |
384 | + end | |
385 | + if versions.any? | |
386 | + # select latest version | |
387 | + versions.sort! | |
388 | + File.join basedir, versions.last | |
389 | + else | |
390 | + basedir | |
391 | + end | |
392 | + end | |
393 | + | |
394 | + # returns a MultiIndex instance operating on a MultiReader | |
395 | + def self.multi_index(indexes) | |
396 | + index_names = indexes.dup | |
397 | + index_names = index_names.map(&:to_s) if Symbol === index_names.first | |
398 | + if String === index_names.first | |
399 | + indexes = index_names.map{ |name| get_index name } | |
400 | + else | |
401 | + index_names = index_names.map{ |i| i.index_name.to_s } | |
402 | + end | |
403 | + key = index_names.sort.join(",") | |
404 | + ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes) | |
405 | + end | |
406 | + | |
407 | + # check for per-model conditions and return these if provided | |
408 | + def self.conditions_for_model(model, conditions = {}) | |
409 | + if Hash === conditions | |
410 | + key = model.name.underscore.to_sym | |
411 | + conditions = conditions[key] | |
412 | + end | |
413 | + return conditions | |
414 | + end | |
415 | + | |
416 | + # retrieves search result records from a data structure like this: | |
417 | + # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] } | |
418 | + # | |
419 | + # TODO: in case of STI AR will filter out hits from other | |
420 | + # classes for us, but this | |
421 | + # will lead to less results retrieved --> scoping of ferret query | |
422 | + # to self.class is still needed. | |
423 | + # from the ferret ML (thanks Curtis Hatter) | |
424 | + # > I created a method in my base STI class so I can scope my query. For scoping | |
425 | + # > I used something like the following line: | |
426 | + # > | |
427 | + # > query << " role:#{self.class.eql?(Contents) '*' : self.class}" | |
428 | + # > | |
429 | + # > Though you could make it more generic by simply asking | |
430 | + # > "self.descends_from_active_record?" which is how rails decides if it should | |
431 | + # > scope your "find" query for STI models. You can check out "base.rb" in | |
432 | + # > activerecord to see that. | |
433 | + # but maybe better do the scoping in find_ids_with_ferret... | |
434 | + def self.retrieve_records(id_arrays, find_options = {}) | |
435 | + result = [] | |
436 | + # get objects for each model | |
437 | + id_arrays.each do |model, id_array| | |
438 | + next if id_array.empty? | |
439 | + model_class = model.constantize | |
440 | + | |
441 | + # merge conditions | |
442 | + conditions = conditions_for_model model_class, find_options[:conditions] | |
443 | + conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)", | |
444 | + id_array.keys ], | |
445 | + conditions) | |
446 | + | |
447 | + # check for include association that might only exist on some models in case of multi_search | |
448 | + filtered_include_options = [] | |
449 | + if include_options = find_options[:include] | |
450 | + include_options = [ include_options ] unless include_options.respond_to?(:each) | |
451 | + include_options.each do |include_option| | |
452 | + filtered_include_options << include_option if model_class.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym) | |
453 | + end | |
454 | + end | |
455 | + filtered_include_options = nil if filtered_include_options.empty? | |
456 | + | |
457 | + # fetch | |
458 | + tmp_result = model_class.find(:all, find_options.merge(:conditions => conditions, | |
459 | + :include => filtered_include_options)) | |
460 | + | |
461 | + # set scores and rank | |
462 | + tmp_result.each do |record| | |
463 | + record.ferret_rank, record.ferret_score = id_array[record.id.to_s] | |
464 | + end | |
465 | + # merge with result array | |
466 | + result += tmp_result | |
467 | + end | |
468 | + | |
469 | + # order results as they were found by ferret, unless an AR :order | |
470 | + # option was given | |
471 | + result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order] | |
472 | + return result | |
473 | + end | |
474 | + | |
475 | + # combine our conditions with those given by user, if any | |
476 | + def self.combine_conditions(conditions, additional_conditions = []) | |
477 | + returning conditions do | |
478 | + if additional_conditions && additional_conditions.any? | |
479 | + cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ] | |
480 | + logger.debug "cust_opts: #{cust_opts.inspect}" | |
481 | + conditions.first << " and " << cust_opts.shift | |
482 | + conditions.concat(cust_opts) | |
483 | + end | |
484 | + end | |
485 | + end | |
486 | + | |
487 | + def self.build_field_config(fields) | |
488 | + field_config = {} | |
489 | + case fields | |
490 | + when Array | |
491 | + fields.each { |name| field_config[name] = field_config_for name } | |
492 | + when Hash | |
493 | + fields.each { |name, options| field_config[name] = field_config_for name, options } | |
494 | + else raise InvalidArgumentError.new(":fields option must be Hash or Array") | |
495 | + end if fields | |
496 | + return field_config | |
497 | + end | |
498 | + | |
499 | + def self.ensure_directory(dir) | |
500 | + FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir)) | |
501 | + end | |
502 | + | |
503 | + | |
504 | + # make sure the default index base dir exists. by default, all indexes are created | |
505 | + # under RAILS_ROOT/index/RAILS_ENV | |
506 | + def self.init_index_basedir | |
507 | + index_base = "#{RAILS_ROOT}/index" | |
508 | + @@index_dir = "#{index_base}/#{RAILS_ENV}" | |
509 | + end | |
510 | + | |
511 | + mattr_accessor :index_dir | |
512 | + init_index_basedir | |
513 | + | |
514 | + def self.append_features(base) | |
515 | + super | |
516 | + base.extend(ClassMethods) | |
517 | + end | |
518 | + | |
519 | + # builds a FieldInfos instance for creation of an index | |
520 | + def self.field_infos(index_definition) | |
521 | + # default attributes for fields | |
522 | + fi = Ferret::Index::FieldInfos.new(:store => :no, | |
523 | + :index => :yes, | |
524 | + :term_vector => :no, | |
525 | + :boost => 1.0) | |
526 | + # primary key | |
527 | + fi.add_field(:id, :store => :yes, :index => :untokenized) | |
528 | + # class_name | |
529 | + fi.add_field(:class_name, :store => :yes, :index => :untokenized) | |
530 | + | |
531 | + # other fields | |
532 | + index_definition[:ferret_fields].each_pair do |field, options| | |
533 | + options = options.dup | |
534 | + options.delete :via | |
535 | + options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost | |
536 | + fi.add_field(field, options) | |
537 | + end | |
538 | + return fi | |
539 | + end | |
540 | + | |
541 | + def self.close_multi_indexes | |
542 | + # close combined index readers, just in case | |
543 | + # this seems to fix a strange test failure that seems to relate to a | |
544 | + # multi_index looking at an old version of the content_base index. | |
545 | + multi_indexes.each_pair do |key, index| | |
546 | + # puts "#{key} -- #{self.name}" | |
547 | + # TODO only close those where necessary (watch inheritance, where | |
548 | + # self.name is base class of a class where key is made from) | |
549 | + index.close #if key =~ /#{self.name}/ | |
550 | + end | |
551 | + multi_indexes.clear | |
552 | + end | |
553 | + | |
554 | + protected | |
555 | + | |
556 | + def self.field_config_for(fieldname, options = {}) | |
557 | + config = DEFAULT_FIELD_OPTIONS.merge options | |
558 | + config[:via] ||= fieldname | |
559 | + config[:term_vector] = :no if config[:index] == :no | |
560 | + return config | |
561 | + end | |
562 | + | |
563 | +end | |
564 | + | |
565 | +# include acts_as_ferret method into ActiveRecord::Base | |
566 | +ActiveRecord::Base.extend ActsAsFerret::ActMethods | |
567 | + | ... | ... |
vendor/plugins/acts_as_ferret/lib/ar_mysql_auto_reconnect_patch.rb
0 → 100644
... | ... | @@ -0,0 +1,41 @@ |
1 | +# Source: http://pastie.caboo.se/154842 | |
2 | +# | |
3 | +# in /etc/my.cnf on the MySQL server, you can set the interactive-timeout parameter, | |
4 | +# for example, 12 hours = 28800 sec | |
5 | +# interactive-timeout=28800 | |
6 | + | |
7 | +# in ActiveRecord, setting the verification_timeout to something less than | |
8 | +# the interactive-timeout parameter; 14400 sec = 6 hours | |
9 | +ActiveRecord::Base.verification_timeout = 14400 | |
10 | +ActiveRecord::Base.establish_connection | |
11 | + | |
12 | +# Below is a monkey patch for keeping ActiveRecord connections alive. | |
13 | +# http://www.sparecycles.org/2007/7/2/saying-goodbye-to-lost-connections-in-rails | |
14 | + | |
15 | +module ActiveRecord | |
16 | + module ConnectionAdapters | |
17 | + class MysqlAdapter | |
18 | + def execute(sql, name = nil) #:nodoc: | |
19 | + reconnect_lost_connections = true | |
20 | + begin | |
21 | + log(sql, name) { @connection.query(sql) } | |
22 | + rescue ActiveRecord::StatementInvalid => exception | |
23 | + if reconnect_lost_connections and exception.message =~ /(Lost connection to MySQL server during query | |
24 | +|MySQL server has gone away)/ | |
25 | + reconnect_lost_connections = false | |
26 | + reconnect! | |
27 | + retry | |
28 | + elsif exception.message.split(":").first =~ /Packets out of order/ | |
29 | + raise ActiveRecord::StatementInvalid, "'Packets out of order' error was received from the database. | |
30 | + Please update your mysql bindings (gem install mysql) and read http://dev.mysql.com/doc/mysql/en/password-hash | |
31 | +ing.html for more information. If you're on Windows, use the Instant Rails installer to get the updated mysql | |
32 | +bindings." | |
33 | + else | |
34 | + raise | |
35 | + end | |
36 | + end | |
37 | + end | |
38 | + end | |
39 | + end | |
40 | +end | |
41 | + | ... | ... |
... | ... | @@ -0,0 +1,53 @@ |
1 | +if defined?(BlankSlate) | |
2 | + # Rails 2.x has it already | |
3 | + module ActsAsFerret | |
4 | + class BlankSlate < ::BlankSlate | |
5 | + end | |
6 | + end | |
7 | +else | |
8 | + module ActsAsFerret | |
9 | + # 'backported' for Rails pre 2.0 | |
10 | + # | |
11 | + #-- | |
12 | + # Copyright 2004, 2006 by Jim Weirich (jim@weirichhouse.org). | |
13 | + # All rights reserved. | |
14 | + | |
15 | + # Permission is granted for use, copying, modification, distribution, | |
16 | + # and distribution of modified versions of this work as long as the | |
17 | + # above copyright notice is included. | |
18 | + #++ | |
19 | + | |
20 | + ###################################################################### | |
21 | + # BlankSlate provides an abstract base class with no predefined | |
22 | + # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>). | |
23 | + # BlankSlate is useful as a base class when writing classes that | |
24 | + # depend upon <tt>method_missing</tt> (e.g. dynamic proxies). | |
25 | + # | |
26 | + class BlankSlate | |
27 | + class << self | |
28 | + # Hide the method named +name+ in the BlankSlate class. Don't | |
29 | + # hide +instance_eval+ or any method beginning with "__". | |
30 | + def hide(name) | |
31 | + if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval|methods)/ | |
32 | + @hidden_methods ||= {} | |
33 | + @hidden_methods[name.to_sym] = instance_method(name) | |
34 | + undef_method name | |
35 | + end | |
36 | + end | |
37 | + | |
38 | + # Redefine a previously hidden method so that it may be called on a blank | |
39 | + # slate object. | |
40 | + # | |
41 | + # no-op here since we don't hide the methods we reveal where this is | |
42 | + # used in this implementation | |
43 | + def reveal(name) | |
44 | + end | |
45 | + end | |
46 | + | |
47 | + instance_methods.each { |m| hide(m) } | |
48 | + | |
49 | + end | |
50 | + end | |
51 | + | |
52 | +end | |
53 | + | ... | ... |
... | ... | @@ -0,0 +1,35 @@ |
1 | +module ActsAsFerret | |
2 | + class BulkIndexer | |
3 | + def initialize(args = {}) | |
4 | + @batch_size = args[:batch_size] || 1000 | |
5 | + @logger = args[:logger] | |
6 | + @model = args[:model] | |
7 | + @work_done = 0 | |
8 | + @index = args[:index] | |
9 | + if args[:reindex] | |
10 | + @reindex = true | |
11 | + @model_count = @model.count.to_f | |
12 | + else | |
13 | + @model_count = args[:total] | |
14 | + end | |
15 | + end | |
16 | + | |
17 | + def index_records(records, offset) | |
18 | + batch_time = measure_time { | |
19 | + records.each { |rec| @index.add_document(rec.to_doc, rec.ferret_analyzer) if rec.ferret_enabled?(true) } | |
20 | + }.to_f | |
21 | + @work_done = offset.to_f / @model_count * 100.0 if @model_count > 0 | |
22 | + remaining_time = ( batch_time / @batch_size ) * ( @model_count - offset + @batch_size ) | |
23 | + @logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish" | |
24 | + | |
25 | + end | |
26 | + | |
27 | + def measure_time | |
28 | + t1 = Time.now | |
29 | + yield | |
30 | + Time.now - t1 | |
31 | + end | |
32 | + | |
33 | + end | |
34 | + | |
35 | +end | ... | ... |
... | ... | @@ -0,0 +1,293 @@ |
1 | +module ActsAsFerret | |
2 | + | |
3 | + module ClassMethods | |
4 | + | |
5 | + # Disables ferret index updates for this model. When a block is given, | |
6 | + # Ferret will be re-enabled again after executing the block. | |
7 | + def disable_ferret | |
8 | + aaf_configuration[:enabled] = false | |
9 | + if block_given? | |
10 | + yield | |
11 | + enable_ferret | |
12 | + end | |
13 | + end | |
14 | + | |
15 | + def enable_ferret | |
16 | + aaf_configuration[:enabled] = true | |
17 | + end | |
18 | + | |
19 | + def ferret_enabled? | |
20 | + aaf_configuration[:enabled] | |
21 | + end | |
22 | + | |
23 | + # rebuild the index from all data stored for this model, and any other | |
24 | + # model classes associated with the same index. | |
25 | + # This is called automatically when no index exists yet. | |
26 | + # | |
27 | + def rebuild_index | |
28 | + aaf_index.rebuild_index | |
29 | + end | |
30 | + | |
31 | + # re-index a number records specified by the given ids. Use for large | |
32 | + # indexing jobs i.e. after modifying a lot of records with Ferret disabled. | |
33 | + # Please note that the state of Ferret (enabled or disabled at class or | |
34 | + # record level) is not checked by this method, so if you need to do so | |
35 | + # (e.g. because of a custom ferret_enabled? implementation), you have to do | |
36 | + # so yourself. | |
37 | + def bulk_index(*ids) | |
38 | + options = Hash === ids.last ? ids.pop : {} | |
39 | + ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable) | |
40 | + aaf_index.bulk_index(self.name, ids, options) | |
41 | + end | |
42 | + | |
43 | + # true if our db and table appear to be suitable for the mysql fast batch | |
44 | + # hack (see | |
45 | + # http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord) | |
46 | + def use_fast_batches? | |
47 | + if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches] | |
48 | + logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)" | |
49 | + true | |
50 | + end | |
51 | + end | |
52 | + | |
53 | + # Returns all records modified or created after the specified time. | |
54 | + # Used by the rake rebuild task to find models that need to be updated in | |
55 | + # the index after the rebuild finished because they changed while the | |
56 | + # rebuild was running. | |
57 | + # Override if your models don't stick to the created_at/updated_at | |
58 | + # convention. | |
59 | + def records_modified_since(time) | |
60 | + condition = [] | |
61 | + %w(updated_at created_at).each do |col| | |
62 | + condition << "#{col} >= ?" if column_names.include? col | |
63 | + end | |
64 | + if condition.empty? | |
65 | + logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild." | |
66 | + [] | |
67 | + else | |
68 | + find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ] | |
69 | + end | |
70 | + end | |
71 | + | |
72 | + # runs across all records yielding those to be indexed when the index is rebuilt | |
73 | + def records_for_rebuild(batch_size = 1000) | |
74 | + transaction do | |
75 | + if use_fast_batches? | |
76 | + offset = 0 | |
77 | + while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any? | |
78 | + offset = rows.last.id | |
79 | + yield rows, offset | |
80 | + end | |
81 | + else | |
82 | + order = "#{primary_key} ASC" # fixes #212 | |
83 | + 0.step(self.count, batch_size) do |offset| | |
84 | + yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset | |
85 | + end | |
86 | + end | |
87 | + end | |
88 | + end | |
89 | + | |
90 | + # yields the records with the given ids, in batches of batch_size | |
91 | + def records_for_bulk_index(ids, batch_size = 1000) | |
92 | + transaction do | |
93 | + offset = 0 | |
94 | + ids.each_slice(batch_size) do |id_slice| | |
95 | + records = find( :all, :conditions => ["id in (?)", id_slice] ) | |
96 | + #yield records, offset | |
97 | + yield find( :all, :conditions => ["id in (?)", id_slice] ), offset | |
98 | + offset += batch_size | |
99 | + end | |
100 | + end | |
101 | + end | |
102 | + | |
103 | + # Retrieve the index instance for this model class. This can either be a | |
104 | + # LocalIndex, or a RemoteIndex instance. | |
105 | + # | |
106 | + def aaf_index | |
107 | + @index ||= ActsAsFerret::get_index(aaf_configuration[:name]) | |
108 | + end | |
109 | + | |
110 | + # Finds instances by searching the Ferret index. Terms are ANDed by default, use | |
111 | + # OR between terms for ORed queries. Or specify +:or_default => true+ in the | |
112 | + # +:ferret+ options hash of acts_as_ferret. | |
113 | + # | |
114 | + # You may either use the +offset+ and +limit+ options to implement your own | |
115 | + # pagination logic, or use the +page+ and +per_page+ options to use the | |
116 | + # built in pagination support which is compatible with will_paginate's view | |
117 | + # helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be | |
118 | + # ignored. | |
119 | + # | |
120 | + # == options: | |
121 | + # page:: page of search results to retrieve | |
122 | + # per_page:: number of search results that are displayed per page | |
123 | + # offset:: first hit to retrieve (useful for paging) | |
124 | + # limit:: number of hits to retrieve, or :all to retrieve | |
125 | + # all results | |
126 | + # lazy:: Array of field names whose contents should be read directly | |
127 | + # from the index. Those fields have to be marked | |
128 | + # +:store => :yes+ in their field options. Give true to get all | |
129 | + # stored fields. Note that if you have a shared index, you have | |
130 | + # to explicitly state the fields you want to fetch, true won't | |
131 | + # work here) | |
132 | + # | |
133 | + # +find_options+ is a hash passed on to active_record's find when | |
134 | + # retrieving the data from db, useful to i.e. prefetch relationships with | |
135 | + # :include or to specify additional filter criteria with :conditions. | |
136 | + # | |
137 | + # This method returns a +SearchResults+ instance, which really is an Array that has | |
138 | + # been decorated with a total_hits attribute holding the total number of hits. | |
139 | + # Additionally, SearchResults is compatible with the pagination helper | |
140 | + # methods of the will_paginate plugin. | |
141 | + # | |
142 | + # Please keep in mind that the number of results delivered might be less than | |
143 | + # +limit+ if you specify any active record conditions that further limit | |
144 | + # the result. Use +limit+ and +offset+ as AR find_options instead. | |
145 | + # +page+ and +per_page+ are supposed to work regardless of any | |
146 | + # +conitions+ present in +find_options+. | |
147 | + def find_with_ferret(q, options = {}, find_options = {}) | |
148 | + if respond_to?(:scope) && scope(:find, :conditions) | |
149 | + if find_options[:conditions] | |
150 | + find_options[:conditions] = "(#{find_options[:conditions]}) AND (#{scope(:find, :conditions)})" | |
151 | + else | |
152 | + find_options[:conditions] = scope(:find, :conditions) | |
153 | + end | |
154 | + end | |
155 | + | |
156 | + if options[:per_page] | |
157 | + options[:page] = options[:page] ? options[:page].to_i : 1 | |
158 | + limit = options[:per_page] | |
159 | + offset = (options[:page] - 1) * limit | |
160 | + if find_options[:conditions] | |
161 | + find_options[:limit] = limit | |
162 | + find_options[:offset] = offset | |
163 | + options[:limit] = :all | |
164 | + options.delete :offset | |
165 | + else | |
166 | + # do pagination with ferret | |
167 | + options[:limit] = limit | |
168 | + options[:offset] = offset | |
169 | + end | |
170 | + elsif find_options[:conditions] | |
171 | + find_options[:limit] ||= options.delete(:limit) unless options[:limit] == :all | |
172 | + find_options[:offset] ||= options.delete(:offset) | |
173 | + options[:limit] = :all | |
174 | + end | |
175 | + | |
176 | + total_hits, result = aaf_index.find_records q, options.merge(:models => [self]), find_options | |
177 | + logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}" | |
178 | + SearchResults.new(result, total_hits, options[:page], options[:per_page]) | |
179 | + end | |
180 | + | |
181 | + | |
182 | + # Returns the total number of hits for the given query | |
183 | + # | |
184 | + # Note that since we don't query the database here, this method won't deliver | |
185 | + # the expected results when used on an AR association. | |
186 | + # | |
187 | + def total_hits(q, options={}) | |
188 | + aaf_index.total_hits(q, options) | |
189 | + end | |
190 | + | |
191 | + # Finds instance model name, ids and scores by contents. | |
192 | + # Useful e.g. if you want to search across models or do not want to fetch | |
193 | + # all result records (yet). | |
194 | + # | |
195 | + # Options are the same as for find_by_contents | |
196 | + # | |
197 | + # A block can be given too, it will be executed with every result: | |
198 | + # find_ids_with_ferret(q, options) do |model, id, score| | |
199 | + # id_array << id | |
200 | + # scores_by_id[id] = score | |
201 | + # end | |
202 | + # NOTE: in case a block is given, only the total_hits value will be returned | |
203 | + # instead of the [total_hits, results] array! | |
204 | + # | |
205 | + def find_ids_with_ferret(q, options = {}, &block) | |
206 | + aaf_index.find_ids(q, options, &block) | |
207 | + end | |
208 | + | |
209 | + | |
210 | + protected | |
211 | + | |
212 | +# def find_records_lazy_or_not(q, options = {}, find_options = {}) | |
213 | +# if options[:lazy] | |
214 | +# logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty? | |
215 | +# lazy_find_by_contents q, options | |
216 | +# else | |
217 | +# ar_find_by_contents q, options, find_options | |
218 | +# end | |
219 | +# end | |
220 | +# | |
221 | +# def ar_find_by_contents(q, options = {}, find_options = {}) | |
222 | +# result_ids = {} | |
223 | +# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data| | |
224 | +# # stores ids, index and score of each hit for later ordering of | |
225 | +# # results | |
226 | +# result_ids[id] = [ result_ids.size + 1, score ] | |
227 | +# end | |
228 | +# | |
229 | +# result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options ) | |
230 | +# | |
231 | +# # count total_hits via sql when using conditions or when we're called | |
232 | +# # from an ActiveRecord association. | |
233 | +# if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} } | |
234 | +# # chances are the ferret result count is not our total_hits value, so | |
235 | +# # we correct this here. | |
236 | +# if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset] | |
237 | +# # our ferret result has been limited, so we need to re-run that | |
238 | +# # search to get the full result set from ferret. | |
239 | +# result_ids = {} | |
240 | +# find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data| | |
241 | +# result_ids[id] = [ result_ids.size + 1, score ] | |
242 | +# end | |
243 | +# # Now ask the database for the total size of the final result set. | |
244 | +# total_hits = count_records( { self.name => result_ids }, find_options ) | |
245 | +# else | |
246 | +# # what we got from the database is our full result set, so take | |
247 | +# # it's size | |
248 | +# total_hits = result.length | |
249 | +# end | |
250 | +# end | |
251 | +# | |
252 | +# [ total_hits, result ] | |
253 | +# end | |
254 | +# | |
255 | +# def lazy_find_by_contents(q, options = {}) | |
256 | +# logger.debug "lazy_find_by_contents: #{q}" | |
257 | +# result = [] | |
258 | +# rank = 0 | |
259 | +# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data| | |
260 | +# logger.debug "model: #{model}, id: #{id}, data: #{data}" | |
261 | +# result << FerretResult.new(model, id, score, rank += 1, data) | |
262 | +# end | |
263 | +# [ total_hits, result ] | |
264 | +# end | |
265 | + | |
266 | + | |
267 | + def model_find(model, id, find_options = {}) | |
268 | + model.constantize.find(id, find_options) | |
269 | + end | |
270 | + | |
271 | + | |
272 | +# def count_records(id_arrays, find_options = {}) | |
273 | +# count_options = find_options.dup | |
274 | +# count_options.delete :limit | |
275 | +# count_options.delete :offset | |
276 | +# count = 0 | |
277 | +# id_arrays.each do |model, id_array| | |
278 | +# next if id_array.empty? | |
279 | +# model = model.constantize | |
280 | +# # merge conditions | |
281 | +# conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ], | |
282 | +# find_options[:conditions]) | |
283 | +# opts = find_options.merge :conditions => conditions | |
284 | +# opts.delete :limit; opts.delete :offset | |
285 | +# count += model.count opts | |
286 | +# end | |
287 | +# count | |
288 | +# end | |
289 | + | |
290 | + end | |
291 | + | |
292 | +end | |
293 | + | ... | ... |
... | ... | @@ -0,0 +1,115 @@ |
1 | +module Ferret | |
2 | + | |
3 | + module Analysis | |
4 | + | |
5 | + # = PerFieldAnalyzer | |
6 | + # | |
7 | + # This PerFieldAnalyzer is a workaround to a memory leak in | |
8 | + # ferret 0.11.4. It does basically do the same as the original | |
9 | + # Ferret::Analysis::PerFieldAnalyzer, but without the leak :) | |
10 | + # | |
11 | + # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html | |
12 | + # | |
13 | + # Thanks to Ben from omdb.org for tracking this down and creating this | |
14 | + # workaround. | |
15 | + # You can read more about the issue there: | |
16 | + # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4 | |
17 | + class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer | |
18 | + def initialize( default_analyzer = StandardAnalyzer.new ) | |
19 | + @analyzers = {} | |
20 | + @default_analyzer = default_analyzer | |
21 | + end | |
22 | + | |
23 | + def add_field( field, analyzer ) | |
24 | + @analyzers[field] = analyzer | |
25 | + end | |
26 | + alias []= add_field | |
27 | + | |
28 | + def token_stream(field, string) | |
29 | + @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) : | |
30 | + @default_analyzer.token_stream(field, string) | |
31 | + end | |
32 | + end | |
33 | + end | |
34 | + | |
35 | + class Index::Index | |
36 | + attr_accessor :batch_size, :logger | |
37 | + | |
38 | + def index_models(models) | |
39 | + models.each { |model| index_model model } | |
40 | + flush | |
41 | + optimize | |
42 | + close | |
43 | + ActsAsFerret::close_multi_indexes | |
44 | + end | |
45 | + | |
46 | + def index_model(model) | |
47 | + bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, | |
48 | + :model => model, :index => self, :reindex => true) | |
49 | + logger.info "reindexing model #{model.name}" | |
50 | + | |
51 | + model.records_for_rebuild(@batch_size) do |records, offset| | |
52 | + bulk_indexer.index_records(records, offset) | |
53 | + end | |
54 | + end | |
55 | + | |
56 | + def bulk_index(model, ids, options = {}) | |
57 | + options.reverse_merge! :optimize => true | |
58 | + orig_flush = @auto_flush | |
59 | + @auto_flush = false | |
60 | + bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger, | |
61 | + :model => model, :index => self, :total => ids.size) | |
62 | + model.records_for_bulk_index(ids, @batch_size) do |records, offset| | |
63 | + logger.debug "#{model} bulk indexing #{records.size} at #{offset}" | |
64 | + bulk_indexer.index_records(records, offset) | |
65 | + end | |
66 | + logger.info 'finishing bulk index...' | |
67 | + flush | |
68 | + if options[:optimize] | |
69 | + logger.info 'optimizing...' | |
70 | + optimize | |
71 | + end | |
72 | + @auto_flush = orig_flush | |
73 | + end | |
74 | + | |
75 | + end | |
76 | + | |
77 | + # add marshalling support to SortFields | |
78 | + class Search::SortField | |
79 | + def _dump(depth) | |
80 | + to_s | |
81 | + end | |
82 | + | |
83 | + def self._load(string) | |
84 | + case string | |
85 | + when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV | |
86 | + when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID | |
87 | + when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV | |
88 | + when '<SCORE>' : Ferret::Search::SortField::SCORE | |
89 | + when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?) | |
90 | + else raise "invalid value: #{string}" | |
91 | + end | |
92 | + end | |
93 | + end | |
94 | + | |
95 | + # add marshalling support to Sort | |
96 | + class Search::Sort | |
97 | + def _dump(depth) | |
98 | + to_s | |
99 | + end | |
100 | + | |
101 | + def self._load(string) | |
102 | + # we exclude the last <DOC> sorting as it is appended by new anyway | |
103 | + if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/ | |
104 | + sort_fields = $1.split(',').map do |value| | |
105 | + value.strip! | |
106 | + Ferret::Search::SortField._load value unless value.blank? | |
107 | + end | |
108 | + new sort_fields.compact | |
109 | + else | |
110 | + raise "invalid value: #{string}" | |
111 | + end | |
112 | + end | |
113 | + end | |
114 | + | |
115 | +end | ... | ... |
vendor/plugins/acts_as_ferret/lib/ferret_find_methods.rb
0 → 100644
... | ... | @@ -0,0 +1,118 @@ |
1 | +module ActsAsFerret | |
2 | + # Ferret search logic common to single-class indexes, shared indexes and | |
3 | + # multi indexes. | |
4 | + module FerretFindMethods | |
5 | + | |
6 | + def find_records(q, options = {}, ar_options = {}) | |
7 | + if options[:lazy] | |
8 | + logger.warn "find_options #{ar_options} are ignored because :lazy => true" unless ar_options.empty? | |
9 | + lazy_find q, options | |
10 | + else | |
11 | + ar_find q, options, ar_options | |
12 | + end | |
13 | + end | |
14 | + | |
15 | + def lazy_find(q, options = {}) | |
16 | + logger.debug "lazy_find: #{q}" | |
17 | + result = [] | |
18 | + rank = 0 | |
19 | + total_hits = find_ids(q, options) do |model, id, score, data| | |
20 | + logger.debug "model: #{model}, id: #{id}, data: #{data}" | |
21 | + result << FerretResult.new(model, id, score, rank += 1, data) | |
22 | + end | |
23 | + [ total_hits, result ] | |
24 | + end | |
25 | + | |
26 | + def ar_find(q, options = {}, ar_options = {}) | |
27 | + total_hits, id_arrays = find_id_model_arrays q, options | |
28 | + result = ActsAsFerret::retrieve_records(id_arrays, ar_options) | |
29 | + | |
30 | + # count total_hits via sql when using conditions, multiple models, or when we're called | |
31 | + # from an ActiveRecord association. | |
32 | + if id_arrays.size > 1 or ar_options[:conditions] | |
33 | + # chances are the ferret result count is not our total_hits value, so | |
34 | + # we correct this here. | |
35 | + if options[:limit] != :all || options[:page] || options[:offset] || ar_options[:limit] || ar_options[:offset] | |
36 | + # our ferret result has been limited, so we need to re-run that | |
37 | + # search to get the full result set from ferret. | |
38 | + new_th, id_arrays = find_id_model_arrays( q, options.merge(:limit => :all, :offset => 0) ) | |
39 | + # Now ask the database for the total size of the final result set. | |
40 | + total_hits = count_records( id_arrays, ar_options ) | |
41 | + else | |
42 | + # what we got from the database is our full result set, so take | |
43 | + # it's size | |
44 | + total_hits = result.length | |
45 | + end | |
46 | + end | |
47 | + [ total_hits, result ] | |
48 | + end | |
49 | + | |
50 | + def count_records(id_arrays, ar_options = {}) | |
51 | + count_options = ar_options.dup | |
52 | + count_options.delete :limit | |
53 | + count_options.delete :offset | |
54 | + count = 0 | |
55 | + id_arrays.each do |model, id_array| | |
56 | + next if id_array.empty? | |
57 | + model = model.constantize | |
58 | + # merge conditions | |
59 | + conditions = ActsAsFerret::conditions_for_model model, ar_options[:conditions] | |
60 | + count_options[:conditions] = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ], conditions) | |
61 | + count += model.count count_options | |
62 | + end | |
63 | + count | |
64 | + end | |
65 | + | |
66 | + def find_id_model_arrays(q, options) | |
67 | + id_arrays = {} | |
68 | + rank = 0 | |
69 | + total_hits = find_ids(q, options) do |model, id, score, data| | |
70 | + id_arrays[model] ||= {} | |
71 | + id_arrays[model][id] = [ rank += 1, score ] | |
72 | + end | |
73 | + [total_hits, id_arrays] | |
74 | + end | |
75 | + | |
76 | + # Queries the Ferret index to retrieve model class, id, score and the | |
77 | + # values of any fields stored in the index for each hit. | |
78 | + # If a block is given, these are yielded and the number of total hits is | |
79 | + # returned. Otherwise [total_hits, result_array] is returned. | |
80 | + def find_ids(query, options = {}) | |
81 | + | |
82 | + result = [] | |
83 | + stored_fields = determine_stored_fields options | |
84 | + | |
85 | + q = process_query(query, options) | |
86 | + q = scope_query_to_models q, options[:models] #if shared? | |
87 | + logger.debug "query: #{query}\n-->#{q}" | |
88 | + s = searcher | |
89 | + total_hits = s.search_each(q, options) do |hit, score| | |
90 | + doc = s[hit] | |
91 | + model = doc[:class_name] | |
92 | + # fetch stored fields if lazy loading | |
93 | + data = extract_stored_fields(doc, stored_fields) | |
94 | + if block_given? | |
95 | + yield model, doc[:id], score, data | |
96 | + else | |
97 | + result << { :model => model, :id => doc[:id], :score => score, :data => data } | |
98 | + end | |
99 | + end | |
100 | + #logger.debug "id_score_model array: #{result.inspect}" | |
101 | + return block_given? ? total_hits : [total_hits, result] | |
102 | + end | |
103 | + | |
104 | + def scope_query_to_models(query, models) | |
105 | + return query if models.nil? or models == :all | |
106 | + models = [ models ] if Class === models | |
107 | + q = Ferret::Search::BooleanQuery.new | |
108 | + q.add_query(query, :must) | |
109 | + model_query = Ferret::Search::BooleanQuery.new | |
110 | + models.each do |model| | |
111 | + model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should) | |
112 | + end | |
113 | + q.add_query(model_query, :must) | |
114 | + return q | |
115 | + end | |
116 | + | |
117 | + end | |
118 | +end | ... | ... |
... | ... | @@ -0,0 +1,53 @@ |
1 | +module ActsAsFerret | |
2 | + | |
3 | + # mixed into the FerretResult and AR classes calling acts_as_ferret | |
4 | + module ResultAttributes | |
5 | + # holds the score this record had when it was found via | |
6 | + # acts_as_ferret | |
7 | + attr_accessor :ferret_score | |
8 | + | |
9 | + attr_accessor :ferret_rank | |
10 | + end | |
11 | + | |
12 | + class FerretResult < ActsAsFerret::BlankSlate | |
13 | + include ResultAttributes | |
14 | + attr_accessor :id | |
15 | + reveal :methods | |
16 | + | |
17 | + def initialize(model, id, score, rank, data = {}) | |
18 | + @model = model.constantize | |
19 | + @id = id | |
20 | + @ferret_score = score | |
21 | + @ferret_rank = rank | |
22 | + @data = data | |
23 | + @use_record = false | |
24 | + end | |
25 | + | |
26 | + def inspect | |
27 | + "#<FerretResult wrapper for #{@model} with id #{@id}" | |
28 | + end | |
29 | + | |
30 | + def method_missing(method, *args, &block) | |
31 | + if (@ar_record && @use_record) || !@data.has_key?(method) | |
32 | + to_record.send method, *args, &block | |
33 | + else | |
34 | + @data[method] | |
35 | + end | |
36 | + end | |
37 | + | |
38 | + def respond_to?(name) | |
39 | + methods.include?(name.to_s) || @data.has_key?(name.to_sym) || to_record.respond_to?(name) | |
40 | + end | |
41 | + | |
42 | + def to_record | |
43 | + unless @ar_record | |
44 | + @ar_record = @model.find(id) | |
45 | + @ar_record.ferret_rank = ferret_rank | |
46 | + @ar_record.ferret_score = ferret_score | |
47 | + # don't try to fetch attributes from RDig based records | |
48 | + @use_record = !@ar_record.class.included_modules.include?(ActsAsFerret::RdigAdapter) | |
49 | + end | |
50 | + @ar_record | |
51 | + end | |
52 | + end | |
53 | +end | ... | ... |
... | ... | @@ -0,0 +1,238 @@ |
1 | +require 'drb' | |
2 | +require 'thread' | |
3 | +require 'yaml' | |
4 | +require 'erb' | |
5 | + | |
6 | +################################################################################ | |
7 | +module ActsAsFerret | |
8 | + module Remote | |
9 | + | |
10 | + ################################################################################ | |
11 | + class Config | |
12 | + | |
13 | + ################################################################################ | |
14 | + DEFAULTS = { | |
15 | + 'host' => 'localhost', | |
16 | + 'port' => '9009', | |
17 | + 'cf' => "#{RAILS_ROOT}/config/ferret_server.yml", | |
18 | + 'pid_file' => "#{RAILS_ROOT}/log/ferret_server.pid", | |
19 | + 'log_file' => "#{RAILS_ROOT}/log/ferret_server.log", | |
20 | + 'log_level' => 'debug', | |
21 | + 'socket' => nil, | |
22 | + 'script' => nil | |
23 | + } | |
24 | + | |
25 | + ################################################################################ | |
26 | + # load the configuration file and apply default settings | |
27 | + def initialize (file=DEFAULTS['cf']) | |
28 | + @everything = YAML.load(ERB.new(IO.read(file)).result) | |
29 | + raise "malformed ferret server config" unless @everything.is_a?(Hash) | |
30 | + @config = DEFAULTS.merge(@everything[RAILS_ENV] || {}) | |
31 | + if @everything[RAILS_ENV] | |
32 | + @config['uri'] = socket.nil? ? "druby://#{host}:#{port}" : "drbunix:#{socket}" | |
33 | + end | |
34 | + end | |
35 | + | |
36 | + ################################################################################ | |
37 | + # treat the keys of the config data as methods | |
38 | + def method_missing (name, *args) | |
39 | + @config.has_key?(name.to_s) ? @config[name.to_s] : super | |
40 | + end | |
41 | + | |
42 | + end | |
43 | + | |
44 | + ################################################################################# | |
45 | + # This class acts as a drb server listening for indexing and | |
46 | + # search requests from models declared to 'acts_as_ferret :remote => true' | |
47 | + # | |
48 | + # Usage: | |
49 | + # - modify RAILS_ROOT/config/ferret_server.yml to suit your needs. | |
50 | + # - environments for which no section in the config file exists will use | |
51 | + # the index locally (good for unit tests/development mode) | |
52 | + # - run script/ferret_server to start the server: | |
53 | + # script/ferret_server -e production start | |
54 | + # - to stop the server run | |
55 | + # script/ferret_server -e production stop | |
56 | + # | |
57 | + class Server | |
58 | + | |
59 | + ################################################################################# | |
60 | + # FIXME include detection of OS and include the correct file | |
61 | + require 'unix_daemon' | |
62 | + include(ActsAsFerret::Remote::UnixDaemon) | |
63 | + | |
64 | + | |
65 | + ################################################################################ | |
66 | + cattr_accessor :running | |
67 | + | |
68 | + ################################################################################ | |
69 | + def initialize | |
70 | + ActiveRecord::Base.allow_concurrency = true | |
71 | + require 'ar_mysql_auto_reconnect_patch' | |
72 | + @cfg = ActsAsFerret::Remote::Config.new | |
73 | + ActiveRecord::Base.logger = @logger = Logger.new(@cfg.log_file) | |
74 | + ActiveRecord::Base.logger.level = Logger.const_get(@cfg.log_level.upcase) rescue Logger::DEBUG | |
75 | + if @cfg.script | |
76 | + path = File.join(RAILS_ROOT, @cfg.script) | |
77 | + load path | |
78 | + @logger.info "loaded custom startup script from #{path}" | |
79 | + end | |
80 | + end | |
81 | + | |
82 | + ################################################################################ | |
83 | + # start the server as a daemon process | |
84 | + def start | |
85 | + raise "ferret_server not configured for #{RAILS_ENV}" unless (@cfg.uri rescue nil) | |
86 | + platform_daemon { run_drb_service } | |
87 | + end | |
88 | + | |
89 | + ################################################################################ | |
90 | + # run the server and block until it exits | |
91 | + def run | |
92 | + raise "ferret_server not configured for #{RAILS_ENV}" unless (@cfg.uri rescue nil) | |
93 | + run_drb_service | |
94 | + end | |
95 | + | |
96 | + def run_drb_service | |
97 | + $stdout.puts("starting ferret server...") | |
98 | + self.class.running = true | |
99 | + DRb.start_service(@cfg.uri, self) | |
100 | + DRb.thread.join | |
101 | + rescue Exception => e | |
102 | + @logger.error(e.to_s) | |
103 | + raise | |
104 | + end | |
105 | + | |
106 | + ################################################################################# | |
107 | + # handles all incoming method calls, and sends them on to the correct local index | |
108 | + # instance. | |
109 | + # | |
110 | + # Calls are not queued, so this will block until the call returned. | |
111 | + # | |
112 | + def method_missing(name, *args) | |
113 | + @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})" | |
114 | + | |
115 | + | |
116 | + index_name = args.shift | |
117 | + index = if name.to_s =~ /^multi_(.+)/ | |
118 | + name = $1 | |
119 | + ActsAsFerret::multi_index(index_name) | |
120 | + else | |
121 | + ActsAsFerret::get_index(index_name) | |
122 | + end | |
123 | + | |
124 | + if index.nil? | |
125 | + @logger.error "\#index with name #{index_name} not found in call to #{name} with args #{args.inspect}" | |
126 | + raise ActsAsFerret::IndexNotDefined.new(index_name) | |
127 | + end | |
128 | + | |
129 | + | |
130 | + # TODO find another way to implement the reconnection logic (maybe in | |
131 | + # local_index or class_methods) | |
132 | + # reconnect_when_needed(clazz) do | |
133 | + | |
134 | + # using respond_to? here so we not have to catch NoMethodError | |
135 | + # which would silently catch those from deep inside the indexing | |
136 | + # code, too... | |
137 | + | |
138 | + if index.respond_to?(name) | |
139 | + index.send name, *args | |
140 | + # TODO check where we need this: | |
141 | + #elsif clazz.respond_to?(name) | |
142 | + # @logger.debug "no luck, trying to call class method instead" | |
143 | + # clazz.send name, *args | |
144 | + else | |
145 | + raise NoMethodError.new("method #{name} not supported by DRb server") | |
146 | + end | |
147 | + rescue => e | |
148 | + @logger.error "ferret server error #{$!}\n#{$!.backtrace.join "\n"}" | |
149 | + raise e | |
150 | + end | |
151 | + | |
152 | + def register_class(class_name) | |
153 | + @logger.debug "############ registerclass #{class_name}" | |
154 | + class_name.constantize | |
155 | + @logger.debug "index for class #{class_name}: #{ActsAsFerret::ferret_indexes[class_name.underscore.to_sym]}" | |
156 | + | |
157 | + end | |
158 | + | |
159 | + # make sure we have a versioned index in place, building one if necessary | |
160 | + def ensure_index_exists(index_name) | |
161 | + @logger.debug "DRb server: ensure_index_exists for index #{index_name}" | |
162 | + definition = ActsAsFerret::get_index(index_name).index_definition | |
163 | + dir = definition[:index_dir] | |
164 | + unless File.directory?(dir) && File.file?(File.join(dir, 'segments')) && dir =~ %r{/\d+(_\d+)?$} | |
165 | + rebuild_index(index_name) | |
166 | + end | |
167 | + end | |
168 | + | |
169 | + # disconnects the db connection for the class specified by class_name | |
170 | + # used only in unit tests to check the automatic reconnection feature | |
171 | + def db_disconnect!(class_name) | |
172 | + with_class class_name do |clazz| | |
173 | + clazz.connection.disconnect! | |
174 | + end | |
175 | + end | |
176 | + | |
177 | + # hides LocalIndex#rebuild_index to implement index versioning | |
178 | + def rebuild_index(index_name) | |
179 | + definition = ActsAsFerret::get_index(index_name).index_definition.dup | |
180 | + models = definition[:registered_models] | |
181 | + index = new_index_for(definition) | |
182 | + # TODO fix reconnection stuff | |
183 | + # reconnect_when_needed(clazz) do | |
184 | + # @logger.debug "DRb server: rebuild index for class(es) #{models.inspect} in #{index.options[:path]}" | |
185 | + index.index_models models | |
186 | + # end | |
187 | + new_version = File.join definition[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S') | |
188 | + # create a unique directory name (needed for unit tests where | |
189 | + # multiple rebuilds per second may occur) | |
190 | + if File.exists?(new_version) | |
191 | + i = 0 | |
192 | + i+=1 while File.exists?("#{new_version}_#{i}") | |
193 | + new_version << "_#{i}" | |
194 | + end | |
195 | + | |
196 | + File.rename index.options[:path], new_version | |
197 | + ActsAsFerret::change_index_dir index_name, new_version | |
198 | + end | |
199 | + | |
200 | + | |
201 | + protected | |
202 | + | |
203 | + def reconnect_when_needed(clazz) | |
204 | + retried = false | |
205 | + begin | |
206 | + yield | |
207 | + rescue ActiveRecord::StatementInvalid => e | |
208 | + if e.message =~ /MySQL server has gone away/ | |
209 | + if retried | |
210 | + raise e | |
211 | + else | |
212 | + @logger.info "StatementInvalid caught, trying to reconnect..." | |
213 | + clazz.connection.reconnect! | |
214 | + retried = true | |
215 | + retry | |
216 | + end | |
217 | + else | |
218 | + @logger.error "StatementInvalid caught, but unsure what to do with it: #{e}" | |
219 | + raise e | |
220 | + end | |
221 | + end | |
222 | + end | |
223 | + | |
224 | + def new_index_for(index_definition) | |
225 | + ferret_cfg = index_definition[:ferret].dup | |
226 | + ferret_cfg.update :auto_flush => false, | |
227 | + :create => true, | |
228 | + :field_infos => ActsAsFerret::field_infos(index_definition), | |
229 | + :path => File.join(index_definition[:index_base_dir], 'rebuild') | |
230 | + returning Ferret::Index::Index.new(ferret_cfg) do |i| | |
231 | + i.batch_size = index_definition[:reindex_batch_size] | |
232 | + i.logger = @logger | |
233 | + end | |
234 | + end | |
235 | + | |
236 | + end | |
237 | + end | |
238 | +end | ... | ... |
... | ... | @@ -0,0 +1,99 @@ |
1 | +module ActsAsFerret | |
2 | + | |
3 | + class IndexLogger | |
4 | + def initialize(logger, name) | |
5 | + @logger = logger | |
6 | + @index_name = name | |
7 | + end | |
8 | + %w(debug info warn error).each do |m| | |
9 | + define_method(m) do |message| | |
10 | + @logger.send m, "[#{@index_name}] #{message}" | |
11 | + end | |
12 | + question = :"#{m}?" | |
13 | + define_method(question) do | |
14 | + @logger.send question | |
15 | + end | |
16 | + end | |
17 | + end | |
18 | + | |
19 | + # base class for local and remote indexes | |
20 | + class AbstractIndex | |
21 | + include FerretFindMethods | |
22 | + | |
23 | + attr_reader :logger, :index_name, :index_definition, :registered_models_config | |
24 | + def initialize(index_definition) | |
25 | + @index_definition = index_definition | |
26 | + @registered_models_config = {} | |
27 | + @index_name = index_definition[:name] | |
28 | + @logger = IndexLogger.new(ActsAsFerret::logger, @index_name) | |
29 | + end | |
30 | + | |
31 | + # TODO allow for per-class field configuration (i.e. different via, boosts | |
32 | + # for the same field among different models) | |
33 | + def register_class(clazz, options = {}) | |
34 | + logger.info "register class #{clazz} with index #{index_name}" | |
35 | + | |
36 | + if force = options.delete(:force_re_registration) | |
37 | + index_definition[:registered_models].delete(clazz) | |
38 | + end | |
39 | + | |
40 | + if index_definition[:registered_models].map(&:name).include?(clazz.name) | |
41 | + logger.info("refusing re-registration of class #{clazz}") | |
42 | + else | |
43 | + index_definition[:registered_models] << clazz | |
44 | + @registered_models_config[clazz] = options | |
45 | + | |
46 | + # merge fields from this acts_as_ferret call with predefined fields | |
47 | + already_defined_fields = index_definition[:ferret_fields] | |
48 | + field_config = ActsAsFerret::build_field_config options[:fields] | |
49 | + field_config.update ActsAsFerret::build_field_config( options[:additional_fields] ) | |
50 | + field_config.each do |field, config| | |
51 | + if already_defined_fields.has_key?(field) | |
52 | + logger.info "ignoring redefinition of ferret field #{field}" if shared? | |
53 | + else | |
54 | + already_defined_fields[field] = config | |
55 | + logger.info "adding new field #{field} from class #{clazz.name} to index #{index_name}" | |
56 | + end | |
57 | + end | |
58 | + | |
59 | + # update default field list to be used by the query parser, unless it | |
60 | + # was explicitly given by user. | |
61 | + # | |
62 | + # It will include all content fields *not* marked as :untokenized. | |
63 | + # This fixes the otherwise failing CommentTest#test_stopwords. Basically | |
64 | + # this means that by default only tokenized fields (which all fields are | |
65 | + # by default) will be searched. If you want to search inside the contents | |
66 | + # of an untokenized field, you'll have to explicitly specify it in your | |
67 | + # query. | |
68 | + unless index_definition[:user_default_field] | |
69 | + # grab all tokenized fields | |
70 | + ferret_fields = index_definition[:ferret_fields] | |
71 | + index_definition[:ferret][:default_field] = ferret_fields.keys.select do |field| | |
72 | + ferret_fields[field][:index] != :untokenized | |
73 | + end | |
74 | + logger.info "default field list for index #{index_name}: #{index_definition[:ferret][:default_field].inspect}" | |
75 | + end | |
76 | + end | |
77 | + | |
78 | + return index_definition | |
79 | + end | |
80 | + | |
81 | + # true if this index is used by more than one model class | |
82 | + def shared? | |
83 | + index_definition[:registered_models].size > 1 | |
84 | + end | |
85 | + | |
86 | + # Switches the index to a new index directory. | |
87 | + # Used by the DRb server when switching to a new index version. | |
88 | + def change_index_dir(new_dir) | |
89 | + logger.debug "[#{index_name}] changing index dir to #{new_dir}" | |
90 | + index_definition[:index_dir] = index_definition[:ferret][:path] = new_dir | |
91 | + reopen! | |
92 | + logger.debug "[#{index_name}] index dir is now #{new_dir}" | |
93 | + end | |
94 | + | |
95 | + protected | |
96 | + | |
97 | + end | |
98 | + | |
99 | +end | ... | ... |
... | ... | @@ -0,0 +1,164 @@ |
1 | +module ActsAsFerret #:nodoc: | |
2 | + | |
3 | + module InstanceMethods | |
4 | + include ResultAttributes | |
5 | + | |
6 | + # Returns an array of strings with the matches highlighted. The +query+ can | |
7 | + # either be a String or a Ferret::Search::Query object. | |
8 | + # | |
9 | + # === Options | |
10 | + # | |
11 | + # field:: field to take the content from. This field has | |
12 | + # to have it's content stored in the index | |
13 | + # (:store => :yes in your call to aaf). If not | |
14 | + # given, all stored fields are searched, and the | |
15 | + # highlighted content found in all of them is returned. | |
16 | + # set :highlight => :no in the field options to | |
17 | + # avoid highlighting of contents from a :stored field. | |
18 | + # excerpt_length:: Default: 150. Length of excerpt to show. Highlighted | |
19 | + # terms will be in the centre of the excerpt. | |
20 | + # num_excerpts:: Default: 2. Number of excerpts to return. | |
21 | + # pre_tag:: Default: "<em>". Tag to place to the left of the | |
22 | + # match. | |
23 | + # post_tag:: Default: "</em>". This tag should close the | |
24 | + # +:pre_tag+. | |
25 | + # ellipsis:: Default: "...". This is the string that is appended | |
26 | + # at the beginning and end of excerpts (unless the | |
27 | + # excerpt hits the start or end of the field. You'll | |
28 | + # probably want to change this to a Unicode elipsis | |
29 | + # character. | |
30 | + def highlight(query, options = {}) | |
31 | + self.class.aaf_index.highlight(self.send(self.class.primary_key), self.class.name, query, options) | |
32 | + end | |
33 | + | |
34 | + # re-eneable ferret indexing for this instance after a call to #disable_ferret | |
35 | + def enable_ferret | |
36 | + @ferret_disabled = nil | |
37 | + end | |
38 | + alias ferret_enable enable_ferret # compatibility | |
39 | + | |
40 | + # returns true if ferret indexing is enabled for this record. | |
41 | + # | |
42 | + # The optional is_bulk_index parameter will be true if the method is called | |
43 | + # by rebuild_index or bulk_index, and false otherwise. | |
44 | + # | |
45 | + # If is_bulk_index is true, the class level ferret_enabled state will be | |
46 | + # ignored by this method (per-instance ferret_enabled checks however will | |
47 | + # take place, so if you override this method to forbid indexing of certain | |
48 | + # records you're still safe). | |
49 | + def ferret_enabled?(is_bulk_index = false) | |
50 | + @ferret_disabled.nil? && (is_bulk_index || self.class.ferret_enabled?) && (aaf_configuration[:if].nil? || aaf_configuration[:if].call(self)) | |
51 | + end | |
52 | + | |
53 | + # Returns the analyzer to use when adding this record to the index. | |
54 | + # | |
55 | + # Override to return a specific analyzer for any record that is to be | |
56 | + # indexed, i.e. specify a different analyzer based on language. Returns nil | |
57 | + # by default so the global analyzer (specified with the acts_as_ferret | |
58 | + # call) is used. | |
59 | + def ferret_analyzer | |
60 | + nil | |
61 | + end | |
62 | + | |
63 | + # Disable Ferret for this record for a specified amount of time. ::once will | |
64 | + # disable Ferret for the next call to #save (this is the default), ::always | |
65 | + # will do so for all subsequent calls. | |
66 | + # | |
67 | + # Note that this will turn off only the create and update hooks, but not the | |
68 | + # destroy hook. I think that's reasonable, if you think the opposite, please | |
69 | + # tell me. | |
70 | + # | |
71 | + # To manually trigger reindexing of a record after you're finished modifying | |
72 | + # it, you can call #ferret_update directly instead of #save (remember to | |
73 | + # enable ferret again before). | |
74 | + # | |
75 | + # When given a block, this will be executed without any ferret indexing of | |
76 | + # this object taking place. The optional argument in this case can be used | |
77 | + # to indicate if the object should be indexed after executing the block | |
78 | + # (::index_when_finished). Automatic Ferret indexing of this object will be | |
79 | + # turned on after the block has been executed. If passed ::index_when_true, | |
80 | + # the index will only be updated if the block evaluated not to false or nil. | |
81 | + # | |
82 | + def disable_ferret(option = :once) | |
83 | + if block_given? | |
84 | + @ferret_disabled = :always | |
85 | + result = yield | |
86 | + ferret_enable | |
87 | + ferret_update if option == :index_when_finished || (option == :index_when_true && result) | |
88 | + result | |
89 | + elsif [:once, :always].include?(option) | |
90 | + @ferret_disabled = option | |
91 | + else | |
92 | + raise ArgumentError.new("Invalid Argument #{option}") | |
93 | + end | |
94 | + end | |
95 | + | |
96 | + # add to index | |
97 | + def ferret_create | |
98 | + if ferret_enabled? | |
99 | + logger.debug "ferret_create/update: #{self.class.name} : #{self.id}" | |
100 | + self.class.aaf_index << self | |
101 | + else | |
102 | + ferret_enable if @ferret_disabled == :once | |
103 | + end | |
104 | + true # signal success to AR | |
105 | + end | |
106 | + alias :ferret_update :ferret_create | |
107 | + | |
108 | + | |
109 | + # remove from index | |
110 | + def ferret_destroy | |
111 | + logger.debug "ferret_destroy: #{self.class.name} : #{self.id}" | |
112 | + begin | |
113 | + self.class.aaf_index.remove self.id, self.class.name | |
114 | + rescue | |
115 | + logger.warn("Could not find indexed value for this object: #{$!}\n#{$!.backtrace}") | |
116 | + end | |
117 | + true # signal success to AR | |
118 | + end | |
119 | + | |
120 | + # turn this instance into a ferret document (which basically is a hash of | |
121 | + # fieldname => value pairs) | |
122 | + def to_doc | |
123 | + logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}" | |
124 | + returning Ferret::Document.new do |doc| | |
125 | + # store the id and class name of each item | |
126 | + doc[:id] = self.id | |
127 | + doc[:class_name] = self.class.name | |
128 | + | |
129 | + # iterate through the fields and add them to the document | |
130 | + aaf_configuration[:defined_fields].each_pair do |field, config| | |
131 | + doc[field] = self.send("#{field}_to_ferret") unless config[:ignore] | |
132 | + end | |
133 | + if aaf_configuration[:boost] | |
134 | + if self.respond_to?(aaf_configuration[:boost]) | |
135 | + boost = self.send aaf_configuration[:boost] | |
136 | + doc.boost = boost.to_i if boost | |
137 | + else | |
138 | + logger.error "boost option should point to an instance method: #{aaf_configuration[:boost]}" | |
139 | + end | |
140 | + end | |
141 | + end | |
142 | + end | |
143 | + | |
144 | + def document_number | |
145 | + self.class.aaf_index.document_number(id, self.class.name) | |
146 | + end | |
147 | + | |
148 | + def query_for_record | |
149 | + self.class.aaf_index.query_for_record(id, self.class.name) | |
150 | + end | |
151 | + | |
152 | + def content_for_field_name(field, via = field, dynamic_boost = nil) | |
153 | + field_data = self.send(via) || self.instance_variable_get("@#{via}") | |
154 | + if (dynamic_boost && boost_value = self.send(dynamic_boost)) | |
155 | + field_data = Ferret::Field.new(field_data) | |
156 | + field_data.boost = boost_value.to_i | |
157 | + end | |
158 | + field_data | |
159 | + end | |
160 | + | |
161 | + | |
162 | + end | |
163 | + | |
164 | +end | ... | ... |
... | ... | @@ -0,0 +1,199 @@ |
1 | +module ActsAsFerret | |
2 | + class LocalIndex < AbstractIndex | |
3 | + include MoreLikeThis::IndexMethods | |
4 | + | |
5 | + def initialize(index_name) | |
6 | + super | |
7 | + ensure_index_exists | |
8 | + end | |
9 | + | |
10 | + def reopen! | |
11 | + logger.debug "reopening index at #{index_definition[:ferret][:path]}" | |
12 | + close | |
13 | + ferret_index | |
14 | + end | |
15 | + | |
16 | + # The 'real' Ferret Index instance | |
17 | + def ferret_index | |
18 | + ensure_index_exists | |
19 | + returning @ferret_index ||= Ferret::Index::Index.new(index_definition[:ferret]) do | |
20 | + @ferret_index.batch_size = index_definition[:reindex_batch_size] | |
21 | + @ferret_index.logger = logger | |
22 | + end | |
23 | + end | |
24 | + | |
25 | + # Checks for the presence of a segments file in the index directory | |
26 | + # Rebuilds the index if none exists. | |
27 | + def ensure_index_exists | |
28 | + #logger.debug "LocalIndex: ensure_index_exists at #{index_definition[:index_dir]}" | |
29 | + unless File.file? "#{index_definition[:index_dir]}/segments" | |
30 | + ActsAsFerret::ensure_directory(index_definition[:index_dir]) | |
31 | + rebuild_index | |
32 | + end | |
33 | + end | |
34 | + | |
35 | + # Closes the underlying index instance | |
36 | + def close | |
37 | + @ferret_index.close if @ferret_index | |
38 | + rescue StandardError | |
39 | + # is raised when index already closed | |
40 | + ensure | |
41 | + @ferret_index = nil | |
42 | + end | |
43 | + | |
44 | + # rebuilds the index from all records of the model classes associated with this index | |
45 | + def rebuild_index | |
46 | + models = index_definition[:registered_models] | |
47 | + logger.debug "rebuild index with models: #{models.inspect}" | |
48 | + close | |
49 | + index = Ferret::Index::Index.new(index_definition[:ferret].dup.update(:auto_flush => false, | |
50 | + :field_infos => ActsAsFerret::field_infos(index_definition), | |
51 | + :create => true)) | |
52 | + index.batch_size = index_definition[:reindex_batch_size] | |
53 | + index.logger = logger | |
54 | + index.index_models models | |
55 | + reopen! | |
56 | + end | |
57 | + | |
58 | + def bulk_index(class_name, ids, options) | |
59 | + ferret_index.bulk_index(class_name.constantize, ids, options) | |
60 | + end | |
61 | + | |
62 | + # Parses the given query string into a Ferret Query object. | |
63 | + def process_query(query, options = {}) | |
64 | + return query unless String === query | |
65 | + ferret_index.synchronize do | |
66 | + if options[:analyzer] | |
67 | + # use per-query analyzer if present | |
68 | + qp = Ferret::QueryParser.new ferret_index.instance_variable_get('@options').merge(options) | |
69 | + reader = ferret_index.reader | |
70 | + qp.fields = | |
71 | + reader.fields unless options[:all_fields] || options[:fields] | |
72 | + qp.tokenized_fields = | |
73 | + reader.tokenized_fields unless options[:tokenized_fields] | |
74 | + return qp.parse query | |
75 | + else | |
76 | + # work around ferret bug in #process_query (doesn't ensure the | |
77 | + # reader is open) | |
78 | + ferret_index.send(:ensure_reader_open) | |
79 | + return ferret_index.process_query(query) | |
80 | + end | |
81 | + end | |
82 | + end | |
83 | + | |
84 | + # Total number of hits for the given query. | |
85 | + def total_hits(query, options = {}) | |
86 | + ferret_index.search(query, options).total_hits | |
87 | + end | |
88 | + | |
89 | + def searcher | |
90 | + ferret_index | |
91 | + end | |
92 | + | |
93 | + | |
94 | + ###################################### | |
95 | + # methods working on a single record | |
96 | + # called from instance_methods, here to simplify interfacing with the | |
97 | + # remote ferret server | |
98 | + # TODO having to pass id and class_name around like this isn't nice | |
99 | + ###################################### | |
100 | + | |
101 | + # add record to index | |
102 | + # record may be the full AR object, a Ferret document instance or a Hash | |
103 | + def add(record, analyzer = nil) | |
104 | + unless Hash === record || Ferret::Document === record | |
105 | + analyzer = record.ferret_analyzer | |
106 | + record = record.to_doc | |
107 | + end | |
108 | + ferret_index.add_document(record, analyzer) | |
109 | + end | |
110 | + alias << add | |
111 | + | |
112 | + # delete record from index | |
113 | + def remove(id, class_name) | |
114 | + ferret_index.query_delete query_for_record(id, class_name) | |
115 | + end | |
116 | + | |
117 | + # highlight search terms for the record with the given id. | |
118 | + def highlight(id, class_name, query, options = {}) | |
119 | + logger.debug("highlight: #{class_name} / #{id} query: #{query}") | |
120 | + options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>' | |
121 | + highlights = [] | |
122 | + ferret_index.synchronize do | |
123 | + doc_num = document_number(id, class_name) | |
124 | + | |
125 | + if options[:field] | |
126 | + highlights << ferret_index.highlight(query, doc_num, options) | |
127 | + else | |
128 | + query = process_query(query) # process only once | |
129 | + index_definition[:ferret_fields].each_pair do |field, config| | |
130 | + next if config[:store] == :no || config[:highlight] == :no | |
131 | + options[:field] = field | |
132 | + highlights << ferret_index.highlight(query, doc_num, options) | |
133 | + end | |
134 | + end | |
135 | + end | |
136 | + return highlights.compact.flatten[0..options[:num_excerpts]-1] | |
137 | + end | |
138 | + | |
139 | + # retrieves the ferret document number of the record with the given id. | |
140 | + def document_number(id, class_name) | |
141 | + hits = ferret_index.search(query_for_record(id, class_name)) | |
142 | + return hits.hits.first.doc if hits.total_hits == 1 | |
143 | + raise "cannot determine document number for class #{class_name} / primary key: #{id}\nresult was: #{hits.inspect}" | |
144 | + end | |
145 | + | |
146 | + # build a ferret query matching only the record with the given id | |
147 | + # the class name only needs to be given in case of a shared index configuration | |
148 | + def query_for_record(id, class_name = nil) | |
149 | + if shared? | |
150 | + raise InvalidArgumentError.new("shared index needs class_name argument") if class_name.nil? | |
151 | + returning bq = Ferret::Search::BooleanQuery.new do | |
152 | + bq.add_query(Ferret::Search::TermQuery.new(:id, id.to_s), :must) | |
153 | + bq.add_query(Ferret::Search::TermQuery.new(:class_name, class_name), :must) | |
154 | + end | |
155 | + else | |
156 | + Ferret::Search::TermQuery.new(:id, id.to_s) | |
157 | + end | |
158 | + end | |
159 | + | |
160 | + | |
161 | + | |
162 | + def determine_stored_fields(options = {}) | |
163 | + stored_fields = options[:lazy] | |
164 | + if stored_fields && !(Array === stored_fields) | |
165 | + stored_fields = index_definition[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first) | |
166 | + end | |
167 | + logger.debug "stored_fields: #{stored_fields.inspect}" | |
168 | + return stored_fields | |
169 | + end | |
170 | + | |
171 | + # loads data for fields declared as :lazy from the Ferret document | |
172 | + def extract_stored_fields(doc, stored_fields) | |
173 | + fields = index_definition[:ferret_fields] | |
174 | + data = {} | |
175 | + logger.debug "extracting stored fields #{stored_fields.inspect} from document #{doc[:class_name]} / #{doc[:id]}" | |
176 | + stored_fields.each do |field| | |
177 | + if field_cfg = fields[field] | |
178 | + data[field_cfg[:via]] = doc[field] | |
179 | + end | |
180 | + end if stored_fields | |
181 | + logger.debug "done: #{data.inspect}" | |
182 | + return data | |
183 | + end | |
184 | + | |
185 | + protected | |
186 | + | |
187 | + # returns a MultiIndex instance operating on a MultiReader | |
188 | + #def multi_index(model_classes) | |
189 | + # model_classes.map!(&:constantize) if String === model_classes.first | |
190 | + # model_classes.sort! { |a, b| a.name <=> b.name } | |
191 | + # key = model_classes.inject("") { |s, clazz| s + clazz.name } | |
192 | + # multi_config = index_definition[:ferret].dup | |
193 | + # multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching | |
194 | + # ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) | |
195 | + #end | |
196 | + | |
197 | + end | |
198 | + | |
199 | +end | ... | ... |
... | ... | @@ -0,0 +1,217 @@ |
1 | +module ActsAsFerret #:nodoc: | |
2 | + | |
3 | + module MoreLikeThis | |
4 | + | |
5 | + module InstanceMethods | |
6 | + | |
7 | + # returns other instances of this class, which have similar contents | |
8 | + # like this one. Basically works like this: find out n most interesting | |
9 | + # (i.e. characteristic) terms from this document, and then build a | |
10 | + # query from those which is run against the whole index. Which terms | |
11 | + # are interesting is decided on variour criteria which can be | |
12 | + # influenced by the given options. | |
13 | + # | |
14 | + # The algorithm used here is a quite straight port of the MoreLikeThis class | |
15 | + # from Apache Lucene. | |
16 | + # | |
17 | + # options are: | |
18 | + # :field_names : Array of field names to use for similarity search (mandatory) | |
19 | + # :min_term_freq => 2, # Ignore terms with less than this frequency in the source doc. | |
20 | + # :min_doc_freq => 5, # Ignore words which do not occur in at least this many docs | |
21 | + # :min_word_length => nil, # Ignore words shorter than this length (longer words tend to | |
22 | + # be more characteristic for the document they occur in). | |
23 | + # :max_word_length => nil, # Ignore words if greater than this len. | |
24 | + # :max_query_terms => 25, # maximum number of terms in the query built | |
25 | + # :max_num_tokens => 5000, # maximum number of tokens to examine in a single field | |
26 | + # :boost => false, # when true, a boost according to the relative score of | |
27 | + # a term is applied to this Term's TermQuery. | |
28 | + # :similarity => 'DefaultAAFSimilarity' # the similarity implementation to use (the default | |
29 | + # equals Ferret's internal similarity implementation) | |
30 | + # :analyzer => 'Ferret::Analysis::StandardAnalyzer' # class name of the analyzer to use | |
31 | + # :append_to_query => nil # proc taking a query object as argument, which will be called after generating the query. can be used to further manipulate the query used to find related documents, i.e. to constrain the search to a given class in single table inheritance scenarios | |
32 | + # ferret_options : Ferret options handed over to find_by_contents (i.e. for limits and sorting) | |
33 | + # ar_options : options handed over to find_by_contents for AR scoping | |
34 | + def more_like_this(options = {}, ferret_options = {}, ar_options = {}) | |
35 | + options = { | |
36 | + :field_names => nil, # Default field names | |
37 | + :min_term_freq => 2, # Ignore terms with less than this frequency in the source doc. | |
38 | + :min_doc_freq => 5, # Ignore words which do not occur in at least this many docs | |
39 | + :min_word_length => 0, # Ignore words if less than this len. Default is not to ignore any words. | |
40 | + :max_word_length => 0, # Ignore words if greater than this len. Default is not to ignore any words. | |
41 | + :max_query_terms => 25, # maximum number of terms in the query built | |
42 | + :max_num_tokens => 5000, # maximum number of tokens to analyze when analyzing contents | |
43 | + :boost => false, | |
44 | + :similarity => 'ActsAsFerret::MoreLikeThis::DefaultAAFSimilarity', # class name of the similarity implementation to use | |
45 | + :analyzer => 'Ferret::Analysis::StandardAnalyzer', # class name of the analyzer to use | |
46 | + :append_to_query => nil, | |
47 | + :base_class => self.class # base class to use for querying, useful in STI scenarios where BaseClass.find_by_contents can be used to retrieve results from other classes, too | |
48 | + }.update(options) | |
49 | + #index.search_each('id:*') do |doc, score| | |
50 | + # puts "#{doc} == #{index[doc][:description]}" | |
51 | + #end | |
52 | + clazz = options[:base_class] | |
53 | + options[:base_class] = clazz.name | |
54 | + query = clazz.aaf_index.build_more_like_this_query(self.id, self.class.name, options) | |
55 | + options[:append_to_query].call(query) if options[:append_to_query] | |
56 | + clazz.find_with_ferret(query, ferret_options, ar_options) | |
57 | + end | |
58 | + | |
59 | + end | |
60 | + | |
61 | + module IndexMethods | |
62 | + | |
63 | + # TODO to allow morelikethis for unsaved records, we have to give the | |
64 | + # unsaved record's data to this method. check how this will work out | |
65 | + # via drb... | |
66 | + def build_more_like_this_query(id, class_name, options) | |
67 | + [:similarity, :analyzer].each { |sym| options[sym] = options[sym].constantize.new } | |
68 | + ferret_index.synchronize do # avoid that concurrent writes close our reader | |
69 | + ferret_index.send(:ensure_reader_open) | |
70 | + reader = ferret_index.send(:reader) | |
71 | + term_freq_map = retrieve_terms(id, class_name, reader, options) | |
72 | + priority_queue = create_queue(term_freq_map, reader, options) | |
73 | + create_query(id, class_name, priority_queue, options) | |
74 | + end | |
75 | + end | |
76 | + | |
77 | + protected | |
78 | + | |
79 | + def create_query(id, class_name, priority_queue, options={}) | |
80 | + query = Ferret::Search::BooleanQuery.new | |
81 | + qterms = 0 | |
82 | + best_score = nil | |
83 | + while(cur = priority_queue.pop) | |
84 | + term_query = Ferret::Search::TermQuery.new(cur.field, cur.word) | |
85 | + | |
86 | + if options[:boost] | |
87 | + # boost term according to relative score | |
88 | + # TODO untested | |
89 | + best_score ||= cur.score | |
90 | + term_query.boost = cur.score / best_score | |
91 | + end | |
92 | + begin | |
93 | + query.add_query(term_query, :should) | |
94 | + rescue Ferret::Search::BooleanQuery::TooManyClauses | |
95 | + break | |
96 | + end | |
97 | + qterms += 1 | |
98 | + break if options[:max_query_terms] > 0 && qterms >= options[:max_query_terms] | |
99 | + end | |
100 | + # exclude the original record | |
101 | + query.add_query(query_for_record(id, class_name), :must_not) | |
102 | + return query | |
103 | + end | |
104 | + | |
105 | + | |
106 | + | |
107 | + # creates a term/term_frequency map for terms from the fields | |
108 | + # given in options[:field_names] | |
109 | + def retrieve_terms(id, class_name, reader, options) | |
110 | + raise "more_like_this atm only works on saved records" if id.nil? | |
111 | + document_number = document_number(id, class_name) rescue nil | |
112 | + field_names = options[:field_names] | |
113 | + max_num_tokens = options[:max_num_tokens] | |
114 | + term_freq_map = Hash.new(0) | |
115 | + doc = nil | |
116 | + record = nil | |
117 | + field_names.each do |field| | |
118 | + #puts "field: #{field}" | |
119 | + term_freq_vector = reader.term_vector(document_number, field) if document_number | |
120 | + #if false | |
121 | + if term_freq_vector | |
122 | + # use stored term vector | |
123 | + # puts 'using stored term vector' | |
124 | + term_freq_vector.terms.each do |term| | |
125 | + term_freq_map[term.text] += term.positions.size unless noise_word?(term.text, options) | |
126 | + end | |
127 | + else | |
128 | + # puts 'no stored term vector' | |
129 | + # no term vector stored, but we have stored the contents in the index | |
130 | + # -> extract terms from there | |
131 | + content = nil | |
132 | + if document_number | |
133 | + doc = reader[document_number] | |
134 | + content = doc[field] | |
135 | + end | |
136 | + unless content | |
137 | + # no term vector, no stored content, so try content from this instance | |
138 | + record ||= options[:base_class].constantize.find(id) | |
139 | + content = record.content_for_field_name(field.to_s) | |
140 | + end | |
141 | + puts "have doc: #{doc[:id]} with #{field} == #{content}" | |
142 | + token_count = 0 | |
143 | + | |
144 | + ts = options[:analyzer].token_stream(field, content) | |
145 | + while token = ts.next | |
146 | + break if (token_count+=1) > max_num_tokens | |
147 | + next if noise_word?(token.text, options) | |
148 | + term_freq_map[token.text] += 1 | |
149 | + end | |
150 | + end | |
151 | + end | |
152 | + term_freq_map | |
153 | + end | |
154 | + | |
155 | + # create an ordered(by score) list of word,fieldname,score | |
156 | + # structures | |
157 | + def create_queue(term_freq_map, reader, options) | |
158 | + pq = Array.new(term_freq_map.size) | |
159 | + | |
160 | + similarity = options[:similarity] | |
161 | + num_docs = reader.num_docs | |
162 | + term_freq_map.each_pair do |word, tf| | |
163 | + # filter out words that don't occur enough times in the source | |
164 | + next if options[:min_term_freq] && tf < options[:min_term_freq] | |
165 | + | |
166 | + # go through all the fields and find the largest document frequency | |
167 | + top_field = options[:field_names].first | |
168 | + doc_freq = 0 | |
169 | + options[:field_names].each do |field_name| | |
170 | + freq = reader.doc_freq(field_name, word) | |
171 | + if freq > doc_freq | |
172 | + top_field = field_name | |
173 | + doc_freq = freq | |
174 | + end | |
175 | + end | |
176 | + # filter out words that don't occur in enough docs | |
177 | + next if options[:min_doc_freq] && doc_freq < options[:min_doc_freq] | |
178 | + next if doc_freq == 0 # index update problem ? | |
179 | + | |
180 | + idf = similarity.idf(doc_freq, num_docs) | |
181 | + score = tf * idf | |
182 | + pq << FrequencyQueueItem.new(word, top_field, score) | |
183 | + end | |
184 | + pq.compact! | |
185 | + pq.sort! { |a,b| a.score<=>b.score } | |
186 | + return pq | |
187 | + end | |
188 | + | |
189 | + def noise_word?(text, options) | |
190 | + len = text.length | |
191 | + ( | |
192 | + (options[:min_word_length] > 0 && len < options[:min_word_length]) || | |
193 | + (options[:max_word_length] > 0 && len > options[:max_word_length]) || | |
194 | + (options[:stop_words] && options.include?(text)) | |
195 | + ) | |
196 | + end | |
197 | + | |
198 | + end | |
199 | + | |
200 | + class DefaultAAFSimilarity | |
201 | + def idf(doc_freq, num_docs) | |
202 | + return 0.0 if num_docs == 0 | |
203 | + return Math.log(num_docs.to_f/(doc_freq+1)) + 1.0 | |
204 | + end | |
205 | + end | |
206 | + | |
207 | + | |
208 | + class FrequencyQueueItem | |
209 | + attr_reader :word, :field, :score | |
210 | + def initialize(word, field, score) | |
211 | + @word = word; @field = field; @score = score | |
212 | + end | |
213 | + end | |
214 | + | |
215 | + end | |
216 | +end | |
217 | + | ... | ... |
... | ... | @@ -0,0 +1,126 @@ |
1 | +module ActsAsFerret #:nodoc: | |
2 | + | |
3 | + # Base class for remote and local multi-indexes | |
4 | + class MultiIndexBase | |
5 | + include FerretFindMethods | |
6 | + attr_accessor :logger | |
7 | + | |
8 | + def initialize(indexes, options = {}) | |
9 | + # ensure all models indexes exist | |
10 | + @indexes = indexes | |
11 | + indexes.each { |i| i.ensure_index_exists } | |
12 | + default_fields = indexes.inject([]) do |fields, idx| | |
13 | + fields + [ idx.index_definition[:ferret][:default_field] ] | |
14 | + end.flatten.uniq | |
15 | + @options = { | |
16 | + :default_field => default_fields | |
17 | + }.update(options) | |
18 | + @logger = IndexLogger.new(ActsAsFerret::logger, "multi: #{indexes.map(&:index_name).join(',')}") | |
19 | + end | |
20 | + | |
21 | + def ar_find(query, options = {}, ar_options = {}) | |
22 | + limit = options.delete(:limit) | |
23 | + offset = options.delete(:offset) || 0 | |
24 | + options[:limit] = :all | |
25 | + total_hits, result = super query, options, ar_options | |
26 | + total_hits = result.size if ar_options[:conditions] | |
27 | + if limit && limit != :all | |
28 | + result = result[offset..limit+offset-1] | |
29 | + end | |
30 | + [total_hits, result] | |
31 | + end | |
32 | + | |
33 | + def determine_stored_fields(options) | |
34 | + return nil unless options.has_key?(:lazy) | |
35 | + stored_fields = [] | |
36 | + @indexes.each do |index| | |
37 | + stored_fields += index.determine_stored_fields(options) | |
38 | + end | |
39 | + return stored_fields.uniq | |
40 | + end | |
41 | + | |
42 | + def shared? | |
43 | + false | |
44 | + end | |
45 | + | |
46 | + end | |
47 | + | |
48 | + # This class can be used to search multiple physical indexes at once. | |
49 | + class MultiIndex < MultiIndexBase | |
50 | + | |
51 | + def extract_stored_fields(doc, stored_fields) | |
52 | + ActsAsFerret::get_index_for(doc[:class_name]).extract_stored_fields(doc, stored_fields) unless stored_fields.blank? | |
53 | + end | |
54 | + | |
55 | + def total_hits(q, options = {}) | |
56 | + search(q, options).total_hits | |
57 | + end | |
58 | + | |
59 | + def search(query, options={}) | |
60 | + query = process_query(query, options) | |
61 | + logger.debug "parsed query: #{query.to_s}" | |
62 | + searcher.search(query, options) | |
63 | + end | |
64 | + | |
65 | + def search_each(query, options = {}, &block) | |
66 | + query = process_query(query, options) | |
67 | + searcher.search_each(query, options, &block) | |
68 | + end | |
69 | + | |
70 | + # checks if all our sub-searchers still are up to date | |
71 | + def latest? | |
72 | + #return false unless @reader | |
73 | + # segfaults with 0.10.4 --> TODO report as bug @reader.latest? | |
74 | + @reader and @reader.latest? | |
75 | + #@sub_readers.each do |r| | |
76 | + # return false unless r.latest? | |
77 | + #end | |
78 | + #true | |
79 | + end | |
80 | + | |
81 | + def searcher | |
82 | + ensure_searcher | |
83 | + @searcher | |
84 | + end | |
85 | + | |
86 | + def doc(i) | |
87 | + searcher[i] | |
88 | + end | |
89 | + alias :[] :doc | |
90 | + | |
91 | + def query_parser | |
92 | + @query_parser ||= Ferret::QueryParser.new(@options) | |
93 | + end | |
94 | + | |
95 | + def process_query(query, options = {}) | |
96 | + query = query_parser.parse(query) if query.is_a?(String) | |
97 | + return query | |
98 | + end | |
99 | + | |
100 | + def close | |
101 | + @searcher.close if @searcher | |
102 | + @reader.close if @reader | |
103 | + end | |
104 | + | |
105 | + protected | |
106 | + | |
107 | + def ensure_searcher | |
108 | + unless latest? | |
109 | + @sub_readers = @indexes.map { |idx| | |
110 | + begin | |
111 | + reader = Ferret::Index::IndexReader.new(idx.index_definition[:index_dir]) | |
112 | + logger.debug "sub-reader opened: #{reader}" | |
113 | + reader | |
114 | + rescue Exception | |
115 | + raise "error opening reader on index for class #{clazz.inspect}: #{$!}" | |
116 | + end | |
117 | + } | |
118 | + close | |
119 | + @reader = Ferret::Index::IndexReader.new(@sub_readers) | |
120 | + @searcher = Ferret::Search::Searcher.new(@reader) | |
121 | + end | |
122 | + end | |
123 | + | |
124 | + end # of class MultiIndex | |
125 | + | |
126 | +end | ... | ... |
... | ... | @@ -0,0 +1,141 @@ |
1 | +begin | |
2 | + require 'rdig' | |
3 | +rescue LoadError | |
4 | +end | |
5 | +module ActsAsFerret | |
6 | + | |
7 | + # The RdigAdapter is automatically included into your model if you specify | |
8 | + # the +:rdig+ options hash in your call to acts_as_ferret. It overrides | |
9 | + # several methods declared by aaf to retrieve documents with the help of | |
10 | + # RDig's http crawler when you call rebuild_index. | |
11 | + module RdigAdapter | |
12 | + | |
13 | + if defined?(RDig) | |
14 | + | |
15 | + def self.included(target) | |
16 | + target.extend ClassMethods | |
17 | + target.send :include, InstanceMethods | |
18 | + end | |
19 | + | |
20 | + # Indexer class to replace RDig's original indexer | |
21 | + class Indexer | |
22 | + include MonitorMixin | |
23 | + def initialize(batch_size, model_class, &block) | |
24 | + @batch_size = batch_size | |
25 | + @model_class = model_class | |
26 | + @documents = [] | |
27 | + @offset = 0 | |
28 | + @block = block | |
29 | + super() | |
30 | + end | |
31 | + | |
32 | + def add(doc) | |
33 | + synchronize do | |
34 | + @documents << @model_class.new(doc.uri.to_s, doc) | |
35 | + process_batch if @documents.size >= @batch_size | |
36 | + end | |
37 | + end | |
38 | + alias << add | |
39 | + | |
40 | + def close | |
41 | + synchronize do | |
42 | + process_batch | |
43 | + end | |
44 | + end | |
45 | + | |
46 | + protected | |
47 | + def process_batch | |
48 | + ActsAsFerret::logger.info "RdigAdapter::Indexer#process_batch: #{@documents.size} docs in queue, offset #{@offset}" | |
49 | + @block.call @documents, @offset | |
50 | + @offset += @documents.size | |
51 | + @documents = [] | |
52 | + end | |
53 | + end | |
54 | + | |
55 | + module ClassMethods | |
56 | + # overriding aaf to return the documents fetched via RDig | |
57 | + def records_for_rebuild(batch_size = 1000, &block) | |
58 | + indexer = Indexer.new(batch_size, self, &block) | |
59 | + configure_rdig do | |
60 | + crawler = RDig::Crawler.new RDig.configuration, ActsAsFerret::logger | |
61 | + crawler.instance_variable_set '@indexer', indexer | |
62 | + ActsAsFerret::logger.debug "now crawling..." | |
63 | + crawler.crawl | |
64 | + end | |
65 | + rescue => e | |
66 | + ActsAsFerret::logger.error e | |
67 | + ActsAsFerret::logger.debug e.backtrace.join("\n") | |
68 | + ensure | |
69 | + indexer.close if indexer | |
70 | + end | |
71 | + | |
72 | + # overriding aaf to skip reindexing records changed during the rebuild | |
73 | + # when rebuilding with the rake task | |
74 | + def records_modified_since(time) | |
75 | + [] | |
76 | + end | |
77 | + | |
78 | + # unfortunately need to modify global RDig.configuration because it's | |
79 | + # used everywhere in RDig | |
80 | + def configure_rdig | |
81 | + # back up original config | |
82 | + old_logger = RDig.logger | |
83 | + old_cfg = RDig.configuration.dup | |
84 | + RDig.logger = ActsAsFerret.logger | |
85 | + rdig_configuration[:crawler].each { |k,v| RDig.configuration.crawler.send :"#{k}=", v } if rdig_configuration[:crawler] | |
86 | + if ce_config = rdig_configuration[:content_extraction] | |
87 | + RDig.configuration.content_extraction = OpenStruct.new( :hpricot => OpenStruct.new( ce_config ) ) | |
88 | + end | |
89 | + yield | |
90 | + ensure | |
91 | + # restore original config | |
92 | + RDig.configuration.crawler = old_cfg.crawler | |
93 | + RDig.configuration.content_extraction = old_cfg.content_extraction | |
94 | + RDig.logger = old_logger | |
95 | + end | |
96 | + | |
97 | + # overriding aaf to enforce loading page title and content from the | |
98 | + # ferret index | |
99 | + def find_with_ferret(q, options = {}, find_options = {}) | |
100 | + options[:lazy] = true | |
101 | + super | |
102 | + end | |
103 | + | |
104 | + def find_for_id(id) | |
105 | + new id | |
106 | + end | |
107 | + end | |
108 | + | |
109 | + module InstanceMethods | |
110 | + def initialize(uri, rdig_document = nil) | |
111 | + @id = uri | |
112 | + @rdig_document = rdig_document | |
113 | + end | |
114 | + | |
115 | + # Title of the document. | |
116 | + # Use the +:title_tag_selector+ option to declare the hpricot expression | |
117 | + # that should be used for selecting the content for this field. | |
118 | + def title | |
119 | + @rdig_document.title | |
120 | + end | |
121 | + | |
122 | + # Content of the document. | |
123 | + # Use the +:content_tag_selector+ option to declare the hpricot expression | |
124 | + # that should be used for selecting the content for this field. | |
125 | + def content | |
126 | + @rdig_document.body | |
127 | + end | |
128 | + | |
129 | + # Url of this document. | |
130 | + def id | |
131 | + @id | |
132 | + end | |
133 | + | |
134 | + def to_s | |
135 | + "Page at #{id}, title: #{title}" | |
136 | + end | |
137 | + end | |
138 | + end | |
139 | + end | |
140 | + | |
141 | +end | ... | ... |
... | ... | @@ -0,0 +1,23 @@ |
1 | +module ActsAsFerret | |
2 | + module RemoteFunctions | |
3 | + | |
4 | + private | |
5 | + | |
6 | + def yield_results(total_hits, results) | |
7 | + results.each do |result| | |
8 | + yield result[:model], result[:id], result[:score], result[:data] | |
9 | + end | |
10 | + total_hits | |
11 | + end | |
12 | + | |
13 | + | |
14 | + def handle_drb_error(return_value_in_case_of_error = false) | |
15 | + yield | |
16 | + rescue DRb::DRbConnError => e | |
17 | + logger.error "DRb connection error: #{e}" | |
18 | + logger.warn e.backtrace.join("\n") | |
19 | + raise e if ActsAsFerret::raise_drb_errors? | |
20 | + return_value_in_case_of_error | |
21 | + end | |
22 | + end | |
23 | +end | ... | ... |
... | ... | @@ -0,0 +1,54 @@ |
1 | +require 'drb' | |
2 | +module ActsAsFerret | |
3 | + | |
4 | + # This index implementation connects to a remote ferret server instance. It | |
5 | + # basically forwards all calls to the remote server. | |
6 | + class RemoteIndex < AbstractIndex | |
7 | + include RemoteFunctions | |
8 | + | |
9 | + def initialize(config) | |
10 | + super | |
11 | + @server = DRbObject.new(nil, ActsAsFerret::remote) | |
12 | + end | |
13 | + | |
14 | + # Cause model classes to be loaded (and indexes get declared) on the DRb | |
15 | + # side of things. | |
16 | + def register_class(clazz, options) | |
17 | + handle_drb_error { @server.register_class clazz.name } | |
18 | + end | |
19 | + | |
20 | + def method_missing(method_name, *args) | |
21 | + args.unshift index_name | |
22 | + handle_drb_error { @server.send(method_name, *args) } | |
23 | + end | |
24 | + | |
25 | + # Proxy any methods that require special return values in case of errors | |
26 | + { | |
27 | + :highlight => [] | |
28 | + }.each do |method_name, default_result| | |
29 | + define_method method_name do |*args| | |
30 | + args.unshift index_name | |
31 | + handle_drb_error(default_result) { @server.send method_name, *args } | |
32 | + end | |
33 | + end | |
34 | + | |
35 | + def find_ids(q, options = {}, &proc) | |
36 | + total_hits, results = handle_drb_error([0, []]) { @server.find_ids(index_name, q, options) } | |
37 | + block_given? ? yield_results(total_hits, results, &proc) : [ total_hits, results ] | |
38 | + end | |
39 | + | |
40 | + # add record to index | |
41 | + def add(record) | |
42 | + handle_drb_error { @server.add index_name, record.to_doc } | |
43 | + end | |
44 | + alias << add | |
45 | + | |
46 | + private | |
47 | + | |
48 | + #def model_class_name | |
49 | + # index_definition[:class_name] | |
50 | + #end | |
51 | + | |
52 | + end | |
53 | + | |
54 | +end | ... | ... |
... | ... | @@ -0,0 +1,20 @@ |
1 | +module ActsAsFerret | |
2 | + class RemoteMultiIndex < MultiIndexBase | |
3 | + include RemoteFunctions | |
4 | + | |
5 | + def initialize(indexes, options = {}) | |
6 | + @index_names = indexes.map(&:index_name) | |
7 | + @server = DRbObject.new(nil, ActsAsFerret::remote) | |
8 | + super | |
9 | + end | |
10 | + | |
11 | + def find_ids(query, options, &proc) | |
12 | + total_hits, results = handle_drb_error([0, []]) { @server.multi_find_ids(@index_names, query, options) } | |
13 | + block_given? ? yield_results(total_hits, results, &proc) : [ total_hits, results ] | |
14 | + end | |
15 | + | |
16 | + def method_missing(name, *args) | |
17 | + handle_drb_error { @server.send(:"multi_#{name}", @index_names, *args) } | |
18 | + end | |
19 | + end | |
20 | +end | ... | ... |
... | ... | @@ -0,0 +1,50 @@ |
1 | +module ActsAsFerret | |
2 | + | |
3 | + # decorator that adds a total_hits accessor and will_paginate compatible | |
4 | + # paging support to search result arrays | |
5 | + class SearchResults < ActsAsFerret::BlankSlate | |
6 | + reveal :methods | |
7 | + attr_reader :current_page, :per_page, :total_hits, :total_pages | |
8 | + alias total_entries total_hits # will_paginate compatibility | |
9 | + alias page_count total_pages # will_paginate backwards compatibility | |
10 | + | |
11 | + def initialize(results, total_hits, current_page = 1, per_page = nil) | |
12 | + @results = results | |
13 | + @total_hits = total_hits | |
14 | + @current_page = current_page | |
15 | + @per_page = (per_page || total_hits) | |
16 | + @total_pages = @per_page > 0 ? (@total_hits / @per_page.to_f).ceil : 0 | |
17 | + end | |
18 | + | |
19 | + def method_missing(symbol, *args, &block) | |
20 | + @results.send(symbol, *args, &block) | |
21 | + end | |
22 | + | |
23 | + def respond_to?(name) | |
24 | + methods.include?(name.to_s) || @results.respond_to?(name) | |
25 | + end | |
26 | + | |
27 | + | |
28 | + # code from here on was directly taken from will_paginate's collection.rb | |
29 | + | |
30 | + # Current offset of the paginated collection. If we're on the first page, | |
31 | + # it is always 0. If we're on the 2nd page and there are 30 entries per page, | |
32 | + # the offset is 30. This property is useful if you want to render ordinals | |
33 | + # besides your records: simply start with offset + 1. | |
34 | + # | |
35 | + def offset | |
36 | + (current_page - 1) * per_page | |
37 | + end | |
38 | + | |
39 | + # current_page - 1 or nil if there is no previous page | |
40 | + def previous_page | |
41 | + current_page > 1 ? (current_page - 1) : nil | |
42 | + end | |
43 | + | |
44 | + # current_page + 1 or nil if there is no next page | |
45 | + def next_page | |
46 | + current_page < total_pages ? (current_page + 1) : nil | |
47 | + end | |
48 | + end | |
49 | + | |
50 | +end | ... | ... |
... | ... | @@ -0,0 +1,58 @@ |
1 | +################################################################################ | |
2 | +require 'optparse' | |
3 | + | |
4 | +################################################################################ | |
5 | +$ferret_server_options = { | |
6 | + 'environment' => nil, | |
7 | + 'debug' => nil, | |
8 | + 'root' => nil | |
9 | +} | |
10 | + | |
11 | +################################################################################ | |
12 | +OptionParser.new do |optparser| | |
13 | + optparser.banner = "Usage: #{File.basename($0)} [options] {start|stop|run}" | |
14 | + | |
15 | + optparser.on('-h', '--help', "This message") do | |
16 | + puts optparser | |
17 | + exit | |
18 | + end | |
19 | + | |
20 | + optparser.on('-R', '--root=PATH', 'Set RAILS_ROOT to the given string') do |r| | |
21 | + $ferret_server_options['root'] = r | |
22 | + end | |
23 | + | |
24 | + optparser.on('-e', '--environment=NAME', 'Set RAILS_ENV to the given string') do |e| | |
25 | + $ferret_server_options['environment'] = e | |
26 | + end | |
27 | + | |
28 | + optparser.on('--debug', 'Include full stack traces on exceptions') do | |
29 | + $ferret_server_options['debug'] = true | |
30 | + end | |
31 | + | |
32 | + $ferret_server_action = optparser.permute!(ARGV) | |
33 | + (puts optparser; exit(1)) unless $ferret_server_action.size == 1 | |
34 | + | |
35 | + $ferret_server_action = $ferret_server_action.first | |
36 | + (puts optparser; exit(1)) unless %w(start stop run).include?($ferret_server_action) | |
37 | +end | |
38 | + | |
39 | +################################################################################ | |
40 | +begin | |
41 | + ENV['FERRET_USE_LOCAL_INDEX'] = 'true' | |
42 | + ENV['RAILS_ENV'] = $ferret_server_options['environment'] | |
43 | + | |
44 | + # determine RAILS_ROOT unless already set | |
45 | + RAILS_ROOT = $ferret_server_options['root'] || File.join(File.dirname(__FILE__), *(['..']*4)) unless defined? RAILS_ROOT | |
46 | + # check if environment.rb is present | |
47 | + rails_env_file = File.join(RAILS_ROOT, 'config', 'environment') | |
48 | + raise "Unable to find Rails environment.rb at \n#{rails_env_file}.rb\nPlease use the --root option of ferret_server to point it to your RAILS_ROOT." unless File.exists?(rails_env_file+'.rb') | |
49 | + # load it | |
50 | + require rails_env_file | |
51 | + | |
52 | + require 'acts_as_ferret' | |
53 | + ActsAsFerret::Remote::Server.new.send($ferret_server_action) | |
54 | +rescue Exception => e | |
55 | + $stderr.puts(e.message) | |
56 | + $stderr.puts(e.backtrace.join("\n")) if $ferret_server_options['debug'] | |
57 | + exit(1) | |
58 | +end | ... | ... |
... | ... | @@ -0,0 +1,64 @@ |
1 | +################################################################################ | |
2 | +module ActsAsFerret | |
3 | + module Remote | |
4 | + | |
5 | + ################################################################################ | |
6 | + # methods for becoming a daemon on Unix-like operating systems | |
7 | + module UnixDaemon | |
8 | + | |
9 | + ################################################################################ | |
10 | + def platform_daemon (&block) | |
11 | + safefork do | |
12 | + write_pid_file | |
13 | + trap("TERM") { exit(0) } | |
14 | + sess_id = Process.setsid | |
15 | + STDIN.reopen("/dev/null") | |
16 | + STDOUT.reopen("#{RAILS_ROOT}/log/ferret_server.out", "a") | |
17 | + STDERR.reopen(STDOUT) | |
18 | + block.call | |
19 | + end | |
20 | + end | |
21 | + | |
22 | + ################################################################################ | |
23 | + # stop the daemon, nicely at first, and then forcefully if necessary | |
24 | + def stop | |
25 | + pid = read_pid_file | |
26 | + raise "ferret_server doesn't appear to be running" unless pid | |
27 | + $stdout.puts("stopping ferret server...") | |
28 | + Process.kill("TERM", pid) | |
29 | + 30.times { Process.kill(0, pid); sleep(0.5) } | |
30 | + $stdout.puts("using kill -9 #{pid}") | |
31 | + Process.kill(9, pid) | |
32 | + rescue Errno::ESRCH => e | |
33 | + $stdout.puts("process #{pid} has stopped") | |
34 | + ensure | |
35 | + File.unlink(@cfg.pid_file) if File.exist?(@cfg.pid_file) | |
36 | + end | |
37 | + | |
38 | + ################################################################################ | |
39 | + def safefork (&block) | |
40 | + @fork_tries ||= 0 | |
41 | + fork(&block) | |
42 | + rescue Errno::EWOULDBLOCK | |
43 | + raise if @fork_tries >= 20 | |
44 | + @fork_tries += 1 | |
45 | + sleep 5 | |
46 | + retry | |
47 | + end | |
48 | + | |
49 | + ################################################################################# | |
50 | + # create the PID file and install an at_exit handler | |
51 | + def write_pid_file | |
52 | + raise "ferret_server may already be running, a pid file exists: #{@cfg.pid_file}" if read_pid_file | |
53 | + open(@cfg.pid_file, "w") {|f| f << Process.pid << "\n"} | |
54 | + at_exit { File.unlink(@cfg.pid_file) if read_pid_file == Process.pid } | |
55 | + end | |
56 | + | |
57 | + ################################################################################# | |
58 | + def read_pid_file | |
59 | + File.read(@cfg.pid_file).to_i if File.exist?(@cfg.pid_file) | |
60 | + end | |
61 | + | |
62 | + end | |
63 | + end | |
64 | +end | ... | ... |
... | ... | @@ -0,0 +1,49 @@ |
1 | +module ActsAsFerret | |
2 | + | |
3 | + # Include this module to use acts_as_ferret with model classes | |
4 | + # not based on ActiveRecord. | |
5 | + # | |
6 | + # Implement the find_for_id(id) class method in your model class in | |
7 | + # order to make search work. | |
8 | + module WithoutAR | |
9 | + def self.included(target) | |
10 | + target.extend ClassMethods | |
11 | + target.extend ActsAsFerret::ActMethods | |
12 | + target.send :include, InstanceMethods | |
13 | + end | |
14 | + | |
15 | + module ClassMethods | |
16 | + def logger | |
17 | + RAILS_DEFAULT_LOGGER | |
18 | + end | |
19 | + def table_name | |
20 | + self.name.underscore | |
21 | + end | |
22 | + def primary_key | |
23 | + 'id' | |
24 | + end | |
25 | + def find(what, args = {}) | |
26 | + case what | |
27 | + when :all | |
28 | + ids = args[:conditions][1] | |
29 | + ids.map { |id| find id } | |
30 | + else | |
31 | + find_for_id what | |
32 | + end | |
33 | + end | |
34 | + def find_for_id(id) | |
35 | + raise NotImplementedError.new("implement find_for_id in class #{self.name}") | |
36 | + end | |
37 | + def count | |
38 | + 0 | |
39 | + end | |
40 | + end | |
41 | + | |
42 | + module InstanceMethods | |
43 | + def logger | |
44 | + self.class.logger | |
45 | + end | |
46 | + end | |
47 | + end | |
48 | + | |
49 | +end | ... | ... |
... | ... | @@ -0,0 +1,134 @@ |
1 | +# rakefile for acts_as_ferret. | |
2 | +# use to create a gem or generate rdoc api documentation. | |
3 | +# | |
4 | +# RELEASE creation: | |
5 | +# rake release REL=x.y.z | |
6 | + | |
7 | +require 'rake' | |
8 | +require 'rake/rdoctask' | |
9 | +require 'rake/packagetask' | |
10 | +require 'rake/gempackagetask' | |
11 | +require 'rake/testtask' | |
12 | +require 'rake/contrib/rubyforgepublisher' | |
13 | + | |
14 | +def announce(msg='') | |
15 | + STDERR.puts msg | |
16 | +end | |
17 | + | |
18 | + | |
19 | +PKG_NAME = 'acts_as_ferret' | |
20 | +PKG_VERSION = ENV['REL'] | |
21 | +PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}" | |
22 | +RUBYFORGE_PROJECT = 'actsasferret' | |
23 | +RUBYFORGE_USER = 'jkraemer' | |
24 | + | |
25 | +desc 'Default: run unit tests.' | |
26 | +task :default => :test | |
27 | + | |
28 | +desc 'Test the acts_as_ferret plugin.' | |
29 | +Rake::TestTask.new(:test) do |t| | |
30 | + t.libs << 'lib' | |
31 | + t.pattern = 'test/**/*_test.rb' | |
32 | + t.verbose = true | |
33 | +end | |
34 | + | |
35 | +desc 'Generate documentation for the acts_as_ferret plugin.' | |
36 | +Rake::RDocTask.new(:rdoc) do |rdoc| | |
37 | + rdoc.rdoc_dir = 'html' | |
38 | + rdoc.title = "acts_as_ferret - Ferret based full text search for any ActiveRecord model" | |
39 | + rdoc.options << '--line-numbers' << '--inline-source' | |
40 | + rdoc.options << '--main' << 'README' | |
41 | + rdoc.rdoc_files.include('README', 'LICENSE') | |
42 | + rdoc.template = "#{ENV['template']}.rb" if ENV['template'] | |
43 | + rdoc.rdoc_files.include('lib/**/*.rb') | |
44 | +end | |
45 | + | |
46 | +desc "Publish the API documentation" | |
47 | +task :pdoc => [:rdoc] do | |
48 | + Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, RUBYFORGE_USER).upload | |
49 | +end | |
50 | + | |
51 | +if PKG_VERSION | |
52 | + spec = Gem::Specification.new do |s| | |
53 | + s.name = PKG_NAME | |
54 | + s.version = PKG_VERSION | |
55 | + s.platform = Gem::Platform::RUBY | |
56 | + s.summary = "acts_as_ferret - Ferret based full text search for any ActiveRecord model" | |
57 | + s.files = Dir.glob('**/*', File::FNM_DOTMATCH).reject do |f| | |
58 | + [ /\.$/, /sqlite$/, /\.log$/, /^pkg/, /\.svn/, /\.\w+\.sw.$/, | |
59 | + /^html/, /\~$/, /\/\._/, /\/#/ ].any? {|regex| f =~ regex } | |
60 | + end | |
61 | + #s.files = FileList["{lib,test}/**/*"].to_a + %w(README MIT-LICENSE CHANGELOG) | |
62 | + # s.files.delete ... | |
63 | + s.require_path = 'lib' | |
64 | + s.bindir = "bin" | |
65 | + s.executables = ["aaf_install"] | |
66 | + s.default_executable = "aaf_install" | |
67 | + s.autorequire = 'acts_as_ferret' | |
68 | + s.has_rdoc = true | |
69 | + # s.test_files = Dir['test/**/*_test.rb'] | |
70 | + s.author = "Jens Kraemer" | |
71 | + s.email = "jk@jkraemer.net" | |
72 | + s.homepage = "http://projects.jkraemer.net/acts_as_ferret" | |
73 | + end | |
74 | + | |
75 | + package_task = Rake::GemPackageTask.new(spec) do |pkg| | |
76 | + pkg.need_tar = true | |
77 | + end | |
78 | + | |
79 | + # Validate that everything is ready to go for a release. | |
80 | + task :prerelease do | |
81 | + announce | |
82 | + announce "**************************************************************" | |
83 | + announce "* Making RubyGem Release #{PKG_VERSION}" | |
84 | + announce "**************************************************************" | |
85 | + announce | |
86 | + # Are all source files checked in? | |
87 | + if ENV['RELTEST'] | |
88 | + announce "Release Task Testing, skipping checked-in file test" | |
89 | + else | |
90 | + announce "Pulling in svn..." | |
91 | + `svk pull .` | |
92 | + announce "Checking for unchecked-in files..." | |
93 | + data = `svk st` | |
94 | + unless data =~ /^$/ | |
95 | + fail "SVK status is not clean ... do you have unchecked-in files?" | |
96 | + end | |
97 | + announce "No outstanding checkins found ... OK" | |
98 | +# announce "Pushing to svn..." | |
99 | +# `svk push .` | |
100 | + end | |
101 | + end | |
102 | + | |
103 | + | |
104 | + desc "tag the new release" | |
105 | + task :tag => [ :prerelease ] do | |
106 | + reltag = "REL_#{PKG_VERSION.gsub(/\./, '_')}" | |
107 | + reltag << ENV['REUSE'].gsub(/\./, '_') if ENV['REUSE'] | |
108 | + announce "Tagging with [#{PKG_VERSION}]" | |
109 | + if ENV['RELTEST'] | |
110 | + announce "Release Task Testing, skipping tagging" | |
111 | + else | |
112 | + `svn copy -m 'tagging version #{PKG_VERSION}' svn://projects.jkraemer.net/acts_as_ferret/trunk/plugin svn://projects.jkraemer.net/acts_as_ferret/tags/#{PKG_VERSION}` | |
113 | + `svn del -m 'remove old stable' svn://projects.jkraemer.net/acts_as_ferret/tags/stable` | |
114 | + `svn copy -m 'tagging version #{PKG_VERSION} as stable' svn://projects.jkraemer.net/acts_as_ferret/tags/#{PKG_VERSION} svn://projects.jkraemer.net/acts_as_ferret/tags/stable` | |
115 | + end | |
116 | + end | |
117 | + | |
118 | + # Upload release to rubyforge | |
119 | + desc "Upload release to rubyforge" | |
120 | + task :prel => [ :tag, :prerelease, :package ] do | |
121 | + `rubyforge login` | |
122 | + release_command = "rubyforge add_release #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.gem" | |
123 | + puts release_command | |
124 | + system(release_command) | |
125 | + `rubyforge config #{RUBYFORGE_PROJECT}` | |
126 | + release_command = "rubyforge add_file #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.tgz" | |
127 | + puts release_command | |
128 | + system(release_command) | |
129 | + end | |
130 | + | |
131 | + desc 'Publish the gem and API docs' | |
132 | + task :release => [:pdoc, :prel ] | |
133 | + | |
134 | +end | ... | ... |
... | ... | @@ -0,0 +1,97 @@ |
1 | +# Ferret DRb server Capistrano tasks | |
2 | +# | |
3 | +# Usage: | |
4 | +# in your Capfile, add acts_as_ferret's recipes directory to your load path and | |
5 | +# load the ferret tasks: | |
6 | +# | |
7 | +# load_paths << 'vendor/plugins/acts_as_ferret/recipes' | |
8 | +# load 'aaf_recipes' | |
9 | +# | |
10 | +# This will hook aaf's DRb start/stop tasks into the standard | |
11 | +# deploy:{start|restart|stop} tasks so the server will be restarted along with | |
12 | +# the rest of your application. | |
13 | +# Also an index directory in the shared folder will be created and symlinked | |
14 | +# into current/ when you deploy. | |
15 | +# | |
16 | +# In order to use the ferret:index:rebuild task, declare the indexes you intend to | |
17 | +# rebuild remotely in config/deploy.rb: | |
18 | +# | |
19 | +# set :ferret_indexes, %w( model another_model shared ) | |
20 | +# | |
21 | +# HINT: To be very sure that your DRb server and application are always using | |
22 | +# the same model and schema versions, and you never lose any index updates because | |
23 | +# of the DRb server being restarted in that moment, use the following sequence | |
24 | +# to update your application: | |
25 | +# | |
26 | +# cap deploy:stop deploy:update deploy:migrate deploy:start | |
27 | +# | |
28 | +# That will stop the DRb server after stopping your application, and bring it | |
29 | +# up before starting the application again. Plus they'll never use different | |
30 | +# versions of model classes (which might happen otherwise) | |
31 | +# Downside: Your downtime is a bit longer than with the usual deploy, so be sure to | |
32 | +# put up some maintenance page for the meantime. Obviously this won't work if | |
33 | +# your migrations need acts_as_ferret (i.e. if you update model instances which | |
34 | +# would lead to index updates). In this case bring up the DRb server before | |
35 | +# running your migrations: | |
36 | +# | |
37 | +# cap deploy:stop deploy:update ferret:start deploy:migrate ferret:stop deploy:start | |
38 | +# | |
39 | +# Chances are that you're still not safe if your migrations not only modify the index, | |
40 | +# but also change the structure of your models. So just don't do both things in | |
41 | +# one go - I can't think of an easy way to handle this case automatically. | |
42 | +# Suggestions and patches are of course very welcome :-) | |
43 | + | |
44 | +namespace :ferret do | |
45 | + | |
46 | + desc "Stop the Ferret DRb server" | |
47 | + task :stop, :roles => :app do | |
48 | + rails_env = fetch(:rails_env, 'production') | |
49 | + run "cd #{current_path}; script/ferret_server -e #{rails_env} stop || true" | |
50 | + end | |
51 | + | |
52 | + desc "Start the Ferret DRb server" | |
53 | + task :start, :roles => :app do | |
54 | + rails_env = fetch(:rails_env, 'production') | |
55 | + run "cd #{current_path}; script/ferret_server -e #{rails_env} start" | |
56 | + end | |
57 | + | |
58 | + desc "Restart the Ferret DRb server" | |
59 | + task :restart, :roles => :app do | |
60 | + top.ferret.stop | |
61 | + sleep 1 | |
62 | + top.ferret.start | |
63 | + end | |
64 | + | |
65 | + namespace :index do | |
66 | + | |
67 | + desc "Rebuild the Ferret index. See aaf_recipes.rb for instructions." | |
68 | + task :rebuild => :environment, :roles => :app do | |
69 | + rake = fetch(:rake, 'rake') | |
70 | + rails_env = fetch(:rails_env, 'production') | |
71 | + indexes = fetch(:ferret_indexes, nil) | |
72 | + if indexes and indexes.any? | |
73 | + run "cd #{current_path}; RAILS_ENV=#{rails_env} INDEXES='#{indexes.join(' ')}' #{rake} ferret:rebuild" | |
74 | + end | |
75 | + end | |
76 | + | |
77 | + desc "purges all indexes for the current environment" | |
78 | + task :purge, :roles => :app do | |
79 | + run "rm -fr #{shared_path}/index/#{rails_env}" | |
80 | + end | |
81 | + | |
82 | + desc "symlinks index folder" | |
83 | + task :symlink, :roles => :app do | |
84 | + run "mkdir -p #{shared_path}/index && rm -rf #{release_path}/index && ln -nfs #{shared_path}/index #{release_path}/index" | |
85 | + end | |
86 | + | |
87 | + end | |
88 | + | |
89 | +end | |
90 | + | |
91 | +after "deploy:stop", "ferret:stop" | |
92 | +before "deploy:start", "ferret:start" | |
93 | + | |
94 | +before "deploy:restart", "ferret:stop" | |
95 | +after "deploy:restart", "ferret:start" | |
96 | +after "deploy:symlink", "ferret:index:symlink" | |
97 | + | ... | ... |
... | ... | @@ -0,0 +1,94 @@ |
1 | +# Ferret Win32 Service Daemon, called by Win 32 service, | |
2 | +# created by Herryanto Siatono <herryanto@pluitsolutions.com> | |
3 | +# | |
4 | +# see doc/README.win32 for usage instructions | |
5 | +# | |
6 | +require 'optparse' | |
7 | +require 'win32/service' | |
8 | +include Win32 | |
9 | + | |
10 | +# Read options | |
11 | +options = {} | |
12 | +ARGV.options do |opts| | |
13 | + opts.banner = 'Usage: ferret_daemon [options]' | |
14 | + opts.on("-l", "--log FILE", "Daemon log file") {|file| options[:log] = file } | |
15 | + opts.on("-c","--console","Run Ferret server on console.") {options[:console] = true} | |
16 | + opts.on_tail("-h","--help", "Show this help message") {puts opts; exit} | |
17 | + opts.on("-e", "--environment ENV ", "Rails environment") {|env| | |
18 | + options[:environment] = env | |
19 | + ENV['RAILS_ENV'] = env | |
20 | + } | |
21 | + opts.parse! | |
22 | +end | |
23 | + | |
24 | +require File.dirname(__FILE__) + '/../config/environment' | |
25 | + | |
26 | +# Ferret Win32 Service Daemon, called by Win 32 service, | |
27 | +# to run on the console, use -c or --console option. | |
28 | +module Ferret | |
29 | + class FerretDaemon < Daemon | |
30 | + # Standard logger to redirect STDOUT and STDERR to a log file | |
31 | + class FerretStandardLogger | |
32 | + def initialize(logger) | |
33 | + @logger = logger | |
34 | + end | |
35 | + | |
36 | + def write(s) | |
37 | + @logger.info s | |
38 | + end | |
39 | + end | |
40 | + | |
41 | + def initialize(options={}) | |
42 | + @options = options | |
43 | + | |
44 | + # initialize logger | |
45 | + if options[:log] | |
46 | + @logger = Logger.new @options[:log] | |
47 | + else | |
48 | + @logger = Logger.new RAILS_ROOT + "/log/ferret_service_#{RAILS_ENV}.log" | |
49 | + end | |
50 | + | |
51 | + # redirect stout and stderr to Ferret logger if running as windows service | |
52 | + $stdout = $stderr = FerretStandardLogger.new(@logger) unless @options[:console] | |
53 | + | |
54 | + log "Initializing FerretDaemon..." | |
55 | + if @options[:console] | |
56 | + self.service_init | |
57 | + self.service_main | |
58 | + end | |
59 | + end | |
60 | + | |
61 | + def service_main | |
62 | + log "Service main enterred..." | |
63 | + | |
64 | + while running? | |
65 | + log "Listening..." | |
66 | + sleep | |
67 | + end | |
68 | + | |
69 | + log "Service main exit..." | |
70 | + end | |
71 | + | |
72 | + def service_init | |
73 | + log "Starting Ferret DRb server..." | |
74 | + ActsAsFerret::Remote::Server.start | |
75 | + log "FerretDaemon started." | |
76 | + end | |
77 | + | |
78 | + def service_stop | |
79 | + log "Stopping service..." | |
80 | + DRb.stop_service | |
81 | + log "FerretDaemon stopped." | |
82 | + end | |
83 | + | |
84 | + def log(msg) | |
85 | + @logger.info msg | |
86 | + puts msg if @options[:console] | |
87 | + end | |
88 | + end | |
89 | +end | |
90 | + | |
91 | +if __FILE__ == $0 | |
92 | + d = Ferret::FerretDaemon.new(options) | |
93 | + d.mainloop | |
94 | +end | ... | ... |
... | ... | @@ -0,0 +1,178 @@ |
1 | +# Ferret Win32 Service Daemon install script | |
2 | +# created by Herryanto Siatono <herryanto@pluitsolutions.com> | |
3 | +# | |
4 | +# see doc/README.win32 for usage instructions | |
5 | +# | |
6 | +require 'optparse' | |
7 | +require 'win32/service' | |
8 | +include Win32 | |
9 | + | |
10 | +module Ferret | |
11 | + # Parse and validate service command and options | |
12 | + class FerretServiceCommand | |
13 | + COMMANDS = ['install', 'remove', 'start', 'stop', 'help'] | |
14 | + BANNER = "Usage: ruby script/ferret_service <command> [options]" | |
15 | + | |
16 | + attr_reader :options, :command | |
17 | + | |
18 | + def initialize | |
19 | + @options = {} | |
20 | + end | |
21 | + | |
22 | + def valid_command? | |
23 | + COMMANDS.include?@command | |
24 | + end | |
25 | + | |
26 | + def valid_options? | |
27 | + @options[:name] and !@options[:name].empty? | |
28 | + end | |
29 | + | |
30 | + def print_command_list | |
31 | + puts BANNER | |
32 | + puts "\nAvailable commands:\n" | |
33 | + puts COMMANDS.map {|cmd| " - #{cmd}\n"} | |
34 | + puts "\nUse option -h for each command to help." | |
35 | + exit | |
36 | + end | |
37 | + | |
38 | + def validate_options | |
39 | + errors = [] | |
40 | + errors << "Service name is required." unless @options[:name] | |
41 | + | |
42 | + if (errors.size > 0) | |
43 | + errors << "Error found. Use: 'ruby script/ferret_service #{@command} -h' for to get help." | |
44 | + puts errors.join("\n") | |
45 | + exit | |
46 | + end | |
47 | + end | |
48 | + | |
49 | + def run(args) | |
50 | + @command = args.shift | |
51 | + @command = @command.dup.downcase if @command | |
52 | + | |
53 | + # validate command and options | |
54 | + print_command_list unless valid_command? or @command == 'help' | |
55 | + | |
56 | + opts_parser = create_options_parser | |
57 | + begin | |
58 | + opts_parser.parse!(args) | |
59 | + rescue OptionParser::ParseError => e | |
60 | + puts e | |
61 | + puts opts_parser | |
62 | + end | |
63 | + | |
64 | + # validate required options | |
65 | + validate_options | |
66 | + end | |
67 | + | |
68 | + def create_options_parser | |
69 | + opts_parser = OptionParser.new | |
70 | + opts_parser.banner = BANNER | |
71 | + opts_parser.on("-n", "--name=NAME", "Service name") {|name| @options[:name] = name } | |
72 | + opts_parser.on_tail("-t", "--trace", "Display stack trace when exception thrown") { @options[:trace] = true } | |
73 | + opts_parser.on_tail("-h", "--help", "Show this help message") { puts opts_parser; exit } | |
74 | + | |
75 | + if ['install'].include?@command | |
76 | + opts_parser.on("-d", "--display=NAME", "Service display name") {|name| @options[:display] = name } | |
77 | + | |
78 | + opts_parser.on("-l", "--log FILE", "Service log file") {|file| @options[:log] = file } | |
79 | + opts_parser.on("-e", "--environment ENV ", "Rails environment") { |env| | |
80 | + @options[:environment] = env | |
81 | + ENV['RAILS_ENV'] = env | |
82 | + } | |
83 | + end | |
84 | + opts_parser | |
85 | + end | |
86 | + end | |
87 | + | |
88 | + # Install, Remove, Start and Stop Ferret DRb server Win32 service | |
89 | + class FerretService | |
90 | + FERRET_DAEMON = 'ferret_daemon' | |
91 | + | |
92 | + def initialize | |
93 | + end | |
94 | + | |
95 | + def install | |
96 | + svc = Service.new | |
97 | + | |
98 | + begin | |
99 | + if Service.exists?(@options[:name]) | |
100 | + puts "Service name '#{@options[:name]}' already exists." | |
101 | + return | |
102 | + end | |
103 | + | |
104 | + svc.create_service do |s| | |
105 | + s.service_name = @options[:name] | |
106 | + s.display_name = @options[:display] | |
107 | + s.binary_path_name = binary_path_name | |
108 | + s.dependencies = [] | |
109 | + end | |
110 | + | |
111 | + svc.close | |
112 | + puts "'#{@options[:name]}' service installed." | |
113 | + rescue => e | |
114 | + handle_error(e) | |
115 | + end | |
116 | + end | |
117 | + | |
118 | + def remove | |
119 | + begin | |
120 | + Service.stop(@options[:name]) | |
121 | + rescue | |
122 | + end | |
123 | + | |
124 | + begin | |
125 | + Service.delete(@options[:name]) | |
126 | + puts "'#{@options[:name]}' service removed." | |
127 | + rescue => e | |
128 | + handle_error(e) | |
129 | + end | |
130 | + end | |
131 | + | |
132 | + def start | |
133 | + begin | |
134 | + Service.start(@options[:name]) | |
135 | + puts "'#{@options[:name]}' successfully started." | |
136 | + rescue => e | |
137 | + handle_error(e) | |
138 | + end | |
139 | + end | |
140 | + | |
141 | + def stop | |
142 | + begin | |
143 | + Service.stop(@options[:name]) | |
144 | + puts "'#{@options[:name]}' successfully stopped.\n" | |
145 | + rescue => e | |
146 | + handle_error(e) | |
147 | + end | |
148 | + end | |
149 | + | |
150 | + def run(args) | |
151 | + svc_cmd = FerretServiceCommand.new | |
152 | + svc_cmd.run(args) | |
153 | + @options = svc_cmd.options | |
154 | + self.send(svc_cmd.command.to_sym) | |
155 | + end | |
156 | + | |
157 | + protected | |
158 | + def handle_error(e) | |
159 | + if @options[:trace] | |
160 | + raise e | |
161 | + else | |
162 | + puts e | |
163 | + end | |
164 | + end | |
165 | + | |
166 | + def binary_path_name | |
167 | + path = "" | |
168 | + path << "#{ENV['RUBY_HOME']}/bin/" if ENV['RUBY_HOME'] | |
169 | + path << "ruby.exe " | |
170 | + path << File.expand_path("script/" + FERRET_DAEMON) | |
171 | + path << " -e #{@options[:environment]} " if @options[:environment] | |
172 | + path << " -l #{@options[:log]} " if @options[:log] | |
173 | + path | |
174 | + end | |
175 | + end | |
176 | +end | |
177 | + | |
178 | +Ferret::FerretService.new.run(ARGV) | ... | ... |
... | ... | @@ -0,0 +1,24 @@ |
1 | +namespace :ferret do | |
2 | + | |
3 | + # Rebuild index task. Declare the indexes to be rebuilt with the INDEXES | |
4 | + # environment variable: | |
5 | + # | |
6 | + # INDEXES="my_model shared" rake ferret:rebuild | |
7 | + desc "Rebuild a Ferret index. Specify what model to rebuild with the MODEL environment variable." | |
8 | + task :rebuild do | |
9 | + require File.join(RAILS_ROOT, 'config', 'environment') | |
10 | + | |
11 | + indexes = ENV['INDEXES'].split | |
12 | + indexes.each do |index_name| | |
13 | + start = 1.minute.ago | |
14 | + ActsAsFerret::rebuild_index index_name | |
15 | + idx = ActsAsFerret::get_index index_name | |
16 | + # update records that have changed since the rebuild started | |
17 | + idx.index_definition[:registered_models].each do |m| | |
18 | + m.records_modified_since(start).each do |object| | |
19 | + object.ferret_update | |
20 | + end | |
21 | + end | |
22 | + end | |
23 | + end | |
24 | +end | ... | ... |