diff --git a/app/models/search_term.rb b/app/models/search_term.rb index 8031e89..f45b22a 100644 --- a/app/models/search_term.rb +++ b/app/models/search_term.rb @@ -8,26 +8,55 @@ class SearchTerm < ActiveRecord::Base attr_accessible :term, :context, :asset def self.calculate_scores - find_each { |search_term| search_term.calculate_score } + os = occurrences_scores + find_each { |search_term| search_term.calculate_score(os) } end def self.find_or_create(term, context, asset='all') context.search_terms.where(:term => term, :asset => asset).first || context.search_terms.create!(:term => term, :asset=> asset) end - def calculate_score + # Fast way of getting the occurrences score for each search_term. Ugly but fast! + # + # Each occurrence of a search_term has a score that is smaller the older the + # occurrence happened. We subtract the amount of time between now and the + # moment it happened from the total time any occurrence is valid to happen. E.g.: + # The expiration time is 100 days and an occurrence happened 3 days ago. + # Therefore the score is 97. Them we sum every score to get the total score + # for a search term. + def self.occurrences_scores + ActiveSupport::OrderedHash[*ActiveRecord::Base.connection.execute( + joins(:occurrences). + select("search_terms.id, sum(#{SearchTermOccurrence::EXPIRATION_TIME.to_i} - extract(epoch from (now() - search_term_occurrences.created_at))) as value"). + where("search_term_occurrences.created_at > ?", DateTime.now - SearchTermOccurrence::EXPIRATION_TIME). + group("search_terms.id"). + order('value DESC'). + to_sql + ).map {|result| [result['id'].to_i, result['value'].to_i]}.flatten] + end + + def calculate_occurrence(occurrences_scores) + max_score = occurrences_scores.first[1] + (occurrences_scores[id]/max_score.to_f)*100 + end + + def calculate_relevance(valid_occurrences) + indexed = valid_occurrences.last.indexed.to_f + total = valid_occurrences.last.total.to_f + (1 - indexed/total)*100 + end + + def calculate_score(occurrences_scores) valid_occurrences = occurrences.valid if valid_occurrences.present? - indexed = valid_occurrences.last.indexed - total = valid_occurrences.last.total - # Using the formula described on this paper: http://www.soi.city.ac.uk/~ser/papers/RSJ76.pdf - current_relevance = indexed > 0 && total >= indexed ? -Math.log(indexed.to_f/total.to_f) : 0 - # Damp number of occurrences with log function to decrease it's effect over relevance. - damped_occurrences = Math.log(valid_occurrences.count) - self.score = (damped_occurrences * current_relevance).to_f + # These scores vary from 1~100 + self.occurrence_score = calculate_occurrence(occurrences_scores) + self.relevance_score = calculate_relevance(valid_occurrences) else - self.score = 0 + self.occurrence_score = 0 + self.relevance_score = 0 end + self.score = (occurrence_score * relevance_score)/100.0 self.save! end end diff --git a/app/models/search_term_occurrence.rb b/app/models/search_term_occurrence.rb index fc3e20a..75a8f29 100644 --- a/app/models/search_term_occurrence.rb +++ b/app/models/search_term_occurrence.rb @@ -3,8 +3,7 @@ class SearchTermOccurrence < ActiveRecord::Base validates_presence_of :search_term attr_accessible :search_term, :created_at, :total, :indexed - #TODO Verify this value - EXPIRATION_TIME = 1.month + EXPIRATION_TIME = 1.year - scope :valid, :conditions => ["search_term_occurrences.created_at >= ?", DateTime.now - EXPIRATION_TIME] + scope :valid, :conditions => ["search_term_occurrences.created_at > ?", DateTime.now - EXPIRATION_TIME] end diff --git a/db/migrate/20140507205338_create_search_terms.rb b/db/migrate/20140507205338_create_search_terms.rb index 2e01a8d..6e5fc1f 100644 --- a/db/migrate/20140507205338_create_search_terms.rb +++ b/db/migrate/20140507205338_create_search_terms.rb @@ -5,13 +5,23 @@ class CreateSearchTerms < ActiveRecord::Migration t.references :context, :polymorphic => true t.string :asset, :default => 'all' t.float :score, :default => 0 + t.float :relevance_score, :default => 0 + t.float :occurrence_score, :default => 0 end - add_index :search_terms, [:term, :asset, :score] + add_index :search_terms, :term + add_index :search_terms, :asset + add_index :search_terms, :score + add_index :search_terms, :relevance_score + add_index :search_terms, :occurrence_score end def down - remove_index :search_terms, [:term, :asset, :score] + remove_index :search_terms, :term + remove_index :search_terms, :asset + remove_index :search_terms, :score + remove_index :search_terms, :relevance_score + remove_index :search_terms, :occurrence_score drop_table :search_terms end end diff --git a/db/schema.rb b/db/schema.rb index fa3123d..72a9e29 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -563,11 +563,17 @@ ActiveRecord::Schema.define(:version => 20140507205338) do t.string "term" t.integer "context_id" t.string "context_type" - t.string "asset", :default => "all" - t.float "score", :default => 0.0 + t.string "asset", :default => "all" + t.float "score", :default => 0.0 + t.float "relevance_score", :default => 0.0 + t.float "occurrence_score", :default => 0.0 end - add_index "search_terms", ["term", "asset", "score"], :name => "index_search_terms_on_term_and_asset_and_score" + add_index "search_terms", ["asset"], :name => "index_search_terms_on_asset" + add_index "search_terms", ["occurrence_score"], :name => "index_search_terms_on_occurrence_score" + add_index "search_terms", ["relevance_score"], :name => "index_search_terms_on_relevance_score" + add_index "search_terms", ["score"], :name => "index_search_terms_on_score" + add_index "search_terms", ["term"], :name => "index_search_terms_on_term" create_table "sessions", :force => true do |t| t.string "session_id", :null => false diff --git a/test/unit/search_term_test.rb b/test/unit/search_term_test.rb index faa825d..9cba835 100644 --- a/test/unit/search_term_test.rb +++ b/test/unit/search_term_test.rb @@ -51,14 +51,16 @@ class SearchTermTest < ActiveSupport::TestCase SearchTermOccurrence.create!(:search_term => search_term, :total => 10, :indexed => 3) # Search term must happens at least two times to be considered SearchTermOccurrence.create!(:search_term => search_term, :total => 10, :indexed => 3) - search_term.calculate_score + SearchTerm.calculate_scores + search_term.reload assert search_term.score > 0, "Score was not calculated." end should 'not consider expired occurrences to calculate the score' do search_term = SearchTerm.find_or_create('universe', Environment.default) occurrence = SearchTermOccurrence.create!(:search_term => search_term, :total => 10, :indexed => 3, :created_at => DateTime.now - (SearchTermOccurrence::EXPIRATION_TIME + 1.day)) - search_term.calculate_score + SearchTerm.calculate_scores + search_term.reload assert search_term.score == 0, "Considered expired occurrence to calculate the score." end @@ -80,4 +82,19 @@ class SearchTermTest < ActiveSupport::TestCase assert st2.score > 0, "Did not calculate st2 score." end + should 'the older the occurrence the less it should influence the score' do + st1 = SearchTerm.find_or_create('st1', Environment.default) + SearchTermOccurrence.create!(:search_term => st1, :total => 10, :indexed => 3, :created_at => 1.month.ago) + SearchTermOccurrence.create!(:search_term => st1, :total => 20, :indexed => 8, :created_at => 1.month.ago) + st2 = SearchTerm.find_or_create('st2', Environment.default) + SearchTermOccurrence.create!(:search_term => st2, :total => 10, :indexed => 3, :created_at => 2.months.ago) + SearchTermOccurrence.create!(:search_term => st2, :total => 20, :indexed => 8, :created_at => 2.months.ago) + + SearchTerm.calculate_scores + st1.reload + st2.reload + + assert st1.score > st2.score, "Older occurrences are not influencing score less than newer ones." + end + end -- libgit2 0.21.2