Commit b374f2c57ee9a01617191cb02753613d6e47d674
1 parent
ab66aa9b
Exists in
master
and in
29 other branches
[search-improvements] Add infrastructure for search_terms score calculation
Showing
9 changed files
with
211 additions
and
1 deletions
Show diff stats
app/models/environment.rb
... | ... | @@ -10,6 +10,7 @@ class Environment < ActiveRecord::Base |
10 | 10 | self.partial_updates = false |
11 | 11 | |
12 | 12 | has_many :tasks, :dependent => :destroy, :as => 'target' |
13 | + has_many :search_terms, :as => :context | |
13 | 14 | |
14 | 15 | IDENTIFY_SCRIPTS = /(php[0-9s]?|[sp]htm[l]?|pl|py|cgi|rb)/ |
15 | 16 | ... | ... |
app/models/profile.rb
... | ... | @@ -138,6 +138,8 @@ class Profile < ActiveRecord::Base |
138 | 138 | |
139 | 139 | has_many :comments_received, :class_name => 'Comment', :through => :articles, :source => :comments |
140 | 140 | |
141 | + has_many :search_terms, :as => :context | |
142 | + | |
141 | 143 | def scraps(scrap=nil) |
142 | 144 | scrap = scrap.is_a?(Scrap) ? scrap.id : scrap |
143 | 145 | scrap.nil? ? Scrap.all_scraps(self) : Scrap.all_scraps(self).find(scrap) | ... | ... |
... | ... | @@ -0,0 +1,34 @@ |
1 | +class SearchTerm < ActiveRecord::Base | |
2 | + validates_presence_of :term, :context | |
3 | + validates_uniqueness_of :term, :scope => [:context_id, :context_type, :asset] | |
4 | + | |
5 | + belongs_to :context, :polymorphic => true | |
6 | + has_many :occurrences, :class_name => 'SearchTermOccurrence' | |
7 | + | |
8 | + attr_accessible :term, :context, :asset | |
9 | + | |
10 | + def self.calculate_scores | |
11 | + find_each { |search_term| search_term.calculate_score } | |
12 | + end | |
13 | + | |
14 | + def self.find_or_create(term, context, asset='all') | |
15 | + context.search_terms.where(:term => term, :asset => asset).first || context.search_terms.create!(:term => term, :asset=> asset) | |
16 | + end | |
17 | + | |
18 | + before_save :calculate_score | |
19 | + | |
20 | + def calculate_score | |
21 | + valid_occurrences = occurrences.valid | |
22 | + if valid_occurrences.present? | |
23 | + indexed = valid_occurrences.last.indexed | |
24 | + total = valid_occurrences.last.total | |
25 | + # Using the formula described on this paper: http://www.soi.city.ac.uk/~ser/papers/RSJ76.pdf | |
26 | + current_relevance = indexed > 0 && total >= indexed ? -Math.log(indexed.to_f/total.to_f) : 0 | |
27 | + # Damp number of occurrences with log function to decrease it's effect over relevance. | |
28 | + damped_occurrences = Math.log(valid_occurrences.count) | |
29 | + self.score = (damped_occurrences * current_relevance).to_f | |
30 | + else | |
31 | + self.score = 0 | |
32 | + end | |
33 | + end | |
34 | +end | ... | ... |
... | ... | @@ -0,0 +1,10 @@ |
1 | +class SearchTermOccurrence < ActiveRecord::Base | |
2 | + belongs_to :search_term | |
3 | + validates_presence_of :search_term | |
4 | + attr_accessible :search_term, :created_at, :total, :indexed | |
5 | + | |
6 | + #TODO Verify this value | |
7 | + EXPIRATION_TIME = 1.month | |
8 | + | |
9 | + scope :valid, :conditions => ["search_term_occurrences.created_at >= ?", DateTime.now - EXPIRATION_TIME] | |
10 | +end | ... | ... |
db/migrate/20140507195924_create_search_term_occurrences.rb
0 → 100644
... | ... | @@ -0,0 +1,16 @@ |
1 | +class CreateSearchTermOccurrences < ActiveRecord::Migration | |
2 | + def up | |
3 | + create_table :search_term_occurrences do |t| | |
4 | + t.references :search_term | |
5 | + t.datetime :created_at | |
6 | + t.integer :total, :default => 0 | |
7 | + t.integer :indexed, :default => 0 | |
8 | + end | |
9 | + add_index :search_term_occurrences, :created_at | |
10 | + end | |
11 | + | |
12 | + def down | |
13 | + remove_index :search_term_occurrences, :created_at | |
14 | + drop_table :search_term_occurrences | |
15 | + end | |
16 | +end | ... | ... |
... | ... | @@ -0,0 +1,17 @@ |
1 | +class CreateSearchTerms < ActiveRecord::Migration | |
2 | + def up | |
3 | + create_table :search_terms do |t| | |
4 | + t.string :term | |
5 | + t.references :context, :polymorphic => true | |
6 | + t.string :asset, :default => 'all' | |
7 | + t.float :score, :default => 0 | |
8 | + end | |
9 | + | |
10 | + add_index :search_terms, [:term, :asset, :score] | |
11 | + end | |
12 | + | |
13 | + def down | |
14 | + remove_index :search_terms, [:term, :asset, :score] | |
15 | + drop_table :search_terms | |
16 | + end | |
17 | +end | ... | ... |
db/schema.rb
... | ... | @@ -11,7 +11,7 @@ |
11 | 11 | # |
12 | 12 | # It's strongly recommended to check this file into your version control system. |
13 | 13 | |
14 | -ActiveRecord::Schema.define(:version => 20140408172149) do | |
14 | +ActiveRecord::Schema.define(:version => 20140507205338) do | |
15 | 15 | |
16 | 16 | create_table "abuse_reports", :force => true do |t| |
17 | 17 | t.integer "reporter_id" |
... | ... | @@ -550,6 +550,25 @@ ActiveRecord::Schema.define(:version => 20140408172149) do |
550 | 550 | t.integer "context_id" |
551 | 551 | end |
552 | 552 | |
553 | + create_table "search_term_occurrences", :force => true do |t| | |
554 | + t.integer "search_term_id" | |
555 | + t.datetime "created_at" | |
556 | + t.integer "total", :default => 0 | |
557 | + t.integer "indexed", :default => 0 | |
558 | + end | |
559 | + | |
560 | + add_index "search_term_occurrences", ["created_at"], :name => "index_search_term_occurrences_on_created_at" | |
561 | + | |
562 | + create_table "search_terms", :force => true do |t| | |
563 | + t.string "term" | |
564 | + t.integer "context_id" | |
565 | + t.string "context_type" | |
566 | + t.string "asset", :default => "all" | |
567 | + t.float "score", :default => 0.0 | |
568 | + end | |
569 | + | |
570 | + add_index "search_terms", ["term", "asset", "score"], :name => "index_search_terms_on_term_and_asset_and_score" | |
571 | + | |
553 | 572 | create_table "sessions", :force => true do |t| |
554 | 573 | t.string "session_id", :null => false |
555 | 574 | t.text "data" | ... | ... |
... | ... | @@ -0,0 +1,30 @@ |
1 | +require 'test_helper' | |
2 | + | |
3 | +class SearchTermOccurrenceTest < ActiveSupport::TestCase | |
4 | + | |
5 | + def setup | |
6 | + @search_term = SearchTerm.find_or_create('universe', Environment.default) | |
7 | + end | |
8 | + | |
9 | + attr_reader :search_term | |
10 | + | |
11 | + should 'have term' do | |
12 | + search_term_occurrence = SearchTermOccurrence.new | |
13 | + assert !search_term_occurrence.valid? | |
14 | + assert search_term_occurrence.errors.has_key?(:search_term) | |
15 | + end | |
16 | + | |
17 | + should 'create a search term occurence' do | |
18 | + assert_nothing_raised do | |
19 | + SearchTermOccurrence.create!(:search_term => search_term) | |
20 | + end | |
21 | + end | |
22 | + | |
23 | + should 'fetch only valid occurrences' do | |
24 | + o1 = SearchTermOccurrence.create!(:search_term => search_term) | |
25 | + o2 = SearchTermOccurrence.create!(:search_term => search_term) | |
26 | + o3 = SearchTermOccurrence.create!(:search_term => search_term, :created_at => DateTime.now - (SearchTermOccurrence::EXPIRATION_TIME + 1.day)) | |
27 | + | |
28 | + assert_equivalent [o1,o2], search_term.occurrences.valid | |
29 | + end | |
30 | +end | ... | ... |
... | ... | @@ -0,0 +1,81 @@ |
1 | +require 'test_helper' | |
2 | + | |
3 | +class SearchTermTest < ActiveSupport::TestCase | |
4 | + should 'have term' do | |
5 | + search_term = SearchTerm.new | |
6 | + assert !search_term.valid? | |
7 | + assert search_term.errors.has_key?(:term) | |
8 | + end | |
9 | + | |
10 | + should 'have context' do | |
11 | + search_term = SearchTerm.new | |
12 | + assert !search_term.valid? | |
13 | + assert search_term.errors.has_key?(:context) | |
14 | + end | |
15 | + | |
16 | + should 'have unique term within specific context and asset' do | |
17 | + SearchTerm.create!(:term => 'galaxy', :context => Environment.default, :asset => 'universe') | |
18 | + search_term = SearchTerm.new(:term => 'galaxy', :context => Environment.default, :asset => 'universe') | |
19 | + search_term.valid? | |
20 | + assert search_term.errors.has_key?(:term) | |
21 | + | |
22 | + search_term.asset = 'alternate_universe' | |
23 | + search_term.valid? | |
24 | + assert !search_term.errors.has_key?(:term) | |
25 | + end | |
26 | + | |
27 | + should 'create a search term' do | |
28 | + assert_nothing_raised do | |
29 | + SearchTerm.create!(:term => 'universe', :context => Environment.default) | |
30 | + end | |
31 | + end | |
32 | + | |
33 | + should 'find or create by term' do | |
34 | + assert_difference 'SearchTerm.count', 1 do | |
35 | + SearchTerm.find_or_create('universe', Environment.default) | |
36 | + search_term = SearchTerm.find_or_create('universe', Environment.default) | |
37 | + assert_equal 'universe', search_term.term | |
38 | + end | |
39 | + end | |
40 | + | |
41 | + should 'have occurrences' do | |
42 | + search_term = SearchTerm.find_or_create('universe', Environment.default) | |
43 | + o1 = SearchTermOccurrence.create!(:search_term => search_term) | |
44 | + o2 = SearchTermOccurrence.create!(:search_term => search_term) | |
45 | + | |
46 | + assert_equivalent [o1,o2], search_term.occurrences | |
47 | + end | |
48 | + | |
49 | + should 'calculate score' do | |
50 | + search_term = SearchTerm.find_or_create('universe', Environment.default) | |
51 | + occurrence = SearchTermOccurrence.create!(:search_term => search_term, :total => 10, :indexed => 3) | |
52 | + search_term.save! | |
53 | + assert search_term.score > 0, "Score was not calculated." | |
54 | + end | |
55 | + | |
56 | + should 'not consider expired occurrences to calculate the score' do | |
57 | + search_term = SearchTerm.find_or_create('universe', Environment.default) | |
58 | + occurrence = SearchTermOccurrence.create!(:search_term => search_term, :total => 10, :indexed => 3, :created_at => DateTime.now - (SearchTermOccurrence::EXPIRATION_TIME + 1.day)) | |
59 | + search_term.save! | |
60 | + assert search_term.score == 0, "Considered expired occurrence to calculate the score." | |
61 | + end | |
62 | + | |
63 | + should 'calculate search_terms scores' do | |
64 | + st1 = SearchTerm.create!(:term => 'st1') | |
65 | + SearchTermOccurrence.create!(:search_term => st1, :total => 10, :indexed => 3) | |
66 | + SearchTermOccurrence.create!(:search_term => st1, :total => 20, :indexed => 8) | |
67 | + SearchTermOccurrence.create!(:search_term => st1, :total => 30, :indexed => 9) | |
68 | + st2 = SearchTerm.create!(:term => 'st2') | |
69 | + SearchTermOccurrence.create!(:search_term => st2, :total => 10, :indexed => 7) | |
70 | + SearchTermOccurrence.create!(:search_term => st2, :total => 20, :indexed => 16) | |
71 | + SearchTermOccurrence.create!(:search_term => st2, :total => 30, :indexed => 21) | |
72 | + | |
73 | + SearchTerm.calculate_scores | |
74 | + st1.reload | |
75 | + st2.reload | |
76 | + | |
77 | + assert st1.score > 0, "Did not calculate st1 score." | |
78 | + assert st2.score > 0, "Did not calculate st2 score." | |
79 | + end | |
80 | + | |
81 | +end | ... | ... |