Commit fd9ec40e5cc949421a351f28ae912a0fcb5dab0c

Authored by Rodrigo Souto
1 parent d8fd6277

profile-suggestions: refactoring suggestion calculation

Building full sql to calculate every suggestion of every rule
app/models/profile_suggestion.rb
@@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base @@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base
28 28
29 # {:category_type => ['category-icon', 'category-label']} 29 # {:category_type => ['category-icon', 'category-label']}
30 CATEGORIES = { 30 CATEGORIES = {
31 - :common_friends => ['menu-people', _('Friends in common')],  
32 - :common_communities => ['menu-community',_('Communities in common')],  
33 - :common_tags => ['edit', _('Tags in common')] 31 + :people_with_common_friends => ['menu-people', _('Friends in common')],
  32 + :people_with_common_communities => ['menu-community',_('Communities in common')],
  33 + :people_with_common_tags => ['edit', _('Tags in common')],
  34 + :communities_with_common_friends => ['menu-people', _('Friends in common')],
  35 + :communities_with_common_tags => ['edit', _('Tags in common')]
34 } 36 }
35 37
36 - CATEGORIES.keys.each do |category|  
37 - settings_items category.to_sym  
38 - attr_accessible category.to_sym  
39 - end  
40 -  
41 def category_icon(category) 38 def category_icon(category)
42 'icon-' + ProfileSuggestion::CATEGORIES[category][0] 39 'icon-' + ProfileSuggestion::CATEGORIES[category][0]
43 end 40 end
@@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base @@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base
46 ProfileSuggestion::CATEGORIES[category][1] 43 ProfileSuggestion::CATEGORIES[category][1]
47 end 44 end
48 45
49 - RULES = %w[  
50 - people_with_common_friends  
51 - people_with_common_communities  
52 - people_with_common_tags  
53 - communities_with_common_friends  
54 - communities_with_common_tags  
55 - ] 46 + RULES = {
  47 + :people_with_common_communities => {
  48 + :threshold => 2, :weight => 1
  49 + },
  50 + :people_with_common_friends => {
  51 + :threshold => 2, :weight => 1
  52 + },
  53 + :people_with_common_tags => {
  54 + :threshold => 2, :weight => 1
  55 + },
  56 + :communities_with_common_friends => {
  57 + :threshold => 2, :weight => 1
  58 + },
  59 + :communities_with_common_tags => {
  60 + :threshold => 2, :weight => 1
  61 + }
  62 + }
  63 +
  64 + RULES.keys.each do |rule|
  65 + settings_items rule
  66 + attr_accessible rule
  67 + end
56 68
57 # Number of suggestions by rule 69 # Number of suggestions by rule
58 - SUGGESTIONS_BY_RULE = 10 70 + N_SUGGESTIONS = 30
59 71
60 # Minimum number of suggestions 72 # Minimum number of suggestions
61 - MIN_LIMIT = 15  
62 -  
63 - # Number of friends in common  
64 - COMMON_FRIENDS = 2 73 + MIN_LIMIT = 10
65 74
66 - # Number of communities in common  
67 - COMMON_COMMUNITIES = 2  
68 -  
69 - # Number of tags in common  
70 - COMMON_TAGS = 5 75 + def self.profile_id(rule)
  76 + "#{rule}_profile_id"
  77 + end
71 78
72 def self.connections(rule) 79 def self.connections(rule)
73 - rule.split(/.*_with_/).last + '_connections' 80 + "#{rule}_connections"
74 end 81 end
75 82
76 def self.counter(rule) 83 def self.counter(rule)
77 - rule.split(/.*_with_/).last + '_count' 84 + "#{rule}_count"
78 end 85 end
79 86
80 - def self.register_suggestions(person, suggested_profiles, rule) 87 + # If you are about to rewrite the following sql queries, think twice. After
  88 + # that make sure that whatever you are writing to replace it should be faster
  89 + # than how it is now. Yes, sqls are ugly but are fast! And fast is what we
  90 + # need here.
  91 + #
  92 + # The logic behind this code is to produce a table somewhat like this:
  93 + # profile_id | rule1_count | rule1_connections | rule2_count | rule2_connections | ... | score |
  94 + # 12 | 2 | {32,54} | 3 | {8,22,27} | ... | 13 |
  95 + # 13 | 4 | {3,12,32,54} | 2 | {11,24} | ... | 15 |
  96 + # 14 | | | 2 | {44,56} | ... | 17 |
  97 + # ...
  98 + # ...
  99 + #
  100 + # This table has the suggested profile id and the count and connections of
  101 + # each rule that made this profile be suggested. Each suggestion has a score
  102 + # associated based on the rules' counts and rules' weights.
  103 + #
  104 + # From this table, we can sort suggestions by the score and save a small
  105 + # amount of them in the database. At this moment we also register the
  106 + # connections of each suggestion.
  107 +
  108 + def self.calculate_suggestions(person)
  109 + suggested_profiles = all_suggestions(person)
81 return if suggested_profiles.nil? 110 return if suggested_profiles.nil?
  111 +
82 already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',') 112 already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',')
83 suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present? 113 suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present?
84 - suggested_profiles = suggested_profiles.limit(SUGGESTIONS_BY_RULE) 114 + #TODO suggested_profiles = suggested_profiles.order('score DESC')
  115 + suggested_profiles = suggested_profiles.limit(N_SUGGESTIONS)
85 return if suggested_profiles.blank? 116 return if suggested_profiles.blank?
  117 +
86 suggested_profiles.each do |suggested_profile| 118 suggested_profiles.each do |suggested_profile|
87 suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id) 119 suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id)
88 - suggestion.send(counter(rule)+'=', suggested_profile.common_count.to_i) 120 + RULES.each do |rule, options|
  121 + value = suggested_profile.send("#{rule}_count").to_i
  122 + suggestion.send("#{rule}=", value)
  123 + #TODO Create suggestion connections.
  124 + suggestion.score += value * options[:weight]
  125 + end
89 suggestion.save! 126 suggestion.save!
90 end 127 end
91 end 128 end
92 129
93 - def self.calculate_suggestions(person)  
94 - ProfileSuggestion::RULES.each do |rule|  
95 - register_suggestions(person, ProfileSuggestion.send(rule, person), rule)  
96 - end  
97 - end  
98 -  
99 - # If you are about to rewrite the following sql queries, think twice. After  
100 - # that make sure that whatever you are writing to replace it should be faster  
101 - # than how it is now. Yes, sqls are ugly but are fast! And fast is what we  
102 - # need here.  
103 -  
104 def self.people_with_common_friends(person) 130 def self.people_with_common_friends(person)
105 - "SELECT person_id as profiles_id, array_agg(friend_id) as common_friends_connections, count(person_id) as common_friends_count FROM  
106 - friendships WHERE friend_id IN (#{person_friends.join(',')}) AND  
107 - person_id NOT IN (#{(person_friends << person.id).join(',')})  
108 - GROUP BY person_id  
109 - HAVING count(person_id) >= #{COMMON_FRIENDS}"  
110 - end  
111 -  
112 - def self.all_suggestions(person)  
113 - select_string = "profiles.*, " + RULES.map { rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',')  
114 - suggestions_join = RULES.map.with_index do |rule, i|  
115 - if i == 0  
116 - "(#{self.send(rule, person)}) AS #{rule}"  
117 - else  
118 - previous_rule = RULES[i-1]  
119 - "LEFT OUTER JOIN (#{self.send(rule, person)}) AS #{rule} ON #{previous_rule}.profiles_id = #{rule}.profiles_id"  
120 - end  
121 - end.join(' ')  
122 - join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.profiles_id"  
123 - person.environment.profiles.  
124 - select(select_string).  
125 - joins(join_string)  
126 - 131 + person_friends = person.friends.map(&:id)
  132 + rule = "people_with_common_friends"
  133 + return if person_friends.blank?
  134 + "SELECT person_id as #{profile_id(rule)},
  135 + array_agg(friend_id) as #{connections(rule)},
  136 + count(person_id) as #{counter(rule)}
  137 + FROM friendships WHERE friend_id IN (#{person_friends.join(',')})
  138 + AND person_id NOT IN (#{(person_friends << person.id).join(',')})
  139 + GROUP BY person_id"
127 end 140 end
128 141
129 -# def self.people_with_common_friends(person)  
130 -# person_friends = person.friends.map(&:id)  
131 -# return if person_friends.blank?  
132 -# person.environment.people.  
133 -# select("profiles.*, suggestions.count as common_count, suggestions.array_agg as connections").  
134 -# joins("  
135 -# INNER JOIN (SELECT person_id, array_agg(friend_id), count(person_id) FROM  
136 -# friendships WHERE friend_id IN (#{person_friends.join(',')}) AND  
137 -# person_id NOT IN (#{(person_friends << person.id).join(',')})  
138 -# GROUP BY person_id  
139 -# HAVING count(person_id) >= #{COMMON_FRIENDS}) AS suggestions  
140 -# ON profiles.id = suggestions.person_id")  
141 -# end  
142 -  
143 def self.people_with_common_communities(person) 142 def self.people_with_common_communities(person)
144 person_communities = person.communities.map(&:id) 143 person_communities = person.communities.map(&:id)
  144 + rule = "people_with_common_communities"
145 return if person_communities.blank? 145 return if person_communities.blank?
146 - person.environment.people.  
147 - select("profiles.*, suggestions.count AS common_count").  
148 - joins("  
149 - INNER JOIN (SELECT common_members.accessor_id, count(common_members.accessor_id) FROM  
150 - (SELECT DISTINCT accessor_id, resource_id FROM  
151 - role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND  
152 - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND  
153 - role_assignments.accessor_type = 'Profile') AS common_members  
154 - GROUP BY common_members.accessor_id  
155 - HAVING count(common_members.accessor_id) >= #{COMMON_COMMUNITIES})  
156 - AS suggestions ON profiles.id = suggestions.accessor_id") 146 + "SELECT common_members.accessor_id as #{profile_id(rule)},
  147 + array_agg(common_members.resource_id) as #{connections(rule)},
  148 + count(common_members.accessor_id) as #{counter(rule)}
  149 + FROM
  150 + (SELECT DISTINCT accessor_id, resource_id FROM
  151 + role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND
  152 + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
  153 + role_assignments.accessor_type = 'Profile') AS common_members
  154 + GROUP BY common_members.accessor_id"
157 end 155 end
158 156
159 def self.people_with_common_tags(person) 157 def self.people_with_common_tags(person)
160 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) 158 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id)
  159 + rule = "people_with_common_tags"
161 return if profile_tags.blank? 160 return if profile_tags.blank?
162 - person.environment.people.  
163 - select("profiles.*, suggestions.count as common_count").  
164 - joins("  
165 - INNER JOIN (  
166 - SELECT results.profiles_id as profiles_id, count(results.profiles_id) FROM (  
167 - SELECT DISTINCT tags.id, profiles.id as profiles_id FROM profiles  
168 - INNER JOIN articles ON articles.profile_id = profiles.id  
169 - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'  
170 - INNER JOIN tags ON tags.id = taggings.tag_id  
171 - WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results  
172 - GROUP BY results.profiles_id  
173 - HAVING count(results.profiles_id) >= #{COMMON_TAGS})  
174 - as suggestions on profiles.id = suggestions.profiles_id") 161 + "SELECT results.profiles_id as #{profile_id(rule)},
  162 + array_agg(results.tags_id) as #{connections(rule)},
  163 + count(results.profiles_id) as #{counter(rule)}
  164 + FROM (
  165 + SELECT DISTINCT tags.id as tags_id, profiles.id as profiles_id FROM profiles
  166 + INNER JOIN articles ON articles.profile_id = profiles.id
  167 + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
  168 + INNER JOIN tags ON tags.id = taggings.tag_id
  169 + WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
  170 + GROUP BY results.profiles_id"
175 end 171 end
176 172
177 def self.communities_with_common_friends(person) 173 def self.communities_with_common_friends(person)
178 person_friends = person.friends.map(&:id) 174 person_friends = person.friends.map(&:id)
  175 + rule = "communities_with_common_friends"
179 return if person_friends.blank? 176 return if person_friends.blank?
180 - person.environment.communities.  
181 - select("profiles.*, suggestions.count AS common_count").  
182 - joins("  
183 - INNER JOIN (SELECT common_communities.resource_id, count(common_communities.resource_id) FROM  
184 - (SELECT DISTINCT accessor_id, resource_id FROM  
185 - role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND  
186 - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND  
187 - role_assignments.accessor_type = 'Profile') AS common_communities  
188 - GROUP BY common_communities.resource_id  
189 - HAVING count(common_communities.resource_id) >= #{COMMON_FRIENDS})  
190 - AS suggestions ON profiles.id = suggestions.resource_id") 177 + "SELECT common_communities.resource_id as #{profile_id(rule)},
  178 + array_agg(common_communities.accessor_id) as #{connections(rule)},
  179 + count(common_communities.resource_id) as #{counter(rule)}
  180 + FROM
  181 + (SELECT DISTINCT accessor_id, resource_id FROM
  182 + role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND
  183 + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
  184 + role_assignments.accessor_type = 'Profile') AS common_communities
  185 + GROUP BY common_communities.resource_id"
191 end 186 end
192 187
193 def self.communities_with_common_tags(person) 188 def self.communities_with_common_tags(person)
194 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) 189 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id)
  190 + rule = "communities_with_common_tags"
195 return if profile_tags.blank? 191 return if profile_tags.blank?
196 - person.environment.communities.  
197 - select("profiles.*, suggestions.count AS common_count").  
198 - joins("  
199 - INNER JOIN (  
200 - SELECT results.profiles_id AS profiles_id, count(results.profiles_id) FROM (  
201 - SELECT DISTINCT tags.id, profiles.id AS profiles_id FROM profiles  
202 - INNER JOIN articles ON articles.profile_id = profiles.id  
203 - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'  
204 - INNER JOIN tags ON tags.id = taggings.tag_id  
205 - WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results  
206 - GROUP BY results.profiles_id  
207 - HAVING count(results.profiles_id) >= #{COMMON_TAGS})  
208 - AS suggestions ON profiles.id = suggestions.profiles_id") 192 + "SELECT results.profiles_id as #{profile_id(rule)},
  193 + array_agg(results.tags_id) as #{connections(rule)},
  194 + count(results.profiles_id) as #{counter(rule)}
  195 + FROM
  196 + (SELECT DISTINCT tags.id as tags_id, profiles.id AS profiles_id FROM profiles
  197 + INNER JOIN articles ON articles.profile_id = profiles.id
  198 + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
  199 + INNER JOIN tags ON tags.id = taggings.tag_id
  200 + WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
  201 + GROUP BY results.profiles_id"
  202 + end
  203 +
  204 + def self.all_suggestions(person)
  205 + select_string = "profiles.*, " + RULES.keys.map { |rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',')
  206 + previous_rule = nil
  207 + suggestions_join = RULES.keys.map do |rule|
  208 + rule_select = "
  209 + (SELECT profiles.id as #{profile_id(rule)},
  210 + #{rule}_sub.#{counter(rule)} as #{counter(rule)},
  211 + #{rule}_sub.#{connections(rule)} as #{connections(rule)}
  212 + FROM profiles
  213 + LEFT OUTER JOIN (#{self.send(rule, person)}) as #{rule}_sub
  214 + ON profiles.id = #{rule}_sub.#{profile_id(rule)}) AS #{rule}"
  215 +
  216 + if previous_rule.nil?
  217 + result = rule_select
  218 + else
  219 + result = "INNER JOIN #{rule_select}
  220 + ON #{previous_rule}.#{profile_id(previous_rule)} = #{rule}.#{profile_id(rule)}"
  221 + end
  222 + previous_rule = rule
  223 + result
  224 + end.join(' ')
  225 + join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.#{profile_id(RULES.keys.first)}"
  226 + where_string = RULES.map { |rule, options| "#{counter(rule)} >= #{options[:threshold]}"}.join(' OR ')
  227 +
  228 + person.environment.profiles.
  229 + select(select_string).
  230 + joins(join_string).
  231 + where(where_string)
209 end 232 end
210 233
211 def disable 234 def disable
db/migrate/20140720144212_create_profile_suggestions.rb
@@ -6,7 +6,7 @@ class CreateProfileSuggestions &lt; ActiveRecord::Migration @@ -6,7 +6,7 @@ class CreateProfileSuggestions &lt; ActiveRecord::Migration
6 t.string :suggestion_type 6 t.string :suggestion_type
7 t.text :categories 7 t.text :categories
8 t.boolean :enabled, :default => true 8 t.boolean :enabled, :default => true
9 - t.float :score 9 + t.float :score, :default => 0
10 10
11 t.timestamps 11 t.timestamps
12 end 12 end
@@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version =&gt; 20140805205626) do @@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version =&gt; 20140805205626) do
452 t.string "suggestion_type" 452 t.string "suggestion_type"
453 t.text "categories" 453 t.text "categories"
454 t.boolean "enabled", :default => true 454 t.boolean "enabled", :default => true
455 - t.float "score" 455 + t.float "score", :default => 0.0
456 t.datetime "created_at", :null => false 456 t.datetime "created_at", :null => false
457 t.datetime "updated_at", :null => false 457 t.datetime "updated_at", :null => false
458 end 458 end