Commit fd9ec40e5cc949421a351f28ae912a0fcb5dab0c
1 parent
d8fd6277
Exists in
master
and in
27 other branches
profile-suggestions: refactoring suggestion calculation
Building full sql to calculate every suggestion of every rule
Showing
3 changed files
with
150 additions
and
127 deletions
Show diff stats
app/models/profile_suggestion.rb
@@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base | @@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base | ||
28 | 28 | ||
29 | # {:category_type => ['category-icon', 'category-label']} | 29 | # {:category_type => ['category-icon', 'category-label']} |
30 | CATEGORIES = { | 30 | CATEGORIES = { |
31 | - :common_friends => ['menu-people', _('Friends in common')], | ||
32 | - :common_communities => ['menu-community',_('Communities in common')], | ||
33 | - :common_tags => ['edit', _('Tags in common')] | 31 | + :people_with_common_friends => ['menu-people', _('Friends in common')], |
32 | + :people_with_common_communities => ['menu-community',_('Communities in common')], | ||
33 | + :people_with_common_tags => ['edit', _('Tags in common')], | ||
34 | + :communities_with_common_friends => ['menu-people', _('Friends in common')], | ||
35 | + :communities_with_common_tags => ['edit', _('Tags in common')] | ||
34 | } | 36 | } |
35 | 37 | ||
36 | - CATEGORIES.keys.each do |category| | ||
37 | - settings_items category.to_sym | ||
38 | - attr_accessible category.to_sym | ||
39 | - end | ||
40 | - | ||
41 | def category_icon(category) | 38 | def category_icon(category) |
42 | 'icon-' + ProfileSuggestion::CATEGORIES[category][0] | 39 | 'icon-' + ProfileSuggestion::CATEGORIES[category][0] |
43 | end | 40 | end |
@@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base | @@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base | ||
46 | ProfileSuggestion::CATEGORIES[category][1] | 43 | ProfileSuggestion::CATEGORIES[category][1] |
47 | end | 44 | end |
48 | 45 | ||
49 | - RULES = %w[ | ||
50 | - people_with_common_friends | ||
51 | - people_with_common_communities | ||
52 | - people_with_common_tags | ||
53 | - communities_with_common_friends | ||
54 | - communities_with_common_tags | ||
55 | - ] | 46 | + RULES = { |
47 | + :people_with_common_communities => { | ||
48 | + :threshold => 2, :weight => 1 | ||
49 | + }, | ||
50 | + :people_with_common_friends => { | ||
51 | + :threshold => 2, :weight => 1 | ||
52 | + }, | ||
53 | + :people_with_common_tags => { | ||
54 | + :threshold => 2, :weight => 1 | ||
55 | + }, | ||
56 | + :communities_with_common_friends => { | ||
57 | + :threshold => 2, :weight => 1 | ||
58 | + }, | ||
59 | + :communities_with_common_tags => { | ||
60 | + :threshold => 2, :weight => 1 | ||
61 | + } | ||
62 | + } | ||
63 | + | ||
64 | + RULES.keys.each do |rule| | ||
65 | + settings_items rule | ||
66 | + attr_accessible rule | ||
67 | + end | ||
56 | 68 | ||
57 | # Number of suggestions by rule | 69 | # Number of suggestions by rule |
58 | - SUGGESTIONS_BY_RULE = 10 | 70 | + N_SUGGESTIONS = 30 |
59 | 71 | ||
60 | # Minimum number of suggestions | 72 | # Minimum number of suggestions |
61 | - MIN_LIMIT = 15 | ||
62 | - | ||
63 | - # Number of friends in common | ||
64 | - COMMON_FRIENDS = 2 | 73 | + MIN_LIMIT = 10 |
65 | 74 | ||
66 | - # Number of communities in common | ||
67 | - COMMON_COMMUNITIES = 2 | ||
68 | - | ||
69 | - # Number of tags in common | ||
70 | - COMMON_TAGS = 5 | 75 | + def self.profile_id(rule) |
76 | + "#{rule}_profile_id" | ||
77 | + end | ||
71 | 78 | ||
72 | def self.connections(rule) | 79 | def self.connections(rule) |
73 | - rule.split(/.*_with_/).last + '_connections' | 80 | + "#{rule}_connections" |
74 | end | 81 | end |
75 | 82 | ||
76 | def self.counter(rule) | 83 | def self.counter(rule) |
77 | - rule.split(/.*_with_/).last + '_count' | 84 | + "#{rule}_count" |
78 | end | 85 | end |
79 | 86 | ||
80 | - def self.register_suggestions(person, suggested_profiles, rule) | 87 | + # If you are about to rewrite the following sql queries, think twice. After |
88 | + # that make sure that whatever you are writing to replace it should be faster | ||
89 | + # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | ||
90 | + # need here. | ||
91 | + # | ||
92 | + # The logic behind this code is to produce a table somewhat like this: | ||
93 | + # profile_id | rule1_count | rule1_connections | rule2_count | rule2_connections | ... | score | | ||
94 | + # 12 | 2 | {32,54} | 3 | {8,22,27} | ... | 13 | | ||
95 | + # 13 | 4 | {3,12,32,54} | 2 | {11,24} | ... | 15 | | ||
96 | + # 14 | | | 2 | {44,56} | ... | 17 | | ||
97 | + # ... | ||
98 | + # ... | ||
99 | + # | ||
100 | + # This table has the suggested profile id and the count and connections of | ||
101 | + # each rule that made this profile be suggested. Each suggestion has a score | ||
102 | + # associated based on the rules' counts and rules' weights. | ||
103 | + # | ||
104 | + # From this table, we can sort suggestions by the score and save a small | ||
105 | + # amount of them in the database. At this moment we also register the | ||
106 | + # connections of each suggestion. | ||
107 | + | ||
108 | + def self.calculate_suggestions(person) | ||
109 | + suggested_profiles = all_suggestions(person) | ||
81 | return if suggested_profiles.nil? | 110 | return if suggested_profiles.nil? |
111 | + | ||
82 | already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',') | 112 | already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',') |
83 | suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present? | 113 | suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present? |
84 | - suggested_profiles = suggested_profiles.limit(SUGGESTIONS_BY_RULE) | 114 | + #TODO suggested_profiles = suggested_profiles.order('score DESC') |
115 | + suggested_profiles = suggested_profiles.limit(N_SUGGESTIONS) | ||
85 | return if suggested_profiles.blank? | 116 | return if suggested_profiles.blank? |
117 | + | ||
86 | suggested_profiles.each do |suggested_profile| | 118 | suggested_profiles.each do |suggested_profile| |
87 | suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id) | 119 | suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id) |
88 | - suggestion.send(counter(rule)+'=', suggested_profile.common_count.to_i) | 120 | + RULES.each do |rule, options| |
121 | + value = suggested_profile.send("#{rule}_count").to_i | ||
122 | + suggestion.send("#{rule}=", value) | ||
123 | + #TODO Create suggestion connections. | ||
124 | + suggestion.score += value * options[:weight] | ||
125 | + end | ||
89 | suggestion.save! | 126 | suggestion.save! |
90 | end | 127 | end |
91 | end | 128 | end |
92 | 129 | ||
93 | - def self.calculate_suggestions(person) | ||
94 | - ProfileSuggestion::RULES.each do |rule| | ||
95 | - register_suggestions(person, ProfileSuggestion.send(rule, person), rule) | ||
96 | - end | ||
97 | - end | ||
98 | - | ||
99 | - # If you are about to rewrite the following sql queries, think twice. After | ||
100 | - # that make sure that whatever you are writing to replace it should be faster | ||
101 | - # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | ||
102 | - # need here. | ||
103 | - | ||
104 | def self.people_with_common_friends(person) | 130 | def self.people_with_common_friends(person) |
105 | - "SELECT person_id as profiles_id, array_agg(friend_id) as common_friends_connections, count(person_id) as common_friends_count FROM | ||
106 | - friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | ||
107 | - person_id NOT IN (#{(person_friends << person.id).join(',')}) | ||
108 | - GROUP BY person_id | ||
109 | - HAVING count(person_id) >= #{COMMON_FRIENDS}" | ||
110 | - end | ||
111 | - | ||
112 | - def self.all_suggestions(person) | ||
113 | - select_string = "profiles.*, " + RULES.map { rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | ||
114 | - suggestions_join = RULES.map.with_index do |rule, i| | ||
115 | - if i == 0 | ||
116 | - "(#{self.send(rule, person)}) AS #{rule}" | ||
117 | - else | ||
118 | - previous_rule = RULES[i-1] | ||
119 | - "LEFT OUTER JOIN (#{self.send(rule, person)}) AS #{rule} ON #{previous_rule}.profiles_id = #{rule}.profiles_id" | ||
120 | - end | ||
121 | - end.join(' ') | ||
122 | - join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.profiles_id" | ||
123 | - person.environment.profiles. | ||
124 | - select(select_string). | ||
125 | - joins(join_string) | ||
126 | - | 131 | + person_friends = person.friends.map(&:id) |
132 | + rule = "people_with_common_friends" | ||
133 | + return if person_friends.blank? | ||
134 | + "SELECT person_id as #{profile_id(rule)}, | ||
135 | + array_agg(friend_id) as #{connections(rule)}, | ||
136 | + count(person_id) as #{counter(rule)} | ||
137 | + FROM friendships WHERE friend_id IN (#{person_friends.join(',')}) | ||
138 | + AND person_id NOT IN (#{(person_friends << person.id).join(',')}) | ||
139 | + GROUP BY person_id" | ||
127 | end | 140 | end |
128 | 141 | ||
129 | -# def self.people_with_common_friends(person) | ||
130 | -# person_friends = person.friends.map(&:id) | ||
131 | -# return if person_friends.blank? | ||
132 | -# person.environment.people. | ||
133 | -# select("profiles.*, suggestions.count as common_count, suggestions.array_agg as connections"). | ||
134 | -# joins(" | ||
135 | -# INNER JOIN (SELECT person_id, array_agg(friend_id), count(person_id) FROM | ||
136 | -# friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | ||
137 | -# person_id NOT IN (#{(person_friends << person.id).join(',')}) | ||
138 | -# GROUP BY person_id | ||
139 | -# HAVING count(person_id) >= #{COMMON_FRIENDS}) AS suggestions | ||
140 | -# ON profiles.id = suggestions.person_id") | ||
141 | -# end | ||
142 | - | ||
143 | def self.people_with_common_communities(person) | 142 | def self.people_with_common_communities(person) |
144 | person_communities = person.communities.map(&:id) | 143 | person_communities = person.communities.map(&:id) |
144 | + rule = "people_with_common_communities" | ||
145 | return if person_communities.blank? | 145 | return if person_communities.blank? |
146 | - person.environment.people. | ||
147 | - select("profiles.*, suggestions.count AS common_count"). | ||
148 | - joins(" | ||
149 | - INNER JOIN (SELECT common_members.accessor_id, count(common_members.accessor_id) FROM | ||
150 | - (SELECT DISTINCT accessor_id, resource_id FROM | ||
151 | - role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | ||
152 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | ||
153 | - role_assignments.accessor_type = 'Profile') AS common_members | ||
154 | - GROUP BY common_members.accessor_id | ||
155 | - HAVING count(common_members.accessor_id) >= #{COMMON_COMMUNITIES}) | ||
156 | - AS suggestions ON profiles.id = suggestions.accessor_id") | 146 | + "SELECT common_members.accessor_id as #{profile_id(rule)}, |
147 | + array_agg(common_members.resource_id) as #{connections(rule)}, | ||
148 | + count(common_members.accessor_id) as #{counter(rule)} | ||
149 | + FROM | ||
150 | + (SELECT DISTINCT accessor_id, resource_id FROM | ||
151 | + role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | ||
152 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | ||
153 | + role_assignments.accessor_type = 'Profile') AS common_members | ||
154 | + GROUP BY common_members.accessor_id" | ||
157 | end | 155 | end |
158 | 156 | ||
159 | def self.people_with_common_tags(person) | 157 | def self.people_with_common_tags(person) |
160 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) | 158 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
159 | + rule = "people_with_common_tags" | ||
161 | return if profile_tags.blank? | 160 | return if profile_tags.blank? |
162 | - person.environment.people. | ||
163 | - select("profiles.*, suggestions.count as common_count"). | ||
164 | - joins(" | ||
165 | - INNER JOIN ( | ||
166 | - SELECT results.profiles_id as profiles_id, count(results.profiles_id) FROM ( | ||
167 | - SELECT DISTINCT tags.id, profiles.id as profiles_id FROM profiles | ||
168 | - INNER JOIN articles ON articles.profile_id = profiles.id | ||
169 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | ||
170 | - INNER JOIN tags ON tags.id = taggings.tag_id | ||
171 | - WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | ||
172 | - GROUP BY results.profiles_id | ||
173 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | ||
174 | - as suggestions on profiles.id = suggestions.profiles_id") | 161 | + "SELECT results.profiles_id as #{profile_id(rule)}, |
162 | + array_agg(results.tags_id) as #{connections(rule)}, | ||
163 | + count(results.profiles_id) as #{counter(rule)} | ||
164 | + FROM ( | ||
165 | + SELECT DISTINCT tags.id as tags_id, profiles.id as profiles_id FROM profiles | ||
166 | + INNER JOIN articles ON articles.profile_id = profiles.id | ||
167 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | ||
168 | + INNER JOIN tags ON tags.id = taggings.tag_id | ||
169 | + WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | ||
170 | + GROUP BY results.profiles_id" | ||
175 | end | 171 | end |
176 | 172 | ||
177 | def self.communities_with_common_friends(person) | 173 | def self.communities_with_common_friends(person) |
178 | person_friends = person.friends.map(&:id) | 174 | person_friends = person.friends.map(&:id) |
175 | + rule = "communities_with_common_friends" | ||
179 | return if person_friends.blank? | 176 | return if person_friends.blank? |
180 | - person.environment.communities. | ||
181 | - select("profiles.*, suggestions.count AS common_count"). | ||
182 | - joins(" | ||
183 | - INNER JOIN (SELECT common_communities.resource_id, count(common_communities.resource_id) FROM | ||
184 | - (SELECT DISTINCT accessor_id, resource_id FROM | ||
185 | - role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | ||
186 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | ||
187 | - role_assignments.accessor_type = 'Profile') AS common_communities | ||
188 | - GROUP BY common_communities.resource_id | ||
189 | - HAVING count(common_communities.resource_id) >= #{COMMON_FRIENDS}) | ||
190 | - AS suggestions ON profiles.id = suggestions.resource_id") | 177 | + "SELECT common_communities.resource_id as #{profile_id(rule)}, |
178 | + array_agg(common_communities.accessor_id) as #{connections(rule)}, | ||
179 | + count(common_communities.resource_id) as #{counter(rule)} | ||
180 | + FROM | ||
181 | + (SELECT DISTINCT accessor_id, resource_id FROM | ||
182 | + role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | ||
183 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | ||
184 | + role_assignments.accessor_type = 'Profile') AS common_communities | ||
185 | + GROUP BY common_communities.resource_id" | ||
191 | end | 186 | end |
192 | 187 | ||
193 | def self.communities_with_common_tags(person) | 188 | def self.communities_with_common_tags(person) |
194 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) | 189 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
190 | + rule = "communities_with_common_tags" | ||
195 | return if profile_tags.blank? | 191 | return if profile_tags.blank? |
196 | - person.environment.communities. | ||
197 | - select("profiles.*, suggestions.count AS common_count"). | ||
198 | - joins(" | ||
199 | - INNER JOIN ( | ||
200 | - SELECT results.profiles_id AS profiles_id, count(results.profiles_id) FROM ( | ||
201 | - SELECT DISTINCT tags.id, profiles.id AS profiles_id FROM profiles | ||
202 | - INNER JOIN articles ON articles.profile_id = profiles.id | ||
203 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | ||
204 | - INNER JOIN tags ON tags.id = taggings.tag_id | ||
205 | - WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | ||
206 | - GROUP BY results.profiles_id | ||
207 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | ||
208 | - AS suggestions ON profiles.id = suggestions.profiles_id") | 192 | + "SELECT results.profiles_id as #{profile_id(rule)}, |
193 | + array_agg(results.tags_id) as #{connections(rule)}, | ||
194 | + count(results.profiles_id) as #{counter(rule)} | ||
195 | + FROM | ||
196 | + (SELECT DISTINCT tags.id as tags_id, profiles.id AS profiles_id FROM profiles | ||
197 | + INNER JOIN articles ON articles.profile_id = profiles.id | ||
198 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | ||
199 | + INNER JOIN tags ON tags.id = taggings.tag_id | ||
200 | + WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | ||
201 | + GROUP BY results.profiles_id" | ||
202 | + end | ||
203 | + | ||
204 | + def self.all_suggestions(person) | ||
205 | + select_string = "profiles.*, " + RULES.keys.map { |rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | ||
206 | + previous_rule = nil | ||
207 | + suggestions_join = RULES.keys.map do |rule| | ||
208 | + rule_select = " | ||
209 | + (SELECT profiles.id as #{profile_id(rule)}, | ||
210 | + #{rule}_sub.#{counter(rule)} as #{counter(rule)}, | ||
211 | + #{rule}_sub.#{connections(rule)} as #{connections(rule)} | ||
212 | + FROM profiles | ||
213 | + LEFT OUTER JOIN (#{self.send(rule, person)}) as #{rule}_sub | ||
214 | + ON profiles.id = #{rule}_sub.#{profile_id(rule)}) AS #{rule}" | ||
215 | + | ||
216 | + if previous_rule.nil? | ||
217 | + result = rule_select | ||
218 | + else | ||
219 | + result = "INNER JOIN #{rule_select} | ||
220 | + ON #{previous_rule}.#{profile_id(previous_rule)} = #{rule}.#{profile_id(rule)}" | ||
221 | + end | ||
222 | + previous_rule = rule | ||
223 | + result | ||
224 | + end.join(' ') | ||
225 | + join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.#{profile_id(RULES.keys.first)}" | ||
226 | + where_string = RULES.map { |rule, options| "#{counter(rule)} >= #{options[:threshold]}"}.join(' OR ') | ||
227 | + | ||
228 | + person.environment.profiles. | ||
229 | + select(select_string). | ||
230 | + joins(join_string). | ||
231 | + where(where_string) | ||
209 | end | 232 | end |
210 | 233 | ||
211 | def disable | 234 | def disable |
db/migrate/20140720144212_create_profile_suggestions.rb
@@ -6,7 +6,7 @@ class CreateProfileSuggestions < ActiveRecord::Migration | @@ -6,7 +6,7 @@ class CreateProfileSuggestions < ActiveRecord::Migration | ||
6 | t.string :suggestion_type | 6 | t.string :suggestion_type |
7 | t.text :categories | 7 | t.text :categories |
8 | t.boolean :enabled, :default => true | 8 | t.boolean :enabled, :default => true |
9 | - t.float :score | 9 | + t.float :score, :default => 0 |
10 | 10 | ||
11 | t.timestamps | 11 | t.timestamps |
12 | end | 12 | end |
db/schema.rb
@@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version => 20140805205626) do | @@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version => 20140805205626) do | ||
452 | t.string "suggestion_type" | 452 | t.string "suggestion_type" |
453 | t.text "categories" | 453 | t.text "categories" |
454 | t.boolean "enabled", :default => true | 454 | t.boolean "enabled", :default => true |
455 | - t.float "score" | 455 | + t.float "score", :default => 0.0 |
456 | t.datetime "created_at", :null => false | 456 | t.datetime "created_at", :null => false |
457 | t.datetime "updated_at", :null => false | 457 | t.datetime "updated_at", :null => false |
458 | end | 458 | end |