Commit fd9ec40e5cc949421a351f28ae912a0fcb5dab0c
1 parent
d8fd6277
Exists in
master
and in
27 other branches
profile-suggestions: refactoring suggestion calculation
Building full sql to calculate every suggestion of every rule
Showing
3 changed files
with
150 additions
and
127 deletions
Show diff stats
app/models/profile_suggestion.rb
... | ... | @@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base |
28 | 28 | |
29 | 29 | # {:category_type => ['category-icon', 'category-label']} |
30 | 30 | CATEGORIES = { |
31 | - :common_friends => ['menu-people', _('Friends in common')], | |
32 | - :common_communities => ['menu-community',_('Communities in common')], | |
33 | - :common_tags => ['edit', _('Tags in common')] | |
31 | + :people_with_common_friends => ['menu-people', _('Friends in common')], | |
32 | + :people_with_common_communities => ['menu-community',_('Communities in common')], | |
33 | + :people_with_common_tags => ['edit', _('Tags in common')], | |
34 | + :communities_with_common_friends => ['menu-people', _('Friends in common')], | |
35 | + :communities_with_common_tags => ['edit', _('Tags in common')] | |
34 | 36 | } |
35 | 37 | |
36 | - CATEGORIES.keys.each do |category| | |
37 | - settings_items category.to_sym | |
38 | - attr_accessible category.to_sym | |
39 | - end | |
40 | - | |
41 | 38 | def category_icon(category) |
42 | 39 | 'icon-' + ProfileSuggestion::CATEGORIES[category][0] |
43 | 40 | end |
... | ... | @@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base |
46 | 43 | ProfileSuggestion::CATEGORIES[category][1] |
47 | 44 | end |
48 | 45 | |
49 | - RULES = %w[ | |
50 | - people_with_common_friends | |
51 | - people_with_common_communities | |
52 | - people_with_common_tags | |
53 | - communities_with_common_friends | |
54 | - communities_with_common_tags | |
55 | - ] | |
46 | + RULES = { | |
47 | + :people_with_common_communities => { | |
48 | + :threshold => 2, :weight => 1 | |
49 | + }, | |
50 | + :people_with_common_friends => { | |
51 | + :threshold => 2, :weight => 1 | |
52 | + }, | |
53 | + :people_with_common_tags => { | |
54 | + :threshold => 2, :weight => 1 | |
55 | + }, | |
56 | + :communities_with_common_friends => { | |
57 | + :threshold => 2, :weight => 1 | |
58 | + }, | |
59 | + :communities_with_common_tags => { | |
60 | + :threshold => 2, :weight => 1 | |
61 | + } | |
62 | + } | |
63 | + | |
64 | + RULES.keys.each do |rule| | |
65 | + settings_items rule | |
66 | + attr_accessible rule | |
67 | + end | |
56 | 68 | |
57 | 69 | # Number of suggestions by rule |
58 | - SUGGESTIONS_BY_RULE = 10 | |
70 | + N_SUGGESTIONS = 30 | |
59 | 71 | |
60 | 72 | # Minimum number of suggestions |
61 | - MIN_LIMIT = 15 | |
62 | - | |
63 | - # Number of friends in common | |
64 | - COMMON_FRIENDS = 2 | |
73 | + MIN_LIMIT = 10 | |
65 | 74 | |
66 | - # Number of communities in common | |
67 | - COMMON_COMMUNITIES = 2 | |
68 | - | |
69 | - # Number of tags in common | |
70 | - COMMON_TAGS = 5 | |
75 | + def self.profile_id(rule) | |
76 | + "#{rule}_profile_id" | |
77 | + end | |
71 | 78 | |
72 | 79 | def self.connections(rule) |
73 | - rule.split(/.*_with_/).last + '_connections' | |
80 | + "#{rule}_connections" | |
74 | 81 | end |
75 | 82 | |
76 | 83 | def self.counter(rule) |
77 | - rule.split(/.*_with_/).last + '_count' | |
84 | + "#{rule}_count" | |
78 | 85 | end |
79 | 86 | |
80 | - def self.register_suggestions(person, suggested_profiles, rule) | |
87 | + # If you are about to rewrite the following sql queries, think twice. After | |
88 | + # that make sure that whatever you are writing to replace it should be faster | |
89 | + # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | |
90 | + # need here. | |
91 | + # | |
92 | + # The logic behind this code is to produce a table somewhat like this: | |
93 | + # profile_id | rule1_count | rule1_connections | rule2_count | rule2_connections | ... | score | | |
94 | + # 12 | 2 | {32,54} | 3 | {8,22,27} | ... | 13 | | |
95 | + # 13 | 4 | {3,12,32,54} | 2 | {11,24} | ... | 15 | | |
96 | + # 14 | | | 2 | {44,56} | ... | 17 | | |
97 | + # ... | |
98 | + # ... | |
99 | + # | |
100 | + # This table has the suggested profile id and the count and connections of | |
101 | + # each rule that made this profile be suggested. Each suggestion has a score | |
102 | + # associated based on the rules' counts and rules' weights. | |
103 | + # | |
104 | + # From this table, we can sort suggestions by the score and save a small | |
105 | + # amount of them in the database. At this moment we also register the | |
106 | + # connections of each suggestion. | |
107 | + | |
108 | + def self.calculate_suggestions(person) | |
109 | + suggested_profiles = all_suggestions(person) | |
81 | 110 | return if suggested_profiles.nil? |
111 | + | |
82 | 112 | already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',') |
83 | 113 | suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present? |
84 | - suggested_profiles = suggested_profiles.limit(SUGGESTIONS_BY_RULE) | |
114 | + #TODO suggested_profiles = suggested_profiles.order('score DESC') | |
115 | + suggested_profiles = suggested_profiles.limit(N_SUGGESTIONS) | |
85 | 116 | return if suggested_profiles.blank? |
117 | + | |
86 | 118 | suggested_profiles.each do |suggested_profile| |
87 | 119 | suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id) |
88 | - suggestion.send(counter(rule)+'=', suggested_profile.common_count.to_i) | |
120 | + RULES.each do |rule, options| | |
121 | + value = suggested_profile.send("#{rule}_count").to_i | |
122 | + suggestion.send("#{rule}=", value) | |
123 | + #TODO Create suggestion connections. | |
124 | + suggestion.score += value * options[:weight] | |
125 | + end | |
89 | 126 | suggestion.save! |
90 | 127 | end |
91 | 128 | end |
92 | 129 | |
93 | - def self.calculate_suggestions(person) | |
94 | - ProfileSuggestion::RULES.each do |rule| | |
95 | - register_suggestions(person, ProfileSuggestion.send(rule, person), rule) | |
96 | - end | |
97 | - end | |
98 | - | |
99 | - # If you are about to rewrite the following sql queries, think twice. After | |
100 | - # that make sure that whatever you are writing to replace it should be faster | |
101 | - # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | |
102 | - # need here. | |
103 | - | |
104 | 130 | def self.people_with_common_friends(person) |
105 | - "SELECT person_id as profiles_id, array_agg(friend_id) as common_friends_connections, count(person_id) as common_friends_count FROM | |
106 | - friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | |
107 | - person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
108 | - GROUP BY person_id | |
109 | - HAVING count(person_id) >= #{COMMON_FRIENDS}" | |
110 | - end | |
111 | - | |
112 | - def self.all_suggestions(person) | |
113 | - select_string = "profiles.*, " + RULES.map { rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | |
114 | - suggestions_join = RULES.map.with_index do |rule, i| | |
115 | - if i == 0 | |
116 | - "(#{self.send(rule, person)}) AS #{rule}" | |
117 | - else | |
118 | - previous_rule = RULES[i-1] | |
119 | - "LEFT OUTER JOIN (#{self.send(rule, person)}) AS #{rule} ON #{previous_rule}.profiles_id = #{rule}.profiles_id" | |
120 | - end | |
121 | - end.join(' ') | |
122 | - join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.profiles_id" | |
123 | - person.environment.profiles. | |
124 | - select(select_string). | |
125 | - joins(join_string) | |
126 | - | |
131 | + person_friends = person.friends.map(&:id) | |
132 | + rule = "people_with_common_friends" | |
133 | + return if person_friends.blank? | |
134 | + "SELECT person_id as #{profile_id(rule)}, | |
135 | + array_agg(friend_id) as #{connections(rule)}, | |
136 | + count(person_id) as #{counter(rule)} | |
137 | + FROM friendships WHERE friend_id IN (#{person_friends.join(',')}) | |
138 | + AND person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
139 | + GROUP BY person_id" | |
127 | 140 | end |
128 | 141 | |
129 | -# def self.people_with_common_friends(person) | |
130 | -# person_friends = person.friends.map(&:id) | |
131 | -# return if person_friends.blank? | |
132 | -# person.environment.people. | |
133 | -# select("profiles.*, suggestions.count as common_count, suggestions.array_agg as connections"). | |
134 | -# joins(" | |
135 | -# INNER JOIN (SELECT person_id, array_agg(friend_id), count(person_id) FROM | |
136 | -# friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | |
137 | -# person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
138 | -# GROUP BY person_id | |
139 | -# HAVING count(person_id) >= #{COMMON_FRIENDS}) AS suggestions | |
140 | -# ON profiles.id = suggestions.person_id") | |
141 | -# end | |
142 | - | |
143 | 142 | def self.people_with_common_communities(person) |
144 | 143 | person_communities = person.communities.map(&:id) |
144 | + rule = "people_with_common_communities" | |
145 | 145 | return if person_communities.blank? |
146 | - person.environment.people. | |
147 | - select("profiles.*, suggestions.count AS common_count"). | |
148 | - joins(" | |
149 | - INNER JOIN (SELECT common_members.accessor_id, count(common_members.accessor_id) FROM | |
150 | - (SELECT DISTINCT accessor_id, resource_id FROM | |
151 | - role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | |
152 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
153 | - role_assignments.accessor_type = 'Profile') AS common_members | |
154 | - GROUP BY common_members.accessor_id | |
155 | - HAVING count(common_members.accessor_id) >= #{COMMON_COMMUNITIES}) | |
156 | - AS suggestions ON profiles.id = suggestions.accessor_id") | |
146 | + "SELECT common_members.accessor_id as #{profile_id(rule)}, | |
147 | + array_agg(common_members.resource_id) as #{connections(rule)}, | |
148 | + count(common_members.accessor_id) as #{counter(rule)} | |
149 | + FROM | |
150 | + (SELECT DISTINCT accessor_id, resource_id FROM | |
151 | + role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | |
152 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
153 | + role_assignments.accessor_type = 'Profile') AS common_members | |
154 | + GROUP BY common_members.accessor_id" | |
157 | 155 | end |
158 | 156 | |
159 | 157 | def self.people_with_common_tags(person) |
160 | 158 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
159 | + rule = "people_with_common_tags" | |
161 | 160 | return if profile_tags.blank? |
162 | - person.environment.people. | |
163 | - select("profiles.*, suggestions.count as common_count"). | |
164 | - joins(" | |
165 | - INNER JOIN ( | |
166 | - SELECT results.profiles_id as profiles_id, count(results.profiles_id) FROM ( | |
167 | - SELECT DISTINCT tags.id, profiles.id as profiles_id FROM profiles | |
168 | - INNER JOIN articles ON articles.profile_id = profiles.id | |
169 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
170 | - INNER JOIN tags ON tags.id = taggings.tag_id | |
171 | - WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
172 | - GROUP BY results.profiles_id | |
173 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | |
174 | - as suggestions on profiles.id = suggestions.profiles_id") | |
161 | + "SELECT results.profiles_id as #{profile_id(rule)}, | |
162 | + array_agg(results.tags_id) as #{connections(rule)}, | |
163 | + count(results.profiles_id) as #{counter(rule)} | |
164 | + FROM ( | |
165 | + SELECT DISTINCT tags.id as tags_id, profiles.id as profiles_id FROM profiles | |
166 | + INNER JOIN articles ON articles.profile_id = profiles.id | |
167 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
168 | + INNER JOIN tags ON tags.id = taggings.tag_id | |
169 | + WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
170 | + GROUP BY results.profiles_id" | |
175 | 171 | end |
176 | 172 | |
177 | 173 | def self.communities_with_common_friends(person) |
178 | 174 | person_friends = person.friends.map(&:id) |
175 | + rule = "communities_with_common_friends" | |
179 | 176 | return if person_friends.blank? |
180 | - person.environment.communities. | |
181 | - select("profiles.*, suggestions.count AS common_count"). | |
182 | - joins(" | |
183 | - INNER JOIN (SELECT common_communities.resource_id, count(common_communities.resource_id) FROM | |
184 | - (SELECT DISTINCT accessor_id, resource_id FROM | |
185 | - role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | |
186 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
187 | - role_assignments.accessor_type = 'Profile') AS common_communities | |
188 | - GROUP BY common_communities.resource_id | |
189 | - HAVING count(common_communities.resource_id) >= #{COMMON_FRIENDS}) | |
190 | - AS suggestions ON profiles.id = suggestions.resource_id") | |
177 | + "SELECT common_communities.resource_id as #{profile_id(rule)}, | |
178 | + array_agg(common_communities.accessor_id) as #{connections(rule)}, | |
179 | + count(common_communities.resource_id) as #{counter(rule)} | |
180 | + FROM | |
181 | + (SELECT DISTINCT accessor_id, resource_id FROM | |
182 | + role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | |
183 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
184 | + role_assignments.accessor_type = 'Profile') AS common_communities | |
185 | + GROUP BY common_communities.resource_id" | |
191 | 186 | end |
192 | 187 | |
193 | 188 | def self.communities_with_common_tags(person) |
194 | 189 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
190 | + rule = "communities_with_common_tags" | |
195 | 191 | return if profile_tags.blank? |
196 | - person.environment.communities. | |
197 | - select("profiles.*, suggestions.count AS common_count"). | |
198 | - joins(" | |
199 | - INNER JOIN ( | |
200 | - SELECT results.profiles_id AS profiles_id, count(results.profiles_id) FROM ( | |
201 | - SELECT DISTINCT tags.id, profiles.id AS profiles_id FROM profiles | |
202 | - INNER JOIN articles ON articles.profile_id = profiles.id | |
203 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
204 | - INNER JOIN tags ON tags.id = taggings.tag_id | |
205 | - WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
206 | - GROUP BY results.profiles_id | |
207 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | |
208 | - AS suggestions ON profiles.id = suggestions.profiles_id") | |
192 | + "SELECT results.profiles_id as #{profile_id(rule)}, | |
193 | + array_agg(results.tags_id) as #{connections(rule)}, | |
194 | + count(results.profiles_id) as #{counter(rule)} | |
195 | + FROM | |
196 | + (SELECT DISTINCT tags.id as tags_id, profiles.id AS profiles_id FROM profiles | |
197 | + INNER JOIN articles ON articles.profile_id = profiles.id | |
198 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
199 | + INNER JOIN tags ON tags.id = taggings.tag_id | |
200 | + WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
201 | + GROUP BY results.profiles_id" | |
202 | + end | |
203 | + | |
204 | + def self.all_suggestions(person) | |
205 | + select_string = "profiles.*, " + RULES.keys.map { |rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | |
206 | + previous_rule = nil | |
207 | + suggestions_join = RULES.keys.map do |rule| | |
208 | + rule_select = " | |
209 | + (SELECT profiles.id as #{profile_id(rule)}, | |
210 | + #{rule}_sub.#{counter(rule)} as #{counter(rule)}, | |
211 | + #{rule}_sub.#{connections(rule)} as #{connections(rule)} | |
212 | + FROM profiles | |
213 | + LEFT OUTER JOIN (#{self.send(rule, person)}) as #{rule}_sub | |
214 | + ON profiles.id = #{rule}_sub.#{profile_id(rule)}) AS #{rule}" | |
215 | + | |
216 | + if previous_rule.nil? | |
217 | + result = rule_select | |
218 | + else | |
219 | + result = "INNER JOIN #{rule_select} | |
220 | + ON #{previous_rule}.#{profile_id(previous_rule)} = #{rule}.#{profile_id(rule)}" | |
221 | + end | |
222 | + previous_rule = rule | |
223 | + result | |
224 | + end.join(' ') | |
225 | + join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.#{profile_id(RULES.keys.first)}" | |
226 | + where_string = RULES.map { |rule, options| "#{counter(rule)} >= #{options[:threshold]}"}.join(' OR ') | |
227 | + | |
228 | + person.environment.profiles. | |
229 | + select(select_string). | |
230 | + joins(join_string). | |
231 | + where(where_string) | |
209 | 232 | end |
210 | 233 | |
211 | 234 | def disable | ... | ... |
db/migrate/20140720144212_create_profile_suggestions.rb
db/schema.rb
... | ... | @@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version => 20140805205626) do |
452 | 452 | t.string "suggestion_type" |
453 | 453 | t.text "categories" |
454 | 454 | t.boolean "enabled", :default => true |
455 | - t.float "score" | |
455 | + t.float "score", :default => 0.0 | |
456 | 456 | t.datetime "created_at", :null => false |
457 | 457 | t.datetime "updated_at", :null => false |
458 | 458 | end | ... | ... |