Commit fd9ec40e5cc949421a351f28ae912a0fcb5dab0c
1 parent
d8fd6277
Exists in
staging
and in
26 other branches
profile-suggestions: refactoring suggestion calculation
Building full sql to calculate every suggestion of every rule
Showing
3 changed files
with
150 additions
and
127 deletions
Show diff stats
app/models/profile_suggestion.rb
| ... | ... | @@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base |
| 28 | 28 | |
| 29 | 29 | # {:category_type => ['category-icon', 'category-label']} |
| 30 | 30 | CATEGORIES = { |
| 31 | - :common_friends => ['menu-people', _('Friends in common')], | |
| 32 | - :common_communities => ['menu-community',_('Communities in common')], | |
| 33 | - :common_tags => ['edit', _('Tags in common')] | |
| 31 | + :people_with_common_friends => ['menu-people', _('Friends in common')], | |
| 32 | + :people_with_common_communities => ['menu-community',_('Communities in common')], | |
| 33 | + :people_with_common_tags => ['edit', _('Tags in common')], | |
| 34 | + :communities_with_common_friends => ['menu-people', _('Friends in common')], | |
| 35 | + :communities_with_common_tags => ['edit', _('Tags in common')] | |
| 34 | 36 | } |
| 35 | 37 | |
| 36 | - CATEGORIES.keys.each do |category| | |
| 37 | - settings_items category.to_sym | |
| 38 | - attr_accessible category.to_sym | |
| 39 | - end | |
| 40 | - | |
| 41 | 38 | def category_icon(category) |
| 42 | 39 | 'icon-' + ProfileSuggestion::CATEGORIES[category][0] |
| 43 | 40 | end |
| ... | ... | @@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base |
| 46 | 43 | ProfileSuggestion::CATEGORIES[category][1] |
| 47 | 44 | end |
| 48 | 45 | |
| 49 | - RULES = %w[ | |
| 50 | - people_with_common_friends | |
| 51 | - people_with_common_communities | |
| 52 | - people_with_common_tags | |
| 53 | - communities_with_common_friends | |
| 54 | - communities_with_common_tags | |
| 55 | - ] | |
| 46 | + RULES = { | |
| 47 | + :people_with_common_communities => { | |
| 48 | + :threshold => 2, :weight => 1 | |
| 49 | + }, | |
| 50 | + :people_with_common_friends => { | |
| 51 | + :threshold => 2, :weight => 1 | |
| 52 | + }, | |
| 53 | + :people_with_common_tags => { | |
| 54 | + :threshold => 2, :weight => 1 | |
| 55 | + }, | |
| 56 | + :communities_with_common_friends => { | |
| 57 | + :threshold => 2, :weight => 1 | |
| 58 | + }, | |
| 59 | + :communities_with_common_tags => { | |
| 60 | + :threshold => 2, :weight => 1 | |
| 61 | + } | |
| 62 | + } | |
| 63 | + | |
| 64 | + RULES.keys.each do |rule| | |
| 65 | + settings_items rule | |
| 66 | + attr_accessible rule | |
| 67 | + end | |
| 56 | 68 | |
| 57 | 69 | # Number of suggestions by rule |
| 58 | - SUGGESTIONS_BY_RULE = 10 | |
| 70 | + N_SUGGESTIONS = 30 | |
| 59 | 71 | |
| 60 | 72 | # Minimum number of suggestions |
| 61 | - MIN_LIMIT = 15 | |
| 62 | - | |
| 63 | - # Number of friends in common | |
| 64 | - COMMON_FRIENDS = 2 | |
| 73 | + MIN_LIMIT = 10 | |
| 65 | 74 | |
| 66 | - # Number of communities in common | |
| 67 | - COMMON_COMMUNITIES = 2 | |
| 68 | - | |
| 69 | - # Number of tags in common | |
| 70 | - COMMON_TAGS = 5 | |
| 75 | + def self.profile_id(rule) | |
| 76 | + "#{rule}_profile_id" | |
| 77 | + end | |
| 71 | 78 | |
| 72 | 79 | def self.connections(rule) |
| 73 | - rule.split(/.*_with_/).last + '_connections' | |
| 80 | + "#{rule}_connections" | |
| 74 | 81 | end |
| 75 | 82 | |
| 76 | 83 | def self.counter(rule) |
| 77 | - rule.split(/.*_with_/).last + '_count' | |
| 84 | + "#{rule}_count" | |
| 78 | 85 | end |
| 79 | 86 | |
| 80 | - def self.register_suggestions(person, suggested_profiles, rule) | |
| 87 | + # If you are about to rewrite the following sql queries, think twice. After | |
| 88 | + # that make sure that whatever you are writing to replace it should be faster | |
| 89 | + # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | |
| 90 | + # need here. | |
| 91 | + # | |
| 92 | + # The logic behind this code is to produce a table somewhat like this: | |
| 93 | + # profile_id | rule1_count | rule1_connections | rule2_count | rule2_connections | ... | score | | |
| 94 | + # 12 | 2 | {32,54} | 3 | {8,22,27} | ... | 13 | | |
| 95 | + # 13 | 4 | {3,12,32,54} | 2 | {11,24} | ... | 15 | | |
| 96 | + # 14 | | | 2 | {44,56} | ... | 17 | | |
| 97 | + # ... | |
| 98 | + # ... | |
| 99 | + # | |
| 100 | + # This table has the suggested profile id and the count and connections of | |
| 101 | + # each rule that made this profile be suggested. Each suggestion has a score | |
| 102 | + # associated based on the rules' counts and rules' weights. | |
| 103 | + # | |
| 104 | + # From this table, we can sort suggestions by the score and save a small | |
| 105 | + # amount of them in the database. At this moment we also register the | |
| 106 | + # connections of each suggestion. | |
| 107 | + | |
| 108 | + def self.calculate_suggestions(person) | |
| 109 | + suggested_profiles = all_suggestions(person) | |
| 81 | 110 | return if suggested_profiles.nil? |
| 111 | + | |
| 82 | 112 | already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',') |
| 83 | 113 | suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present? |
| 84 | - suggested_profiles = suggested_profiles.limit(SUGGESTIONS_BY_RULE) | |
| 114 | + #TODO suggested_profiles = suggested_profiles.order('score DESC') | |
| 115 | + suggested_profiles = suggested_profiles.limit(N_SUGGESTIONS) | |
| 85 | 116 | return if suggested_profiles.blank? |
| 117 | + | |
| 86 | 118 | suggested_profiles.each do |suggested_profile| |
| 87 | 119 | suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id) |
| 88 | - suggestion.send(counter(rule)+'=', suggested_profile.common_count.to_i) | |
| 120 | + RULES.each do |rule, options| | |
| 121 | + value = suggested_profile.send("#{rule}_count").to_i | |
| 122 | + suggestion.send("#{rule}=", value) | |
| 123 | + #TODO Create suggestion connections. | |
| 124 | + suggestion.score += value * options[:weight] | |
| 125 | + end | |
| 89 | 126 | suggestion.save! |
| 90 | 127 | end |
| 91 | 128 | end |
| 92 | 129 | |
| 93 | - def self.calculate_suggestions(person) | |
| 94 | - ProfileSuggestion::RULES.each do |rule| | |
| 95 | - register_suggestions(person, ProfileSuggestion.send(rule, person), rule) | |
| 96 | - end | |
| 97 | - end | |
| 98 | - | |
| 99 | - # If you are about to rewrite the following sql queries, think twice. After | |
| 100 | - # that make sure that whatever you are writing to replace it should be faster | |
| 101 | - # than how it is now. Yes, sqls are ugly but are fast! And fast is what we | |
| 102 | - # need here. | |
| 103 | - | |
| 104 | 130 | def self.people_with_common_friends(person) |
| 105 | - "SELECT person_id as profiles_id, array_agg(friend_id) as common_friends_connections, count(person_id) as common_friends_count FROM | |
| 106 | - friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | |
| 107 | - person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
| 108 | - GROUP BY person_id | |
| 109 | - HAVING count(person_id) >= #{COMMON_FRIENDS}" | |
| 110 | - end | |
| 111 | - | |
| 112 | - def self.all_suggestions(person) | |
| 113 | - select_string = "profiles.*, " + RULES.map { rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | |
| 114 | - suggestions_join = RULES.map.with_index do |rule, i| | |
| 115 | - if i == 0 | |
| 116 | - "(#{self.send(rule, person)}) AS #{rule}" | |
| 117 | - else | |
| 118 | - previous_rule = RULES[i-1] | |
| 119 | - "LEFT OUTER JOIN (#{self.send(rule, person)}) AS #{rule} ON #{previous_rule}.profiles_id = #{rule}.profiles_id" | |
| 120 | - end | |
| 121 | - end.join(' ') | |
| 122 | - join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.profiles_id" | |
| 123 | - person.environment.profiles. | |
| 124 | - select(select_string). | |
| 125 | - joins(join_string) | |
| 126 | - | |
| 131 | + person_friends = person.friends.map(&:id) | |
| 132 | + rule = "people_with_common_friends" | |
| 133 | + return if person_friends.blank? | |
| 134 | + "SELECT person_id as #{profile_id(rule)}, | |
| 135 | + array_agg(friend_id) as #{connections(rule)}, | |
| 136 | + count(person_id) as #{counter(rule)} | |
| 137 | + FROM friendships WHERE friend_id IN (#{person_friends.join(',')}) | |
| 138 | + AND person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
| 139 | + GROUP BY person_id" | |
| 127 | 140 | end |
| 128 | 141 | |
| 129 | -# def self.people_with_common_friends(person) | |
| 130 | -# person_friends = person.friends.map(&:id) | |
| 131 | -# return if person_friends.blank? | |
| 132 | -# person.environment.people. | |
| 133 | -# select("profiles.*, suggestions.count as common_count, suggestions.array_agg as connections"). | |
| 134 | -# joins(" | |
| 135 | -# INNER JOIN (SELECT person_id, array_agg(friend_id), count(person_id) FROM | |
| 136 | -# friendships WHERE friend_id IN (#{person_friends.join(',')}) AND | |
| 137 | -# person_id NOT IN (#{(person_friends << person.id).join(',')}) | |
| 138 | -# GROUP BY person_id | |
| 139 | -# HAVING count(person_id) >= #{COMMON_FRIENDS}) AS suggestions | |
| 140 | -# ON profiles.id = suggestions.person_id") | |
| 141 | -# end | |
| 142 | - | |
| 143 | 142 | def self.people_with_common_communities(person) |
| 144 | 143 | person_communities = person.communities.map(&:id) |
| 144 | + rule = "people_with_common_communities" | |
| 145 | 145 | return if person_communities.blank? |
| 146 | - person.environment.people. | |
| 147 | - select("profiles.*, suggestions.count AS common_count"). | |
| 148 | - joins(" | |
| 149 | - INNER JOIN (SELECT common_members.accessor_id, count(common_members.accessor_id) FROM | |
| 150 | - (SELECT DISTINCT accessor_id, resource_id FROM | |
| 151 | - role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | |
| 152 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
| 153 | - role_assignments.accessor_type = 'Profile') AS common_members | |
| 154 | - GROUP BY common_members.accessor_id | |
| 155 | - HAVING count(common_members.accessor_id) >= #{COMMON_COMMUNITIES}) | |
| 156 | - AS suggestions ON profiles.id = suggestions.accessor_id") | |
| 146 | + "SELECT common_members.accessor_id as #{profile_id(rule)}, | |
| 147 | + array_agg(common_members.resource_id) as #{connections(rule)}, | |
| 148 | + count(common_members.accessor_id) as #{counter(rule)} | |
| 149 | + FROM | |
| 150 | + (SELECT DISTINCT accessor_id, resource_id FROM | |
| 151 | + role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND | |
| 152 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
| 153 | + role_assignments.accessor_type = 'Profile') AS common_members | |
| 154 | + GROUP BY common_members.accessor_id" | |
| 157 | 155 | end |
| 158 | 156 | |
| 159 | 157 | def self.people_with_common_tags(person) |
| 160 | 158 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
| 159 | + rule = "people_with_common_tags" | |
| 161 | 160 | return if profile_tags.blank? |
| 162 | - person.environment.people. | |
| 163 | - select("profiles.*, suggestions.count as common_count"). | |
| 164 | - joins(" | |
| 165 | - INNER JOIN ( | |
| 166 | - SELECT results.profiles_id as profiles_id, count(results.profiles_id) FROM ( | |
| 167 | - SELECT DISTINCT tags.id, profiles.id as profiles_id FROM profiles | |
| 168 | - INNER JOIN articles ON articles.profile_id = profiles.id | |
| 169 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
| 170 | - INNER JOIN tags ON tags.id = taggings.tag_id | |
| 171 | - WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
| 172 | - GROUP BY results.profiles_id | |
| 173 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | |
| 174 | - as suggestions on profiles.id = suggestions.profiles_id") | |
| 161 | + "SELECT results.profiles_id as #{profile_id(rule)}, | |
| 162 | + array_agg(results.tags_id) as #{connections(rule)}, | |
| 163 | + count(results.profiles_id) as #{counter(rule)} | |
| 164 | + FROM ( | |
| 165 | + SELECT DISTINCT tags.id as tags_id, profiles.id as profiles_id FROM profiles | |
| 166 | + INNER JOIN articles ON articles.profile_id = profiles.id | |
| 167 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
| 168 | + INNER JOIN tags ON tags.id = taggings.tag_id | |
| 169 | + WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
| 170 | + GROUP BY results.profiles_id" | |
| 175 | 171 | end |
| 176 | 172 | |
| 177 | 173 | def self.communities_with_common_friends(person) |
| 178 | 174 | person_friends = person.friends.map(&:id) |
| 175 | + rule = "communities_with_common_friends" | |
| 179 | 176 | return if person_friends.blank? |
| 180 | - person.environment.communities. | |
| 181 | - select("profiles.*, suggestions.count AS common_count"). | |
| 182 | - joins(" | |
| 183 | - INNER JOIN (SELECT common_communities.resource_id, count(common_communities.resource_id) FROM | |
| 184 | - (SELECT DISTINCT accessor_id, resource_id FROM | |
| 185 | - role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | |
| 186 | - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
| 187 | - role_assignments.accessor_type = 'Profile') AS common_communities | |
| 188 | - GROUP BY common_communities.resource_id | |
| 189 | - HAVING count(common_communities.resource_id) >= #{COMMON_FRIENDS}) | |
| 190 | - AS suggestions ON profiles.id = suggestions.resource_id") | |
| 177 | + "SELECT common_communities.resource_id as #{profile_id(rule)}, | |
| 178 | + array_agg(common_communities.accessor_id) as #{connections(rule)}, | |
| 179 | + count(common_communities.resource_id) as #{counter(rule)} | |
| 180 | + FROM | |
| 181 | + (SELECT DISTINCT accessor_id, resource_id FROM | |
| 182 | + role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND | |
| 183 | + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND | |
| 184 | + role_assignments.accessor_type = 'Profile') AS common_communities | |
| 185 | + GROUP BY common_communities.resource_id" | |
| 191 | 186 | end |
| 192 | 187 | |
| 193 | 188 | def self.communities_with_common_tags(person) |
| 194 | 189 | profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id) |
| 190 | + rule = "communities_with_common_tags" | |
| 195 | 191 | return if profile_tags.blank? |
| 196 | - person.environment.communities. | |
| 197 | - select("profiles.*, suggestions.count AS common_count"). | |
| 198 | - joins(" | |
| 199 | - INNER JOIN ( | |
| 200 | - SELECT results.profiles_id AS profiles_id, count(results.profiles_id) FROM ( | |
| 201 | - SELECT DISTINCT tags.id, profiles.id AS profiles_id FROM profiles | |
| 202 | - INNER JOIN articles ON articles.profile_id = profiles.id | |
| 203 | - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
| 204 | - INNER JOIN tags ON tags.id = taggings.tag_id | |
| 205 | - WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
| 206 | - GROUP BY results.profiles_id | |
| 207 | - HAVING count(results.profiles_id) >= #{COMMON_TAGS}) | |
| 208 | - AS suggestions ON profiles.id = suggestions.profiles_id") | |
| 192 | + "SELECT results.profiles_id as #{profile_id(rule)}, | |
| 193 | + array_agg(results.tags_id) as #{connections(rule)}, | |
| 194 | + count(results.profiles_id) as #{counter(rule)} | |
| 195 | + FROM | |
| 196 | + (SELECT DISTINCT tags.id as tags_id, profiles.id AS profiles_id FROM profiles | |
| 197 | + INNER JOIN articles ON articles.profile_id = profiles.id | |
| 198 | + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article' | |
| 199 | + INNER JOIN tags ON tags.id = taggings.tag_id | |
| 200 | + WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results | |
| 201 | + GROUP BY results.profiles_id" | |
| 202 | + end | |
| 203 | + | |
| 204 | + def self.all_suggestions(person) | |
| 205 | + select_string = "profiles.*, " + RULES.keys.map { |rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',') | |
| 206 | + previous_rule = nil | |
| 207 | + suggestions_join = RULES.keys.map do |rule| | |
| 208 | + rule_select = " | |
| 209 | + (SELECT profiles.id as #{profile_id(rule)}, | |
| 210 | + #{rule}_sub.#{counter(rule)} as #{counter(rule)}, | |
| 211 | + #{rule}_sub.#{connections(rule)} as #{connections(rule)} | |
| 212 | + FROM profiles | |
| 213 | + LEFT OUTER JOIN (#{self.send(rule, person)}) as #{rule}_sub | |
| 214 | + ON profiles.id = #{rule}_sub.#{profile_id(rule)}) AS #{rule}" | |
| 215 | + | |
| 216 | + if previous_rule.nil? | |
| 217 | + result = rule_select | |
| 218 | + else | |
| 219 | + result = "INNER JOIN #{rule_select} | |
| 220 | + ON #{previous_rule}.#{profile_id(previous_rule)} = #{rule}.#{profile_id(rule)}" | |
| 221 | + end | |
| 222 | + previous_rule = rule | |
| 223 | + result | |
| 224 | + end.join(' ') | |
| 225 | + join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.#{profile_id(RULES.keys.first)}" | |
| 226 | + where_string = RULES.map { |rule, options| "#{counter(rule)} >= #{options[:threshold]}"}.join(' OR ') | |
| 227 | + | |
| 228 | + person.environment.profiles. | |
| 229 | + select(select_string). | |
| 230 | + joins(join_string). | |
| 231 | + where(where_string) | |
| 209 | 232 | end |
| 210 | 233 | |
| 211 | 234 | def disable | ... | ... |
db/migrate/20140720144212_create_profile_suggestions.rb
db/schema.rb
| ... | ... | @@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version => 20140805205626) do |
| 452 | 452 | t.string "suggestion_type" |
| 453 | 453 | t.text "categories" |
| 454 | 454 | t.boolean "enabled", :default => true |
| 455 | - t.float "score" | |
| 455 | + t.float "score", :default => 0.0 | |
| 456 | 456 | t.datetime "created_at", :null => false |
| 457 | 457 | t.datetime "updated_at", :null => false |
| 458 | 458 | end | ... | ... |