Commit fd9ec40e5cc949421a351f28ae912a0fcb5dab0c

Authored by Rodrigo Souto
1 parent d8fd6277

profile-suggestions: refactoring suggestion calculation

Building full sql to calculate every suggestion of every rule
app/models/profile_suggestion.rb
... ... @@ -28,16 +28,13 @@ class ProfileSuggestion < ActiveRecord::Base
28 28  
29 29 # {:category_type => ['category-icon', 'category-label']}
30 30 CATEGORIES = {
31   - :common_friends => ['menu-people', _('Friends in common')],
32   - :common_communities => ['menu-community',_('Communities in common')],
33   - :common_tags => ['edit', _('Tags in common')]
  31 + :people_with_common_friends => ['menu-people', _('Friends in common')],
  32 + :people_with_common_communities => ['menu-community',_('Communities in common')],
  33 + :people_with_common_tags => ['edit', _('Tags in common')],
  34 + :communities_with_common_friends => ['menu-people', _('Friends in common')],
  35 + :communities_with_common_tags => ['edit', _('Tags in common')]
34 36 }
35 37  
36   - CATEGORIES.keys.each do |category|
37   - settings_items category.to_sym
38   - attr_accessible category.to_sym
39   - end
40   -
41 38 def category_icon(category)
42 39 'icon-' + ProfileSuggestion::CATEGORIES[category][0]
43 40 end
... ... @@ -46,166 +43,192 @@ class ProfileSuggestion < ActiveRecord::Base
46 43 ProfileSuggestion::CATEGORIES[category][1]
47 44 end
48 45  
49   - RULES = %w[
50   - people_with_common_friends
51   - people_with_common_communities
52   - people_with_common_tags
53   - communities_with_common_friends
54   - communities_with_common_tags
55   - ]
  46 + RULES = {
  47 + :people_with_common_communities => {
  48 + :threshold => 2, :weight => 1
  49 + },
  50 + :people_with_common_friends => {
  51 + :threshold => 2, :weight => 1
  52 + },
  53 + :people_with_common_tags => {
  54 + :threshold => 2, :weight => 1
  55 + },
  56 + :communities_with_common_friends => {
  57 + :threshold => 2, :weight => 1
  58 + },
  59 + :communities_with_common_tags => {
  60 + :threshold => 2, :weight => 1
  61 + }
  62 + }
  63 +
  64 + RULES.keys.each do |rule|
  65 + settings_items rule
  66 + attr_accessible rule
  67 + end
56 68  
57 69 # Number of suggestions by rule
58   - SUGGESTIONS_BY_RULE = 10
  70 + N_SUGGESTIONS = 30
59 71  
60 72 # Minimum number of suggestions
61   - MIN_LIMIT = 15
62   -
63   - # Number of friends in common
64   - COMMON_FRIENDS = 2
  73 + MIN_LIMIT = 10
65 74  
66   - # Number of communities in common
67   - COMMON_COMMUNITIES = 2
68   -
69   - # Number of tags in common
70   - COMMON_TAGS = 5
  75 + def self.profile_id(rule)
  76 + "#{rule}_profile_id"
  77 + end
71 78  
72 79 def self.connections(rule)
73   - rule.split(/.*_with_/).last + '_connections'
  80 + "#{rule}_connections"
74 81 end
75 82  
76 83 def self.counter(rule)
77   - rule.split(/.*_with_/).last + '_count'
  84 + "#{rule}_count"
78 85 end
79 86  
80   - def self.register_suggestions(person, suggested_profiles, rule)
  87 + # If you are about to rewrite the following sql queries, think twice. After
  88 + # that make sure that whatever you are writing to replace it should be faster
  89 + # than how it is now. Yes, sqls are ugly but are fast! And fast is what we
  90 + # need here.
  91 + #
  92 + # The logic behind this code is to produce a table somewhat like this:
  93 + # profile_id | rule1_count | rule1_connections | rule2_count | rule2_connections | ... | score |
  94 + # 12 | 2 | {32,54} | 3 | {8,22,27} | ... | 13 |
  95 + # 13 | 4 | {3,12,32,54} | 2 | {11,24} | ... | 15 |
  96 + # 14 | | | 2 | {44,56} | ... | 17 |
  97 + # ...
  98 + # ...
  99 + #
  100 + # This table has the suggested profile id and the count and connections of
  101 + # each rule that made this profile be suggested. Each suggestion has a score
  102 + # associated based on the rules' counts and rules' weights.
  103 + #
  104 + # From this table, we can sort suggestions by the score and save a small
  105 + # amount of them in the database. At this moment we also register the
  106 + # connections of each suggestion.
  107 +
  108 + def self.calculate_suggestions(person)
  109 + suggested_profiles = all_suggestions(person)
81 110 return if suggested_profiles.nil?
  111 +
82 112 already_suggested_profiles = person.profile_suggestions.map(&:suggestion_id).join(',')
83 113 suggested_profiles = suggested_profiles.where("profiles.id NOT IN (#{already_suggested_profiles})") if already_suggested_profiles.present?
84   - suggested_profiles = suggested_profiles.limit(SUGGESTIONS_BY_RULE)
  114 + #TODO suggested_profiles = suggested_profiles.order('score DESC')
  115 + suggested_profiles = suggested_profiles.limit(N_SUGGESTIONS)
85 116 return if suggested_profiles.blank?
  117 +
86 118 suggested_profiles.each do |suggested_profile|
87 119 suggestion = person.profile_suggestions.find_or_initialize_by_suggestion_id(suggested_profile.id)
88   - suggestion.send(counter(rule)+'=', suggested_profile.common_count.to_i)
  120 + RULES.each do |rule, options|
  121 + value = suggested_profile.send("#{rule}_count").to_i
  122 + suggestion.send("#{rule}=", value)
  123 + #TODO Create suggestion connections.
  124 + suggestion.score += value * options[:weight]
  125 + end
89 126 suggestion.save!
90 127 end
91 128 end
92 129  
93   - def self.calculate_suggestions(person)
94   - ProfileSuggestion::RULES.each do |rule|
95   - register_suggestions(person, ProfileSuggestion.send(rule, person), rule)
96   - end
97   - end
98   -
99   - # If you are about to rewrite the following sql queries, think twice. After
100   - # that make sure that whatever you are writing to replace it should be faster
101   - # than how it is now. Yes, sqls are ugly but are fast! And fast is what we
102   - # need here.
103   -
104 130 def self.people_with_common_friends(person)
105   - "SELECT person_id as profiles_id, array_agg(friend_id) as common_friends_connections, count(person_id) as common_friends_count FROM
106   - friendships WHERE friend_id IN (#{person_friends.join(',')}) AND
107   - person_id NOT IN (#{(person_friends << person.id).join(',')})
108   - GROUP BY person_id
109   - HAVING count(person_id) >= #{COMMON_FRIENDS}"
110   - end
111   -
112   - def self.all_suggestions(person)
113   - select_string = "profiles.*, " + RULES.map { rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',')
114   - suggestions_join = RULES.map.with_index do |rule, i|
115   - if i == 0
116   - "(#{self.send(rule, person)}) AS #{rule}"
117   - else
118   - previous_rule = RULES[i-1]
119   - "LEFT OUTER JOIN (#{self.send(rule, person)}) AS #{rule} ON #{previous_rule}.profiles_id = #{rule}.profiles_id"
120   - end
121   - end.join(' ')
122   - join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.profiles_id"
123   - person.environment.profiles.
124   - select(select_string).
125   - joins(join_string)
126   -
  131 + person_friends = person.friends.map(&:id)
  132 + rule = "people_with_common_friends"
  133 + return if person_friends.blank?
  134 + "SELECT person_id as #{profile_id(rule)},
  135 + array_agg(friend_id) as #{connections(rule)},
  136 + count(person_id) as #{counter(rule)}
  137 + FROM friendships WHERE friend_id IN (#{person_friends.join(',')})
  138 + AND person_id NOT IN (#{(person_friends << person.id).join(',')})
  139 + GROUP BY person_id"
127 140 end
128 141  
129   -# def self.people_with_common_friends(person)
130   -# person_friends = person.friends.map(&:id)
131   -# return if person_friends.blank?
132   -# person.environment.people.
133   -# select("profiles.*, suggestions.count as common_count, suggestions.array_agg as connections").
134   -# joins("
135   -# INNER JOIN (SELECT person_id, array_agg(friend_id), count(person_id) FROM
136   -# friendships WHERE friend_id IN (#{person_friends.join(',')}) AND
137   -# person_id NOT IN (#{(person_friends << person.id).join(',')})
138   -# GROUP BY person_id
139   -# HAVING count(person_id) >= #{COMMON_FRIENDS}) AS suggestions
140   -# ON profiles.id = suggestions.person_id")
141   -# end
142   -
143 142 def self.people_with_common_communities(person)
144 143 person_communities = person.communities.map(&:id)
  144 + rule = "people_with_common_communities"
145 145 return if person_communities.blank?
146   - person.environment.people.
147   - select("profiles.*, suggestions.count AS common_count").
148   - joins("
149   - INNER JOIN (SELECT common_members.accessor_id, count(common_members.accessor_id) FROM
150   - (SELECT DISTINCT accessor_id, resource_id FROM
151   - role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND
152   - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
153   - role_assignments.accessor_type = 'Profile') AS common_members
154   - GROUP BY common_members.accessor_id
155   - HAVING count(common_members.accessor_id) >= #{COMMON_COMMUNITIES})
156   - AS suggestions ON profiles.id = suggestions.accessor_id")
  146 + "SELECT common_members.accessor_id as #{profile_id(rule)},
  147 + array_agg(common_members.resource_id) as #{connections(rule)},
  148 + count(common_members.accessor_id) as #{counter(rule)}
  149 + FROM
  150 + (SELECT DISTINCT accessor_id, resource_id FROM
  151 + role_assignments WHERE role_assignments.resource_id IN (#{person_communities.join(',')}) AND
  152 + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
  153 + role_assignments.accessor_type = 'Profile') AS common_members
  154 + GROUP BY common_members.accessor_id"
157 155 end
158 156  
159 157 def self.people_with_common_tags(person)
160 158 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id)
  159 + rule = "people_with_common_tags"
161 160 return if profile_tags.blank?
162   - person.environment.people.
163   - select("profiles.*, suggestions.count as common_count").
164   - joins("
165   - INNER JOIN (
166   - SELECT results.profiles_id as profiles_id, count(results.profiles_id) FROM (
167   - SELECT DISTINCT tags.id, profiles.id as profiles_id FROM profiles
168   - INNER JOIN articles ON articles.profile_id = profiles.id
169   - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
170   - INNER JOIN tags ON tags.id = taggings.tag_id
171   - WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
172   - GROUP BY results.profiles_id
173   - HAVING count(results.profiles_id) >= #{COMMON_TAGS})
174   - as suggestions on profiles.id = suggestions.profiles_id")
  161 + "SELECT results.profiles_id as #{profile_id(rule)},
  162 + array_agg(results.tags_id) as #{connections(rule)},
  163 + count(results.profiles_id) as #{counter(rule)}
  164 + FROM (
  165 + SELECT DISTINCT tags.id as tags_id, profiles.id as profiles_id FROM profiles
  166 + INNER JOIN articles ON articles.profile_id = profiles.id
  167 + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
  168 + INNER JOIN tags ON tags.id = taggings.tag_id
  169 + WHERE (tags.id in (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
  170 + GROUP BY results.profiles_id"
175 171 end
176 172  
177 173 def self.communities_with_common_friends(person)
178 174 person_friends = person.friends.map(&:id)
  175 + rule = "communities_with_common_friends"
179 176 return if person_friends.blank?
180   - person.environment.communities.
181   - select("profiles.*, suggestions.count AS common_count").
182   - joins("
183   - INNER JOIN (SELECT common_communities.resource_id, count(common_communities.resource_id) FROM
184   - (SELECT DISTINCT accessor_id, resource_id FROM
185   - role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND
186   - role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
187   - role_assignments.accessor_type = 'Profile') AS common_communities
188   - GROUP BY common_communities.resource_id
189   - HAVING count(common_communities.resource_id) >= #{COMMON_FRIENDS})
190   - AS suggestions ON profiles.id = suggestions.resource_id")
  177 + "SELECT common_communities.resource_id as #{profile_id(rule)},
  178 + array_agg(common_communities.accessor_id) as #{connections(rule)},
  179 + count(common_communities.resource_id) as #{counter(rule)}
  180 + FROM
  181 + (SELECT DISTINCT accessor_id, resource_id FROM
  182 + role_assignments WHERE role_assignments.accessor_id IN (#{person_friends.join(',')}) AND
  183 + role_assignments.accessor_id != #{person.id} AND role_assignments.resource_type = 'Profile' AND
  184 + role_assignments.accessor_type = 'Profile') AS common_communities
  185 + GROUP BY common_communities.resource_id"
191 186 end
192 187  
193 188 def self.communities_with_common_tags(person)
194 189 profile_tags = person.articles.select('tags.id').joins(:tags).map(&:id)
  190 + rule = "communities_with_common_tags"
195 191 return if profile_tags.blank?
196   - person.environment.communities.
197   - select("profiles.*, suggestions.count AS common_count").
198   - joins("
199   - INNER JOIN (
200   - SELECT results.profiles_id AS profiles_id, count(results.profiles_id) FROM (
201   - SELECT DISTINCT tags.id, profiles.id AS profiles_id FROM profiles
202   - INNER JOIN articles ON articles.profile_id = profiles.id
203   - INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
204   - INNER JOIN tags ON tags.id = taggings.tag_id
205   - WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
206   - GROUP BY results.profiles_id
207   - HAVING count(results.profiles_id) >= #{COMMON_TAGS})
208   - AS suggestions ON profiles.id = suggestions.profiles_id")
  192 + "SELECT results.profiles_id as #{profile_id(rule)},
  193 + array_agg(results.tags_id) as #{connections(rule)},
  194 + count(results.profiles_id) as #{counter(rule)}
  195 + FROM
  196 + (SELECT DISTINCT tags.id as tags_id, profiles.id AS profiles_id FROM profiles
  197 + INNER JOIN articles ON articles.profile_id = profiles.id
  198 + INNER JOIN taggings ON taggings.taggable_id = articles.id AND taggings.context = ('tags') AND taggings.taggable_type = 'Article'
  199 + INNER JOIN tags ON tags.id = taggings.tag_id
  200 + WHERE (tags.id IN (#{profile_tags.join(',')}) AND profiles.id != #{person.id})) AS results
  201 + GROUP BY results.profiles_id"
  202 + end
  203 +
  204 + def self.all_suggestions(person)
  205 + select_string = "profiles.*, " + RULES.keys.map { |rule| "suggestions.#{counter(rule)} as #{counter(rule)}, suggestions.#{connections(rule)} as #{connections(rule)}" }.join(',')
  206 + previous_rule = nil
  207 + suggestions_join = RULES.keys.map do |rule|
  208 + rule_select = "
  209 + (SELECT profiles.id as #{profile_id(rule)},
  210 + #{rule}_sub.#{counter(rule)} as #{counter(rule)},
  211 + #{rule}_sub.#{connections(rule)} as #{connections(rule)}
  212 + FROM profiles
  213 + LEFT OUTER JOIN (#{self.send(rule, person)}) as #{rule}_sub
  214 + ON profiles.id = #{rule}_sub.#{profile_id(rule)}) AS #{rule}"
  215 +
  216 + if previous_rule.nil?
  217 + result = rule_select
  218 + else
  219 + result = "INNER JOIN #{rule_select}
  220 + ON #{previous_rule}.#{profile_id(previous_rule)} = #{rule}.#{profile_id(rule)}"
  221 + end
  222 + previous_rule = rule
  223 + result
  224 + end.join(' ')
  225 + join_string = "INNER JOIN (SELECT * FROM #{suggestions_join}) AS suggestions ON profiles.id = suggestions.#{profile_id(RULES.keys.first)}"
  226 + where_string = RULES.map { |rule, options| "#{counter(rule)} >= #{options[:threshold]}"}.join(' OR ')
  227 +
  228 + person.environment.profiles.
  229 + select(select_string).
  230 + joins(join_string).
  231 + where(where_string)
209 232 end
210 233  
211 234 def disable
... ...
db/migrate/20140720144212_create_profile_suggestions.rb
... ... @@ -6,7 +6,7 @@ class CreateProfileSuggestions &lt; ActiveRecord::Migration
6 6 t.string :suggestion_type
7 7 t.text :categories
8 8 t.boolean :enabled, :default => true
9   - t.float :score
  9 + t.float :score, :default => 0
10 10  
11 11 t.timestamps
12 12 end
... ...
db/schema.rb
... ... @@ -452,7 +452,7 @@ ActiveRecord::Schema.define(:version =&gt; 20140805205626) do
452 452 t.string "suggestion_type"
453 453 t.text "categories"
454 454 t.boolean "enabled", :default => true
455   - t.float "score"
  455 + t.float "score", :default => 0.0
456 456 t.datetime "created_at", :null => false
457 457 t.datetime "updated_at", :null => false
458 458 end
... ...