Commit 5af15ae174e8afcf8ecaa6bb8b71886615279086
1 parent
7f4319d5
Exists in
master
and in
1 other branch
Recommendation strategies refactoring.
Showing
1 changed file
with
175 additions
and
150 deletions
Show diff stats
src/strategy.py
@@ -23,7 +23,9 @@ __license__ = """ | @@ -23,7 +23,9 @@ __license__ = """ | ||
23 | import xapian | 23 | import xapian |
24 | from singleton import Singleton | 24 | from singleton import Singleton |
25 | import recommender | 25 | import recommender |
26 | -from data import * | 26 | +import data |
27 | +import logging | ||
28 | +from error import Error | ||
27 | 29 | ||
28 | class PkgMatchDecider(xapian.MatchDecider): | 30 | class PkgMatchDecider(xapian.MatchDecider): |
29 | """ | 31 | """ |
@@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider): | @@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider): | ||
71 | return is_new_pkg and "gnome" in self.pkgs_list | 73 | return is_new_pkg and "gnome" in self.pkgs_list |
72 | return is_new_pkg | 74 | return is_new_pkg |
73 | 75 | ||
74 | -#class AppMatchDecider(xapian.MatchDecider): | ||
75 | -# """ | ||
76 | -# Extend xapian.MatchDecider to not consider only applications packages. | ||
77 | -# """ | ||
78 | -# def __init__(self, pkgs_list, axi): | ||
79 | -# """ | ||
80 | -# Set initial parameters. | ||
81 | -# """ | ||
82 | -# xapian.MatchDecider.__init__(self) | ||
83 | -# self.pkgs_list = pkgs_list | ||
84 | -# self.axi = axi | ||
85 | -# | ||
86 | -# def __call__(self, doc): | ||
87 | -# """ | ||
88 | -# True if the package is not already installed. | ||
89 | -# """ | ||
90 | -# tags = axi_search_pkg_tags(self.axi,doc.get_data()) | ||
91 | -# return (("XTrole::program" in tags) and | ||
92 | -# (doc.get_data() not in self.pkgs_list)) | ||
93 | -# | ||
94 | -#class UserMatchDecider(xapian.MatchDecider): | ||
95 | -# """ | ||
96 | -# Extend xapian.MatchDecider to match similar profiles. | ||
97 | -# """ | ||
98 | -# | ||
99 | -# def __init__(self, profile): | ||
100 | -# """ | ||
101 | -# Set initial parameters. | ||
102 | -# """ | ||
103 | -# xapian.MatchDecider.__init__(self) | ||
104 | -# self.profile = profile | ||
105 | -# | ||
106 | -# def __call__(self, doc): | ||
107 | -# """ | ||
108 | -# True if the user has more the half of packages from profile. | ||
109 | -# """ | ||
110 | -# match=0 | ||
111 | -# for term in doc: | ||
112 | -# if term.term in self.profile: | ||
113 | -# match = match+1 | ||
114 | -# return (match >= len(self.profile)/2) | ||
115 | - | ||
116 | -#class AppExpandDecider(xapian.ExpandDecider): | ||
117 | -# """ | ||
118 | -# Extend xapian.ExpandDecider to consider applications only. | ||
119 | -# """ | ||
120 | -# def __init__(self,axi): | ||
121 | -# xapian.ExpandDecider.__init__(self) | ||
122 | -# self.axi = axi | ||
123 | -# | ||
124 | -# def __call__(self, term): | ||
125 | -# """ | ||
126 | -# True if the term is a package. | ||
127 | -# """ | ||
128 | -# if not term.startswith("XT"): | ||
129 | -# package = term.lstrip("XP") | ||
130 | -# print package | ||
131 | -# tags = axi_search_pkg_tags(self.axi,package) | ||
132 | -# if "XTrole::program" in tags: | ||
133 | -# print tags | ||
134 | -# return True | ||
135 | -# else: | ||
136 | -# return False | ||
137 | -# else: | ||
138 | -# return False | ||
139 | - | ||
140 | class TagExpandDecider(xapian.ExpandDecider): | 76 | class TagExpandDecider(xapian.ExpandDecider): |
141 | """ | 77 | """ |
142 | Extend xapian.ExpandDecider to consider tags only. | 78 | Extend xapian.ExpandDecider to consider tags only. |
143 | """ | 79 | """ |
144 | def __call__(self, term): | 80 | def __call__(self, term): |
145 | """ | 81 | """ |
146 | - True if the term is a tag. | 82 | + True if the term is a package tag. |
147 | """ | 83 | """ |
148 | return term.startswith("XT") | 84 | return term.startswith("XT") |
149 | 85 | ||
@@ -153,7 +89,7 @@ class RecommendationStrategy: | @@ -153,7 +89,7 @@ class RecommendationStrategy: | ||
153 | """ | 89 | """ |
154 | pass | 90 | pass |
155 | 91 | ||
156 | -class ContentBasedStrategy(RecommendationStrategy): | 92 | +class ContentBased(RecommendationStrategy): |
157 | """ | 93 | """ |
158 | Content-based recommendation strategy based on Apt-xapian-index. | 94 | Content-based recommendation strategy based on Apt-xapian-index. |
159 | """ | 95 | """ |
@@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
162 | self.content = content | 98 | self.content = content |
163 | self.profile_size = profile_size | 99 | self.profile_size = profile_size |
164 | 100 | ||
165 | - def run(self,rec,user,recommendation_size): | ||
166 | - """ | ||
167 | - Perform recommendation strategy. | ||
168 | - """ | ||
169 | - logging.debug("Composing user profile...") | ||
170 | - profile = user.content_profile(rec.items_repository,self.content, | ||
171 | - self.profile_size) | ||
172 | - logging.debug(profile) | ||
173 | - # prepair index for querying user profile | 101 | + def get_sugestion_from_profile(self,rec,user,profile,recommendation_size): |
174 | query = xapian.Query(xapian.Query.OP_OR,profile) | 102 | query = xapian.Query(xapian.Query.OP_OR,profile) |
175 | enquire = xapian.Enquire(rec.items_repository) | 103 | enquire = xapian.Enquire(rec.items_repository) |
176 | enquire.set_weighting_scheme(rec.weight) | 104 | enquire.set_weighting_scheme(rec.weight) |
177 | enquire.set_query(query) | 105 | enquire.set_query(query) |
106 | + # Retrieve matching packages | ||
178 | try: | 107 | try: |
179 | - # retrieve matching packages | ||
180 | mset = enquire.get_mset(0, recommendation_size, None, | 108 | mset = enquire.get_mset(0, recommendation_size, None, |
181 | PkgMatchDecider(user.items())) | 109 | PkgMatchDecider(user.items())) |
182 | - #AppMatchDecider(user.items(), | ||
183 | - # rec.items_repository)) | ||
184 | except xapian.DatabaseError as error: | 110 | except xapian.DatabaseError as error: |
185 | logging.critical("Content-based strategy: "+error.get_msg()) | 111 | logging.critical("Content-based strategy: "+error.get_msg()) |
186 | - # compose result dictionary | 112 | + |
113 | + # Compose result dictionary | ||
187 | item_score = {} | 114 | item_score = {} |
188 | ranking = [] | 115 | ranking = [] |
189 | for m in mset: | 116 | for m in mset: |
190 | - #[FIXME] set this constraint somehow | ||
191 | - #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data()) | ||
192 | - #if "XTrole::program" in tags: | ||
193 | item_score[m.document.get_data()] = m.weight | 117 | item_score[m.document.get_data()] = m.weight |
194 | ranking.append(m.document.get_data()) | 118 | ranking.append(m.document.get_data()) |
195 | 119 | ||
196 | - return recommender.RecommendationResult(item_score,ranking) | ||
197 | - | ||
198 | -class CollaborativeStrategy(RecommendationStrategy): | ||
199 | - """ | ||
200 | - Colaborative recommendation strategy. | ||
201 | - """ | ||
202 | - def __init__(self,k): | ||
203 | - self.description = "Collaborative" | ||
204 | - self.neighbours = k | 120 | + result = recommender.RecommendationResult(item_score,ranking) |
121 | + return result | ||
205 | 122 | ||
206 | def run(self,rec,user,recommendation_size): | 123 | def run(self,rec,user,recommendation_size): |
207 | """ | 124 | """ |
208 | Perform recommendation strategy. | 125 | Perform recommendation strategy. |
209 | """ | 126 | """ |
210 | logging.debug("Composing user profile...") | 127 | logging.debug("Composing user profile...") |
128 | + profile = user.content_profile(rec.items_repository,self.content, | ||
129 | + self.profile_size,rec.valid_tags) | ||
130 | + logging.debug(profile) | ||
131 | + result = self.get_sugestion_from_profile(rec,user,profile,recommendation_size) | ||
132 | + return result | ||
133 | + | ||
134 | +class Collaborative(RecommendationStrategy): | ||
135 | + """ | ||
136 | + Colaborative recommendation strategy. | ||
137 | + """ | ||
138 | + def get_user_profile(self,user,rec): | ||
139 | + logging.debug("Composing user profile...") | ||
211 | profile = ["XP"+package for package in | 140 | profile = ["XP"+package for package in |
212 | user.filter_pkg_profile(rec.valid_pkgs)] | 141 | user.filter_pkg_profile(rec.valid_pkgs)] |
213 | logging.debug(profile) | 142 | logging.debug(profile) |
214 | - # prepair index for querying user profile | ||
215 | - query = xapian.Query(xapian.Query.OP_OR,profile) | 143 | + return profile |
144 | + | ||
145 | + def get_enquire(self,rec): | ||
216 | enquire = xapian.Enquire(rec.users_repository) | 146 | enquire = xapian.Enquire(rec.users_repository) |
217 | enquire.set_weighting_scheme(rec.weight) | 147 | enquire.set_weighting_scheme(rec.weight) |
148 | + return enquire | ||
149 | + | ||
150 | + def get_rset_from_profile(self,profile): | ||
151 | + # Create document to represent user profile and mark it as relevant | ||
152 | + return rset | ||
153 | + | ||
154 | + def get_neighborhood(self,user,rec): | ||
155 | + profile = self.get_user_profile(user,rec) | ||
156 | + #query = xapian.Query(xapian.Query.OP_OR,profile) | ||
157 | + query = xapian.Query(xapian.Query.OP_ELITE_SET,profile) | ||
158 | + enquire = self.get_enquire(rec) | ||
218 | enquire.set_query(query) | 159 | enquire.set_query(query) |
160 | + # Retrieve matching users | ||
219 | try: | 161 | try: |
220 | - # retrieve matching users | ||
221 | mset = enquire.get_mset(0, self.neighbours) | 162 | mset = enquire.get_mset(0, self.neighbours) |
222 | except xapian.DatabaseError as error: | 163 | except xapian.DatabaseError as error: |
223 | - logging.critical("Collaborative strategy: "+error.get_msg()) | 164 | + logging.critical("Could not compose user neighborhood.\n "+error.get_msg()) |
165 | + raise Error | ||
166 | + return mset | ||
167 | + | ||
168 | + def get_neighborhood_rset(self,user,rec): | ||
169 | + mset = self.get_neighborhood(user,rec) | ||
224 | rset = xapian.RSet() | 170 | rset = xapian.RSet() |
225 | - logging.debug("Neighborhood composed by the following users (by hash)") | ||
226 | for m in mset: | 171 | for m in mset: |
227 | rset.add_document(m.document.get_docid()) | 172 | rset.add_document(m.document.get_docid()) |
228 | - logging.debug(m.document.get_data()) | ||
229 | - # retrieve most relevant packages | ||
230 | - #eset = enquire.get_eset(recommendation_size,rset, | ||
231 | - # AppExpandDecider(rec.items_repository)) | ||
232 | - eset = enquire.get_eset(recommendation_size,rset, | ||
233 | - PkgExpandDecider(user.items())) | 173 | + return rset |
174 | + | ||
175 | + def get_result_from_eset(self,eset): | ||
234 | # compose result dictionary | 176 | # compose result dictionary |
235 | item_score = {} | 177 | item_score = {} |
236 | ranking = [] | 178 | ranking = [] |
237 | for e in eset: | 179 | for e in eset: |
238 | package = e.term.lstrip("XP") | 180 | package = e.term.lstrip("XP") |
239 | - #tags = axi_search_pkg_tags(rec.items_repository,package) | ||
240 | - #[FIXME] set this constraint somehow | ||
241 | - #if "XTrole::program" in tags: | ||
242 | item_score[package] = e.weight | 181 | item_score[package] = e.weight |
243 | - ranking.append(m.document.get_data()) | 182 | + ranking.append(package) |
244 | return recommender.RecommendationResult(item_score, ranking) | 183 | return recommender.RecommendationResult(item_score, ranking) |
245 | 184 | ||
246 | -class DemographicStrategy(RecommendationStrategy): | 185 | +class Knn(Collaborative): |
247 | """ | 186 | """ |
248 | - Recommendation strategy based on demographic data. | 187 | + KNN based packages tf-idf weights. |
249 | """ | 188 | """ |
250 | - #def __init__(self, result): | ||
251 | - #self.result = result | ||
252 | - def __init__(self): | ||
253 | - self.description = "Demographic" | ||
254 | - logging.debug("Demographic recommendation not yet implemented.") | ||
255 | - raise Error | 189 | + def __init__(self,k): |
190 | + self.description = "Knn" | ||
191 | + self.neighbours = k | ||
256 | 192 | ||
257 | def run(self,rec,user,recommendation_size): | 193 | def run(self,rec,user,recommendation_size): |
258 | """ | 194 | """ |
259 | Perform recommendation strategy. | 195 | Perform recommendation strategy. |
260 | """ | 196 | """ |
261 | - ordered_result = self.result.get_prediction() | ||
262 | - | ||
263 | - for item,weight in ordered_result: | ||
264 | - pass | ||
265 | - | 197 | + neighborhood = self.get_neighborhood(user,rec) |
198 | + weights = data.tfidf_weighting(rec.users_repository,neighborhood, | ||
199 | + PkgExpandDecider(user.items())) | ||
200 | + item_score = {} | ||
201 | + ranking = [] | ||
202 | + for pkg in weights[:recommendation_size]: | ||
203 | + package = pkg[0].lstrip("XP") | ||
204 | + item_score[package] = pkg[1] | ||
205 | + ranking.append(package) | ||
206 | + result = recommender.RecommendationResult(item_score, ranking) | ||
207 | + return result | ||
266 | 208 | ||
267 | -class KnowledgeBasedStrategy(RecommendationStrategy): | 209 | +class KnnPlus(Collaborative): |
268 | """ | 210 | """ |
269 | - Knowledge-based recommendation strategy. | 211 | + KNN based packages tf-idf weights. |
270 | """ | 212 | """ |
271 | - def __init__(self): | ||
272 | - self.description = "Knowledge-based" | ||
273 | - logging.debug("Knowledge-based recommendation not yet implemented.") | ||
274 | - raise Error | 213 | + def __init__(self,k): |
214 | + self.description = "Knn" | ||
215 | + self.neighbours = k | ||
275 | 216 | ||
276 | - def run(self,user,knowledge_repository): | 217 | + def run(self,rec,user,recommendation_size): |
277 | """ | 218 | """ |
278 | Perform recommendation strategy. | 219 | Perform recommendation strategy. |
279 | """ | 220 | """ |
280 | - pass | 221 | + neighborhood = self.get_neighborhood(user,rec) |
222 | + weights = data.tfidf_plus(rec.users_repository,neighborhood, | ||
223 | + PkgExpandDecider(user.items())) | ||
224 | + item_score = {} | ||
225 | + ranking = [] | ||
226 | + for pkg in weights[:recommendation_size]: | ||
227 | + package = pkg[0].lstrip("XP") | ||
228 | + item_score[package] = pkg[1] | ||
229 | + ranking.append(package) | ||
230 | + result = recommender.RecommendationResult(item_score, ranking) | ||
231 | + return result | ||
281 | 232 | ||
282 | -class ReputationHeuristic(Singleton): | 233 | +class KnnEset(Collaborative): |
283 | """ | 234 | """ |
284 | - Abstraction for diferent reputation heuristics. | 235 | + KNN based on query expansion. |
285 | """ | 236 | """ |
286 | - pass | 237 | + def __init__(self,k): |
238 | + self.description = "KnnEset" | ||
239 | + self.neighbours = k | ||
287 | 240 | ||
288 | -class BugsHeuristic(ReputationHeuristic): | 241 | + def run(self,rec,user,recommendation_size): |
242 | + """ | ||
243 | + Perform recommendation strategy. | ||
244 | + """ | ||
245 | + neighbors_rset = self.get_neighborhood_rset(user,rec) | ||
246 | + enquire = self.get_enquire(rec) | ||
247 | + # Retrieve new packages based on neighborhood profile expansion | ||
248 | + eset = enquire.get_eset(recommendation_size,neighbors_rset, | ||
249 | + PkgExpandDecider(user.items())) | ||
250 | + result = self.get_result_from_eset(eset) | ||
251 | + return result | ||
252 | + | ||
253 | +class CollaborativeEset(Collaborative): | ||
289 | """ | 254 | """ |
290 | - Reputation heuristic based on quantity of open bugs. | 255 | + Colaborative strategy based on query expansion. |
291 | """ | 256 | """ |
292 | - pass | 257 | + def __init__(self): |
258 | + self.description = "Collaborative-Eset" | ||
259 | + | ||
260 | + def run(self,rec,user,recommendation_size): | ||
261 | + """ | ||
262 | + Perform recommendation strategy. | ||
263 | + """ | ||
264 | + temp_index = xapian.WritableDatabase("/tmp/Database",xapian.DB_CREATE_OR_OVERWRITE) | ||
265 | + profile = self.get_user_profile(user,rec) | ||
266 | + doc = xapian.Document() | ||
267 | + for pkg in profile: | ||
268 | + doc.add_term(pkg) | ||
269 | + doc.add_term("TO_BE_DELETED") | ||
270 | + docid = temp_index.add_document(doc) | ||
271 | + temp_index.add_database(rec.users_repository) | ||
272 | + rset = xapian.RSet() | ||
273 | + rset.add_document(docid) | ||
274 | + # rset = self.get_rset_from_profile(profile) | ||
275 | + enquire = xapian.Enquire(temp_index) | ||
276 | + enquire.set_weighting_scheme(rec.weight) | ||
277 | + eset = enquire.get_eset(recommendation_size,rset, | ||
278 | + PkgExpandDecider(user.items())) | ||
279 | + result = self.get_result_from_eset(eset) | ||
280 | + return result | ||
293 | 281 | ||
294 | -class RCBugsHeuristic(ReputationHeuristic): | 282 | +class KnnContent(Collaborative): |
295 | """ | 283 | """ |
296 | - Reputation heuristic based on quantity of RC bugs. | 284 | + Hybrid "Colaborative through content" recommendation strategy. |
297 | """ | 285 | """ |
298 | - pass | 286 | + def __init__(self,k): |
287 | + self.description = "Knn-Content" | ||
288 | + self.neighbours = k | ||
299 | 289 | ||
300 | -class PopularityHeuristic(ReputationHeuristic): | 290 | + def run(self,rec,user,recommendation_size): |
291 | + """ | ||
292 | + Perform recommendation strategy. | ||
293 | + """ | ||
294 | + neighborhood = self.get_neighborhood(user,rec) | ||
295 | + weights = data.tfidf_weighting(rec.users_repository,neighborhood, | ||
296 | + PkgExpandDecider(user.items())) | ||
297 | + profile = [w[0] for w in weights][:rec.cfg.profile_size] | ||
298 | + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) | ||
299 | + return result | ||
300 | + | ||
301 | +class KnnContentEset(Collaborative): | ||
301 | """ | 302 | """ |
302 | - Reputation heuristic based on popularity of packages. | 303 | + Hybrid "Colaborative through content" recommendation strategy. |
303 | """ | 304 | """ |
304 | - pass | 305 | + def __init__(self,k): |
306 | + self.description = "Knn-Content-Eset" | ||
307 | + self.neighbours = k | ||
308 | + | ||
309 | + def run(self,rec,user,recommendation_size): | ||
310 | + """ | ||
311 | + Perform recommendation strategy. | ||
312 | + """ | ||
313 | + neighbors_rset = self.get_neighborhood_rset(user,rec) | ||
314 | + enquire = self.get_enquire(rec) | ||
315 | + # Retrieve relevant tags based on neighborhood profile expansion | ||
316 | + eset = enquire.get_eset(rec.cfg.profile_size,rset, | ||
317 | + TagExpandDecider()) | ||
318 | + profile = [e.term for e in eset] | ||
319 | + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) | ||
320 | + return result | ||
305 | 321 | ||
306 | -class ItemReputationStrategy(RecommendationStrategy): | 322 | +class Demographic(RecommendationStrategy): |
307 | """ | 323 | """ |
308 | - Recommendation strategy based on items reputation. | 324 | + Hybrid rotation strategy based on demographic data. |
309 | """ | 325 | """ |
310 | - def __init__(self): | ||
311 | - self.description = "Item reputation" | ||
312 | - logging.debug("Item reputation recommendation not yet implemented.") | ||
313 | - raise Error | 326 | + def __init__(self,strategy_str): |
327 | + self.description = "Demographic" | ||
328 | + self.strategy_str = strategy_str.lstrip("demo_") | ||
314 | 329 | ||
315 | - def run(self,items_list,heuristic): | 330 | + def run(self,rec,user,recommendation_size): |
316 | """ | 331 | """ |
317 | Perform recommendation strategy. | 332 | Perform recommendation strategy. |
318 | """ | 333 | """ |
319 | - pass | 334 | + program_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"programs")) |
335 | + desktop_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"desktopapps")) | ||
336 | + if (len(desktop_profile)>10 or | ||
337 | + len(desktop_profile)>len(program_profile)/2): | ||
338 | + rec.set_strategy(self.strategy_str) | ||
339 | + # Redefine repositories after configuring strategy | ||
340 | + rec.items_repository = rec.axi_desktopapps | ||
341 | + rec.valid_pkgs = rec.valid_desktopapps | ||
342 | + if "col" in self.strategy_str: | ||
343 | + rec.users_repository = rec.popcon_desktopapps | ||
344 | + return rec.get_recommendation(user,recommendation_size) |