Commit 5af15ae174e8afcf8ecaa6bb8b71886615279086
1 parent
7f4319d5
Exists in
master
and in
1 other branch
Recommendation strategies refactoring.
Showing
1 changed file
with
175 additions
and
150 deletions
Show diff stats
src/strategy.py
... | ... | @@ -23,7 +23,9 @@ __license__ = """ |
23 | 23 | import xapian |
24 | 24 | from singleton import Singleton |
25 | 25 | import recommender |
26 | -from data import * | |
26 | +import data | |
27 | +import logging | |
28 | +from error import Error | |
27 | 29 | |
28 | 30 | class PkgMatchDecider(xapian.MatchDecider): |
29 | 31 | """ |
... | ... | @@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider): |
71 | 73 | return is_new_pkg and "gnome" in self.pkgs_list |
72 | 74 | return is_new_pkg |
73 | 75 | |
74 | -#class AppMatchDecider(xapian.MatchDecider): | |
75 | -# """ | |
76 | -# Extend xapian.MatchDecider to not consider only applications packages. | |
77 | -# """ | |
78 | -# def __init__(self, pkgs_list, axi): | |
79 | -# """ | |
80 | -# Set initial parameters. | |
81 | -# """ | |
82 | -# xapian.MatchDecider.__init__(self) | |
83 | -# self.pkgs_list = pkgs_list | |
84 | -# self.axi = axi | |
85 | -# | |
86 | -# def __call__(self, doc): | |
87 | -# """ | |
88 | -# True if the package is not already installed. | |
89 | -# """ | |
90 | -# tags = axi_search_pkg_tags(self.axi,doc.get_data()) | |
91 | -# return (("XTrole::program" in tags) and | |
92 | -# (doc.get_data() not in self.pkgs_list)) | |
93 | -# | |
94 | -#class UserMatchDecider(xapian.MatchDecider): | |
95 | -# """ | |
96 | -# Extend xapian.MatchDecider to match similar profiles. | |
97 | -# """ | |
98 | -# | |
99 | -# def __init__(self, profile): | |
100 | -# """ | |
101 | -# Set initial parameters. | |
102 | -# """ | |
103 | -# xapian.MatchDecider.__init__(self) | |
104 | -# self.profile = profile | |
105 | -# | |
106 | -# def __call__(self, doc): | |
107 | -# """ | |
108 | -# True if the user has more the half of packages from profile. | |
109 | -# """ | |
110 | -# match=0 | |
111 | -# for term in doc: | |
112 | -# if term.term in self.profile: | |
113 | -# match = match+1 | |
114 | -# return (match >= len(self.profile)/2) | |
115 | - | |
116 | -#class AppExpandDecider(xapian.ExpandDecider): | |
117 | -# """ | |
118 | -# Extend xapian.ExpandDecider to consider applications only. | |
119 | -# """ | |
120 | -# def __init__(self,axi): | |
121 | -# xapian.ExpandDecider.__init__(self) | |
122 | -# self.axi = axi | |
123 | -# | |
124 | -# def __call__(self, term): | |
125 | -# """ | |
126 | -# True if the term is a package. | |
127 | -# """ | |
128 | -# if not term.startswith("XT"): | |
129 | -# package = term.lstrip("XP") | |
130 | -# print package | |
131 | -# tags = axi_search_pkg_tags(self.axi,package) | |
132 | -# if "XTrole::program" in tags: | |
133 | -# print tags | |
134 | -# return True | |
135 | -# else: | |
136 | -# return False | |
137 | -# else: | |
138 | -# return False | |
139 | - | |
140 | 76 | class TagExpandDecider(xapian.ExpandDecider): |
141 | 77 | """ |
142 | 78 | Extend xapian.ExpandDecider to consider tags only. |
143 | 79 | """ |
144 | 80 | def __call__(self, term): |
145 | 81 | """ |
146 | - True if the term is a tag. | |
82 | + True if the term is a package tag. | |
147 | 83 | """ |
148 | 84 | return term.startswith("XT") |
149 | 85 | |
... | ... | @@ -153,7 +89,7 @@ class RecommendationStrategy: |
153 | 89 | """ |
154 | 90 | pass |
155 | 91 | |
156 | -class ContentBasedStrategy(RecommendationStrategy): | |
92 | +class ContentBased(RecommendationStrategy): | |
157 | 93 | """ |
158 | 94 | Content-based recommendation strategy based on Apt-xapian-index. |
159 | 95 | """ |
... | ... | @@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy): |
162 | 98 | self.content = content |
163 | 99 | self.profile_size = profile_size |
164 | 100 | |
165 | - def run(self,rec,user,recommendation_size): | |
166 | - """ | |
167 | - Perform recommendation strategy. | |
168 | - """ | |
169 | - logging.debug("Composing user profile...") | |
170 | - profile = user.content_profile(rec.items_repository,self.content, | |
171 | - self.profile_size) | |
172 | - logging.debug(profile) | |
173 | - # prepair index for querying user profile | |
101 | + def get_sugestion_from_profile(self,rec,user,profile,recommendation_size): | |
174 | 102 | query = xapian.Query(xapian.Query.OP_OR,profile) |
175 | 103 | enquire = xapian.Enquire(rec.items_repository) |
176 | 104 | enquire.set_weighting_scheme(rec.weight) |
177 | 105 | enquire.set_query(query) |
106 | + # Retrieve matching packages | |
178 | 107 | try: |
179 | - # retrieve matching packages | |
180 | 108 | mset = enquire.get_mset(0, recommendation_size, None, |
181 | 109 | PkgMatchDecider(user.items())) |
182 | - #AppMatchDecider(user.items(), | |
183 | - # rec.items_repository)) | |
184 | 110 | except xapian.DatabaseError as error: |
185 | 111 | logging.critical("Content-based strategy: "+error.get_msg()) |
186 | - # compose result dictionary | |
112 | + | |
113 | + # Compose result dictionary | |
187 | 114 | item_score = {} |
188 | 115 | ranking = [] |
189 | 116 | for m in mset: |
190 | - #[FIXME] set this constraint somehow | |
191 | - #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data()) | |
192 | - #if "XTrole::program" in tags: | |
193 | 117 | item_score[m.document.get_data()] = m.weight |
194 | 118 | ranking.append(m.document.get_data()) |
195 | 119 | |
196 | - return recommender.RecommendationResult(item_score,ranking) | |
197 | - | |
198 | -class CollaborativeStrategy(RecommendationStrategy): | |
199 | - """ | |
200 | - Colaborative recommendation strategy. | |
201 | - """ | |
202 | - def __init__(self,k): | |
203 | - self.description = "Collaborative" | |
204 | - self.neighbours = k | |
120 | + result = recommender.RecommendationResult(item_score,ranking) | |
121 | + return result | |
205 | 122 | |
206 | 123 | def run(self,rec,user,recommendation_size): |
207 | 124 | """ |
208 | 125 | Perform recommendation strategy. |
209 | 126 | """ |
210 | 127 | logging.debug("Composing user profile...") |
128 | + profile = user.content_profile(rec.items_repository,self.content, | |
129 | + self.profile_size,rec.valid_tags) | |
130 | + logging.debug(profile) | |
131 | + result = self.get_sugestion_from_profile(rec,user,profile,recommendation_size) | |
132 | + return result | |
133 | + | |
134 | +class Collaborative(RecommendationStrategy): | |
135 | + """ | |
136 | + Colaborative recommendation strategy. | |
137 | + """ | |
138 | + def get_user_profile(self,user,rec): | |
139 | + logging.debug("Composing user profile...") | |
211 | 140 | profile = ["XP"+package for package in |
212 | 141 | user.filter_pkg_profile(rec.valid_pkgs)] |
213 | 142 | logging.debug(profile) |
214 | - # prepair index for querying user profile | |
215 | - query = xapian.Query(xapian.Query.OP_OR,profile) | |
143 | + return profile | |
144 | + | |
145 | + def get_enquire(self,rec): | |
216 | 146 | enquire = xapian.Enquire(rec.users_repository) |
217 | 147 | enquire.set_weighting_scheme(rec.weight) |
148 | + return enquire | |
149 | + | |
150 | + def get_rset_from_profile(self,profile): | |
151 | + # Create document to represent user profile and mark it as relevant | |
152 | + return rset | |
153 | + | |
154 | + def get_neighborhood(self,user,rec): | |
155 | + profile = self.get_user_profile(user,rec) | |
156 | + #query = xapian.Query(xapian.Query.OP_OR,profile) | |
157 | + query = xapian.Query(xapian.Query.OP_ELITE_SET,profile) | |
158 | + enquire = self.get_enquire(rec) | |
218 | 159 | enquire.set_query(query) |
160 | + # Retrieve matching users | |
219 | 161 | try: |
220 | - # retrieve matching users | |
221 | 162 | mset = enquire.get_mset(0, self.neighbours) |
222 | 163 | except xapian.DatabaseError as error: |
223 | - logging.critical("Collaborative strategy: "+error.get_msg()) | |
164 | + logging.critical("Could not compose user neighborhood.\n "+error.get_msg()) | |
165 | + raise Error | |
166 | + return mset | |
167 | + | |
168 | + def get_neighborhood_rset(self,user,rec): | |
169 | + mset = self.get_neighborhood(user,rec) | |
224 | 170 | rset = xapian.RSet() |
225 | - logging.debug("Neighborhood composed by the following users (by hash)") | |
226 | 171 | for m in mset: |
227 | 172 | rset.add_document(m.document.get_docid()) |
228 | - logging.debug(m.document.get_data()) | |
229 | - # retrieve most relevant packages | |
230 | - #eset = enquire.get_eset(recommendation_size,rset, | |
231 | - # AppExpandDecider(rec.items_repository)) | |
232 | - eset = enquire.get_eset(recommendation_size,rset, | |
233 | - PkgExpandDecider(user.items())) | |
173 | + return rset | |
174 | + | |
175 | + def get_result_from_eset(self,eset): | |
234 | 176 | # compose result dictionary |
235 | 177 | item_score = {} |
236 | 178 | ranking = [] |
237 | 179 | for e in eset: |
238 | 180 | package = e.term.lstrip("XP") |
239 | - #tags = axi_search_pkg_tags(rec.items_repository,package) | |
240 | - #[FIXME] set this constraint somehow | |
241 | - #if "XTrole::program" in tags: | |
242 | 181 | item_score[package] = e.weight |
243 | - ranking.append(m.document.get_data()) | |
182 | + ranking.append(package) | |
244 | 183 | return recommender.RecommendationResult(item_score, ranking) |
245 | 184 | |
246 | -class DemographicStrategy(RecommendationStrategy): | |
185 | +class Knn(Collaborative): | |
247 | 186 | """ |
248 | - Recommendation strategy based on demographic data. | |
187 | + KNN based packages tf-idf weights. | |
249 | 188 | """ |
250 | - #def __init__(self, result): | |
251 | - #self.result = result | |
252 | - def __init__(self): | |
253 | - self.description = "Demographic" | |
254 | - logging.debug("Demographic recommendation not yet implemented.") | |
255 | - raise Error | |
189 | + def __init__(self,k): | |
190 | + self.description = "Knn" | |
191 | + self.neighbours = k | |
256 | 192 | |
257 | 193 | def run(self,rec,user,recommendation_size): |
258 | 194 | """ |
259 | 195 | Perform recommendation strategy. |
260 | 196 | """ |
261 | - ordered_result = self.result.get_prediction() | |
262 | - | |
263 | - for item,weight in ordered_result: | |
264 | - pass | |
265 | - | |
197 | + neighborhood = self.get_neighborhood(user,rec) | |
198 | + weights = data.tfidf_weighting(rec.users_repository,neighborhood, | |
199 | + PkgExpandDecider(user.items())) | |
200 | + item_score = {} | |
201 | + ranking = [] | |
202 | + for pkg in weights[:recommendation_size]: | |
203 | + package = pkg[0].lstrip("XP") | |
204 | + item_score[package] = pkg[1] | |
205 | + ranking.append(package) | |
206 | + result = recommender.RecommendationResult(item_score, ranking) | |
207 | + return result | |
266 | 208 | |
267 | -class KnowledgeBasedStrategy(RecommendationStrategy): | |
209 | +class KnnPlus(Collaborative): | |
268 | 210 | """ |
269 | - Knowledge-based recommendation strategy. | |
211 | + KNN based packages tf-idf weights. | |
270 | 212 | """ |
271 | - def __init__(self): | |
272 | - self.description = "Knowledge-based" | |
273 | - logging.debug("Knowledge-based recommendation not yet implemented.") | |
274 | - raise Error | |
213 | + def __init__(self,k): | |
214 | + self.description = "Knn" | |
215 | + self.neighbours = k | |
275 | 216 | |
276 | - def run(self,user,knowledge_repository): | |
217 | + def run(self,rec,user,recommendation_size): | |
277 | 218 | """ |
278 | 219 | Perform recommendation strategy. |
279 | 220 | """ |
280 | - pass | |
221 | + neighborhood = self.get_neighborhood(user,rec) | |
222 | + weights = data.tfidf_plus(rec.users_repository,neighborhood, | |
223 | + PkgExpandDecider(user.items())) | |
224 | + item_score = {} | |
225 | + ranking = [] | |
226 | + for pkg in weights[:recommendation_size]: | |
227 | + package = pkg[0].lstrip("XP") | |
228 | + item_score[package] = pkg[1] | |
229 | + ranking.append(package) | |
230 | + result = recommender.RecommendationResult(item_score, ranking) | |
231 | + return result | |
281 | 232 | |
282 | -class ReputationHeuristic(Singleton): | |
233 | +class KnnEset(Collaborative): | |
283 | 234 | """ |
284 | - Abstraction for diferent reputation heuristics. | |
235 | + KNN based on query expansion. | |
285 | 236 | """ |
286 | - pass | |
237 | + def __init__(self,k): | |
238 | + self.description = "KnnEset" | |
239 | + self.neighbours = k | |
287 | 240 | |
288 | -class BugsHeuristic(ReputationHeuristic): | |
241 | + def run(self,rec,user,recommendation_size): | |
242 | + """ | |
243 | + Perform recommendation strategy. | |
244 | + """ | |
245 | + neighbors_rset = self.get_neighborhood_rset(user,rec) | |
246 | + enquire = self.get_enquire(rec) | |
247 | + # Retrieve new packages based on neighborhood profile expansion | |
248 | + eset = enquire.get_eset(recommendation_size,neighbors_rset, | |
249 | + PkgExpandDecider(user.items())) | |
250 | + result = self.get_result_from_eset(eset) | |
251 | + return result | |
252 | + | |
253 | +class CollaborativeEset(Collaborative): | |
289 | 254 | """ |
290 | - Reputation heuristic based on quantity of open bugs. | |
255 | + Colaborative strategy based on query expansion. | |
291 | 256 | """ |
292 | - pass | |
257 | + def __init__(self): | |
258 | + self.description = "Collaborative-Eset" | |
259 | + | |
260 | + def run(self,rec,user,recommendation_size): | |
261 | + """ | |
262 | + Perform recommendation strategy. | |
263 | + """ | |
264 | + temp_index = xapian.WritableDatabase("/tmp/Database",xapian.DB_CREATE_OR_OVERWRITE) | |
265 | + profile = self.get_user_profile(user,rec) | |
266 | + doc = xapian.Document() | |
267 | + for pkg in profile: | |
268 | + doc.add_term(pkg) | |
269 | + doc.add_term("TO_BE_DELETED") | |
270 | + docid = temp_index.add_document(doc) | |
271 | + temp_index.add_database(rec.users_repository) | |
272 | + rset = xapian.RSet() | |
273 | + rset.add_document(docid) | |
274 | + # rset = self.get_rset_from_profile(profile) | |
275 | + enquire = xapian.Enquire(temp_index) | |
276 | + enquire.set_weighting_scheme(rec.weight) | |
277 | + eset = enquire.get_eset(recommendation_size,rset, | |
278 | + PkgExpandDecider(user.items())) | |
279 | + result = self.get_result_from_eset(eset) | |
280 | + return result | |
293 | 281 | |
294 | -class RCBugsHeuristic(ReputationHeuristic): | |
282 | +class KnnContent(Collaborative): | |
295 | 283 | """ |
296 | - Reputation heuristic based on quantity of RC bugs. | |
284 | + Hybrid "Colaborative through content" recommendation strategy. | |
297 | 285 | """ |
298 | - pass | |
286 | + def __init__(self,k): | |
287 | + self.description = "Knn-Content" | |
288 | + self.neighbours = k | |
299 | 289 | |
300 | -class PopularityHeuristic(ReputationHeuristic): | |
290 | + def run(self,rec,user,recommendation_size): | |
291 | + """ | |
292 | + Perform recommendation strategy. | |
293 | + """ | |
294 | + neighborhood = self.get_neighborhood(user,rec) | |
295 | + weights = data.tfidf_weighting(rec.users_repository,neighborhood, | |
296 | + PkgExpandDecider(user.items())) | |
297 | + profile = [w[0] for w in weights][:rec.cfg.profile_size] | |
298 | + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) | |
299 | + return result | |
300 | + | |
301 | +class KnnContentEset(Collaborative): | |
301 | 302 | """ |
302 | - Reputation heuristic based on popularity of packages. | |
303 | + Hybrid "Colaborative through content" recommendation strategy. | |
303 | 304 | """ |
304 | - pass | |
305 | + def __init__(self,k): | |
306 | + self.description = "Knn-Content-Eset" | |
307 | + self.neighbours = k | |
308 | + | |
309 | + def run(self,rec,user,recommendation_size): | |
310 | + """ | |
311 | + Perform recommendation strategy. | |
312 | + """ | |
313 | + neighbors_rset = self.get_neighborhood_rset(user,rec) | |
314 | + enquire = self.get_enquire(rec) | |
315 | + # Retrieve relevant tags based on neighborhood profile expansion | |
316 | + eset = enquire.get_eset(rec.cfg.profile_size,rset, | |
317 | + TagExpandDecider()) | |
318 | + profile = [e.term for e in eset] | |
319 | + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) | |
320 | + return result | |
305 | 321 | |
306 | -class ItemReputationStrategy(RecommendationStrategy): | |
322 | +class Demographic(RecommendationStrategy): | |
307 | 323 | """ |
308 | - Recommendation strategy based on items reputation. | |
324 | + Hybrid rotation strategy based on demographic data. | |
309 | 325 | """ |
310 | - def __init__(self): | |
311 | - self.description = "Item reputation" | |
312 | - logging.debug("Item reputation recommendation not yet implemented.") | |
313 | - raise Error | |
326 | + def __init__(self,strategy_str): | |
327 | + self.description = "Demographic" | |
328 | + self.strategy_str = strategy_str.lstrip("demo_") | |
314 | 329 | |
315 | - def run(self,items_list,heuristic): | |
330 | + def run(self,rec,user,recommendation_size): | |
316 | 331 | """ |
317 | 332 | Perform recommendation strategy. |
318 | 333 | """ |
319 | - pass | |
334 | + program_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"programs")) | |
335 | + desktop_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"desktopapps")) | |
336 | + if (len(desktop_profile)>10 or | |
337 | + len(desktop_profile)>len(program_profile)/2): | |
338 | + rec.set_strategy(self.strategy_str) | |
339 | + # Redefine repositories after configuring strategy | |
340 | + rec.items_repository = rec.axi_desktopapps | |
341 | + rec.valid_pkgs = rec.valid_desktopapps | |
342 | + if "col" in self.strategy_str: | |
343 | + rec.users_repository = rec.popcon_desktopapps | |
344 | + return rec.get_recommendation(user,recommendation_size) | ... | ... |