Commit 5af15ae174e8afcf8ecaa6bb8b71886615279086

Authored by Tássia Camões Araújo
1 parent 7f4319d5
Exists in master and in 1 other branch add_vagrant

Recommendation strategies refactoring.

Showing 1 changed file with 175 additions and 150 deletions   Show diff stats
src/strategy.py
@@ -23,7 +23,9 @@ __license__ = """ @@ -23,7 +23,9 @@ __license__ = """
23 import xapian 23 import xapian
24 from singleton import Singleton 24 from singleton import Singleton
25 import recommender 25 import recommender
26 -from data import * 26 +import data
  27 +import logging
  28 +from error import Error
27 29
28 class PkgMatchDecider(xapian.MatchDecider): 30 class PkgMatchDecider(xapian.MatchDecider):
29 """ 31 """
@@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider): @@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider):
71 return is_new_pkg and "gnome" in self.pkgs_list 73 return is_new_pkg and "gnome" in self.pkgs_list
72 return is_new_pkg 74 return is_new_pkg
73 75
74 -#class AppMatchDecider(xapian.MatchDecider):  
75 -# """  
76 -# Extend xapian.MatchDecider to not consider only applications packages.  
77 -# """  
78 -# def __init__(self, pkgs_list, axi):  
79 -# """  
80 -# Set initial parameters.  
81 -# """  
82 -# xapian.MatchDecider.__init__(self)  
83 -# self.pkgs_list = pkgs_list  
84 -# self.axi = axi  
85 -#  
86 -# def __call__(self, doc):  
87 -# """  
88 -# True if the package is not already installed.  
89 -# """  
90 -# tags = axi_search_pkg_tags(self.axi,doc.get_data())  
91 -# return (("XTrole::program" in tags) and  
92 -# (doc.get_data() not in self.pkgs_list))  
93 -#  
94 -#class UserMatchDecider(xapian.MatchDecider):  
95 -# """  
96 -# Extend xapian.MatchDecider to match similar profiles.  
97 -# """  
98 -#  
99 -# def __init__(self, profile):  
100 -# """  
101 -# Set initial parameters.  
102 -# """  
103 -# xapian.MatchDecider.__init__(self)  
104 -# self.profile = profile  
105 -#  
106 -# def __call__(self, doc):  
107 -# """  
108 -# True if the user has more the half of packages from profile.  
109 -# """  
110 -# match=0  
111 -# for term in doc:  
112 -# if term.term in self.profile:  
113 -# match = match+1  
114 -# return (match >= len(self.profile)/2)  
115 -  
116 -#class AppExpandDecider(xapian.ExpandDecider):  
117 -# """  
118 -# Extend xapian.ExpandDecider to consider applications only.  
119 -# """  
120 -# def __init__(self,axi):  
121 -# xapian.ExpandDecider.__init__(self)  
122 -# self.axi = axi  
123 -#  
124 -# def __call__(self, term):  
125 -# """  
126 -# True if the term is a package.  
127 -# """  
128 -# if not term.startswith("XT"):  
129 -# package = term.lstrip("XP")  
130 -# print package  
131 -# tags = axi_search_pkg_tags(self.axi,package)  
132 -# if "XTrole::program" in tags:  
133 -# print tags  
134 -# return True  
135 -# else:  
136 -# return False  
137 -# else:  
138 -# return False  
139 -  
140 class TagExpandDecider(xapian.ExpandDecider): 76 class TagExpandDecider(xapian.ExpandDecider):
141 """ 77 """
142 Extend xapian.ExpandDecider to consider tags only. 78 Extend xapian.ExpandDecider to consider tags only.
143 """ 79 """
144 def __call__(self, term): 80 def __call__(self, term):
145 """ 81 """
146 - True if the term is a tag. 82 + True if the term is a package tag.
147 """ 83 """
148 return term.startswith("XT") 84 return term.startswith("XT")
149 85
@@ -153,7 +89,7 @@ class RecommendationStrategy: @@ -153,7 +89,7 @@ class RecommendationStrategy:
153 """ 89 """
154 pass 90 pass
155 91
156 -class ContentBasedStrategy(RecommendationStrategy): 92 +class ContentBased(RecommendationStrategy):
157 """ 93 """
158 Content-based recommendation strategy based on Apt-xapian-index. 94 Content-based recommendation strategy based on Apt-xapian-index.
159 """ 95 """
@@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy): @@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy):
162 self.content = content 98 self.content = content
163 self.profile_size = profile_size 99 self.profile_size = profile_size
164 100
165 - def run(self,rec,user,recommendation_size):  
166 - """  
167 - Perform recommendation strategy.  
168 - """  
169 - logging.debug("Composing user profile...")  
170 - profile = user.content_profile(rec.items_repository,self.content,  
171 - self.profile_size)  
172 - logging.debug(profile)  
173 - # prepair index for querying user profile 101 + def get_sugestion_from_profile(self,rec,user,profile,recommendation_size):
174 query = xapian.Query(xapian.Query.OP_OR,profile) 102 query = xapian.Query(xapian.Query.OP_OR,profile)
175 enquire = xapian.Enquire(rec.items_repository) 103 enquire = xapian.Enquire(rec.items_repository)
176 enquire.set_weighting_scheme(rec.weight) 104 enquire.set_weighting_scheme(rec.weight)
177 enquire.set_query(query) 105 enquire.set_query(query)
  106 + # Retrieve matching packages
178 try: 107 try:
179 - # retrieve matching packages  
180 mset = enquire.get_mset(0, recommendation_size, None, 108 mset = enquire.get_mset(0, recommendation_size, None,
181 PkgMatchDecider(user.items())) 109 PkgMatchDecider(user.items()))
182 - #AppMatchDecider(user.items(),  
183 - # rec.items_repository))  
184 except xapian.DatabaseError as error: 110 except xapian.DatabaseError as error:
185 logging.critical("Content-based strategy: "+error.get_msg()) 111 logging.critical("Content-based strategy: "+error.get_msg())
186 - # compose result dictionary 112 +
  113 + # Compose result dictionary
187 item_score = {} 114 item_score = {}
188 ranking = [] 115 ranking = []
189 for m in mset: 116 for m in mset:
190 - #[FIXME] set this constraint somehow  
191 - #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())  
192 - #if "XTrole::program" in tags:  
193 item_score[m.document.get_data()] = m.weight 117 item_score[m.document.get_data()] = m.weight
194 ranking.append(m.document.get_data()) 118 ranking.append(m.document.get_data())
195 119
196 - return recommender.RecommendationResult(item_score,ranking)  
197 -  
198 -class CollaborativeStrategy(RecommendationStrategy):  
199 - """  
200 - Colaborative recommendation strategy.  
201 - """  
202 - def __init__(self,k):  
203 - self.description = "Collaborative"  
204 - self.neighbours = k 120 + result = recommender.RecommendationResult(item_score,ranking)
  121 + return result
205 122
206 def run(self,rec,user,recommendation_size): 123 def run(self,rec,user,recommendation_size):
207 """ 124 """
208 Perform recommendation strategy. 125 Perform recommendation strategy.
209 """ 126 """
210 logging.debug("Composing user profile...") 127 logging.debug("Composing user profile...")
  128 + profile = user.content_profile(rec.items_repository,self.content,
  129 + self.profile_size,rec.valid_tags)
  130 + logging.debug(profile)
  131 + result = self.get_sugestion_from_profile(rec,user,profile,recommendation_size)
  132 + return result
  133 +
  134 +class Collaborative(RecommendationStrategy):
  135 + """
  136 + Colaborative recommendation strategy.
  137 + """
  138 + def get_user_profile(self,user,rec):
  139 + logging.debug("Composing user profile...")
211 profile = ["XP"+package for package in 140 profile = ["XP"+package for package in
212 user.filter_pkg_profile(rec.valid_pkgs)] 141 user.filter_pkg_profile(rec.valid_pkgs)]
213 logging.debug(profile) 142 logging.debug(profile)
214 - # prepair index for querying user profile  
215 - query = xapian.Query(xapian.Query.OP_OR,profile) 143 + return profile
  144 +
  145 + def get_enquire(self,rec):
216 enquire = xapian.Enquire(rec.users_repository) 146 enquire = xapian.Enquire(rec.users_repository)
217 enquire.set_weighting_scheme(rec.weight) 147 enquire.set_weighting_scheme(rec.weight)
  148 + return enquire
  149 +
  150 + def get_rset_from_profile(self,profile):
  151 + # Create document to represent user profile and mark it as relevant
  152 + return rset
  153 +
  154 + def get_neighborhood(self,user,rec):
  155 + profile = self.get_user_profile(user,rec)
  156 + #query = xapian.Query(xapian.Query.OP_OR,profile)
  157 + query = xapian.Query(xapian.Query.OP_ELITE_SET,profile)
  158 + enquire = self.get_enquire(rec)
218 enquire.set_query(query) 159 enquire.set_query(query)
  160 + # Retrieve matching users
219 try: 161 try:
220 - # retrieve matching users  
221 mset = enquire.get_mset(0, self.neighbours) 162 mset = enquire.get_mset(0, self.neighbours)
222 except xapian.DatabaseError as error: 163 except xapian.DatabaseError as error:
223 - logging.critical("Collaborative strategy: "+error.get_msg()) 164 + logging.critical("Could not compose user neighborhood.\n "+error.get_msg())
  165 + raise Error
  166 + return mset
  167 +
  168 + def get_neighborhood_rset(self,user,rec):
  169 + mset = self.get_neighborhood(user,rec)
224 rset = xapian.RSet() 170 rset = xapian.RSet()
225 - logging.debug("Neighborhood composed by the following users (by hash)")  
226 for m in mset: 171 for m in mset:
227 rset.add_document(m.document.get_docid()) 172 rset.add_document(m.document.get_docid())
228 - logging.debug(m.document.get_data())  
229 - # retrieve most relevant packages  
230 - #eset = enquire.get_eset(recommendation_size,rset,  
231 - # AppExpandDecider(rec.items_repository))  
232 - eset = enquire.get_eset(recommendation_size,rset,  
233 - PkgExpandDecider(user.items())) 173 + return rset
  174 +
  175 + def get_result_from_eset(self,eset):
234 # compose result dictionary 176 # compose result dictionary
235 item_score = {} 177 item_score = {}
236 ranking = [] 178 ranking = []
237 for e in eset: 179 for e in eset:
238 package = e.term.lstrip("XP") 180 package = e.term.lstrip("XP")
239 - #tags = axi_search_pkg_tags(rec.items_repository,package)  
240 - #[FIXME] set this constraint somehow  
241 - #if "XTrole::program" in tags:  
242 item_score[package] = e.weight 181 item_score[package] = e.weight
243 - ranking.append(m.document.get_data()) 182 + ranking.append(package)
244 return recommender.RecommendationResult(item_score, ranking) 183 return recommender.RecommendationResult(item_score, ranking)
245 184
246 -class DemographicStrategy(RecommendationStrategy): 185 +class Knn(Collaborative):
247 """ 186 """
248 - Recommendation strategy based on demographic data. 187 + KNN based packages tf-idf weights.
249 """ 188 """
250 - #def __init__(self, result):  
251 - #self.result = result  
252 - def __init__(self):  
253 - self.description = "Demographic"  
254 - logging.debug("Demographic recommendation not yet implemented.")  
255 - raise Error 189 + def __init__(self,k):
  190 + self.description = "Knn"
  191 + self.neighbours = k
256 192
257 def run(self,rec,user,recommendation_size): 193 def run(self,rec,user,recommendation_size):
258 """ 194 """
259 Perform recommendation strategy. 195 Perform recommendation strategy.
260 """ 196 """
261 - ordered_result = self.result.get_prediction()  
262 -  
263 - for item,weight in ordered_result:  
264 - pass  
265 - 197 + neighborhood = self.get_neighborhood(user,rec)
  198 + weights = data.tfidf_weighting(rec.users_repository,neighborhood,
  199 + PkgExpandDecider(user.items()))
  200 + item_score = {}
  201 + ranking = []
  202 + for pkg in weights[:recommendation_size]:
  203 + package = pkg[0].lstrip("XP")
  204 + item_score[package] = pkg[1]
  205 + ranking.append(package)
  206 + result = recommender.RecommendationResult(item_score, ranking)
  207 + return result
266 208
267 -class KnowledgeBasedStrategy(RecommendationStrategy): 209 +class KnnPlus(Collaborative):
268 """ 210 """
269 - Knowledge-based recommendation strategy. 211 + KNN based packages tf-idf weights.
270 """ 212 """
271 - def __init__(self):  
272 - self.description = "Knowledge-based"  
273 - logging.debug("Knowledge-based recommendation not yet implemented.")  
274 - raise Error 213 + def __init__(self,k):
  214 + self.description = "Knn"
  215 + self.neighbours = k
275 216
276 - def run(self,user,knowledge_repository): 217 + def run(self,rec,user,recommendation_size):
277 """ 218 """
278 Perform recommendation strategy. 219 Perform recommendation strategy.
279 """ 220 """
280 - pass 221 + neighborhood = self.get_neighborhood(user,rec)
  222 + weights = data.tfidf_plus(rec.users_repository,neighborhood,
  223 + PkgExpandDecider(user.items()))
  224 + item_score = {}
  225 + ranking = []
  226 + for pkg in weights[:recommendation_size]:
  227 + package = pkg[0].lstrip("XP")
  228 + item_score[package] = pkg[1]
  229 + ranking.append(package)
  230 + result = recommender.RecommendationResult(item_score, ranking)
  231 + return result
281 232
282 -class ReputationHeuristic(Singleton): 233 +class KnnEset(Collaborative):
283 """ 234 """
284 - Abstraction for diferent reputation heuristics. 235 + KNN based on query expansion.
285 """ 236 """
286 - pass 237 + def __init__(self,k):
  238 + self.description = "KnnEset"
  239 + self.neighbours = k
287 240
288 -class BugsHeuristic(ReputationHeuristic): 241 + def run(self,rec,user,recommendation_size):
  242 + """
  243 + Perform recommendation strategy.
  244 + """
  245 + neighbors_rset = self.get_neighborhood_rset(user,rec)
  246 + enquire = self.get_enquire(rec)
  247 + # Retrieve new packages based on neighborhood profile expansion
  248 + eset = enquire.get_eset(recommendation_size,neighbors_rset,
  249 + PkgExpandDecider(user.items()))
  250 + result = self.get_result_from_eset(eset)
  251 + return result
  252 +
  253 +class CollaborativeEset(Collaborative):
289 """ 254 """
290 - Reputation heuristic based on quantity of open bugs. 255 + Colaborative strategy based on query expansion.
291 """ 256 """
292 - pass 257 + def __init__(self):
  258 + self.description = "Collaborative-Eset"
  259 +
  260 + def run(self,rec,user,recommendation_size):
  261 + """
  262 + Perform recommendation strategy.
  263 + """
  264 + temp_index = xapian.WritableDatabase("/tmp/Database",xapian.DB_CREATE_OR_OVERWRITE)
  265 + profile = self.get_user_profile(user,rec)
  266 + doc = xapian.Document()
  267 + for pkg in profile:
  268 + doc.add_term(pkg)
  269 + doc.add_term("TO_BE_DELETED")
  270 + docid = temp_index.add_document(doc)
  271 + temp_index.add_database(rec.users_repository)
  272 + rset = xapian.RSet()
  273 + rset.add_document(docid)
  274 + # rset = self.get_rset_from_profile(profile)
  275 + enquire = xapian.Enquire(temp_index)
  276 + enquire.set_weighting_scheme(rec.weight)
  277 + eset = enquire.get_eset(recommendation_size,rset,
  278 + PkgExpandDecider(user.items()))
  279 + result = self.get_result_from_eset(eset)
  280 + return result
293 281
294 -class RCBugsHeuristic(ReputationHeuristic): 282 +class KnnContent(Collaborative):
295 """ 283 """
296 - Reputation heuristic based on quantity of RC bugs. 284 + Hybrid "Colaborative through content" recommendation strategy.
297 """ 285 """
298 - pass 286 + def __init__(self,k):
  287 + self.description = "Knn-Content"
  288 + self.neighbours = k
299 289
300 -class PopularityHeuristic(ReputationHeuristic): 290 + def run(self,rec,user,recommendation_size):
  291 + """
  292 + Perform recommendation strategy.
  293 + """
  294 + neighborhood = self.get_neighborhood(user,rec)
  295 + weights = data.tfidf_weighting(rec.users_repository,neighborhood,
  296 + PkgExpandDecider(user.items()))
  297 + profile = [w[0] for w in weights][:rec.cfg.profile_size]
  298 + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size)
  299 + return result
  300 +
  301 +class KnnContentEset(Collaborative):
301 """ 302 """
302 - Reputation heuristic based on popularity of packages. 303 + Hybrid "Colaborative through content" recommendation strategy.
303 """ 304 """
304 - pass 305 + def __init__(self,k):
  306 + self.description = "Knn-Content-Eset"
  307 + self.neighbours = k
  308 +
  309 + def run(self,rec,user,recommendation_size):
  310 + """
  311 + Perform recommendation strategy.
  312 + """
  313 + neighbors_rset = self.get_neighborhood_rset(user,rec)
  314 + enquire = self.get_enquire(rec)
  315 + # Retrieve relevant tags based on neighborhood profile expansion
  316 + eset = enquire.get_eset(rec.cfg.profile_size,rset,
  317 + TagExpandDecider())
  318 + profile = [e.term for e in eset]
  319 + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size)
  320 + return result
305 321
306 -class ItemReputationStrategy(RecommendationStrategy): 322 +class Demographic(RecommendationStrategy):
307 """ 323 """
308 - Recommendation strategy based on items reputation. 324 + Hybrid rotation strategy based on demographic data.
309 """ 325 """
310 - def __init__(self):  
311 - self.description = "Item reputation"  
312 - logging.debug("Item reputation recommendation not yet implemented.")  
313 - raise Error 326 + def __init__(self,strategy_str):
  327 + self.description = "Demographic"
  328 + self.strategy_str = strategy_str.lstrip("demo_")
314 329
315 - def run(self,items_list,heuristic): 330 + def run(self,rec,user,recommendation_size):
316 """ 331 """
317 Perform recommendation strategy. 332 Perform recommendation strategy.
318 """ 333 """
319 - pass 334 + program_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"programs"))
  335 + desktop_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"desktopapps"))
  336 + if (len(desktop_profile)>10 or
  337 + len(desktop_profile)>len(program_profile)/2):
  338 + rec.set_strategy(self.strategy_str)
  339 + # Redefine repositories after configuring strategy
  340 + rec.items_repository = rec.axi_desktopapps
  341 + rec.valid_pkgs = rec.valid_desktopapps
  342 + if "col" in self.strategy_str:
  343 + rec.users_repository = rec.popcon_desktopapps
  344 + return rec.get_recommendation(user,recommendation_size)