Commit 5af15ae174e8afcf8ecaa6bb8b71886615279086

Authored by Tássia Camões Araújo
1 parent 7f4319d5
Exists in master and in 1 other branch add_vagrant

Recommendation strategies refactoring.

Showing 1 changed file with 175 additions and 150 deletions   Show diff stats
src/strategy.py
... ... @@ -23,7 +23,9 @@ __license__ = """
23 23 import xapian
24 24 from singleton import Singleton
25 25 import recommender
26   -from data import *
  26 +import data
  27 +import logging
  28 +from error import Error
27 29  
28 30 class PkgMatchDecider(xapian.MatchDecider):
29 31 """
... ... @@ -71,79 +73,13 @@ class PkgExpandDecider(xapian.ExpandDecider):
71 73 return is_new_pkg and "gnome" in self.pkgs_list
72 74 return is_new_pkg
73 75  
74   -#class AppMatchDecider(xapian.MatchDecider):
75   -# """
76   -# Extend xapian.MatchDecider to not consider only applications packages.
77   -# """
78   -# def __init__(self, pkgs_list, axi):
79   -# """
80   -# Set initial parameters.
81   -# """
82   -# xapian.MatchDecider.__init__(self)
83   -# self.pkgs_list = pkgs_list
84   -# self.axi = axi
85   -#
86   -# def __call__(self, doc):
87   -# """
88   -# True if the package is not already installed.
89   -# """
90   -# tags = axi_search_pkg_tags(self.axi,doc.get_data())
91   -# return (("XTrole::program" in tags) and
92   -# (doc.get_data() not in self.pkgs_list))
93   -#
94   -#class UserMatchDecider(xapian.MatchDecider):
95   -# """
96   -# Extend xapian.MatchDecider to match similar profiles.
97   -# """
98   -#
99   -# def __init__(self, profile):
100   -# """
101   -# Set initial parameters.
102   -# """
103   -# xapian.MatchDecider.__init__(self)
104   -# self.profile = profile
105   -#
106   -# def __call__(self, doc):
107   -# """
108   -# True if the user has more the half of packages from profile.
109   -# """
110   -# match=0
111   -# for term in doc:
112   -# if term.term in self.profile:
113   -# match = match+1
114   -# return (match >= len(self.profile)/2)
115   -
116   -#class AppExpandDecider(xapian.ExpandDecider):
117   -# """
118   -# Extend xapian.ExpandDecider to consider applications only.
119   -# """
120   -# def __init__(self,axi):
121   -# xapian.ExpandDecider.__init__(self)
122   -# self.axi = axi
123   -#
124   -# def __call__(self, term):
125   -# """
126   -# True if the term is a package.
127   -# """
128   -# if not term.startswith("XT"):
129   -# package = term.lstrip("XP")
130   -# print package
131   -# tags = axi_search_pkg_tags(self.axi,package)
132   -# if "XTrole::program" in tags:
133   -# print tags
134   -# return True
135   -# else:
136   -# return False
137   -# else:
138   -# return False
139   -
140 76 class TagExpandDecider(xapian.ExpandDecider):
141 77 """
142 78 Extend xapian.ExpandDecider to consider tags only.
143 79 """
144 80 def __call__(self, term):
145 81 """
146   - True if the term is a tag.
  82 + True if the term is a package tag.
147 83 """
148 84 return term.startswith("XT")
149 85  
... ... @@ -153,7 +89,7 @@ class RecommendationStrategy:
153 89 """
154 90 pass
155 91  
156   -class ContentBasedStrategy(RecommendationStrategy):
  92 +class ContentBased(RecommendationStrategy):
157 93 """
158 94 Content-based recommendation strategy based on Apt-xapian-index.
159 95 """
... ... @@ -162,158 +98,247 @@ class ContentBasedStrategy(RecommendationStrategy):
162 98 self.content = content
163 99 self.profile_size = profile_size
164 100  
165   - def run(self,rec,user,recommendation_size):
166   - """
167   - Perform recommendation strategy.
168   - """
169   - logging.debug("Composing user profile...")
170   - profile = user.content_profile(rec.items_repository,self.content,
171   - self.profile_size)
172   - logging.debug(profile)
173   - # prepair index for querying user profile
  101 + def get_sugestion_from_profile(self,rec,user,profile,recommendation_size):
174 102 query = xapian.Query(xapian.Query.OP_OR,profile)
175 103 enquire = xapian.Enquire(rec.items_repository)
176 104 enquire.set_weighting_scheme(rec.weight)
177 105 enquire.set_query(query)
  106 + # Retrieve matching packages
178 107 try:
179   - # retrieve matching packages
180 108 mset = enquire.get_mset(0, recommendation_size, None,
181 109 PkgMatchDecider(user.items()))
182   - #AppMatchDecider(user.items(),
183   - # rec.items_repository))
184 110 except xapian.DatabaseError as error:
185 111 logging.critical("Content-based strategy: "+error.get_msg())
186   - # compose result dictionary
  112 +
  113 + # Compose result dictionary
187 114 item_score = {}
188 115 ranking = []
189 116 for m in mset:
190   - #[FIXME] set this constraint somehow
191   - #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())
192   - #if "XTrole::program" in tags:
193 117 item_score[m.document.get_data()] = m.weight
194 118 ranking.append(m.document.get_data())
195 119  
196   - return recommender.RecommendationResult(item_score,ranking)
197   -
198   -class CollaborativeStrategy(RecommendationStrategy):
199   - """
200   - Colaborative recommendation strategy.
201   - """
202   - def __init__(self,k):
203   - self.description = "Collaborative"
204   - self.neighbours = k
  120 + result = recommender.RecommendationResult(item_score,ranking)
  121 + return result
205 122  
206 123 def run(self,rec,user,recommendation_size):
207 124 """
208 125 Perform recommendation strategy.
209 126 """
210 127 logging.debug("Composing user profile...")
  128 + profile = user.content_profile(rec.items_repository,self.content,
  129 + self.profile_size,rec.valid_tags)
  130 + logging.debug(profile)
  131 + result = self.get_sugestion_from_profile(rec,user,profile,recommendation_size)
  132 + return result
  133 +
  134 +class Collaborative(RecommendationStrategy):
  135 + """
  136 + Colaborative recommendation strategy.
  137 + """
  138 + def get_user_profile(self,user,rec):
  139 + logging.debug("Composing user profile...")
211 140 profile = ["XP"+package for package in
212 141 user.filter_pkg_profile(rec.valid_pkgs)]
213 142 logging.debug(profile)
214   - # prepair index for querying user profile
215   - query = xapian.Query(xapian.Query.OP_OR,profile)
  143 + return profile
  144 +
  145 + def get_enquire(self,rec):
216 146 enquire = xapian.Enquire(rec.users_repository)
217 147 enquire.set_weighting_scheme(rec.weight)
  148 + return enquire
  149 +
  150 + def get_rset_from_profile(self,profile):
  151 + # Create document to represent user profile and mark it as relevant
  152 + return rset
  153 +
  154 + def get_neighborhood(self,user,rec):
  155 + profile = self.get_user_profile(user,rec)
  156 + #query = xapian.Query(xapian.Query.OP_OR,profile)
  157 + query = xapian.Query(xapian.Query.OP_ELITE_SET,profile)
  158 + enquire = self.get_enquire(rec)
218 159 enquire.set_query(query)
  160 + # Retrieve matching users
219 161 try:
220   - # retrieve matching users
221 162 mset = enquire.get_mset(0, self.neighbours)
222 163 except xapian.DatabaseError as error:
223   - logging.critical("Collaborative strategy: "+error.get_msg())
  164 + logging.critical("Could not compose user neighborhood.\n "+error.get_msg())
  165 + raise Error
  166 + return mset
  167 +
  168 + def get_neighborhood_rset(self,user,rec):
  169 + mset = self.get_neighborhood(user,rec)
224 170 rset = xapian.RSet()
225   - logging.debug("Neighborhood composed by the following users (by hash)")
226 171 for m in mset:
227 172 rset.add_document(m.document.get_docid())
228   - logging.debug(m.document.get_data())
229   - # retrieve most relevant packages
230   - #eset = enquire.get_eset(recommendation_size,rset,
231   - # AppExpandDecider(rec.items_repository))
232   - eset = enquire.get_eset(recommendation_size,rset,
233   - PkgExpandDecider(user.items()))
  173 + return rset
  174 +
  175 + def get_result_from_eset(self,eset):
234 176 # compose result dictionary
235 177 item_score = {}
236 178 ranking = []
237 179 for e in eset:
238 180 package = e.term.lstrip("XP")
239   - #tags = axi_search_pkg_tags(rec.items_repository,package)
240   - #[FIXME] set this constraint somehow
241   - #if "XTrole::program" in tags:
242 181 item_score[package] = e.weight
243   - ranking.append(m.document.get_data())
  182 + ranking.append(package)
244 183 return recommender.RecommendationResult(item_score, ranking)
245 184  
246   -class DemographicStrategy(RecommendationStrategy):
  185 +class Knn(Collaborative):
247 186 """
248   - Recommendation strategy based on demographic data.
  187 + KNN based packages tf-idf weights.
249 188 """
250   - #def __init__(self, result):
251   - #self.result = result
252   - def __init__(self):
253   - self.description = "Demographic"
254   - logging.debug("Demographic recommendation not yet implemented.")
255   - raise Error
  189 + def __init__(self,k):
  190 + self.description = "Knn"
  191 + self.neighbours = k
256 192  
257 193 def run(self,rec,user,recommendation_size):
258 194 """
259 195 Perform recommendation strategy.
260 196 """
261   - ordered_result = self.result.get_prediction()
262   -
263   - for item,weight in ordered_result:
264   - pass
265   -
  197 + neighborhood = self.get_neighborhood(user,rec)
  198 + weights = data.tfidf_weighting(rec.users_repository,neighborhood,
  199 + PkgExpandDecider(user.items()))
  200 + item_score = {}
  201 + ranking = []
  202 + for pkg in weights[:recommendation_size]:
  203 + package = pkg[0].lstrip("XP")
  204 + item_score[package] = pkg[1]
  205 + ranking.append(package)
  206 + result = recommender.RecommendationResult(item_score, ranking)
  207 + return result
266 208  
267   -class KnowledgeBasedStrategy(RecommendationStrategy):
  209 +class KnnPlus(Collaborative):
268 210 """
269   - Knowledge-based recommendation strategy.
  211 + KNN based packages tf-idf weights.
270 212 """
271   - def __init__(self):
272   - self.description = "Knowledge-based"
273   - logging.debug("Knowledge-based recommendation not yet implemented.")
274   - raise Error
  213 + def __init__(self,k):
  214 + self.description = "Knn"
  215 + self.neighbours = k
275 216  
276   - def run(self,user,knowledge_repository):
  217 + def run(self,rec,user,recommendation_size):
277 218 """
278 219 Perform recommendation strategy.
279 220 """
280   - pass
  221 + neighborhood = self.get_neighborhood(user,rec)
  222 + weights = data.tfidf_plus(rec.users_repository,neighborhood,
  223 + PkgExpandDecider(user.items()))
  224 + item_score = {}
  225 + ranking = []
  226 + for pkg in weights[:recommendation_size]:
  227 + package = pkg[0].lstrip("XP")
  228 + item_score[package] = pkg[1]
  229 + ranking.append(package)
  230 + result = recommender.RecommendationResult(item_score, ranking)
  231 + return result
281 232  
282   -class ReputationHeuristic(Singleton):
  233 +class KnnEset(Collaborative):
283 234 """
284   - Abstraction for diferent reputation heuristics.
  235 + KNN based on query expansion.
285 236 """
286   - pass
  237 + def __init__(self,k):
  238 + self.description = "KnnEset"
  239 + self.neighbours = k
287 240  
288   -class BugsHeuristic(ReputationHeuristic):
  241 + def run(self,rec,user,recommendation_size):
  242 + """
  243 + Perform recommendation strategy.
  244 + """
  245 + neighbors_rset = self.get_neighborhood_rset(user,rec)
  246 + enquire = self.get_enquire(rec)
  247 + # Retrieve new packages based on neighborhood profile expansion
  248 + eset = enquire.get_eset(recommendation_size,neighbors_rset,
  249 + PkgExpandDecider(user.items()))
  250 + result = self.get_result_from_eset(eset)
  251 + return result
  252 +
  253 +class CollaborativeEset(Collaborative):
289 254 """
290   - Reputation heuristic based on quantity of open bugs.
  255 + Colaborative strategy based on query expansion.
291 256 """
292   - pass
  257 + def __init__(self):
  258 + self.description = "Collaborative-Eset"
  259 +
  260 + def run(self,rec,user,recommendation_size):
  261 + """
  262 + Perform recommendation strategy.
  263 + """
  264 + temp_index = xapian.WritableDatabase("/tmp/Database",xapian.DB_CREATE_OR_OVERWRITE)
  265 + profile = self.get_user_profile(user,rec)
  266 + doc = xapian.Document()
  267 + for pkg in profile:
  268 + doc.add_term(pkg)
  269 + doc.add_term("TO_BE_DELETED")
  270 + docid = temp_index.add_document(doc)
  271 + temp_index.add_database(rec.users_repository)
  272 + rset = xapian.RSet()
  273 + rset.add_document(docid)
  274 + # rset = self.get_rset_from_profile(profile)
  275 + enquire = xapian.Enquire(temp_index)
  276 + enquire.set_weighting_scheme(rec.weight)
  277 + eset = enquire.get_eset(recommendation_size,rset,
  278 + PkgExpandDecider(user.items()))
  279 + result = self.get_result_from_eset(eset)
  280 + return result
293 281  
294   -class RCBugsHeuristic(ReputationHeuristic):
  282 +class KnnContent(Collaborative):
295 283 """
296   - Reputation heuristic based on quantity of RC bugs.
  284 + Hybrid "Colaborative through content" recommendation strategy.
297 285 """
298   - pass
  286 + def __init__(self,k):
  287 + self.description = "Knn-Content"
  288 + self.neighbours = k
299 289  
300   -class PopularityHeuristic(ReputationHeuristic):
  290 + def run(self,rec,user,recommendation_size):
  291 + """
  292 + Perform recommendation strategy.
  293 + """
  294 + neighborhood = self.get_neighborhood(user,rec)
  295 + weights = data.tfidf_weighting(rec.users_repository,neighborhood,
  296 + PkgExpandDecider(user.items()))
  297 + profile = [w[0] for w in weights][:rec.cfg.profile_size]
  298 + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size)
  299 + return result
  300 +
  301 +class KnnContentEset(Collaborative):
301 302 """
302   - Reputation heuristic based on popularity of packages.
  303 + Hybrid "Colaborative through content" recommendation strategy.
303 304 """
304   - pass
  305 + def __init__(self,k):
  306 + self.description = "Knn-Content-Eset"
  307 + self.neighbours = k
  308 +
  309 + def run(self,rec,user,recommendation_size):
  310 + """
  311 + Perform recommendation strategy.
  312 + """
  313 + neighbors_rset = self.get_neighborhood_rset(user,rec)
  314 + enquire = self.get_enquire(rec)
  315 + # Retrieve relevant tags based on neighborhood profile expansion
  316 + eset = enquire.get_eset(rec.cfg.profile_size,rset,
  317 + TagExpandDecider())
  318 + profile = [e.term for e in eset]
  319 + result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size)
  320 + return result
305 321  
306   -class ItemReputationStrategy(RecommendationStrategy):
  322 +class Demographic(RecommendationStrategy):
307 323 """
308   - Recommendation strategy based on items reputation.
  324 + Hybrid rotation strategy based on demographic data.
309 325 """
310   - def __init__(self):
311   - self.description = "Item reputation"
312   - logging.debug("Item reputation recommendation not yet implemented.")
313   - raise Error
  326 + def __init__(self,strategy_str):
  327 + self.description = "Demographic"
  328 + self.strategy_str = strategy_str.lstrip("demo_")
314 329  
315   - def run(self,items_list,heuristic):
  330 + def run(self,rec,user,recommendation_size):
316 331 """
317 332 Perform recommendation strategy.
318 333 """
319   - pass
  334 + program_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"programs"))
  335 + desktop_profile = user.filter_pkg_profile(os.path.join(rec.cfg.filters_dir,"desktopapps"))
  336 + if (len(desktop_profile)>10 or
  337 + len(desktop_profile)>len(program_profile)/2):
  338 + rec.set_strategy(self.strategy_str)
  339 + # Redefine repositories after configuring strategy
  340 + rec.items_repository = rec.axi_desktopapps
  341 + rec.valid_pkgs = rec.valid_desktopapps
  342 + if "col" in self.strategy_str:
  343 + rec.users_repository = rec.popcon_desktopapps
  344 + return rec.get_recommendation(user,recommendation_size)
... ...