Commit 9332e65ed65a9b72a2db23629c03129ba499f8ca

Authored by Tássia Camões Araújo
1 parent 129d0e43
Exists in master and in 1 other branch add_vagrant

Fixed logging bugs, improved filtering by file, new PkgListSystem class, survey …

…bug fixes, and some other work-in-progress updates.
src/bin/get_highinst.py
... ... @@ -1,10 +0,0 @@
1   -#!/usr/bin/env python
2   -
3   -if __name__ == '__main__':
4   - with open("/root/org/popcon.debian.org/popcon-mail/results") as results:
5   - for line in results.readlines():
6   - if line.startswith("Package"):
7   - fields = line.split()
8   - inst = int(fields[2])+int(fields[3])+int(fields[4])
9   - if inst > 20:
10   - print fields[1], inst
src/bin/get_pkgs_inst.py 0 → 100755
... ... @@ -0,0 +1,15 @@
  1 +#!/usr/bin/env python
  2 +
  3 +from operator import itemgetter
  4 +if __name__ == '__main__':
  5 + pkgs_inst = {}
  6 + with open("/root/org/popcon.debian.org/popcon-mail/results") as results:
  7 + for line in results:
  8 + if line.startswith("Package"):
  9 + fields = line.split()
  10 + inst = int(fields[2])+int(fields[3])+int(fields[4])
  11 + if inst > 20:
  12 + pkgs_inst[fields[1]] = inst
  13 + sorted_by_inst = sorted(pkgs_inst.items(), key=itemgetter(1))
  14 + for pkg, inst in sorted_by_inst:
  15 + print pkg, inst
... ...
src/recommender.py
... ... @@ -19,6 +19,8 @@ __license__ = """
19 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 20 """
21 21  
  22 +import logging
  23 +import os
22 24 import xapian
23 25 import operator
24 26 import data
... ... @@ -73,14 +75,20 @@ class Recommender:
73 75 self.weight = xapian.BM25Weight()
74 76 else:
75 77 self.weight = xapian.TradWeight()
  78 + self.valid_pkgs = []
  79 + # file format: one pkg_name per line
  80 + with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs:
  81 + self.valid_pkgs = [line.strip() for line in valid_pkgs
  82 + if not line.startswith("#")]
76 83  
77 84 def set_strategy(self,strategy_str):
78 85 """
79 86 Set the recommendation strategy.
80 87 """
  88 + logging.info("Setting recommender strategy to \'%s\'" % strategy_str)
81 89 self.items_repository = xapian.Database(self.cfg.axi)
82 90 if "desktop" in strategy_str:
83   - self.items_repository = xapian.Database("/root/.app-recommender/DesktopAxi")
  91 + self.items_repository = xapian.Database("/root/.app-recommender/axi_desktop")
84 92 self.cfg.popcon_index = "/root/.app-recommender/popcon-index_desktop_1000"
85 93  
86 94 if strategy_str == "cb" or strategy_str == "cb_desktop":
... ...
src/strategy.py
... ... @@ -40,85 +40,102 @@ class PkgMatchDecider(xapian.MatchDecider):
40 40 """
41 41 True if the package is not already installed.
42 42 """
43   - return doc.get_data() not in self.pkgs_list
44   -
45   -class AppMatchDecider(xapian.MatchDecider):
46   - """
47   - Extend xapian.MatchDecider to not consider only applications packages.
48   - """
49   - def __init__(self, pkgs_list, axi):
50   - """
51   - Set initial parameters.
52   - """
53   - xapian.MatchDecider.__init__(self)
54   - self.pkgs_list = pkgs_list
55   - self.axi = axi
56   -
57   - def __call__(self, doc):
58   - """
59   - True if the package is not already installed.
60   - """
61   - tags = axi_search_pkg_tags(self.axi,doc.get_data())
62   - return (("XTrole::program" in tags) and
63   - (doc.get_data() not in self.pkgs_list))
64   -
65   -class UserMatchDecider(xapian.MatchDecider):
66   - """
67   - Extend xapian.MatchDecider to match similar profiles.
68   - """
69   -
70   - def __init__(self, profile):
71   - """
72   - Set initial parameters.
73   - """
74   - xapian.MatchDecider.__init__(self)
75   - self.profile = profile
76   -
77   - def __call__(self, doc):
78   - """
79   - True if the user has more the half of packages from profile.
80   - """
81   - match=0
82   - for term in doc:
83   - if term.term in self.profile:
84   - match = match+1
85   - return (match >= len(self.profile)/2)
  43 + pkg = doc.get_data()
  44 + is_new = pkg not in self.pkgs_list
  45 + if "kde" in pkg:
  46 + return is_new and "kde" in self.pkgs_list
  47 + if "gnome" in pkg:
  48 + return is_new and "gnome" in self.pkgs_list
  49 + return is_new
86 50  
87 51 class PkgExpandDecider(xapian.ExpandDecider):
88 52 """
89 53 Extend xapian.ExpandDecider to consider packages only.
90 54 """
91   - def __call__(self, term):
  55 + def __init__(self, pkgs_list):
92 56 """
93   - True if the term is a package.
  57 + Set initial parameters.
94 58 """
95   - # [FIXME] return term.startswith("XP")
96   - #return not term.startswith("XT")
97   - return term.startswith("XP")
98   -
99   -class AppExpandDecider(xapian.ExpandDecider):
100   - """
101   - Extend xapian.ExpandDecider to consider applications only.
102   - """
103   - def __init__(self,axi):
104 59 xapian.ExpandDecider.__init__(self)
105   - self.axi = axi
  60 + self.pkgs_list = pkgs_list
106 61  
107 62 def __call__(self, term):
108 63 """
109 64 True if the term is a package.
110 65 """
111   - if not term.startswith("XT"):
112   - package = term.lstrip("XP")
113   - print package
114   - tags = axi_search_pkg_tags(self.axi,package)
115   - if "XTrole::program" in tags:
116   - print tags
117   - return True
118   - else:
119   - return False
120   - else:
121   - return False
  66 + pkg = term.lstrip("XP")
  67 + is_new_pkg = pkg not in self.pkgs_list and term.startswith("XP")
  68 + if "kde" in pkg:
  69 + return is_new_pkg and "kde" in self.pkgs_list
  70 + if "gnome" in pkg:
  71 + return is_new_pkg and "gnome" in self.pkgs_list
  72 + return is_new_pkg
  73 +
  74 +#class AppMatchDecider(xapian.MatchDecider):
  75 +# """
  76 +# Extend xapian.MatchDecider to not consider only applications packages.
  77 +# """
  78 +# def __init__(self, pkgs_list, axi):
  79 +# """
  80 +# Set initial parameters.
  81 +# """
  82 +# xapian.MatchDecider.__init__(self)
  83 +# self.pkgs_list = pkgs_list
  84 +# self.axi = axi
  85 +#
  86 +# def __call__(self, doc):
  87 +# """
  88 +# True if the package is not already installed.
  89 +# """
  90 +# tags = axi_search_pkg_tags(self.axi,doc.get_data())
  91 +# return (("XTrole::program" in tags) and
  92 +# (doc.get_data() not in self.pkgs_list))
  93 +#
  94 +#class UserMatchDecider(xapian.MatchDecider):
  95 +# """
  96 +# Extend xapian.MatchDecider to match similar profiles.
  97 +# """
  98 +#
  99 +# def __init__(self, profile):
  100 +# """
  101 +# Set initial parameters.
  102 +# """
  103 +# xapian.MatchDecider.__init__(self)
  104 +# self.profile = profile
  105 +#
  106 +# def __call__(self, doc):
  107 +# """
  108 +# True if the user has more the half of packages from profile.
  109 +# """
  110 +# match=0
  111 +# for term in doc:
  112 +# if term.term in self.profile:
  113 +# match = match+1
  114 +# return (match >= len(self.profile)/2)
  115 +
  116 +#class AppExpandDecider(xapian.ExpandDecider):
  117 +# """
  118 +# Extend xapian.ExpandDecider to consider applications only.
  119 +# """
  120 +# def __init__(self,axi):
  121 +# xapian.ExpandDecider.__init__(self)
  122 +# self.axi = axi
  123 +#
  124 +# def __call__(self, term):
  125 +# """
  126 +# True if the term is a package.
  127 +# """
  128 +# if not term.startswith("XT"):
  129 +# package = term.lstrip("XP")
  130 +# print package
  131 +# tags = axi_search_pkg_tags(self.axi,package)
  132 +# if "XTrole::program" in tags:
  133 +# print tags
  134 +# return True
  135 +# else:
  136 +# return False
  137 +# else:
  138 +# return False
122 139  
123 140 class TagExpandDecider(xapian.ExpandDecider):
124 141 """
... ... @@ -149,8 +166,10 @@ class ContentBasedStrategy(RecommendationStrategy):
149 166 """
150 167 Perform recommendation strategy.
151 168 """
  169 + logging.debug("Composing user profile...")
152 170 profile = user.content_profile(rec.items_repository,self.content,
153 171 self.profile_size)
  172 + logging.debug(profile)
154 173 # prepair index for querying user profile
155 174 query = xapian.Query(xapian.Query.OP_OR,profile)
156 175 enquire = xapian.Enquire(rec.items_repository)
... ... @@ -188,8 +207,10 @@ class CollaborativeStrategy(RecommendationStrategy):
188 207 """
189 208 Perform recommendation strategy.
190 209 """
  210 + logging.debug("Composing user profile...")
191 211 profile = ["XP"+package for package in
192   - user.filter_pkg_profile("/root/.app-recommender/filters/program")]
  212 + user.filter_pkg_profile(rec.valid_pkgs)]
  213 + logging.debug(profile)
193 214 # prepair index for querying user profile
194 215 query = xapian.Query(xapian.Query.OP_OR,profile)
195 216 enquire = xapian.Enquire(rec.users_repository)
... ... @@ -208,13 +229,14 @@ class CollaborativeStrategy(RecommendationStrategy):
208 229 # retrieve most relevant packages
209 230 #eset = enquire.get_eset(recommendation_size,rset,
210 231 # AppExpandDecider(rec.items_repository))
211   - eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider())
  232 + eset = enquire.get_eset(recommendation_size,rset,
  233 + PkgExpandDecider(user.items()))
212 234 # compose result dictionary
213 235 item_score = {}
214 236 ranking = []
215 237 for e in eset:
216 238 package = e.term.lstrip("XP")
217   - tags = axi_search_pkg_tags(rec.items_repository,package)
  239 + #tags = axi_search_pkg_tags(rec.items_repository,package)
218 240 #[FIXME] set this constraint somehow
219 241 #if "XTrole::program" in tags:
220 242 item_score[package] = e.weight
... ...
src/user.py
... ... @@ -26,6 +26,7 @@ import datetime
26 26 import xapian
27 27 import logging
28 28 import apt
  29 +from error import Error
29 30 from singleton import Singleton
30 31 import data
31 32  
... ... @@ -113,9 +114,14 @@ class User:
113 114 Get user profile for a specific type of content: packages tags,
114 115 description or both (full_profile)
115 116 """
116   - if content == "tag": return self.tag_profile(items_repository,size)
117   - if content == "desc": return self.desc_profile(items_repository,size)
118   - if content == "full": return self.full_profile(items_repository,size)
  117 + if content == "tag":
  118 + profile = self.tag_profile(items_repository,size)
  119 + if content == "desc":
  120 + profile = self.desc_profile(items_repository,size)
  121 + if content == "full":
  122 + profile = self.full_profile(items_repository,size)
  123 + logging.debug("User profile: %s" % profile)
  124 + return profile
119 125  
120 126 def tag_profile(self,items_repository,size):
121 127 """
... ... @@ -155,17 +161,28 @@ class User:
155 161 desc_profile = self.desc_profile(items_repository,size)[:size/2]
156 162 return tag_profile+desc_profile
157 163  
158   - def filter_pkg_profile(self,filter_file):
  164 + def filter_pkg_profile(self,filter_list_or_file):
159 165 """
160 166 Return list of packages from profile listed in the filter_file.
161 167 """
  168 + if type(filter_list_or_file).__name__ == "list":
  169 + valid_pkgs = filter_list_or_file
  170 + elif type(filter_list_or_file).__name__ == "str":
  171 + try:
  172 + with open(filter_list_or_file) as valid:
  173 + valid_pkgs = [line.strip() for line in valid]
  174 + except IOError:
  175 + logging.critical("Could not open profile filter file.")
  176 + raise Error
  177 + else:
  178 + logging.debug("No filter provided for user profiling.")
  179 + return self.pkg_profile
  180 +
162 181 old_profile_size = len(self.pkg_profile)
163   - with open(filter_file) as valid:
164   - valid_pkgs = [line.strip() for line in valid]
165   - for pkg in self.pkg_profile[:]: #iterate list copy
166   - if pkg not in valid_pkgs:
167   - self.pkg_profile.remove(pkg)
168   - logging.debug("Discarded package %s during profile filtering" % pkg)
  182 + for pkg in self.pkg_profile[:]: #iterate list copy
  183 + if pkg not in valid_pkgs:
  184 + self.pkg_profile.remove(pkg)
  185 + logging.debug("Discarded package %s during profile filtering" % pkg)
169 186 profile_size = len(self.pkg_profile)
170 187 logging.debug("Filtered package profile: reduced packages profile size \
171 188 from %d to %d." % (old_profile_size, profile_size))
... ... @@ -199,7 +216,6 @@ class RandomPopcon(User):
199 216 """
200 217 Set initial parameters.
201 218 """
202   - item_score = {}
203 219 len_profile = 0
204 220 while len_profile < 100:
205 221 path = random.choice([os.path.join(root, submission) for
... ... @@ -217,10 +233,29 @@ class PopconSystem(User):
217 233 """
218 234 Set initial parameters.
219 235 """
220   - item_score = {}
221 236 submission = data.PopconSubmission(path)
222 237 User.__init__(self,submission.packages,submission.user_id)
223 238  
  239 +class PkgsListSystem(User):
  240 + def __init__(self,pkgs_list_or_file):
  241 + """
  242 + Set initial parameters.
  243 + """
  244 + if type(pkgs_list_or_file).__name__ == "list":
  245 + pkgs_list = filter_list_or_file
  246 + elif type(pkgs_list_or_file).__name__ == "str":
  247 + try:
  248 + with open(pkgs_list_or_file) as pkgs_list_file:
  249 + pkgs_list = [line.split()[0] for line in pkgs_list_file]
  250 + except IOError:
  251 + logging.critical("Could not open packages list file.")
  252 + raise Error
  253 + else:
  254 + logging.debug("No packages provided for user profiling.")
  255 + return self.pkg_profile
  256 +
  257 + User.__init__(self,dict.fromkeys(pkgs_list,1))
  258 +
224 259 class LocalSystem(User):
225 260 """
226 261 Extend the class User to consider the packages installed on the local
... ...
src/web/survey.py
... ... @@ -11,7 +11,8 @@ import re
11 11  
12 12 sys.path.insert(0,"../")
13 13  
14   -from config import *
  14 +import logging
  15 +from config import Config
15 16 from recommender import *
16 17 from user import *
17 18  
... ... @@ -30,7 +31,7 @@ class Thanks:
30 31 web_input = web.input()
31 32 user_id = web_input['user_id'].encode('utf8')
32 33 with open("./submissions/%s/ident" % user_id,'w') as ident:
33   - for key in ["name","email","country","public","comments"]:
  34 + for key in ["name","email","comments"]:
34 35 if web_input.has_key(key):
35 36 ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8")))
36 37 return render.thanks_id()
... ... @@ -79,22 +80,30 @@ class Package:
79 80 class Request:
80 81 def __init__(self,web_input,submissions_dir,user_id=0,pkgs_list=0):
81 82 self.strategy = ""
82   - print "Request from user",user_id
83 83 if user_id:
84 84 self.user_id = user_id
85 85 self.outputdir = os.path.join(submissions_dir,user_id)
  86 + logging.info("New round for user %s" % self.user_id)
86 87 else:
87 88 self.outputdir = tempfile.mkdtemp(prefix='',dir=submissions_dir)
88   - print ("created dir %s" % self.outputdir)
89 89 self.user_id = self.outputdir.lstrip(submissions_dir)
  90 + logging.info("Request from user %s" % self.user_id)
  91 + logging.debug("Created dir %s" % self.outputdir)
90 92  
  93 + pkgs_list_file = os.path.join(self.outputdir,"packages_list")
91 94 if pkgs_list:
92 95 self.pkgs_list = pkgs_list
  96 + if not os.path.exists(pkgs_list_file):
  97 + with open(pkgs_list_file,"w") as f:
  98 + for pkg in pkgs_list:
  99 + f.write(pkg+"\n")
93 100 else:
94 101 self.pkgs_list = []
95 102 if web_input['pkgs_file'].value:
96   - f = open(self.outputdir + "/packages_list", "wb")
  103 + f = open(pkgs_list_file, "w")
97 104 lines = web_input['pkgs_file'].file.readlines()
  105 + with open(os.path.join(self.outputdir,"upload"), "w") as upload:
  106 + upload.writelines(lines)
98 107 # popcon submission format
99 108 if lines[0].startswith('POPULARITY-CONTEST'):
100 109 del lines[0]
... ... @@ -122,15 +131,15 @@ class Request:
122 131 class Save:
123 132 def POST(self):
124 133 web_input = web.input()
125   - print web_input
  134 + logging.info("Saving user evaluation...")
  135 + logging.info(web_input)
126 136 user_id = web_input['user_id'].encode('utf8')
127 137 with open("./submissions/%s/packages_list" % user_id) as packages_list:
128 138 pkgs_list = [line.strip() for line in packages_list.readlines()]
129 139 strategy = web_input['strategy']
130   - print user_id,strategy,pkgs_list
131   - output_dir = "./submissions/%s/%s/" % (user_id,strategy)
132   - if not os.path.exists(output_dir):
133   - os.makedirs(output_dir)
  140 + logging.debug("Saving evaluation for user %s, strategy %s and packages..."
  141 + % (user_id,strategy))
  142 + logging.debug(pkgs_list)
134 143 evaluations = {}
135 144 evaluations["poor"] = []
136 145 evaluations["good"] = []
... ... @@ -138,16 +147,17 @@ class Save:
138 147 for key, value in web_input.items():
139 148 if key.startswith("evaluation-"):
140 149 evaluations[value.encode('utf8')].append(key.lstrip("evaluation-"))
  150 + output_dir = ("./submissions/%s/%s/" % (user_id,strategy))
141 151 for key,value in evaluations.items():
142   - with open(output_dir+key,'w') as output:
  152 + with open(os.path.join(output_dir,key),'w') as output:
143 153 for item in value:
144 154 output.write(item+"\n")
145   - with open(output_dir+"report",'w') as report:
  155 + with open(os.path.join(output_dir,"report"),'w') as report:
146 156 report.write("# User: %s\n# Strategy: %s\n# TP FP\n%d %d\n" %
147 157 (user_id,strategy,
148 158 len(evaluations["good"])+len(evaluations["surprising"]),
149 159 len(evaluations["poor"])))
150   - if web_input.has_key('strategy_button'):
  160 + if web_input.has_key('continue_button'):
151 161 return Survey().POST()
152 162 elif web_input.has_key('finish_button'):
153 163 return render.thanks(user_id)
... ... @@ -156,23 +166,21 @@ class Save:
156 166  
157 167 class Survey:
158 168 def __init__(self):
159   - self.strategies = ["cb","cbd","cbt","col","cb-desktop","cbd-desktop",
160   - "cbt-desktop","col-desktop"]
  169 + logging.info("Setting up survey...")
161 170 self.rec = Recommender(Config())
162   - #print rec.users_repository.get_doccount()
163 171 self.submissions_dir = "./submissions/"
164 172 if not os.path.exists(self.submissions_dir):
165 173 os.makedirs(self.submissions_dir)
166 174  
167 175 def POST(self):
168 176 web_input = web.input(pkgs_file={})
169   - print "WEB_INPUT",web_input
  177 + logging.debug("Survey web_input %s" % str(web_input))
  178 + self.strategies = ["cb","cbd","cbt","col"]
170 179 # If it is not the first strategy round, save the previous evaluation
171 180 if not web_input.has_key('user_id'):
172 181 request = Request(web_input,self.submissions_dir)
173 182 else:
174 183 user_id = web_input['user_id'].encode('utf8')
175   - print "Continue", user_id
176 184 with open("./submissions/%s/packages_list" % user_id) as packages_list:
177 185 pkgs_list = [line.strip() for line in packages_list.readlines()]
178 186 request = Request(web_input,self.submissions_dir,user_id,pkgs_list)
... ... @@ -180,30 +188,43 @@ class Survey:
180 188 return render.error_survey()
181 189 else:
182 190 user = User(dict.fromkeys(request.pkgs_list,1),request.user_id)
183   - user.maximal_pkg_profile()
184   - results = dict()
  191 + program_profile = user.filter_pkg_profile(os.path.join(self.rec.cfg.filters,"program"))
  192 + desktop_profile = user.filter_pkg_profile(os.path.join(self.rec.cfg.filters,"desktop"))
  193 + if (len(desktop_profile)>10 or
  194 + len(desktop_profile)>len(program_profile)/2):
  195 + self.strategies = [strategy_str+"_desktop" for strategy_str
  196 + in self.strategies[:]]
185 197 old_strategies = [dirs for root, dirs, files in
186 198 os.walk(os.path.join(self.submissions_dir,
187 199 request.user_id))]
188 200 if old_strategies:
189 201 strategies = [s for s in self.strategies if s not in old_strategies[0]]
190   - print "OLD Strategies", old_strategies[0]
  202 + logging.info("Already used strategies %s" % old_strategies[0])
191 203 else:
192 204 strategies = self.strategies
193   - print "LEFT",strategies
194 205 if not strategies:
195 206 return render.thanks(user_id)
196 207 request.strategy = random.choice(strategies)
197   - print "selected",request.strategy
  208 + logging.info("Selected \'%s\' from %s" % (request.strategy,strategies))
198 209 self.rec.set_strategy(request.strategy)
199 210 prediction = self.rec.get_recommendation(user,10).get_prediction()
200   - print prediction
  211 + logging.info("Prediction for user %s" % user.user_id)
  212 + logging.info(str(prediction))
  213 + output_dir = ("./submissions/%s/%s/" %
  214 + (user.user_id,request.strategy))
  215 + os.makedirs(output_dir)
  216 + with open(os.path.join(output_dir,"prediction"),"w") as prediction_file:
  217 + for pkg,rating in prediction:
  218 + prediction_file.write("%s %f.2\n" % (pkg,rating))
  219 + logging.debug("Saved %s/%s prediction to file" %
  220 + (user.user_id,request.strategy))
201 221 recommendation = [result[0] for result in prediction]
202 222 pkg_summaries = {}
203 223 pkg_details = []
204 224 cache = apt.Cache()
205 225 for pkg in recommendation:
206 226 try:
  227 + logging.debug("Getting details of package %s" % pkg)
207 228 pkg_details.append(Package().get_details_from_dde(pkg))
208 229 pkg_summaries[pkg] = cache[pkg].candidate.summary
209 230 except:
... ... @@ -236,7 +257,7 @@ urls = (&#39;/&#39;, &#39;Index&#39;,
236 257 web.webapi.internalerror = web.debugerror
237 258  
238 259 if __name__ == "__main__":
239   - apprec = web.application(urls, globals())
240   - apprec.add_processor(add_global_hook())
241   - apprec.run()
242   -
  260 + cfg = Config()
  261 + apprec = web.application(urls, globals())
  262 + apprec.add_processor(add_global_hook())
  263 + apprec.run()
... ...