Commit 6003449181d2e86b4566e854c29c469d385163ed

Authored by Tássia Camões Araújo
2 parents c1d556c0 08c59678
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

Showing 3 changed files with 31 additions and 12 deletions   Show diff stats
@@ -40,7 +40,7 @@ class Config(): @@ -40,7 +40,7 @@ class Config():
40 self.output = "/dev/null" 40 self.output = "/dev/null"
41 self.survey_mode = 0 41 self.survey_mode = 0
42 self.axi = "/var/lib/apt-xapian-index/index" 42 self.axi = "/var/lib/apt-xapian-index/index"
43 -# self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" 43 + #self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json"
44 self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json" 44 self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json"
45 self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index") 45 self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index")
46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") 46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir")
@@ -51,6 +51,9 @@ class Config(): @@ -51,6 +51,9 @@ class Config():
51 self.strategy = "cb" 51 self.strategy = "cb"
52 self.weight = "bm25" 52 self.weight = "bm25"
53 self.profile_size = 50 53 self.profile_size = 50
  54 + # options: maximal, voted, desktop
  55 + self.profiling = "maximal"
  56 + self.k_neighbors = 100
54 self.load_options() 57 self.load_options()
55 self.set_logger() 58 self.set_logger()
56 59
@@ -77,7 +80,9 @@ class Config(): @@ -77,7 +80,9 @@ class Config():
77 print " [ recommender ]" 80 print " [ recommender ]"
78 print " -w, --weight=OPTION Search weighting scheme" 81 print " -w, --weight=OPTION Search weighting scheme"
79 print " -s, --strategy=OPTION Recommendation strategy" 82 print " -s, --strategy=OPTION Recommendation strategy"
80 - print " -z, --profile_size=SIZE Size of user profile" 83 + print " -z, --profilesize=k Size of user profile"
  84 + print " -f, --profiling=OPTION Profile filter strategy"
  85 + print " -n, --neighbors=k Size of neighborhood for collaboration"
81 print "" 86 print ""
82 print " [ weight options ] " 87 print " [ weight options ] "
83 print " trad = traditional probabilistic weighting" 88 print " trad = traditional probabilistic weighting"
@@ -132,12 +137,15 @@ class Config(): @@ -132,12 +137,15 @@ class Config():
132 self.strategy = self.read_option('recommender', 'strategy') 137 self.strategy = self.read_option('recommender', 'strategy')
133 self.profile_size = int(self.read_option('recommender', 138 self.profile_size = int(self.read_option('recommender',
134 'profile_size')) 139 'profile_size'))
  140 + self.profiling = self.read_option('recommender', 'profiling')
  141 + self.k_neighbors = int(self.read_option('recommender',
  142 + 'k_neighbors'))
135 143
136 - short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:" 144 + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:f:n:"
137 long_options = ["help", "debug", "verbose", "output=", 145 long_options = ["help", "debug", "verbose", "output=",
138 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", 146 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=",
139 - "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=",  
140 - "profile_size="] 147 + "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=",
  148 + "profile_size=", "profiling=", "neighbors="]
141 try: 149 try:
142 opts, args = getopt.getopt(sys.argv[1:], short_options, 150 opts, args = getopt.getopt(sys.argv[1:], short_options,
143 long_options) 151 long_options)
@@ -178,7 +186,11 @@ class Config(): @@ -178,7 +186,11 @@ class Config():
178 elif o in ("-s", "--strategy"): 186 elif o in ("-s", "--strategy"):
179 self.strategy = p 187 self.strategy = p
180 elif o in ("-z", "--profile_size"): 188 elif o in ("-z", "--profile_size"):
181 - self.strategy = int(p) 189 + self.profile_size = int(p)
  190 + elif o in ("-z", "--profiling"):
  191 + self.profiling = p
  192 + elif o in ("-n", "--neighbors"):
  193 + self.k_neighbors = int(p)
182 else: 194 else:
183 assert False, "unhandled option" 195 assert False, "unhandled option"
184 196
@@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg):
45 enquire.set_query(xapian.Query("XP"+pkg)) 45 enquire.set_query(xapian.Query("XP"+pkg))
46 matches = enquire.get_mset(0,1) 46 matches = enquire.get_mset(0,1)
47 if not matches: 47 if not matches:
48 - logging.debug("Package %s not found in items repository" % pkg) 48 + #logging.debug("Package %s not found in items repository" % pkg)
49 return [] 49 return []
50 for m in matches: 50 for m in matches:
51 tags = [term.term for term in axi.get_document(m.docid).termlist() if 51 tags = [term.term for term in axi.get_document(m.docid).termlist() if
@@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase):
158 self.axi = xapian.Database(cfg.axi) 158 self.axi = xapian.Database(cfg.axi)
159 self.path = os.path.expanduser(cfg.popcon_index) 159 self.path = os.path.expanduser(cfg.popcon_index)
160 self.source_dir = os.path.expanduser(cfg.popcon_dir) 160 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  161 + self.max_popcon = cfg.max_popcon
161 if not cfg.index_mode == "old" or not self.load_index(): 162 if not cfg.index_mode == "old" or not self.load_index():
162 if not os.path.exists(cfg.popcon_dir): 163 if not os.path.exists(cfg.popcon_dir):
163 os.makedirs(cfg.popcon_dir) 164 os.makedirs(cfg.popcon_dir)
164 if not os.listdir(cfg.popcon_dir): 165 if not os.listdir(cfg.popcon_dir):
165 logging.critical("Popcon dir seems to be empty.") 166 logging.critical("Popcon dir seems to be empty.")
166 raise Error 167 raise Error
167 - if cfg.index_mode == "reindex": 168 + if cfg.index_mode == "reindex" or cfg.index_mode == "old":
168 self.source_dir = os.path.expanduser(cfg.popcon_dir) 169 self.source_dir = os.path.expanduser(cfg.popcon_dir)
169 logging.debug(self.source_dir) 170 logging.debug(self.source_dir)
170 else: 171 else:
@@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase):
229 logging.critical(str(e)) 230 logging.critical(str(e))
230 raise Error 231 raise Error
231 232
  233 + doc_count = 0
232 for root, dirs, files in os.walk(self.source_dir): 234 for root, dirs, files in os.walk(self.source_dir):
  235 + if doc_count == self.max_popcon:
  236 + break
233 for popcon_file in files: 237 for popcon_file in files:
  238 + if doc_count == self.max_popcon:
  239 + break
234 submission = PopconSubmission(os.path.join(root, popcon_file)) 240 submission = PopconSubmission(os.path.join(root, popcon_file))
235 doc = xapian.Document() 241 doc = xapian.Document()
236 doc.set_data(submission.user_id) 242 doc.set_data(submission.user_id)
@@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase):
238 submission.user_id) 244 submission.user_id)
239 for pkg, freq in submission.packages.items(): 245 for pkg, freq in submission.packages.items():
240 doc.add_term("XP"+pkg,freq) 246 doc.add_term("XP"+pkg,freq)
241 - if axi_search_pkg_tags(self.axi,pkg):  
242 - for tag in axi_search_pkg_tags(self.axi,pkg):  
243 - doc.add_term(tag,freq) 247 + #if axi_search_pkg_tags(self.axi,pkg):
  248 + # for tag in axi_search_pkg_tags(self.axi,pkg):
  249 + # doc.add_term(tag,freq)
244 doc_id = self.add_document(doc) 250 doc_id = self.add_document(doc)
  251 + doc_count += 1
245 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) 252 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
246 # python garbage collector 253 # python garbage collector
247 gc.collect() 254 gc.collect()
src/web/survey.py
@@ -32,7 +32,7 @@ class Thanks: @@ -32,7 +32,7 @@ class Thanks:
32 with open("./submissions/%s/ident" % user_id,'w') as ident: 32 with open("./submissions/%s/ident" % user_id,'w') as ident:
33 for key in ["name","email","country","public","comments"]: 33 for key in ["name","email","country","public","comments"]:
34 if web_input.has_key(key): 34 if web_input.has_key(key):
35 - ident.write("%s: %s\n" % (key,web_input[key])) 35 + ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8")))
36 return render.thanks_id() 36 return render.thanks_id()
37 37
38 class Package: 38 class Package: