Commit 6003449181d2e86b4566e854c29c469d385163ed

Authored by Tássia Camões Araújo
2 parents c1d556c0 08c59678
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

Showing 3 changed files with 31 additions and 12 deletions   Show diff stats
src/config.py
... ... @@ -40,7 +40,7 @@ class Config():
40 40 self.output = "/dev/null"
41 41 self.survey_mode = 0
42 42 self.axi = "/var/lib/apt-xapian-index/index"
43   -# self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json"
  43 + #self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json"
44 44 self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json"
45 45 self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index")
46 46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir")
... ... @@ -51,6 +51,9 @@ class Config():
51 51 self.strategy = "cb"
52 52 self.weight = "bm25"
53 53 self.profile_size = 50
  54 + # options: maximal, voted, desktop
  55 + self.profiling = "maximal"
  56 + self.k_neighbors = 100
54 57 self.load_options()
55 58 self.set_logger()
56 59  
... ... @@ -77,7 +80,9 @@ class Config():
77 80 print " [ recommender ]"
78 81 print " -w, --weight=OPTION Search weighting scheme"
79 82 print " -s, --strategy=OPTION Recommendation strategy"
80   - print " -z, --profile_size=SIZE Size of user profile"
  83 + print " -z, --profilesize=k Size of user profile"
  84 + print " -f, --profiling=OPTION Profile filter strategy"
  85 + print " -n, --neighbors=k Size of neighborhood for collaboration"
81 86 print ""
82 87 print " [ weight options ] "
83 88 print " trad = traditional probabilistic weighting"
... ... @@ -132,12 +137,15 @@ class Config():
132 137 self.strategy = self.read_option('recommender', 'strategy')
133 138 self.profile_size = int(self.read_option('recommender',
134 139 'profile_size'))
  140 + self.profiling = self.read_option('recommender', 'profiling')
  141 + self.k_neighbors = int(self.read_option('recommender',
  142 + 'k_neighbors'))
135 143  
136   - short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:"
  144 + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:f:n:"
137 145 long_options = ["help", "debug", "verbose", "output=",
138 146 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=",
139   - "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=",
140   - "profile_size="]
  147 + "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=",
  148 + "profile_size=", "profiling=", "neighbors="]
141 149 try:
142 150 opts, args = getopt.getopt(sys.argv[1:], short_options,
143 151 long_options)
... ... @@ -178,7 +186,11 @@ class Config():
178 186 elif o in ("-s", "--strategy"):
179 187 self.strategy = p
180 188 elif o in ("-z", "--profile_size"):
181   - self.strategy = int(p)
  189 + self.profile_size = int(p)
  190 + elif o in ("-z", "--profiling"):
  191 + self.profiling = p
  192 + elif o in ("-n", "--neighbors"):
  193 + self.k_neighbors = int(p)
182 194 else:
183 195 assert False, "unhandled option"
184 196  
... ...
src/data.py
... ... @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg):
45 45 enquire.set_query(xapian.Query("XP"+pkg))
46 46 matches = enquire.get_mset(0,1)
47 47 if not matches:
48   - logging.debug("Package %s not found in items repository" % pkg)
  48 + #logging.debug("Package %s not found in items repository" % pkg)
49 49 return []
50 50 for m in matches:
51 51 tags = [term.term for term in axi.get_document(m.docid).termlist() if
... ... @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase):
158 158 self.axi = xapian.Database(cfg.axi)
159 159 self.path = os.path.expanduser(cfg.popcon_index)
160 160 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  161 + self.max_popcon = cfg.max_popcon
161 162 if not cfg.index_mode == "old" or not self.load_index():
162 163 if not os.path.exists(cfg.popcon_dir):
163 164 os.makedirs(cfg.popcon_dir)
164 165 if not os.listdir(cfg.popcon_dir):
165 166 logging.critical("Popcon dir seems to be empty.")
166 167 raise Error
167   - if cfg.index_mode == "reindex":
  168 + if cfg.index_mode == "reindex" or cfg.index_mode == "old":
168 169 self.source_dir = os.path.expanduser(cfg.popcon_dir)
169 170 logging.debug(self.source_dir)
170 171 else:
... ... @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase):
229 230 logging.critical(str(e))
230 231 raise Error
231 232  
  233 + doc_count = 0
232 234 for root, dirs, files in os.walk(self.source_dir):
  235 + if doc_count == self.max_popcon:
  236 + break
233 237 for popcon_file in files:
  238 + if doc_count == self.max_popcon:
  239 + break
234 240 submission = PopconSubmission(os.path.join(root, popcon_file))
235 241 doc = xapian.Document()
236 242 doc.set_data(submission.user_id)
... ... @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase):
238 244 submission.user_id)
239 245 for pkg, freq in submission.packages.items():
240 246 doc.add_term("XP"+pkg,freq)
241   - if axi_search_pkg_tags(self.axi,pkg):
242   - for tag in axi_search_pkg_tags(self.axi,pkg):
243   - doc.add_term(tag,freq)
  247 + #if axi_search_pkg_tags(self.axi,pkg):
  248 + # for tag in axi_search_pkg_tags(self.axi,pkg):
  249 + # doc.add_term(tag,freq)
244 250 doc_id = self.add_document(doc)
  251 + doc_count += 1
245 252 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
246 253 # python garbage collector
247 254 gc.collect()
... ...
src/web/survey.py
... ... @@ -32,7 +32,7 @@ class Thanks:
32 32 with open("./submissions/%s/ident" % user_id,'w') as ident:
33 33 for key in ["name","email","country","public","comments"]:
34 34 if web_input.has_key(key):
35   - ident.write("%s: %s\n" % (key,web_input[key]))
  35 + ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8")))
36 36 return render.thanks_id()
37 37  
38 38 class Package:
... ...