Commit 6003449181d2e86b4566e854c29c469d385163ed
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
3 changed files
with
31 additions
and
12 deletions
Show diff stats
src/config.py
... | ... | @@ -40,7 +40,7 @@ class Config(): |
40 | 40 | self.output = "/dev/null" |
41 | 41 | self.survey_mode = 0 |
42 | 42 | self.axi = "/var/lib/apt-xapian-index/index" |
43 | -# self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" | |
43 | + #self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" | |
44 | 44 | self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json" |
45 | 45 | self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index") |
46 | 46 | self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") |
... | ... | @@ -51,6 +51,9 @@ class Config(): |
51 | 51 | self.strategy = "cb" |
52 | 52 | self.weight = "bm25" |
53 | 53 | self.profile_size = 50 |
54 | + # options: maximal, voted, desktop | |
55 | + self.profiling = "maximal" | |
56 | + self.k_neighbors = 100 | |
54 | 57 | self.load_options() |
55 | 58 | self.set_logger() |
56 | 59 | |
... | ... | @@ -77,7 +80,9 @@ class Config(): |
77 | 80 | print " [ recommender ]" |
78 | 81 | print " -w, --weight=OPTION Search weighting scheme" |
79 | 82 | print " -s, --strategy=OPTION Recommendation strategy" |
80 | - print " -z, --profile_size=SIZE Size of user profile" | |
83 | + print " -z, --profilesize=k Size of user profile" | |
84 | + print " -f, --profiling=OPTION Profile filter strategy" | |
85 | + print " -n, --neighbors=k Size of neighborhood for collaboration" | |
81 | 86 | print "" |
82 | 87 | print " [ weight options ] " |
83 | 88 | print " trad = traditional probabilistic weighting" |
... | ... | @@ -132,12 +137,15 @@ class Config(): |
132 | 137 | self.strategy = self.read_option('recommender', 'strategy') |
133 | 138 | self.profile_size = int(self.read_option('recommender', |
134 | 139 | 'profile_size')) |
140 | + self.profiling = self.read_option('recommender', 'profiling') | |
141 | + self.k_neighbors = int(self.read_option('recommender', | |
142 | + 'k_neighbors')) | |
135 | 143 | |
136 | - short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:" | |
144 | + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:f:n:" | |
137 | 145 | long_options = ["help", "debug", "verbose", "output=", |
138 | 146 | "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", |
139 | - "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=", | |
140 | - "profile_size="] | |
147 | + "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=", | |
148 | + "profile_size=", "profiling=", "neighbors="] | |
141 | 149 | try: |
142 | 150 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
143 | 151 | long_options) |
... | ... | @@ -178,7 +186,11 @@ class Config(): |
178 | 186 | elif o in ("-s", "--strategy"): |
179 | 187 | self.strategy = p |
180 | 188 | elif o in ("-z", "--profile_size"): |
181 | - self.strategy = int(p) | |
189 | + self.profile_size = int(p) | |
190 | + elif o in ("-z", "--profiling"): | |
191 | + self.profiling = p | |
192 | + elif o in ("-n", "--neighbors"): | |
193 | + self.k_neighbors = int(p) | |
182 | 194 | else: |
183 | 195 | assert False, "unhandled option" |
184 | 196 | ... | ... |
src/data.py
... | ... | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): |
45 | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
46 | 46 | matches = enquire.get_mset(0,1) |
47 | 47 | if not matches: |
48 | - logging.debug("Package %s not found in items repository" % pkg) | |
48 | + #logging.debug("Package %s not found in items repository" % pkg) | |
49 | 49 | return [] |
50 | 50 | for m in matches: |
51 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
... | ... | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): |
158 | 158 | self.axi = xapian.Database(cfg.axi) |
159 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
161 | + self.max_popcon = cfg.max_popcon | |
161 | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
162 | 163 | if not os.path.exists(cfg.popcon_dir): |
163 | 164 | os.makedirs(cfg.popcon_dir) |
164 | 165 | if not os.listdir(cfg.popcon_dir): |
165 | 166 | logging.critical("Popcon dir seems to be empty.") |
166 | 167 | raise Error |
167 | - if cfg.index_mode == "reindex": | |
168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": | |
168 | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
169 | 170 | logging.debug(self.source_dir) |
170 | 171 | else: |
... | ... | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): |
229 | 230 | logging.critical(str(e)) |
230 | 231 | raise Error |
231 | 232 | |
233 | + doc_count = 0 | |
232 | 234 | for root, dirs, files in os.walk(self.source_dir): |
235 | + if doc_count == self.max_popcon: | |
236 | + break | |
233 | 237 | for popcon_file in files: |
238 | + if doc_count == self.max_popcon: | |
239 | + break | |
234 | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
235 | 241 | doc = xapian.Document() |
236 | 242 | doc.set_data(submission.user_id) |
... | ... | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): |
238 | 244 | submission.user_id) |
239 | 245 | for pkg, freq in submission.packages.items(): |
240 | 246 | doc.add_term("XP"+pkg,freq) |
241 | - if axi_search_pkg_tags(self.axi,pkg): | |
242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
243 | - doc.add_term(tag,freq) | |
247 | + #if axi_search_pkg_tags(self.axi,pkg): | |
248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | |
249 | + # doc.add_term(tag,freq) | |
244 | 250 | doc_id = self.add_document(doc) |
251 | + doc_count += 1 | |
245 | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
246 | 253 | # python garbage collector |
247 | 254 | gc.collect() | ... | ... |
src/web/survey.py
... | ... | @@ -32,7 +32,7 @@ class Thanks: |
32 | 32 | with open("./submissions/%s/ident" % user_id,'w') as ident: |
33 | 33 | for key in ["name","email","country","public","comments"]: |
34 | 34 | if web_input.has_key(key): |
35 | - ident.write("%s: %s\n" % (key,web_input[key])) | |
35 | + ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8"))) | |
36 | 36 | return render.thanks_id() |
37 | 37 | |
38 | 38 | class Package: | ... | ... |