Commit 6003449181d2e86b4566e854c29c469d385163ed
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
3 changed files
with
31 additions
and
12 deletions
Show diff stats
src/config.py
@@ -40,7 +40,7 @@ class Config(): | @@ -40,7 +40,7 @@ class Config(): | ||
40 | self.output = "/dev/null" | 40 | self.output = "/dev/null" |
41 | self.survey_mode = 0 | 41 | self.survey_mode = 0 |
42 | self.axi = "/var/lib/apt-xapian-index/index" | 42 | self.axi = "/var/lib/apt-xapian-index/index" |
43 | -# self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" | 43 | + #self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" |
44 | self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json" | 44 | self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json" |
45 | self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index") | 45 | self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index") |
46 | self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") | 46 | self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") |
@@ -51,6 +51,9 @@ class Config(): | @@ -51,6 +51,9 @@ class Config(): | ||
51 | self.strategy = "cb" | 51 | self.strategy = "cb" |
52 | self.weight = "bm25" | 52 | self.weight = "bm25" |
53 | self.profile_size = 50 | 53 | self.profile_size = 50 |
54 | + # options: maximal, voted, desktop | ||
55 | + self.profiling = "maximal" | ||
56 | + self.k_neighbors = 100 | ||
54 | self.load_options() | 57 | self.load_options() |
55 | self.set_logger() | 58 | self.set_logger() |
56 | 59 | ||
@@ -77,7 +80,9 @@ class Config(): | @@ -77,7 +80,9 @@ class Config(): | ||
77 | print " [ recommender ]" | 80 | print " [ recommender ]" |
78 | print " -w, --weight=OPTION Search weighting scheme" | 81 | print " -w, --weight=OPTION Search weighting scheme" |
79 | print " -s, --strategy=OPTION Recommendation strategy" | 82 | print " -s, --strategy=OPTION Recommendation strategy" |
80 | - print " -z, --profile_size=SIZE Size of user profile" | 83 | + print " -z, --profilesize=k Size of user profile" |
84 | + print " -f, --profiling=OPTION Profile filter strategy" | ||
85 | + print " -n, --neighbors=k Size of neighborhood for collaboration" | ||
81 | print "" | 86 | print "" |
82 | print " [ weight options ] " | 87 | print " [ weight options ] " |
83 | print " trad = traditional probabilistic weighting" | 88 | print " trad = traditional probabilistic weighting" |
@@ -132,12 +137,15 @@ class Config(): | @@ -132,12 +137,15 @@ class Config(): | ||
132 | self.strategy = self.read_option('recommender', 'strategy') | 137 | self.strategy = self.read_option('recommender', 'strategy') |
133 | self.profile_size = int(self.read_option('recommender', | 138 | self.profile_size = int(self.read_option('recommender', |
134 | 'profile_size')) | 139 | 'profile_size')) |
140 | + self.profiling = self.read_option('recommender', 'profiling') | ||
141 | + self.k_neighbors = int(self.read_option('recommender', | ||
142 | + 'k_neighbors')) | ||
135 | 143 | ||
136 | - short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:" | 144 | + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:f:n:" |
137 | long_options = ["help", "debug", "verbose", "output=", | 145 | long_options = ["help", "debug", "verbose", "output=", |
138 | "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", | 146 | "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", |
139 | - "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=", | ||
140 | - "profile_size="] | 147 | + "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=", |
148 | + "profile_size=", "profiling=", "neighbors="] | ||
141 | try: | 149 | try: |
142 | opts, args = getopt.getopt(sys.argv[1:], short_options, | 150 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
143 | long_options) | 151 | long_options) |
@@ -178,7 +186,11 @@ class Config(): | @@ -178,7 +186,11 @@ class Config(): | ||
178 | elif o in ("-s", "--strategy"): | 186 | elif o in ("-s", "--strategy"): |
179 | self.strategy = p | 187 | self.strategy = p |
180 | elif o in ("-z", "--profile_size"): | 188 | elif o in ("-z", "--profile_size"): |
181 | - self.strategy = int(p) | 189 | + self.profile_size = int(p) |
190 | + elif o in ("-z", "--profiling"): | ||
191 | + self.profiling = p | ||
192 | + elif o in ("-n", "--neighbors"): | ||
193 | + self.k_neighbors = int(p) | ||
182 | else: | 194 | else: |
183 | assert False, "unhandled option" | 195 | assert False, "unhandled option" |
184 | 196 |
src/data.py
@@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): | ||
45 | enquire.set_query(xapian.Query("XP"+pkg)) | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
46 | matches = enquire.get_mset(0,1) | 46 | matches = enquire.get_mset(0,1) |
47 | if not matches: | 47 | if not matches: |
48 | - logging.debug("Package %s not found in items repository" % pkg) | 48 | + #logging.debug("Package %s not found in items repository" % pkg) |
49 | return [] | 49 | return [] |
50 | for m in matches: | 50 | for m in matches: |
51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
@@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
158 | self.axi = xapian.Database(cfg.axi) | 158 | self.axi = xapian.Database(cfg.axi) |
159 | self.path = os.path.expanduser(cfg.popcon_index) | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
161 | + self.max_popcon = cfg.max_popcon | ||
161 | if not cfg.index_mode == "old" or not self.load_index(): | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
162 | if not os.path.exists(cfg.popcon_dir): | 163 | if not os.path.exists(cfg.popcon_dir): |
163 | os.makedirs(cfg.popcon_dir) | 164 | os.makedirs(cfg.popcon_dir) |
164 | if not os.listdir(cfg.popcon_dir): | 165 | if not os.listdir(cfg.popcon_dir): |
165 | logging.critical("Popcon dir seems to be empty.") | 166 | logging.critical("Popcon dir seems to be empty.") |
166 | raise Error | 167 | raise Error |
167 | - if cfg.index_mode == "reindex": | 168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": |
168 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
169 | logging.debug(self.source_dir) | 170 | logging.debug(self.source_dir) |
170 | else: | 171 | else: |
@@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
229 | logging.critical(str(e)) | 230 | logging.critical(str(e)) |
230 | raise Error | 231 | raise Error |
231 | 232 | ||
233 | + doc_count = 0 | ||
232 | for root, dirs, files in os.walk(self.source_dir): | 234 | for root, dirs, files in os.walk(self.source_dir): |
235 | + if doc_count == self.max_popcon: | ||
236 | + break | ||
233 | for popcon_file in files: | 237 | for popcon_file in files: |
238 | + if doc_count == self.max_popcon: | ||
239 | + break | ||
234 | submission = PopconSubmission(os.path.join(root, popcon_file)) | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
235 | doc = xapian.Document() | 241 | doc = xapian.Document() |
236 | doc.set_data(submission.user_id) | 242 | doc.set_data(submission.user_id) |
@@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
238 | submission.user_id) | 244 | submission.user_id) |
239 | for pkg, freq in submission.packages.items(): | 245 | for pkg, freq in submission.packages.items(): |
240 | doc.add_term("XP"+pkg,freq) | 246 | doc.add_term("XP"+pkg,freq) |
241 | - if axi_search_pkg_tags(self.axi,pkg): | ||
242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | ||
243 | - doc.add_term(tag,freq) | 247 | + #if axi_search_pkg_tags(self.axi,pkg): |
248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | ||
249 | + # doc.add_term(tag,freq) | ||
244 | doc_id = self.add_document(doc) | 250 | doc_id = self.add_document(doc) |
251 | + doc_count += 1 | ||
245 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
246 | # python garbage collector | 253 | # python garbage collector |
247 | gc.collect() | 254 | gc.collect() |
src/web/survey.py
@@ -32,7 +32,7 @@ class Thanks: | @@ -32,7 +32,7 @@ class Thanks: | ||
32 | with open("./submissions/%s/ident" % user_id,'w') as ident: | 32 | with open("./submissions/%s/ident" % user_id,'w') as ident: |
33 | for key in ["name","email","country","public","comments"]: | 33 | for key in ["name","email","country","public","comments"]: |
34 | if web_input.has_key(key): | 34 | if web_input.has_key(key): |
35 | - ident.write("%s: %s\n" % (key,web_input[key])) | 35 | + ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8"))) |
36 | return render.thanks_id() | 36 | return render.thanks_id() |
37 | 37 | ||
38 | class Package: | 38 | class Package: |