Commit 6003449181d2e86b4566e854c29c469d385163ed
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
3 changed files
with
31 additions
and
12 deletions
Show diff stats
src/config.py
| ... | ... | @@ -40,7 +40,7 @@ class Config(): |
| 40 | 40 | self.output = "/dev/null" |
| 41 | 41 | self.survey_mode = 0 |
| 42 | 42 | self.axi = "/var/lib/apt-xapian-index/index" |
| 43 | -# self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" | |
| 43 | + #self.dde_url = "http://dde.debian.net/dde/q/udd/packs/all/%s?t=json" | |
| 44 | 44 | self.dde_url = "http://46.4.235.200:8000/q/udd/packages/all/%s?t=json" |
| 45 | 45 | self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index") |
| 46 | 46 | self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") |
| ... | ... | @@ -51,6 +51,9 @@ class Config(): |
| 51 | 51 | self.strategy = "cb" |
| 52 | 52 | self.weight = "bm25" |
| 53 | 53 | self.profile_size = 50 |
| 54 | + # options: maximal, voted, desktop | |
| 55 | + self.profiling = "maximal" | |
| 56 | + self.k_neighbors = 100 | |
| 54 | 57 | self.load_options() |
| 55 | 58 | self.set_logger() |
| 56 | 59 | |
| ... | ... | @@ -77,7 +80,9 @@ class Config(): |
| 77 | 80 | print " [ recommender ]" |
| 78 | 81 | print " -w, --weight=OPTION Search weighting scheme" |
| 79 | 82 | print " -s, --strategy=OPTION Recommendation strategy" |
| 80 | - print " -z, --profile_size=SIZE Size of user profile" | |
| 83 | + print " -z, --profilesize=k Size of user profile" | |
| 84 | + print " -f, --profiling=OPTION Profile filter strategy" | |
| 85 | + print " -n, --neighbors=k Size of neighborhood for collaboration" | |
| 81 | 86 | print "" |
| 82 | 87 | print " [ weight options ] " |
| 83 | 88 | print " trad = traditional probabilistic weighting" |
| ... | ... | @@ -132,12 +137,15 @@ class Config(): |
| 132 | 137 | self.strategy = self.read_option('recommender', 'strategy') |
| 133 | 138 | self.profile_size = int(self.read_option('recommender', |
| 134 | 139 | 'profile_size')) |
| 140 | + self.profiling = self.read_option('recommender', 'profiling') | |
| 141 | + self.k_neighbors = int(self.read_option('recommender', | |
| 142 | + 'k_neighbors')) | |
| 135 | 143 | |
| 136 | - short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:" | |
| 144 | + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:f:n:" | |
| 137 | 145 | long_options = ["help", "debug", "verbose", "output=", |
| 138 | 146 | "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", |
| 139 | - "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=", | |
| 140 | - "profile_size="] | |
| 147 | + "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=", | |
| 148 | + "profile_size=", "profiling=", "neighbors="] | |
| 141 | 149 | try: |
| 142 | 150 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
| 143 | 151 | long_options) |
| ... | ... | @@ -178,7 +186,11 @@ class Config(): |
| 178 | 186 | elif o in ("-s", "--strategy"): |
| 179 | 187 | self.strategy = p |
| 180 | 188 | elif o in ("-z", "--profile_size"): |
| 181 | - self.strategy = int(p) | |
| 189 | + self.profile_size = int(p) | |
| 190 | + elif o in ("-z", "--profiling"): | |
| 191 | + self.profiling = p | |
| 192 | + elif o in ("-n", "--neighbors"): | |
| 193 | + self.k_neighbors = int(p) | |
| 182 | 194 | else: |
| 183 | 195 | assert False, "unhandled option" |
| 184 | 196 | ... | ... |
src/data.py
| ... | ... | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): |
| 45 | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
| 46 | 46 | matches = enquire.get_mset(0,1) |
| 47 | 47 | if not matches: |
| 48 | - logging.debug("Package %s not found in items repository" % pkg) | |
| 48 | + #logging.debug("Package %s not found in items repository" % pkg) | |
| 49 | 49 | return [] |
| 50 | 50 | for m in matches: |
| 51 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
| ... | ... | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 158 | 158 | self.axi = xapian.Database(cfg.axi) |
| 159 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
| 160 | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 161 | + self.max_popcon = cfg.max_popcon | |
| 161 | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
| 162 | 163 | if not os.path.exists(cfg.popcon_dir): |
| 163 | 164 | os.makedirs(cfg.popcon_dir) |
| 164 | 165 | if not os.listdir(cfg.popcon_dir): |
| 165 | 166 | logging.critical("Popcon dir seems to be empty.") |
| 166 | 167 | raise Error |
| 167 | - if cfg.index_mode == "reindex": | |
| 168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": | |
| 168 | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 169 | 170 | logging.debug(self.source_dir) |
| 170 | 171 | else: |
| ... | ... | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 229 | 230 | logging.critical(str(e)) |
| 230 | 231 | raise Error |
| 231 | 232 | |
| 233 | + doc_count = 0 | |
| 232 | 234 | for root, dirs, files in os.walk(self.source_dir): |
| 235 | + if doc_count == self.max_popcon: | |
| 236 | + break | |
| 233 | 237 | for popcon_file in files: |
| 238 | + if doc_count == self.max_popcon: | |
| 239 | + break | |
| 234 | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
| 235 | 241 | doc = xapian.Document() |
| 236 | 242 | doc.set_data(submission.user_id) |
| ... | ... | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 238 | 244 | submission.user_id) |
| 239 | 245 | for pkg, freq in submission.packages.items(): |
| 240 | 246 | doc.add_term("XP"+pkg,freq) |
| 241 | - if axi_search_pkg_tags(self.axi,pkg): | |
| 242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 243 | - doc.add_term(tag,freq) | |
| 247 | + #if axi_search_pkg_tags(self.axi,pkg): | |
| 248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 249 | + # doc.add_term(tag,freq) | |
| 244 | 250 | doc_id = self.add_document(doc) |
| 251 | + doc_count += 1 | |
| 245 | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 246 | 253 | # python garbage collector |
| 247 | 254 | gc.collect() | ... | ... |
src/web/survey.py
| ... | ... | @@ -32,7 +32,7 @@ class Thanks: |
| 32 | 32 | with open("./submissions/%s/ident" % user_id,'w') as ident: |
| 33 | 33 | for key in ["name","email","country","public","comments"]: |
| 34 | 34 | if web_input.has_key(key): |
| 35 | - ident.write("%s: %s\n" % (key,web_input[key])) | |
| 35 | + ident.write("%s: %s\n" % (key,web_input[key].encode("utf-8"))) | |
| 36 | 36 | return render.thanks_id() |
| 37 | 37 | |
| 38 | 38 | class Package: | ... | ... |