Commit 08c5967867d73b85832b216f4686830a4a2eb4d3
1 parent
72ac610b
Exists in
master
and in
1 other branch
Added max_popcon option as overall index size and fixed minor bug with inde_mode handling.
Showing
1 changed file
with
12 additions
and
5 deletions
Show diff stats
src/data.py
| ... | ... | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): |
| 45 | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
| 46 | 46 | matches = enquire.get_mset(0,1) |
| 47 | 47 | if not matches: |
| 48 | - logging.debug("Package %s not found in items repository" % pkg) | |
| 48 | + #logging.debug("Package %s not found in items repository" % pkg) | |
| 49 | 49 | return [] |
| 50 | 50 | for m in matches: |
| 51 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
| ... | ... | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 158 | 158 | self.axi = xapian.Database(cfg.axi) |
| 159 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
| 160 | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 161 | + self.max_popcon = cfg.max_popcon | |
| 161 | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
| 162 | 163 | if not os.path.exists(cfg.popcon_dir): |
| 163 | 164 | os.makedirs(cfg.popcon_dir) |
| 164 | 165 | if not os.listdir(cfg.popcon_dir): |
| 165 | 166 | logging.critical("Popcon dir seems to be empty.") |
| 166 | 167 | raise Error |
| 167 | - if cfg.index_mode == "reindex": | |
| 168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": | |
| 168 | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 169 | 170 | logging.debug(self.source_dir) |
| 170 | 171 | else: |
| ... | ... | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 229 | 230 | logging.critical(str(e)) |
| 230 | 231 | raise Error |
| 231 | 232 | |
| 233 | + doc_count = 0 | |
| 232 | 234 | for root, dirs, files in os.walk(self.source_dir): |
| 235 | + if doc_count == self.max_popcon: | |
| 236 | + break | |
| 233 | 237 | for popcon_file in files: |
| 238 | + if doc_count == self.max_popcon: | |
| 239 | + break | |
| 234 | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
| 235 | 241 | doc = xapian.Document() |
| 236 | 242 | doc.set_data(submission.user_id) |
| ... | ... | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 238 | 244 | submission.user_id) |
| 239 | 245 | for pkg, freq in submission.packages.items(): |
| 240 | 246 | doc.add_term("XP"+pkg,freq) |
| 241 | - if axi_search_pkg_tags(self.axi,pkg): | |
| 242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 243 | - doc.add_term(tag,freq) | |
| 247 | + #if axi_search_pkg_tags(self.axi,pkg): | |
| 248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 249 | + # doc.add_term(tag,freq) | |
| 244 | 250 | doc_id = self.add_document(doc) |
| 251 | + doc_count += 1 | |
| 245 | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 246 | 253 | # python garbage collector |
| 247 | 254 | gc.collect() | ... | ... |