Commit 08c5967867d73b85832b216f4686830a4a2eb4d3
1 parent
72ac610b
Exists in
master
and in
1 other branch
Added max_popcon option as overall index size and fixed minor bug with inde_mode handling.
Showing
1 changed file
with
12 additions
and
5 deletions
Show diff stats
src/data.py
| @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): | ||
| 45 | enquire.set_query(xapian.Query("XP"+pkg)) | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
| 46 | matches = enquire.get_mset(0,1) | 46 | matches = enquire.get_mset(0,1) |
| 47 | if not matches: | 47 | if not matches: |
| 48 | - logging.debug("Package %s not found in items repository" % pkg) | 48 | + #logging.debug("Package %s not found in items repository" % pkg) |
| 49 | return [] | 49 | return [] |
| 50 | for m in matches: | 50 | for m in matches: |
| 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
| @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
| 158 | self.axi = xapian.Database(cfg.axi) | 158 | self.axi = xapian.Database(cfg.axi) |
| 159 | self.path = os.path.expanduser(cfg.popcon_index) | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
| 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 161 | + self.max_popcon = cfg.max_popcon | ||
| 161 | if not cfg.index_mode == "old" or not self.load_index(): | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
| 162 | if not os.path.exists(cfg.popcon_dir): | 163 | if not os.path.exists(cfg.popcon_dir): |
| 163 | os.makedirs(cfg.popcon_dir) | 164 | os.makedirs(cfg.popcon_dir) |
| 164 | if not os.listdir(cfg.popcon_dir): | 165 | if not os.listdir(cfg.popcon_dir): |
| 165 | logging.critical("Popcon dir seems to be empty.") | 166 | logging.critical("Popcon dir seems to be empty.") |
| 166 | raise Error | 167 | raise Error |
| 167 | - if cfg.index_mode == "reindex": | 168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": |
| 168 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 169 | logging.debug(self.source_dir) | 170 | logging.debug(self.source_dir) |
| 170 | else: | 171 | else: |
| @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
| 229 | logging.critical(str(e)) | 230 | logging.critical(str(e)) |
| 230 | raise Error | 231 | raise Error |
| 231 | 232 | ||
| 233 | + doc_count = 0 | ||
| 232 | for root, dirs, files in os.walk(self.source_dir): | 234 | for root, dirs, files in os.walk(self.source_dir): |
| 235 | + if doc_count == self.max_popcon: | ||
| 236 | + break | ||
| 233 | for popcon_file in files: | 237 | for popcon_file in files: |
| 238 | + if doc_count == self.max_popcon: | ||
| 239 | + break | ||
| 234 | submission = PopconSubmission(os.path.join(root, popcon_file)) | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
| 235 | doc = xapian.Document() | 241 | doc = xapian.Document() |
| 236 | doc.set_data(submission.user_id) | 242 | doc.set_data(submission.user_id) |
| @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
| 238 | submission.user_id) | 244 | submission.user_id) |
| 239 | for pkg, freq in submission.packages.items(): | 245 | for pkg, freq in submission.packages.items(): |
| 240 | doc.add_term("XP"+pkg,freq) | 246 | doc.add_term("XP"+pkg,freq) |
| 241 | - if axi_search_pkg_tags(self.axi,pkg): | ||
| 242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | ||
| 243 | - doc.add_term(tag,freq) | 247 | + #if axi_search_pkg_tags(self.axi,pkg): |
| 248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | ||
| 249 | + # doc.add_term(tag,freq) | ||
| 244 | doc_id = self.add_document(doc) | 250 | doc_id = self.add_document(doc) |
| 251 | + doc_count += 1 | ||
| 245 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 246 | # python garbage collector | 253 | # python garbage collector |
| 247 | gc.collect() | 254 | gc.collect() |