Commit 08c5967867d73b85832b216f4686830a4a2eb4d3
1 parent
72ac610b
Exists in
master
and in
1 other branch
Added max_popcon option as overall index size and fixed minor bug with inde_mode handling.
Showing
1 changed file
with
12 additions
and
5 deletions
Show diff stats
src/data.py
... | ... | @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): |
45 | 45 | enquire.set_query(xapian.Query("XP"+pkg)) |
46 | 46 | matches = enquire.get_mset(0,1) |
47 | 47 | if not matches: |
48 | - logging.debug("Package %s not found in items repository" % pkg) | |
48 | + #logging.debug("Package %s not found in items repository" % pkg) | |
49 | 49 | return [] |
50 | 50 | for m in matches: |
51 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
... | ... | @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): |
158 | 158 | self.axi = xapian.Database(cfg.axi) |
159 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | 160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
161 | + self.max_popcon = cfg.max_popcon | |
161 | 162 | if not cfg.index_mode == "old" or not self.load_index(): |
162 | 163 | if not os.path.exists(cfg.popcon_dir): |
163 | 164 | os.makedirs(cfg.popcon_dir) |
164 | 165 | if not os.listdir(cfg.popcon_dir): |
165 | 166 | logging.critical("Popcon dir seems to be empty.") |
166 | 167 | raise Error |
167 | - if cfg.index_mode == "reindex": | |
168 | + if cfg.index_mode == "reindex" or cfg.index_mode == "old": | |
168 | 169 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
169 | 170 | logging.debug(self.source_dir) |
170 | 171 | else: |
... | ... | @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): |
229 | 230 | logging.critical(str(e)) |
230 | 231 | raise Error |
231 | 232 | |
233 | + doc_count = 0 | |
232 | 234 | for root, dirs, files in os.walk(self.source_dir): |
235 | + if doc_count == self.max_popcon: | |
236 | + break | |
233 | 237 | for popcon_file in files: |
238 | + if doc_count == self.max_popcon: | |
239 | + break | |
234 | 240 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
235 | 241 | doc = xapian.Document() |
236 | 242 | doc.set_data(submission.user_id) |
... | ... | @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): |
238 | 244 | submission.user_id) |
239 | 245 | for pkg, freq in submission.packages.items(): |
240 | 246 | doc.add_term("XP"+pkg,freq) |
241 | - if axi_search_pkg_tags(self.axi,pkg): | |
242 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
243 | - doc.add_term(tag,freq) | |
247 | + #if axi_search_pkg_tags(self.axi,pkg): | |
248 | + # for tag in axi_search_pkg_tags(self.axi,pkg): | |
249 | + # doc.add_term(tag,freq) | |
244 | 250 | doc_id = self.add_document(doc) |
251 | + doc_count += 1 | |
245 | 252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
246 | 253 | # python garbage collector |
247 | 254 | gc.collect() | ... | ... |