Commit 08c5967867d73b85832b216f4686830a4a2eb4d3

Authored by Tássia Camões Araújo
1 parent 72ac610b
Exists in master and in 1 other branch add_vagrant

Added max_popcon option as overall index size and fixed minor bug with inde_mode handling.

Showing 1 changed file with 12 additions and 5 deletions   Show diff stats
src/data.py
... ... @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg):
45 45 enquire.set_query(xapian.Query("XP"+pkg))
46 46 matches = enquire.get_mset(0,1)
47 47 if not matches:
48   - logging.debug("Package %s not found in items repository" % pkg)
  48 + #logging.debug("Package %s not found in items repository" % pkg)
49 49 return []
50 50 for m in matches:
51 51 tags = [term.term for term in axi.get_document(m.docid).termlist() if
... ... @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase):
158 158 self.axi = xapian.Database(cfg.axi)
159 159 self.path = os.path.expanduser(cfg.popcon_index)
160 160 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  161 + self.max_popcon = cfg.max_popcon
161 162 if not cfg.index_mode == "old" or not self.load_index():
162 163 if not os.path.exists(cfg.popcon_dir):
163 164 os.makedirs(cfg.popcon_dir)
164 165 if not os.listdir(cfg.popcon_dir):
165 166 logging.critical("Popcon dir seems to be empty.")
166 167 raise Error
167   - if cfg.index_mode == "reindex":
  168 + if cfg.index_mode == "reindex" or cfg.index_mode == "old":
168 169 self.source_dir = os.path.expanduser(cfg.popcon_dir)
169 170 logging.debug(self.source_dir)
170 171 else:
... ... @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase):
229 230 logging.critical(str(e))
230 231 raise Error
231 232  
  233 + doc_count = 0
232 234 for root, dirs, files in os.walk(self.source_dir):
  235 + if doc_count == self.max_popcon:
  236 + break
233 237 for popcon_file in files:
  238 + if doc_count == self.max_popcon:
  239 + break
234 240 submission = PopconSubmission(os.path.join(root, popcon_file))
235 241 doc = xapian.Document()
236 242 doc.set_data(submission.user_id)
... ... @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase):
238 244 submission.user_id)
239 245 for pkg, freq in submission.packages.items():
240 246 doc.add_term("XP"+pkg,freq)
241   - if axi_search_pkg_tags(self.axi,pkg):
242   - for tag in axi_search_pkg_tags(self.axi,pkg):
243   - doc.add_term(tag,freq)
  247 + #if axi_search_pkg_tags(self.axi,pkg):
  248 + # for tag in axi_search_pkg_tags(self.axi,pkg):
  249 + # doc.add_term(tag,freq)
244 250 doc_id = self.add_document(doc)
  251 + doc_count += 1
245 252 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
246 253 # python garbage collector
247 254 gc.collect()
... ...