Commit 08c5967867d73b85832b216f4686830a4a2eb4d3

Authored by Tássia Camões Araújo
1 parent 72ac610b
Exists in master and in 1 other branch add_vagrant

Added max_popcon option as overall index size and fixed minor bug with inde_mode handling.

Showing 1 changed file with 12 additions and 5 deletions   Show diff stats
@@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg): @@ -45,7 +45,7 @@ def axi_search_pkg_tags(axi,pkg):
45 enquire.set_query(xapian.Query("XP"+pkg)) 45 enquire.set_query(xapian.Query("XP"+pkg))
46 matches = enquire.get_mset(0,1) 46 matches = enquire.get_mset(0,1)
47 if not matches: 47 if not matches:
48 - logging.debug("Package %s not found in items repository" % pkg) 48 + #logging.debug("Package %s not found in items repository" % pkg)
49 return [] 49 return []
50 for m in matches: 50 for m in matches:
51 tags = [term.term for term in axi.get_document(m.docid).termlist() if 51 tags = [term.term for term in axi.get_document(m.docid).termlist() if
@@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -158,13 +158,14 @@ class PopconXapianIndex(xapian.WritableDatabase):
158 self.axi = xapian.Database(cfg.axi) 158 self.axi = xapian.Database(cfg.axi)
159 self.path = os.path.expanduser(cfg.popcon_index) 159 self.path = os.path.expanduser(cfg.popcon_index)
160 self.source_dir = os.path.expanduser(cfg.popcon_dir) 160 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  161 + self.max_popcon = cfg.max_popcon
161 if not cfg.index_mode == "old" or not self.load_index(): 162 if not cfg.index_mode == "old" or not self.load_index():
162 if not os.path.exists(cfg.popcon_dir): 163 if not os.path.exists(cfg.popcon_dir):
163 os.makedirs(cfg.popcon_dir) 164 os.makedirs(cfg.popcon_dir)
164 if not os.listdir(cfg.popcon_dir): 165 if not os.listdir(cfg.popcon_dir):
165 logging.critical("Popcon dir seems to be empty.") 166 logging.critical("Popcon dir seems to be empty.")
166 raise Error 167 raise Error
167 - if cfg.index_mode == "reindex": 168 + if cfg.index_mode == "reindex" or cfg.index_mode == "old":
168 self.source_dir = os.path.expanduser(cfg.popcon_dir) 169 self.source_dir = os.path.expanduser(cfg.popcon_dir)
169 logging.debug(self.source_dir) 170 logging.debug(self.source_dir)
170 else: 171 else:
@@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -229,8 +230,13 @@ class PopconXapianIndex(xapian.WritableDatabase):
229 logging.critical(str(e)) 230 logging.critical(str(e))
230 raise Error 231 raise Error
231 232
  233 + doc_count = 0
232 for root, dirs, files in os.walk(self.source_dir): 234 for root, dirs, files in os.walk(self.source_dir):
  235 + if doc_count == self.max_popcon:
  236 + break
233 for popcon_file in files: 237 for popcon_file in files:
  238 + if doc_count == self.max_popcon:
  239 + break
234 submission = PopconSubmission(os.path.join(root, popcon_file)) 240 submission = PopconSubmission(os.path.join(root, popcon_file))
235 doc = xapian.Document() 241 doc = xapian.Document()
236 doc.set_data(submission.user_id) 242 doc.set_data(submission.user_id)
@@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -238,10 +244,11 @@ class PopconXapianIndex(xapian.WritableDatabase):
238 submission.user_id) 244 submission.user_id)
239 for pkg, freq in submission.packages.items(): 245 for pkg, freq in submission.packages.items():
240 doc.add_term("XP"+pkg,freq) 246 doc.add_term("XP"+pkg,freq)
241 - if axi_search_pkg_tags(self.axi,pkg):  
242 - for tag in axi_search_pkg_tags(self.axi,pkg):  
243 - doc.add_term(tag,freq) 247 + #if axi_search_pkg_tags(self.axi,pkg):
  248 + # for tag in axi_search_pkg_tags(self.axi,pkg):
  249 + # doc.add_term(tag,freq)
244 doc_id = self.add_document(doc) 250 doc_id = self.add_document(doc)
  251 + doc_count += 1
245 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) 252 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
246 # python garbage collector 253 # python garbage collector
247 gc.collect() 254 gc.collect()