Commit 2188f43d08d43f6781f2e5d4fbaa560e44f59af6

Authored by Tássia Camões Araújo
1 parent 8b95a69d
Exists in master and in 1 other branch add_vagrant

Discarding submissions with few valid packages in profile for popcon indexing.

Showing 1 changed file with 17 additions and 14 deletions   Show diff stats
@@ -119,13 +119,12 @@ class PopconSubmission(): @@ -119,13 +119,12 @@ class PopconSubmission():
119 output += "\n "+pkg+": "+str(weight) 119 output += "\n "+pkg+": "+str(weight)
120 return output 120 return output
121 121
122 - def apps(self,axi):  
123 - apps = {} 122 + def get_filtered(self,filter_list):
  123 + filtered = {}
124 for pkg in self.packages.keys(): 124 for pkg in self.packages.keys():
125 - tags = axi_search_pkg_tags(self.axi,pkg)  
126 - if "XTrole::program" in tags:  
127 - apps[pkg] = self.packages[pkg]  
128 - return apps 125 + if pkg in filter_list:
  126 + filtered[pkg] = self.packages[pkg]
  127 + return filtered
129 128
130 def load(self,binary=1): 129 def load(self,binary=1):
131 """ 130 """
@@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase):
261 break 260 break
262 submission = PopconSubmission(os.path.join(root, popcon_file)) 261 submission = PopconSubmission(os.path.join(root, popcon_file))
263 doc = xapian.Document() 262 doc = xapian.Document()
264 - doc.set_data(submission.user_id)  
265 - logging.debug("Parsing popcon submission \'%s\'" %  
266 - submission.user_id)  
267 - for pkg, freq in submission.packages.items():  
268 - if pkg in self.valid_pkgs: 263 + submission_pkgs = submission.get_filtered(self.valid_pkgs)
  264 + if len(submission_pkgs) < 10:
  265 + logging.debug("Low profile popcon submission \'%s\' (%d)" %
  266 + (submission.user_id,len(submission_pkgs)))
  267 + else:
  268 + doc.set_data(submission.user_id)
  269 + logging.debug("Parsing popcon submission \'%s\'" %
  270 + submission.user_id)
  271 + for pkg,freq in submission_pkgs.items():
269 tags = axi_search_pkg_tags(self.axi,pkg) 272 tags = axi_search_pkg_tags(self.axi,pkg)
270 # if the package was foung in axi 273 # if the package was foung in axi
271 if tags: 274 if tags:
@@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
275 for tag in tags: 278 for tag in tags:
276 if tag in self.valid_tags: 279 if tag in self.valid_tags:
277 doc.add_term(tag,freq) 280 doc.add_term(tag,freq)
278 - doc_id = self.add_document(doc)  
279 - doc_count += 1  
280 - logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) 281 + doc_id = self.add_document(doc)
  282 + doc_count += 1
  283 + logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
281 # python garbage collector 284 # python garbage collector
282 gc.collect() 285 gc.collect()
283 # flush to disk database changes 286 # flush to disk database changes