Commit 2188f43d08d43f6781f2e5d4fbaa560e44f59af6

Authored by Tássia Camões Araújo
1 parent 8b95a69d
Exists in master and in 1 other branch add_vagrant

Discarding submissions with few valid packages in profile for popcon indexing.

Showing 1 changed file with 17 additions and 14 deletions   Show diff stats
src/data.py
... ... @@ -119,13 +119,12 @@ class PopconSubmission():
119 119 output += "\n "+pkg+": "+str(weight)
120 120 return output
121 121  
122   - def apps(self,axi):
123   - apps = {}
  122 + def get_filtered(self,filter_list):
  123 + filtered = {}
124 124 for pkg in self.packages.keys():
125   - tags = axi_search_pkg_tags(self.axi,pkg)
126   - if "XTrole::program" in tags:
127   - apps[pkg] = self.packages[pkg]
128   - return apps
  125 + if pkg in filter_list:
  126 + filtered[pkg] = self.packages[pkg]
  127 + return filtered
129 128  
130 129 def load(self,binary=1):
131 130 """
... ... @@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase):
261 260 break
262 261 submission = PopconSubmission(os.path.join(root, popcon_file))
263 262 doc = xapian.Document()
264   - doc.set_data(submission.user_id)
265   - logging.debug("Parsing popcon submission \'%s\'" %
266   - submission.user_id)
267   - for pkg, freq in submission.packages.items():
268   - if pkg in self.valid_pkgs:
  263 + submission_pkgs = submission.get_filtered(self.valid_pkgs)
  264 + if len(submission_pkgs) < 10:
  265 + logging.debug("Low profile popcon submission \'%s\' (%d)" %
  266 + (submission.user_id,len(submission_pkgs)))
  267 + else:
  268 + doc.set_data(submission.user_id)
  269 + logging.debug("Parsing popcon submission \'%s\'" %
  270 + submission.user_id)
  271 + for pkg,freq in submission_pkgs.items():
269 272 tags = axi_search_pkg_tags(self.axi,pkg)
270 273 # if the package was foung in axi
271 274 if tags:
... ... @@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
275 278 for tag in tags:
276 279 if tag in self.valid_tags:
277 280 doc.add_term(tag,freq)
278   - doc_id = self.add_document(doc)
279   - doc_count += 1
280   - logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
  281 + doc_id = self.add_document(doc)
  282 + doc_count += 1
  283 + logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
281 284 # python garbage collector
282 285 gc.collect()
283 286 # flush to disk database changes
... ...