Commit 2188f43d08d43f6781f2e5d4fbaa560e44f59af6
1 parent
8b95a69d
Exists in
master
and in
1 other branch
Discarding submissions with few valid packages in profile for popcon indexing.
Showing
1 changed file
with
17 additions
and
14 deletions
Show diff stats
src/data.py
... | ... | @@ -119,13 +119,12 @@ class PopconSubmission(): |
119 | 119 | output += "\n "+pkg+": "+str(weight) |
120 | 120 | return output |
121 | 121 | |
122 | - def apps(self,axi): | |
123 | - apps = {} | |
122 | + def get_filtered(self,filter_list): | |
123 | + filtered = {} | |
124 | 124 | for pkg in self.packages.keys(): |
125 | - tags = axi_search_pkg_tags(self.axi,pkg) | |
126 | - if "XTrole::program" in tags: | |
127 | - apps[pkg] = self.packages[pkg] | |
128 | - return apps | |
125 | + if pkg in filter_list: | |
126 | + filtered[pkg] = self.packages[pkg] | |
127 | + return filtered | |
129 | 128 | |
130 | 129 | def load(self,binary=1): |
131 | 130 | """ |
... | ... | @@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase): |
261 | 260 | break |
262 | 261 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
263 | 262 | doc = xapian.Document() |
264 | - doc.set_data(submission.user_id) | |
265 | - logging.debug("Parsing popcon submission \'%s\'" % | |
266 | - submission.user_id) | |
267 | - for pkg, freq in submission.packages.items(): | |
268 | - if pkg in self.valid_pkgs: | |
263 | + submission_pkgs = submission.get_filtered(self.valid_pkgs) | |
264 | + if len(submission_pkgs) < 10: | |
265 | + logging.debug("Low profile popcon submission \'%s\' (%d)" % | |
266 | + (submission.user_id,len(submission_pkgs))) | |
267 | + else: | |
268 | + doc.set_data(submission.user_id) | |
269 | + logging.debug("Parsing popcon submission \'%s\'" % | |
270 | + submission.user_id) | |
271 | + for pkg,freq in submission_pkgs.items(): | |
269 | 272 | tags = axi_search_pkg_tags(self.axi,pkg) |
270 | 273 | # if the package was foung in axi |
271 | 274 | if tags: |
... | ... | @@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase): |
275 | 278 | for tag in tags: |
276 | 279 | if tag in self.valid_tags: |
277 | 280 | doc.add_term(tag,freq) |
278 | - doc_id = self.add_document(doc) | |
279 | - doc_count += 1 | |
280 | - logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | |
281 | + doc_id = self.add_document(doc) | |
282 | + doc_count += 1 | |
283 | + logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | |
281 | 284 | # python garbage collector |
282 | 285 | gc.collect() |
283 | 286 | # flush to disk database changes | ... | ... |