Commit 2188f43d08d43f6781f2e5d4fbaa560e44f59af6
1 parent
8b95a69d
Exists in
master
and in
1 other branch
Discarding submissions with few valid packages in profile for popcon indexing.
Showing
1 changed file
with
17 additions
and
14 deletions
Show diff stats
src/data.py
@@ -119,13 +119,12 @@ class PopconSubmission(): | @@ -119,13 +119,12 @@ class PopconSubmission(): | ||
119 | output += "\n "+pkg+": "+str(weight) | 119 | output += "\n "+pkg+": "+str(weight) |
120 | return output | 120 | return output |
121 | 121 | ||
122 | - def apps(self,axi): | ||
123 | - apps = {} | 122 | + def get_filtered(self,filter_list): |
123 | + filtered = {} | ||
124 | for pkg in self.packages.keys(): | 124 | for pkg in self.packages.keys(): |
125 | - tags = axi_search_pkg_tags(self.axi,pkg) | ||
126 | - if "XTrole::program" in tags: | ||
127 | - apps[pkg] = self.packages[pkg] | ||
128 | - return apps | 125 | + if pkg in filter_list: |
126 | + filtered[pkg] = self.packages[pkg] | ||
127 | + return filtered | ||
129 | 128 | ||
130 | def load(self,binary=1): | 129 | def load(self,binary=1): |
131 | """ | 130 | """ |
@@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -261,11 +260,15 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
261 | break | 260 | break |
262 | submission = PopconSubmission(os.path.join(root, popcon_file)) | 261 | submission = PopconSubmission(os.path.join(root, popcon_file)) |
263 | doc = xapian.Document() | 262 | doc = xapian.Document() |
264 | - doc.set_data(submission.user_id) | ||
265 | - logging.debug("Parsing popcon submission \'%s\'" % | ||
266 | - submission.user_id) | ||
267 | - for pkg, freq in submission.packages.items(): | ||
268 | - if pkg in self.valid_pkgs: | 263 | + submission_pkgs = submission.get_filtered(self.valid_pkgs) |
264 | + if len(submission_pkgs) < 10: | ||
265 | + logging.debug("Low profile popcon submission \'%s\' (%d)" % | ||
266 | + (submission.user_id,len(submission_pkgs))) | ||
267 | + else: | ||
268 | + doc.set_data(submission.user_id) | ||
269 | + logging.debug("Parsing popcon submission \'%s\'" % | ||
270 | + submission.user_id) | ||
271 | + for pkg,freq in submission_pkgs.items(): | ||
269 | tags = axi_search_pkg_tags(self.axi,pkg) | 272 | tags = axi_search_pkg_tags(self.axi,pkg) |
270 | # if the package was foung in axi | 273 | # if the package was foung in axi |
271 | if tags: | 274 | if tags: |
@@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -275,9 +278,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
275 | for tag in tags: | 278 | for tag in tags: |
276 | if tag in self.valid_tags: | 279 | if tag in self.valid_tags: |
277 | doc.add_term(tag,freq) | 280 | doc.add_term(tag,freq) |
278 | - doc_id = self.add_document(doc) | ||
279 | - doc_count += 1 | ||
280 | - logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 281 | + doc_id = self.add_document(doc) |
282 | + doc_count += 1 | ||
283 | + logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | ||
281 | # python garbage collector | 284 | # python garbage collector |
282 | gc.collect() | 285 | gc.collect() |
283 | # flush to disk database changes | 286 | # flush to disk database changes |