Commit 8c0000f71d2d4be6877a4e3e555fd1f725a36d02
1 parent
32cf4ed6
Exists in
master
and in
1 other branch
Catching log value error.
Showing
1 changed file
with
10 additions
and
5 deletions
Show diff stats
src/data.py
... | ... | @@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0): |
85 | 85 | # Compute sublinear tfidf for each term |
86 | 86 | weights = {} |
87 | 87 | for term in terms_doc.termlist(): |
88 | - tf = 1+math.log(term.wdf) | |
89 | - idf = math.log(index.get_doccount()/ | |
90 | - float(index.get_termfreq(term.term))) | |
91 | - weights[term.term] = tf*idf | |
88 | + try: | |
89 | + # Even if it shouldn't raise error... | |
90 | + # math.log: ValueError: math domain error | |
91 | + tf = 1+math.log(term.wdf) | |
92 | + idf = math.log(index.get_doccount()/ | |
93 | + float(index.get_termfreq(term.term))) | |
94 | + weights[term.term] = tf*idf | |
95 | + except: | |
96 | + pass | |
92 | 97 | sorted_weights = list(reversed(sorted(weights.items(), |
93 | 98 | key=operator.itemgetter(1)))) |
94 | 99 | #print sorted_weights |
... | ... | @@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase): |
410 | 415 | # if the package has tags associated with it |
411 | 416 | if not tags == "notags": |
412 | 417 | for tag in tags: |
413 | - if tag in self.valid_tags: | |
418 | + if tag.lstrip("XT") in self.valid_tags: | |
414 | 419 | doc.add_term(tag,freq) |
415 | 420 | doc_id = self.add_document(doc) |
416 | 421 | doc_count += 1 | ... | ... |