Commit 8c0000f71d2d4be6877a4e3e555fd1f725a36d02
1 parent
32cf4ed6
Exists in
master
and in
1 other branch
Catching log value error.
Showing
1 changed file
with
10 additions
and
5 deletions
Show diff stats
src/data.py
@@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0): | @@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0): | ||
85 | # Compute sublinear tfidf for each term | 85 | # Compute sublinear tfidf for each term |
86 | weights = {} | 86 | weights = {} |
87 | for term in terms_doc.termlist(): | 87 | for term in terms_doc.termlist(): |
88 | - tf = 1+math.log(term.wdf) | ||
89 | - idf = math.log(index.get_doccount()/ | ||
90 | - float(index.get_termfreq(term.term))) | ||
91 | - weights[term.term] = tf*idf | 88 | + try: |
89 | + # Even if it shouldn't raise error... | ||
90 | + # math.log: ValueError: math domain error | ||
91 | + tf = 1+math.log(term.wdf) | ||
92 | + idf = math.log(index.get_doccount()/ | ||
93 | + float(index.get_termfreq(term.term))) | ||
94 | + weights[term.term] = tf*idf | ||
95 | + except: | ||
96 | + pass | ||
92 | sorted_weights = list(reversed(sorted(weights.items(), | 97 | sorted_weights = list(reversed(sorted(weights.items(), |
93 | key=operator.itemgetter(1)))) | 98 | key=operator.itemgetter(1)))) |
94 | #print sorted_weights | 99 | #print sorted_weights |
@@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
410 | # if the package has tags associated with it | 415 | # if the package has tags associated with it |
411 | if not tags == "notags": | 416 | if not tags == "notags": |
412 | for tag in tags: | 417 | for tag in tags: |
413 | - if tag in self.valid_tags: | 418 | + if tag.lstrip("XT") in self.valid_tags: |
414 | doc.add_term(tag,freq) | 419 | doc.add_term(tag,freq) |
415 | doc_id = self.add_document(doc) | 420 | doc_id = self.add_document(doc) |
416 | doc_count += 1 | 421 | doc_count += 1 |