Commit 8c0000f71d2d4be6877a4e3e555fd1f725a36d02

Authored by Tássia Camões Araújo
1 parent 32cf4ed6
Exists in master and in 1 other branch add_vagrant

Catching log value error.

Showing 1 changed file with 10 additions and 5 deletions   Show diff stats
src/data.py
... ... @@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0):
85 85 # Compute sublinear tfidf for each term
86 86 weights = {}
87 87 for term in terms_doc.termlist():
88   - tf = 1+math.log(term.wdf)
89   - idf = math.log(index.get_doccount()/
90   - float(index.get_termfreq(term.term)))
91   - weights[term.term] = tf*idf
  88 + try:
  89 + # Even if it shouldn't raise error...
  90 + # math.log: ValueError: math domain error
  91 + tf = 1+math.log(term.wdf)
  92 + idf = math.log(index.get_doccount()/
  93 + float(index.get_termfreq(term.term)))
  94 + weights[term.term] = tf*idf
  95 + except:
  96 + pass
92 97 sorted_weights = list(reversed(sorted(weights.items(),
93 98 key=operator.itemgetter(1))))
94 99 #print sorted_weights
... ... @@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
410 415 # if the package has tags associated with it
411 416 if not tags == "notags":
412 417 for tag in tags:
413   - if tag in self.valid_tags:
  418 + if tag.lstrip("XT") in self.valid_tags:
414 419 doc.add_term(tag,freq)
415 420 doc_id = self.add_document(doc)
416 421 doc_count += 1
... ...