Commit 8c0000f71d2d4be6877a4e3e555fd1f725a36d02

Authored by Tássia Camões Araújo
1 parent 32cf4ed6
Exists in master and in 1 other branch add_vagrant

Catching log value error.

Showing 1 changed file with 10 additions and 5 deletions   Show diff stats
@@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0): @@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0):
85 # Compute sublinear tfidf for each term 85 # Compute sublinear tfidf for each term
86 weights = {} 86 weights = {}
87 for term in terms_doc.termlist(): 87 for term in terms_doc.termlist():
88 - tf = 1+math.log(term.wdf)  
89 - idf = math.log(index.get_doccount()/  
90 - float(index.get_termfreq(term.term)))  
91 - weights[term.term] = tf*idf 88 + try:
  89 + # Even if it shouldn't raise error...
  90 + # math.log: ValueError: math domain error
  91 + tf = 1+math.log(term.wdf)
  92 + idf = math.log(index.get_doccount()/
  93 + float(index.get_termfreq(term.term)))
  94 + weights[term.term] = tf*idf
  95 + except:
  96 + pass
92 sorted_weights = list(reversed(sorted(weights.items(), 97 sorted_weights = list(reversed(sorted(weights.items(),
93 key=operator.itemgetter(1)))) 98 key=operator.itemgetter(1))))
94 #print sorted_weights 99 #print sorted_weights
@@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
410 # if the package has tags associated with it 415 # if the package has tags associated with it
411 if not tags == "notags": 416 if not tags == "notags":
412 for tag in tags: 417 for tag in tags:
413 - if tag in self.valid_tags: 418 + if tag.lstrip("XT") in self.valid_tags:
414 doc.add_term(tag,freq) 419 doc.add_term(tag,freq)
415 doc_id = self.add_document(doc) 420 doc_id = self.add_document(doc)
416 doc_count += 1 421 doc_count += 1