Commit 43742989ec7b23b05f0d73198a287523fc52cdbc

Authored by Tássia Camões Araújo
1 parent 49638292
Exists in master and in 1 other branch add_vagrant

Introduced configuration option 'reindex' that can be set using '-r' or

'--force-reindex' in command line. If reindex is not set, it tries to reuse
an existing index and creates a new one only if debtags database has been
modified or an error occurs when opening the index.
(close #8)
src/app_recommender.py
@@ -30,8 +30,6 @@ from strategy import * @@ -30,8 +30,6 @@ from strategy import *
30 from user import * 30 from user import *
31 31
32 def set_up_recommender(cfg): 32 def set_up_recommender(cfg):
33 - reindex = 1 #FIXME should do it only if necessary  
34 -  
35 if cfg.strategy == "cta": 33 if cfg.strategy == "cta":
36 axi_db = xapian.Database(cfg.axi) 34 axi_db = xapian.Database(cfg.axi)
37 app_rec = Recommender(axi_db) 35 app_rec = Recommender(axi_db)
@@ -43,7 +41,7 @@ def set_up_recommender(cfg): @@ -43,7 +41,7 @@ def set_up_recommender(cfg):
43 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) 41 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
44 sys.exit(1) 42 sys.exit(1)
45 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) 43 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
46 - debtags_index.load(debtags_db,reindex) 44 + debtags_index.load(debtags_db,cfg.reindex)
47 app_rec = Recommender(debtags_index) 45 app_rec = Recommender(debtags_index)
48 app_rec.set_strategy(ContentBasedStrategy()) 46 app_rec.set_strategy(ContentBasedStrategy())
49 47
@@ -42,6 +42,7 @@ class Config(): @@ -42,6 +42,7 @@ class Config():
42 self.axi = "/var/lib/apt-xapian-index/index" 42 self.axi = "/var/lib/apt-xapian-index/index"
43 self.axi_values = "/var/lib/apt-xapian-index/values" 43 self.axi_values = "/var/lib/apt-xapian-index/values"
44 self.strategy = "ct" # defaults to the cheapest one 44 self.strategy = "ct" # defaults to the cheapest one
  45 + self.reindex = 0
45 46
46 def usage(self): 47 def usage(self):
47 """ 48 """
@@ -57,6 +58,7 @@ class Config(): @@ -57,6 +58,7 @@ class Config():
57 print " [ recommender ]" 58 print " [ recommender ]"
58 print " -t, --tagsdb=PATH Path to debtags database." 59 print " -t, --tagsdb=PATH Path to debtags database."
59 print " -i, --tagsindex=PATH Path to debtags dedicated index." 60 print " -i, --tagsindex=PATH Path to debtags dedicated index."
  61 + print " -r, --force-reindex Force reindexing debtags database."
60 print " -a, --axi=PATH Path to Apt-xapian-index." 62 print " -a, --axi=PATH Path to Apt-xapian-index."
61 print " -s, --strategy=OPTION Recommendation strategy." 63 print " -s, --strategy=OPTION Recommendation strategy."
62 print "" 64 print ""
@@ -98,17 +100,18 @@ class Config(): @@ -98,17 +100,18 @@ class Config():
98 100
99 self.tags_db = self.read_option('recommender', 'tags_db') 101 self.tags_db = self.read_option('recommender', 'tags_db')
100 self.tags_index = self.read_option('recommender', 'tags_index') 102 self.tags_index = self.read_option('recommender', 'tags_index')
  103 + self.reindex = self.read_option('recommender', 'reindex')
101 self.axi = self.read_option('recommender', 'axi') 104 self.axi = self.read_option('recommender', 'axi')
102 105
103 - short_options = "hdvo:c:t:i:a:s:" 106 + short_options = "hdvo:c:t:i:ra:s:"
104 long_options = ["help", "debug", "verbose", "output=", "config=", 107 long_options = ["help", "debug", "verbose", "output=", "config=",
105 - "tagsdb=", "tagsindex=", "axi=", "strategy="] 108 + "tagsdb=", "tagsindex=", "reindex", "axi=", "strategy="]
106 try: 109 try:
107 opts, args = getopt.getopt(sys.argv[1:], short_options, 110 opts, args = getopt.getopt(sys.argv[1:], short_options,
108 long_options) 111 long_options)
109 - except getopt.GetoptError, err:  
110 - logging.error("Error parsing args: %s", str(err))  
111 - print "Syntax error" 112 + except getopt.GetoptError as error:
  113 + self.set_logger()
  114 + logging.error("Bad syntax: %s" % str(error))
112 self.usage() 115 self.usage()
113 sys.exit() 116 sys.exit()
114 117
@@ -128,6 +131,8 @@ class Config(): @@ -128,6 +131,8 @@ class Config():
128 self.tagsdb = p 131 self.tagsdb = p
129 elif o in ("-i", "--tagsindex"): 132 elif o in ("-i", "--tagsindex"):
130 self.tagsindex = p 133 self.tagsindex = p
  134 + elif o in ("-r", "--force-reindex"):
  135 + self.reindex = 1
131 elif o in ("-a", "--axi"): 136 elif o in ("-a", "--axi"):
132 self.axi = p + "/index" 137 self.axi = p + "/index"
133 self.axi_values = p + "/values" 138 self.axi_values = p + "/values"
@@ -24,6 +24,7 @@ import xapian @@ -24,6 +24,7 @@ import xapian
24 import axi 24 import axi
25 from debian import debtags 25 from debian import debtags
26 import logging 26 import logging
  27 +import hashlib
27 28
28 class Item: 29 class Item:
29 """ """ 30 """ """
@@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB): @@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB):
74 class DebtagsIndex(xapian.WritableDatabase): 75 class DebtagsIndex(xapian.WritableDatabase):
75 def __init__(self,path): 76 def __init__(self,path):
76 self.path = path 77 self.path = path
  78 + self.db_md5 = 0
77 79
78 - def load(self,debtags_db,reindex): 80 + def load(self,debtags_db,reindex=0):
79 """ 81 """
80 Load an existing debtags index. 82 Load an existing debtags index.
81 """ 83 """
82 self.debtags_db = debtags_db 84 self.debtags_db = debtags_db
  85 + db = open(debtags_db.path)
  86 + md5 = hashlib.md5()
  87 + md5.update(db.read())
  88 + self.db_md5 = md5.hexdigest()
  89 +
83 if not reindex: 90 if not reindex:
84 try: 91 try:
85 logging.info("Opening existing debtags xapian index at \'%s\'" 92 logging.info("Opening existing debtags xapian index at \'%s\'"
86 % self.path) 93 % self.path)
87 xapian.Database.__init__(self,self.path) 94 xapian.Database.__init__(self,self.path)
  95 + md5 = self.get_metadata("md5")
  96 + if not md5 == self.db_md5:
  97 + logging.info("Index must be updated.")
  98 + reindex = 1
88 except xapian.DatabaseError: 99 except xapian.DatabaseError:
89 - logging.error("Could not open debtags xapian index") 100 + logging.info("Could not open index.")
90 reindex =1 101 reindex =1
  102 +
91 if reindex: 103 if reindex:
92 - self.reindex(debtags_db) 104 + self.create_index(debtags_db)
93 105
94 - def reindex(self,debtags_db): 106 + def create_index(self,debtags_db):
95 """ 107 """
96 Create a xapian index for debtags info based on file 'debtags_db' and 108 Create a xapian index for debtags info based on file 'debtags_db' and
97 place it at 'index_path'. 109 place it at 'index_path'.
98 """ 110 """
99 if not os.path.exists(self.path): 111 if not os.path.exists(self.path):
100 os.makedirs(self.path) 112 os.makedirs(self.path)
101 - logging.info("Creating new debtags xapian index at \'%s\'" % self.path)  
102 - xapian.WritableDatabase.__init__(self,self.path,  
103 - xapian.DB_CREATE_OR_OVERWRITE) 113 +
  114 + try:
  115 + logging.info("Creating new xapian index for debtags at \'%s\'" %
  116 + self.path)
  117 + xapian.WritableDatabase.__init__(self,self.path,
  118 + xapian.DB_CREATE_OR_OVERWRITE)
  119 + except xapian.DatabaseError:
  120 + logging.critical("Could not create xapian index.")
  121 + exit(1)
  122 +
  123 + self.set_metadata("md5",self.db_md5)
104 for pkg,tags in debtags_db.iter_packages_tags(): 124 for pkg,tags in debtags_db.iter_packages_tags():
105 doc = xapian.Document() 125 doc = xapian.Document()
106 doc.set_data(pkg) 126 doc.set_data(pkg)
src/strategy.py
@@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy): @@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy):
85 enquire = xapian.Enquire(recommender.items_repository) 85 enquire = xapian.Enquire(recommender.items_repository)
86 enquire.set_query(query) 86 enquire.set_query(query)
87 87
88 - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 88 + try:
  89 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  90 + except xapian.DatabaseError as error:
  91 + logging.critical(error.get_msg())
  92 + exit(1)
  93 +
89 item_score = {} 94 item_score = {}
90 for m in mset: 95 for m in mset:
91 item_score[m.document.get_data()] = m.rank 96 item_score[m.document.get_data()] = m.rank
@@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy): @@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy):
104 enquire = xapian.Enquire(recommender.items_repository) 109 enquire = xapian.Enquire(recommender.items_repository)
105 enquire.set_query(query) 110 enquire.set_query(query)
106 111
107 - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 112 + try:
  113 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  114 + except xapian.DatabaseError as error:
  115 + logging.critical(error.get_msg())
  116 + exit(1)
  117 +
108 item_score = {} 118 item_score = {}
109 for m in mset: 119 for m in mset:
110 item_score[m.document.get_data()] = m.rank 120 item_score[m.document.get_data()] = m.rank