diff --git a/src/app_recommender.py b/src/app_recommender.py index 792dceb..4f02618 100755 --- a/src/app_recommender.py +++ b/src/app_recommender.py @@ -30,8 +30,6 @@ from strategy import * from user import * def set_up_recommender(cfg): - reindex = 1 #FIXME should do it only if necessary - if cfg.strategy == "cta": axi_db = xapian.Database(cfg.axi) app_rec = Recommender(axi_db) @@ -43,7 +41,7 @@ def set_up_recommender(cfg): logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) sys.exit(1) debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) - debtags_index.load(debtags_db,reindex) + debtags_index.load(debtags_db,cfg.reindex) app_rec = Recommender(debtags_index) app_rec.set_strategy(ContentBasedStrategy()) diff --git a/src/config.py b/src/config.py index 1a28006..1cef015 100644 --- a/src/config.py +++ b/src/config.py @@ -42,6 +42,7 @@ class Config(): self.axi = "/var/lib/apt-xapian-index/index" self.axi_values = "/var/lib/apt-xapian-index/values" self.strategy = "ct" # defaults to the cheapest one + self.reindex = 0 def usage(self): """ @@ -57,6 +58,7 @@ class Config(): print " [ recommender ]" print " -t, --tagsdb=PATH Path to debtags database." print " -i, --tagsindex=PATH Path to debtags dedicated index." + print " -r, --force-reindex Force reindexing debtags database." print " -a, --axi=PATH Path to Apt-xapian-index." print " -s, --strategy=OPTION Recommendation strategy." print "" @@ -98,17 +100,18 @@ class Config(): self.tags_db = self.read_option('recommender', 'tags_db') self.tags_index = self.read_option('recommender', 'tags_index') + self.reindex = self.read_option('recommender', 'reindex') self.axi = self.read_option('recommender', 'axi') - short_options = "hdvo:c:t:i:a:s:" + short_options = "hdvo:c:t:i:ra:s:" long_options = ["help", "debug", "verbose", "output=", "config=", - "tagsdb=", "tagsindex=", "axi=", "strategy="] + "tagsdb=", "tagsindex=", "reindex", "axi=", "strategy="] try: opts, args = getopt.getopt(sys.argv[1:], short_options, long_options) - except getopt.GetoptError, err: - logging.error("Error parsing args: %s", str(err)) - print "Syntax error" + except getopt.GetoptError as error: + self.set_logger() + logging.error("Bad syntax: %s" % str(error)) self.usage() sys.exit() @@ -128,6 +131,8 @@ class Config(): self.tagsdb = p elif o in ("-i", "--tagsindex"): self.tagsindex = p + elif o in ("-r", "--force-reindex"): + self.reindex = 1 elif o in ("-a", "--axi"): self.axi = p + "/index" self.axi_values = p + "/values" diff --git a/src/data.py b/src/data.py index 94cc705..16c2b22 100644 --- a/src/data.py +++ b/src/data.py @@ -24,6 +24,7 @@ import xapian import axi from debian import debtags import logging +import hashlib class Item: """ """ @@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB): class DebtagsIndex(xapian.WritableDatabase): def __init__(self,path): self.path = path + self.db_md5 = 0 - def load(self,debtags_db,reindex): + def load(self,debtags_db,reindex=0): """ Load an existing debtags index. """ self.debtags_db = debtags_db + db = open(debtags_db.path) + md5 = hashlib.md5() + md5.update(db.read()) + self.db_md5 = md5.hexdigest() + if not reindex: try: logging.info("Opening existing debtags xapian index at \'%s\'" % self.path) xapian.Database.__init__(self,self.path) + md5 = self.get_metadata("md5") + if not md5 == self.db_md5: + logging.info("Index must be updated.") + reindex = 1 except xapian.DatabaseError: - logging.error("Could not open debtags xapian index") + logging.info("Could not open index.") reindex =1 + if reindex: - self.reindex(debtags_db) + self.create_index(debtags_db) - def reindex(self,debtags_db): + def create_index(self,debtags_db): """ Create a xapian index for debtags info based on file 'debtags_db' and place it at 'index_path'. """ if not os.path.exists(self.path): os.makedirs(self.path) - logging.info("Creating new debtags xapian index at \'%s\'" % self.path) - xapian.WritableDatabase.__init__(self,self.path, - xapian.DB_CREATE_OR_OVERWRITE) + + try: + logging.info("Creating new xapian index for debtags at \'%s\'" % + self.path) + xapian.WritableDatabase.__init__(self,self.path, + xapian.DB_CREATE_OR_OVERWRITE) + except xapian.DatabaseError: + logging.critical("Could not create xapian index.") + exit(1) + + self.set_metadata("md5",self.db_md5) for pkg,tags in debtags_db.iter_packages_tags(): doc = xapian.Document() doc.set_data(pkg) diff --git a/src/strategy.py b/src/strategy.py index af82018..1aa7510 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy): enquire = xapian.Enquire(recommender.items_repository) enquire.set_query(query) - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) + try: + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) + except xapian.DatabaseError as error: + logging.critical(error.get_msg()) + exit(1) + item_score = {} for m in mset: item_score[m.document.get_data()] = m.rank @@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy): enquire = xapian.Enquire(recommender.items_repository) enquire.set_query(query) - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) + try: + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) + except xapian.DatabaseError as error: + logging.critical(error.get_msg()) + exit(1) + item_score = {} for m in mset: item_score[m.document.get_data()] = m.rank -- libgit2 0.21.2