Commit 43742989ec7b23b05f0d73198a287523fc52cdbc
1 parent
49638292
Exists in
master
and in
1 other branch
Introduced configuration option 'reindex' that can be set using '-r' or
'--force-reindex' in command line. If reindex is not set, it tries to reuse an existing index and creates a new one only if debtags database has been modified or an error occurs when opening the index. (close #8)
Showing
4 changed files
with
50 additions
and
17 deletions
Show diff stats
src/app_recommender.py
| ... | ... | @@ -30,8 +30,6 @@ from strategy import * |
| 30 | 30 | from user import * |
| 31 | 31 | |
| 32 | 32 | def set_up_recommender(cfg): |
| 33 | - reindex = 1 #FIXME should do it only if necessary | |
| 34 | - | |
| 35 | 33 | if cfg.strategy == "cta": |
| 36 | 34 | axi_db = xapian.Database(cfg.axi) |
| 37 | 35 | app_rec = Recommender(axi_db) |
| ... | ... | @@ -43,7 +41,7 @@ def set_up_recommender(cfg): |
| 43 | 41 | logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) |
| 44 | 42 | sys.exit(1) |
| 45 | 43 | debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) |
| 46 | - debtags_index.load(debtags_db,reindex) | |
| 44 | + debtags_index.load(debtags_db,cfg.reindex) | |
| 47 | 45 | app_rec = Recommender(debtags_index) |
| 48 | 46 | app_rec.set_strategy(ContentBasedStrategy()) |
| 49 | 47 | ... | ... |
src/config.py
| ... | ... | @@ -42,6 +42,7 @@ class Config(): |
| 42 | 42 | self.axi = "/var/lib/apt-xapian-index/index" |
| 43 | 43 | self.axi_values = "/var/lib/apt-xapian-index/values" |
| 44 | 44 | self.strategy = "ct" # defaults to the cheapest one |
| 45 | + self.reindex = 0 | |
| 45 | 46 | |
| 46 | 47 | def usage(self): |
| 47 | 48 | """ |
| ... | ... | @@ -57,6 +58,7 @@ class Config(): |
| 57 | 58 | print " [ recommender ]" |
| 58 | 59 | print " -t, --tagsdb=PATH Path to debtags database." |
| 59 | 60 | print " -i, --tagsindex=PATH Path to debtags dedicated index." |
| 61 | + print " -r, --force-reindex Force reindexing debtags database." | |
| 60 | 62 | print " -a, --axi=PATH Path to Apt-xapian-index." |
| 61 | 63 | print " -s, --strategy=OPTION Recommendation strategy." |
| 62 | 64 | print "" |
| ... | ... | @@ -98,17 +100,18 @@ class Config(): |
| 98 | 100 | |
| 99 | 101 | self.tags_db = self.read_option('recommender', 'tags_db') |
| 100 | 102 | self.tags_index = self.read_option('recommender', 'tags_index') |
| 103 | + self.reindex = self.read_option('recommender', 'reindex') | |
| 101 | 104 | self.axi = self.read_option('recommender', 'axi') |
| 102 | 105 | |
| 103 | - short_options = "hdvo:c:t:i:a:s:" | |
| 106 | + short_options = "hdvo:c:t:i:ra:s:" | |
| 104 | 107 | long_options = ["help", "debug", "verbose", "output=", "config=", |
| 105 | - "tagsdb=", "tagsindex=", "axi=", "strategy="] | |
| 108 | + "tagsdb=", "tagsindex=", "reindex", "axi=", "strategy="] | |
| 106 | 109 | try: |
| 107 | 110 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
| 108 | 111 | long_options) |
| 109 | - except getopt.GetoptError, err: | |
| 110 | - logging.error("Error parsing args: %s", str(err)) | |
| 111 | - print "Syntax error" | |
| 112 | + except getopt.GetoptError as error: | |
| 113 | + self.set_logger() | |
| 114 | + logging.error("Bad syntax: %s" % str(error)) | |
| 112 | 115 | self.usage() |
| 113 | 116 | sys.exit() |
| 114 | 117 | |
| ... | ... | @@ -128,6 +131,8 @@ class Config(): |
| 128 | 131 | self.tagsdb = p |
| 129 | 132 | elif o in ("-i", "--tagsindex"): |
| 130 | 133 | self.tagsindex = p |
| 134 | + elif o in ("-r", "--force-reindex"): | |
| 135 | + self.reindex = 1 | |
| 131 | 136 | elif o in ("-a", "--axi"): |
| 132 | 137 | self.axi = p + "/index" |
| 133 | 138 | self.axi_values = p + "/values" | ... | ... |
src/data.py
| ... | ... | @@ -24,6 +24,7 @@ import xapian |
| 24 | 24 | import axi |
| 25 | 25 | from debian import debtags |
| 26 | 26 | import logging |
| 27 | +import hashlib | |
| 27 | 28 | |
| 28 | 29 | class Item: |
| 29 | 30 | """ """ |
| ... | ... | @@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB): |
| 74 | 75 | class DebtagsIndex(xapian.WritableDatabase): |
| 75 | 76 | def __init__(self,path): |
| 76 | 77 | self.path = path |
| 78 | + self.db_md5 = 0 | |
| 77 | 79 | |
| 78 | - def load(self,debtags_db,reindex): | |
| 80 | + def load(self,debtags_db,reindex=0): | |
| 79 | 81 | """ |
| 80 | 82 | Load an existing debtags index. |
| 81 | 83 | """ |
| 82 | 84 | self.debtags_db = debtags_db |
| 85 | + db = open(debtags_db.path) | |
| 86 | + md5 = hashlib.md5() | |
| 87 | + md5.update(db.read()) | |
| 88 | + self.db_md5 = md5.hexdigest() | |
| 89 | + | |
| 83 | 90 | if not reindex: |
| 84 | 91 | try: |
| 85 | 92 | logging.info("Opening existing debtags xapian index at \'%s\'" |
| 86 | 93 | % self.path) |
| 87 | 94 | xapian.Database.__init__(self,self.path) |
| 95 | + md5 = self.get_metadata("md5") | |
| 96 | + if not md5 == self.db_md5: | |
| 97 | + logging.info("Index must be updated.") | |
| 98 | + reindex = 1 | |
| 88 | 99 | except xapian.DatabaseError: |
| 89 | - logging.error("Could not open debtags xapian index") | |
| 100 | + logging.info("Could not open index.") | |
| 90 | 101 | reindex =1 |
| 102 | + | |
| 91 | 103 | if reindex: |
| 92 | - self.reindex(debtags_db) | |
| 104 | + self.create_index(debtags_db) | |
| 93 | 105 | |
| 94 | - def reindex(self,debtags_db): | |
| 106 | + def create_index(self,debtags_db): | |
| 95 | 107 | """ |
| 96 | 108 | Create a xapian index for debtags info based on file 'debtags_db' and |
| 97 | 109 | place it at 'index_path'. |
| 98 | 110 | """ |
| 99 | 111 | if not os.path.exists(self.path): |
| 100 | 112 | os.makedirs(self.path) |
| 101 | - logging.info("Creating new debtags xapian index at \'%s\'" % self.path) | |
| 102 | - xapian.WritableDatabase.__init__(self,self.path, | |
| 103 | - xapian.DB_CREATE_OR_OVERWRITE) | |
| 113 | + | |
| 114 | + try: | |
| 115 | + logging.info("Creating new xapian index for debtags at \'%s\'" % | |
| 116 | + self.path) | |
| 117 | + xapian.WritableDatabase.__init__(self,self.path, | |
| 118 | + xapian.DB_CREATE_OR_OVERWRITE) | |
| 119 | + except xapian.DatabaseError: | |
| 120 | + logging.critical("Could not create xapian index.") | |
| 121 | + exit(1) | |
| 122 | + | |
| 123 | + self.set_metadata("md5",self.db_md5) | |
| 104 | 124 | for pkg,tags in debtags_db.iter_packages_tags(): |
| 105 | 125 | doc = xapian.Document() |
| 106 | 126 | doc.set_data(pkg) | ... | ... |
src/strategy.py
| ... | ... | @@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy): |
| 85 | 85 | enquire = xapian.Enquire(recommender.items_repository) |
| 86 | 86 | enquire.set_query(query) |
| 87 | 87 | |
| 88 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 88 | + try: | |
| 89 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 90 | + except xapian.DatabaseError as error: | |
| 91 | + logging.critical(error.get_msg()) | |
| 92 | + exit(1) | |
| 93 | + | |
| 89 | 94 | item_score = {} |
| 90 | 95 | for m in mset: |
| 91 | 96 | item_score[m.document.get_data()] = m.rank |
| ... | ... | @@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy): |
| 104 | 109 | enquire = xapian.Enquire(recommender.items_repository) |
| 105 | 110 | enquire.set_query(query) |
| 106 | 111 | |
| 107 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 112 | + try: | |
| 113 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 114 | + except xapian.DatabaseError as error: | |
| 115 | + logging.critical(error.get_msg()) | |
| 116 | + exit(1) | |
| 117 | + | |
| 108 | 118 | item_score = {} |
| 109 | 119 | for m in mset: |
| 110 | 120 | item_score[m.document.get_data()] = m.rank | ... | ... |