Commit 43742989ec7b23b05f0d73198a287523fc52cdbc
1 parent
49638292
Exists in
master
and in
1 other branch
Introduced configuration option 'reindex' that can be set using '-r' or
'--force-reindex' in command line. If reindex is not set, it tries to reuse an existing index and creates a new one only if debtags database has been modified or an error occurs when opening the index. (close #8)
Showing
4 changed files
with
50 additions
and
17 deletions
Show diff stats
src/app_recommender.py
... | ... | @@ -30,8 +30,6 @@ from strategy import * |
30 | 30 | from user import * |
31 | 31 | |
32 | 32 | def set_up_recommender(cfg): |
33 | - reindex = 1 #FIXME should do it only if necessary | |
34 | - | |
35 | 33 | if cfg.strategy == "cta": |
36 | 34 | axi_db = xapian.Database(cfg.axi) |
37 | 35 | app_rec = Recommender(axi_db) |
... | ... | @@ -43,7 +41,7 @@ def set_up_recommender(cfg): |
43 | 41 | logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) |
44 | 42 | sys.exit(1) |
45 | 43 | debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) |
46 | - debtags_index.load(debtags_db,reindex) | |
44 | + debtags_index.load(debtags_db,cfg.reindex) | |
47 | 45 | app_rec = Recommender(debtags_index) |
48 | 46 | app_rec.set_strategy(ContentBasedStrategy()) |
49 | 47 | ... | ... |
src/config.py
... | ... | @@ -42,6 +42,7 @@ class Config(): |
42 | 42 | self.axi = "/var/lib/apt-xapian-index/index" |
43 | 43 | self.axi_values = "/var/lib/apt-xapian-index/values" |
44 | 44 | self.strategy = "ct" # defaults to the cheapest one |
45 | + self.reindex = 0 | |
45 | 46 | |
46 | 47 | def usage(self): |
47 | 48 | """ |
... | ... | @@ -57,6 +58,7 @@ class Config(): |
57 | 58 | print " [ recommender ]" |
58 | 59 | print " -t, --tagsdb=PATH Path to debtags database." |
59 | 60 | print " -i, --tagsindex=PATH Path to debtags dedicated index." |
61 | + print " -r, --force-reindex Force reindexing debtags database." | |
60 | 62 | print " -a, --axi=PATH Path to Apt-xapian-index." |
61 | 63 | print " -s, --strategy=OPTION Recommendation strategy." |
62 | 64 | print "" |
... | ... | @@ -98,17 +100,18 @@ class Config(): |
98 | 100 | |
99 | 101 | self.tags_db = self.read_option('recommender', 'tags_db') |
100 | 102 | self.tags_index = self.read_option('recommender', 'tags_index') |
103 | + self.reindex = self.read_option('recommender', 'reindex') | |
101 | 104 | self.axi = self.read_option('recommender', 'axi') |
102 | 105 | |
103 | - short_options = "hdvo:c:t:i:a:s:" | |
106 | + short_options = "hdvo:c:t:i:ra:s:" | |
104 | 107 | long_options = ["help", "debug", "verbose", "output=", "config=", |
105 | - "tagsdb=", "tagsindex=", "axi=", "strategy="] | |
108 | + "tagsdb=", "tagsindex=", "reindex", "axi=", "strategy="] | |
106 | 109 | try: |
107 | 110 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
108 | 111 | long_options) |
109 | - except getopt.GetoptError, err: | |
110 | - logging.error("Error parsing args: %s", str(err)) | |
111 | - print "Syntax error" | |
112 | + except getopt.GetoptError as error: | |
113 | + self.set_logger() | |
114 | + logging.error("Bad syntax: %s" % str(error)) | |
112 | 115 | self.usage() |
113 | 116 | sys.exit() |
114 | 117 | |
... | ... | @@ -128,6 +131,8 @@ class Config(): |
128 | 131 | self.tagsdb = p |
129 | 132 | elif o in ("-i", "--tagsindex"): |
130 | 133 | self.tagsindex = p |
134 | + elif o in ("-r", "--force-reindex"): | |
135 | + self.reindex = 1 | |
131 | 136 | elif o in ("-a", "--axi"): |
132 | 137 | self.axi = p + "/index" |
133 | 138 | self.axi_values = p + "/values" | ... | ... |
src/data.py
... | ... | @@ -24,6 +24,7 @@ import xapian |
24 | 24 | import axi |
25 | 25 | from debian import debtags |
26 | 26 | import logging |
27 | +import hashlib | |
27 | 28 | |
28 | 29 | class Item: |
29 | 30 | """ """ |
... | ... | @@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB): |
74 | 75 | class DebtagsIndex(xapian.WritableDatabase): |
75 | 76 | def __init__(self,path): |
76 | 77 | self.path = path |
78 | + self.db_md5 = 0 | |
77 | 79 | |
78 | - def load(self,debtags_db,reindex): | |
80 | + def load(self,debtags_db,reindex=0): | |
79 | 81 | """ |
80 | 82 | Load an existing debtags index. |
81 | 83 | """ |
82 | 84 | self.debtags_db = debtags_db |
85 | + db = open(debtags_db.path) | |
86 | + md5 = hashlib.md5() | |
87 | + md5.update(db.read()) | |
88 | + self.db_md5 = md5.hexdigest() | |
89 | + | |
83 | 90 | if not reindex: |
84 | 91 | try: |
85 | 92 | logging.info("Opening existing debtags xapian index at \'%s\'" |
86 | 93 | % self.path) |
87 | 94 | xapian.Database.__init__(self,self.path) |
95 | + md5 = self.get_metadata("md5") | |
96 | + if not md5 == self.db_md5: | |
97 | + logging.info("Index must be updated.") | |
98 | + reindex = 1 | |
88 | 99 | except xapian.DatabaseError: |
89 | - logging.error("Could not open debtags xapian index") | |
100 | + logging.info("Could not open index.") | |
90 | 101 | reindex =1 |
102 | + | |
91 | 103 | if reindex: |
92 | - self.reindex(debtags_db) | |
104 | + self.create_index(debtags_db) | |
93 | 105 | |
94 | - def reindex(self,debtags_db): | |
106 | + def create_index(self,debtags_db): | |
95 | 107 | """ |
96 | 108 | Create a xapian index for debtags info based on file 'debtags_db' and |
97 | 109 | place it at 'index_path'. |
98 | 110 | """ |
99 | 111 | if not os.path.exists(self.path): |
100 | 112 | os.makedirs(self.path) |
101 | - logging.info("Creating new debtags xapian index at \'%s\'" % self.path) | |
102 | - xapian.WritableDatabase.__init__(self,self.path, | |
103 | - xapian.DB_CREATE_OR_OVERWRITE) | |
113 | + | |
114 | + try: | |
115 | + logging.info("Creating new xapian index for debtags at \'%s\'" % | |
116 | + self.path) | |
117 | + xapian.WritableDatabase.__init__(self,self.path, | |
118 | + xapian.DB_CREATE_OR_OVERWRITE) | |
119 | + except xapian.DatabaseError: | |
120 | + logging.critical("Could not create xapian index.") | |
121 | + exit(1) | |
122 | + | |
123 | + self.set_metadata("md5",self.db_md5) | |
104 | 124 | for pkg,tags in debtags_db.iter_packages_tags(): |
105 | 125 | doc = xapian.Document() |
106 | 126 | doc.set_data(pkg) | ... | ... |
src/strategy.py
... | ... | @@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy): |
85 | 85 | enquire = xapian.Enquire(recommender.items_repository) |
86 | 86 | enquire.set_query(query) |
87 | 87 | |
88 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
88 | + try: | |
89 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
90 | + except xapian.DatabaseError as error: | |
91 | + logging.critical(error.get_msg()) | |
92 | + exit(1) | |
93 | + | |
89 | 94 | item_score = {} |
90 | 95 | for m in mset: |
91 | 96 | item_score[m.document.get_data()] = m.rank |
... | ... | @@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy): |
104 | 109 | enquire = xapian.Enquire(recommender.items_repository) |
105 | 110 | enquire.set_query(query) |
106 | 111 | |
107 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
112 | + try: | |
113 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
114 | + except xapian.DatabaseError as error: | |
115 | + logging.critical(error.get_msg()) | |
116 | + exit(1) | |
117 | + | |
108 | 118 | item_score = {} |
109 | 119 | for m in mset: |
110 | 120 | item_score[m.document.get_data()] = m.rank | ... | ... |