Commit 43742989ec7b23b05f0d73198a287523fc52cdbc

Authored by Tássia Camões Araújo
1 parent 49638292
Exists in master and in 1 other branch add_vagrant

Introduced configuration option 'reindex' that can be set using '-r' or

'--force-reindex' in command line. If reindex is not set, it tries to reuse
an existing index and creates a new one only if debtags database has been
modified or an error occurs when opening the index.
(close #8)
src/app_recommender.py
... ... @@ -30,8 +30,6 @@ from strategy import *
30 30 from user import *
31 31  
32 32 def set_up_recommender(cfg):
33   - reindex = 1 #FIXME should do it only if necessary
34   -
35 33 if cfg.strategy == "cta":
36 34 axi_db = xapian.Database(cfg.axi)
37 35 app_rec = Recommender(axi_db)
... ... @@ -43,7 +41,7 @@ def set_up_recommender(cfg):
43 41 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
44 42 sys.exit(1)
45 43 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
46   - debtags_index.load(debtags_db,reindex)
  44 + debtags_index.load(debtags_db,cfg.reindex)
47 45 app_rec = Recommender(debtags_index)
48 46 app_rec.set_strategy(ContentBasedStrategy())
49 47  
... ...
src/config.py
... ... @@ -42,6 +42,7 @@ class Config():
42 42 self.axi = "/var/lib/apt-xapian-index/index"
43 43 self.axi_values = "/var/lib/apt-xapian-index/values"
44 44 self.strategy = "ct" # defaults to the cheapest one
  45 + self.reindex = 0
45 46  
46 47 def usage(self):
47 48 """
... ... @@ -57,6 +58,7 @@ class Config():
57 58 print " [ recommender ]"
58 59 print " -t, --tagsdb=PATH Path to debtags database."
59 60 print " -i, --tagsindex=PATH Path to debtags dedicated index."
  61 + print " -r, --force-reindex Force reindexing debtags database."
60 62 print " -a, --axi=PATH Path to Apt-xapian-index."
61 63 print " -s, --strategy=OPTION Recommendation strategy."
62 64 print ""
... ... @@ -98,17 +100,18 @@ class Config():
98 100  
99 101 self.tags_db = self.read_option('recommender', 'tags_db')
100 102 self.tags_index = self.read_option('recommender', 'tags_index')
  103 + self.reindex = self.read_option('recommender', 'reindex')
101 104 self.axi = self.read_option('recommender', 'axi')
102 105  
103   - short_options = "hdvo:c:t:i:a:s:"
  106 + short_options = "hdvo:c:t:i:ra:s:"
104 107 long_options = ["help", "debug", "verbose", "output=", "config=",
105   - "tagsdb=", "tagsindex=", "axi=", "strategy="]
  108 + "tagsdb=", "tagsindex=", "reindex", "axi=", "strategy="]
106 109 try:
107 110 opts, args = getopt.getopt(sys.argv[1:], short_options,
108 111 long_options)
109   - except getopt.GetoptError, err:
110   - logging.error("Error parsing args: %s", str(err))
111   - print "Syntax error"
  112 + except getopt.GetoptError as error:
  113 + self.set_logger()
  114 + logging.error("Bad syntax: %s" % str(error))
112 115 self.usage()
113 116 sys.exit()
114 117  
... ... @@ -128,6 +131,8 @@ class Config():
128 131 self.tagsdb = p
129 132 elif o in ("-i", "--tagsindex"):
130 133 self.tagsindex = p
  134 + elif o in ("-r", "--force-reindex"):
  135 + self.reindex = 1
131 136 elif o in ("-a", "--axi"):
132 137 self.axi = p + "/index"
133 138 self.axi_values = p + "/values"
... ...
src/data.py
... ... @@ -24,6 +24,7 @@ import xapian
24 24 import axi
25 25 from debian import debtags
26 26 import logging
  27 +import hashlib
27 28  
28 29 class Item:
29 30 """ """
... ... @@ -74,33 +75,52 @@ class DebtagsDB(debtags.DB):
74 75 class DebtagsIndex(xapian.WritableDatabase):
75 76 def __init__(self,path):
76 77 self.path = path
  78 + self.db_md5 = 0
77 79  
78   - def load(self,debtags_db,reindex):
  80 + def load(self,debtags_db,reindex=0):
79 81 """
80 82 Load an existing debtags index.
81 83 """
82 84 self.debtags_db = debtags_db
  85 + db = open(debtags_db.path)
  86 + md5 = hashlib.md5()
  87 + md5.update(db.read())
  88 + self.db_md5 = md5.hexdigest()
  89 +
83 90 if not reindex:
84 91 try:
85 92 logging.info("Opening existing debtags xapian index at \'%s\'"
86 93 % self.path)
87 94 xapian.Database.__init__(self,self.path)
  95 + md5 = self.get_metadata("md5")
  96 + if not md5 == self.db_md5:
  97 + logging.info("Index must be updated.")
  98 + reindex = 1
88 99 except xapian.DatabaseError:
89   - logging.error("Could not open debtags xapian index")
  100 + logging.info("Could not open index.")
90 101 reindex =1
  102 +
91 103 if reindex:
92   - self.reindex(debtags_db)
  104 + self.create_index(debtags_db)
93 105  
94   - def reindex(self,debtags_db):
  106 + def create_index(self,debtags_db):
95 107 """
96 108 Create a xapian index for debtags info based on file 'debtags_db' and
97 109 place it at 'index_path'.
98 110 """
99 111 if not os.path.exists(self.path):
100 112 os.makedirs(self.path)
101   - logging.info("Creating new debtags xapian index at \'%s\'" % self.path)
102   - xapian.WritableDatabase.__init__(self,self.path,
103   - xapian.DB_CREATE_OR_OVERWRITE)
  113 +
  114 + try:
  115 + logging.info("Creating new xapian index for debtags at \'%s\'" %
  116 + self.path)
  117 + xapian.WritableDatabase.__init__(self,self.path,
  118 + xapian.DB_CREATE_OR_OVERWRITE)
  119 + except xapian.DatabaseError:
  120 + logging.critical("Could not create xapian index.")
  121 + exit(1)
  122 +
  123 + self.set_metadata("md5",self.db_md5)
104 124 for pkg,tags in debtags_db.iter_packages_tags():
105 125 doc = xapian.Document()
106 126 doc.set_data(pkg)
... ...
src/strategy.py
... ... @@ -85,7 +85,12 @@ class ContentBasedStrategy(RecommendationStrategy):
85 85 enquire = xapian.Enquire(recommender.items_repository)
86 86 enquire.set_query(query)
87 87  
88   - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  88 + try:
  89 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  90 + except xapian.DatabaseError as error:
  91 + logging.critical(error.get_msg())
  92 + exit(1)
  93 +
89 94 item_score = {}
90 95 for m in mset:
91 96 item_score[m.document.get_data()] = m.rank
... ... @@ -104,7 +109,12 @@ class AxiContentBasedStrategy(RecommendationStrategy):
104 109 enquire = xapian.Enquire(recommender.items_repository)
105 110 enquire.set_query(query)
106 111  
107   - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  112 + try:
  113 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  114 + except xapian.DatabaseError as error:
  115 + logging.critical(error.get_msg())
  116 + exit(1)
  117 +
108 118 item_score = {}
109 119 for m in mset:
110 120 item_score[m.document.get_data()] = m.rank
... ...