Commit 11dce5d56c8d60e16748c35ba01a9cd28b7504b2

Authored by Tássia Camões Araújo
1 parent 8859fa78
Exists in master and in 1 other branch add_vagrant

Code refactoring and amenities

- Merged DebtagsDB and DebtagsIndex into TagsXapianIndex
- Moved recommender setup to class initialization
- Handling errors with try, exception and raise statements
- Saving and logging computation time
src/app_recommender.py
... ... @@ -20,6 +20,8 @@
20 20 import os
21 21 import sys
22 22 import logging
  23 +import datetime
  24 +from datetime import timedelta
23 25  
24 26 from config import *
25 27 from data import *
... ... @@ -28,27 +30,24 @@ from similarity_measure import *
28 30 from recommender import *
29 31 from strategy import *
30 32 from user import *
  33 +from error import Error
31 34  
32   -def set_up_recommender(cfg):
33   - if cfg.strategy == "cta":
34   - axi_db = xapian.Database(cfg.axi)
35   - app_rec = Recommender(axi_db)
36   - app_rec.set_strategy(AxiContentBasedStrategy())
  35 +if __name__ == '__main__':
  36 + try:
  37 + cfg = Config()
  38 + rec = Recommender(cfg)
  39 + user = LocalSystem()
37 40  
38   - elif cfg.strategy == "ct":
39   - debtags_db = DebtagsDB(cfg.tags_db)
40   - if not debtags_db.load():
41   - logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
42   - sys.exit(1)
43   - debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
44   - debtags_index.load(debtags_db,cfg.reindex)
45   - app_rec = Recommender(debtags_index)
46   - app_rec.set_strategy(ContentBasedStrategy())
  41 + begin_time = datetime.datetime.now()
  42 + logging.debug("Recommendation computation started at %s" % begin_time)
47 43  
48   - return app_rec
  44 + print rec.get_recommendation(user)
  45 +
  46 + end_time = datetime.datetime.now()
  47 + logging.debug("Recommendation computation completed at %s" % end_time)
  48 + delta = end_time - begin_time
  49 + logging.info("Time elapsed: %d seconds." % delta.seconds)
  50 +
  51 + except Error:
  52 + logging.critical("Aborting proccess. Use '--debug' for more details.")
49 53  
50   -if __name__ == '__main__':
51   - cfg = Config()
52   - rec = set_up_recommender(cfg)
53   - user = LocalSystem()
54   - print rec.get_recommendation(user)
... ...
src/config.py
... ... @@ -50,7 +50,7 @@ class Config():
50 50 """
51 51 Print usage help.
52 52 """
53   - print " [ general ]"
  53 + print "\n [ general ]"
54 54 print " -h, --help Print this help"
55 55 print " -d, --debug Set logging level to debug."
56 56 print " -v, --verbose Set logging level to verbose."
... ... @@ -130,9 +130,9 @@ class Config():
130 130 elif o in ("-c", "--config"):
131 131 self.config = p
132 132 elif o in ("-t", "--tagsdb"):
133   - self.tagsdb = p
  133 + self.tags_db = p
134 134 elif o in ("-i", "--tagsindex"):
135   - self.tagsindex = p
  135 + self.tags_index = p
136 136 elif o in ("-r", "--force-reindex"):
137 137 self.reindex = 1
138 138 elif o in ("-a", "--axi"):
... ...
src/cross_validation.py
... ... @@ -39,7 +39,7 @@ def set_up_recommender(cfg):
39 39 debtags_db = DebtagsDB(cfg.tags_db)
40 40 if not debtags_db.load():
41 41 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
42   - sys.exit(1)
  42 + raise Error
43 43 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
44 44 debtags_index.load(debtags_db,cfg.reindex)
45 45 app_rec = Recommender(debtags_index)
... ...
src/data.py
... ... @@ -26,6 +26,8 @@ from debian import debtags
26 26 import logging
27 27 import hashlib
28 28  
  29 +from error import Error
  30 +
29 31 class Item:
30 32 """ """
31 33  
... ... @@ -51,46 +53,46 @@ class Singleton(object):
51 53 cls._inst = object.__new__(cls)
52 54 return cls._inst
53 55  
54   -class DebtagsDB(debtags.DB,Singleton):
55   - def __init__(self,path):
56   - self.path = path
  56 +class TagsXapianIndex(xapian.WritableDatabase,Singleton):
  57 + def __init__(self,cfg):
  58 + self.path = os.path.expanduser(cfg.tags_index)
  59 + self.db_path = os.path.expanduser(cfg.tags_db)
  60 + self.debtags_db = debtags.DB()
57 61  
58   - def load(self):
  62 + db = open(self.db_path)
  63 + md5 = hashlib.md5()
  64 + md5.update(db.read())
  65 + self.db_md5 = md5.hexdigest()
  66 +
  67 + self.load_index(cfg.reindex)
  68 +
  69 + def load_db(self):
59 70 tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
60 71 try:
61   - self.read(open(self.path, "r"), lambda x: not tag_filter.match(x))
62   - return 1
63   - except IOError:
64   - logging.error("IOError: could not open debtags file \'%s\'" %
65   - self.path)
66   - return 0
67   -
68   - def get_relevant_tags(self,pkgs_list,qtd_of_tags):
  72 + db_file = open(self.db_path, "r")
  73 + self.debtags_db.read(db_file,lambda x: not tag_filter.match(x))
  74 + except IOError: #FIXME try is not catching this
  75 + logging.error("Could not load DebtagsDB from %s." % self.db_path)
  76 + raise Error
  77 +
  78 + def relevant_tags_from_db(self,pkgs_list,qtd_of_tags):
69 79 """
70 80 Return most relevant tags considering a list of packages.
71 81 """
72   - relevant_db = self.choose_packages(pkgs_list)
73   - relevance_index = debtags.relevance_index_function(self,relevant_db)
  82 + if not self.debtags_db.package_count():
  83 + self.load_db()
  84 + relevant_db = self.debtags_db.choose_packages(pkgs_list)
  85 + relevance_index = debtags.relevance_index_function(self.debtags_db,
  86 + relevant_db)
74 87 sorted_relevant_tags = sorted(relevant_db.iter_tags(),
75 88 lambda a, b: cmp(relevance_index(a),
76 89 relevance_index(b)))
77 90 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
78 91  
79   -class DebtagsIndex(xapian.WritableDatabase,Singleton):
80   - def __init__(self,path):
81   - self.path = path
82   - self.db_md5 = 0
83   -
84   - def load(self,debtags_db,reindex=0):
  92 + def load_index(self,reindex):
85 93 """
86 94 Load an existing debtags index.
87 95 """
88   - self.debtags_db = debtags_db
89   - db = open(debtags_db.path)
90   - md5 = hashlib.md5()
91   - md5.update(db.read())
92   - self.db_md5 = md5.hexdigest()
93   -
94 96 if not reindex:
95 97 try:
96 98 logging.info("Opening existing debtags xapian index at \'%s\'"
... ... @@ -105,11 +107,11 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton):
105 107 reindex =1
106 108  
107 109 if reindex:
108   - self.create_index(debtags_db)
  110 + self.new_index()
109 111  
110   - def create_index(self,debtags_db):
  112 + def new_index(self):
111 113 """
112   - Create a xapian index for debtags info based on file 'debtags_db' and
  114 + Create a xapian index for debtags info based on 'debtags_db' and
113 115 place it at 'index_path'.
114 116 """
115 117 if not os.path.exists(self.path):
... ... @@ -122,10 +124,12 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton):
122 124 xapian.DB_CREATE_OR_OVERWRITE)
123 125 except xapian.DatabaseError:
124 126 logging.critical("Could not create xapian index.")
125   - exit(1)
  127 + raise Error
126 128  
  129 + self.load_db()
127 130 self.set_metadata("md5",self.db_md5)
128   - for pkg,tags in debtags_db.iter_packages_tags():
  131 +
  132 + for pkg,tags in self.debtags_db.iter_packages_tags():
129 133 doc = xapian.Document()
130 134 doc.set_data(pkg)
131 135 for tag in tags:
... ...
src/error.py 0 → 100644
... ... @@ -0,0 +1,3 @@
  1 +class Error(Exception):
  2 + """Base class for exceptions."""
  3 + pass
... ...
src/evaluation.py
... ... @@ -105,7 +105,7 @@ class CrossValidation:
105 105 self.partition_proportion = partition_proportion
106 106 else:
107 107 logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.")
108   - exit(1)
  108 + raise Error
109 109 self.rounds = rounds
110 110 self.recommender = rec
111 111 self.metrics_list = metrics_list
... ... @@ -143,7 +143,7 @@ class CrossValidation:
143 143 random_key = random.choice(cross_item_score.keys())
144 144 else:
145 145 logging.critical("cross_item_score vazio")
146   - exit(1)
  146 + raise Error
147 147 round_partition[random_key] = cross_item_score.pop(random_key)
148 148 round_user = User(cross_item_score)
149 149 predicted_result = self.recommender.get_recommendation(round_user)
... ...
src/recommender.py
... ... @@ -18,6 +18,9 @@
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
20 20 from operator import itemgetter
  21 +from data import *
  22 +from strategy import *
  23 +from error import Error
21 24  
22 25 class RecommendationResult:
23 26 def __init__(self,item_score,size):
... ... @@ -37,11 +40,22 @@ class RecommendationResult:
37 40  
38 41 class Recommender:
39 42 """ """
40   - def __init__(self,items_repository,users_repository=None,
41   - knowledge_repository=None):
42   - self.items_repository = items_repository
43   - self.users_repository = users_repository
44   - self.knowledge_repository = knowledge_repository
  43 + def __init__(self,cfg):
  44 + try:
  45 + strategy = "self."+cfg.strategy+"(cfg)"
  46 + exec(strategy)
  47 + except (NameError, AttributeError, SyntaxError):
  48 + logging.critical("Could not perform recommendation strategy '%s'" %
  49 + cfg.strategy)
  50 + raise Error
  51 +
  52 + def ct(self,cfg):
  53 + self.items_repository = TagsXapianIndex(cfg)
  54 + self.strategy = ContentBasedStrategy()
  55 +
  56 + def cta(self,cfg):
  57 + self.items_repository = xapian.Database(cfg.axi)
  58 + self.strategy = AxiContentBasedStrategy()
45 59  
46 60 def set_strategy(self,strategy):
47 61 """ """
... ...
src/strategy.py
... ... @@ -20,7 +20,7 @@
20 20 import os, re
21 21 import xapian
22 22 from data import *
23   -from recommender import *
  23 +import recommender
24 24  
25 25 class ReputationHeuristic:
26 26 """
... ... @@ -75,50 +75,50 @@ class ContentBasedStrategy(RecommendationStrategy):
75 75 """
76 76 Content-based recommendation strategy.
77 77 """
78   - def run(self,recommender,user):
  78 + def run(self,rec,user):
79 79 """
80 80 Perform recommendation strategy.
81 81 """
82   - profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50)
  82 + profile = user.txi_tag_profile(rec.items_repository,50)
83 83 qp = xapian.QueryParser()
84 84 query = qp.parse_query(profile)
85   - enquire = xapian.Enquire(recommender.items_repository)
  85 + enquire = xapian.Enquire(rec.items_repository)
86 86 enquire.set_query(query)
87 87  
88 88 try:
89 89 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
90 90 except xapian.DatabaseError as error:
91 91 logging.critical(error.get_msg())
92   - exit(1)
  92 + raise Error
93 93  
94 94 item_score = {}
95 95 for m in mset:
96 96 item_score[m.document.get_data()] = m.rank
97   - return RecommendationResult(item_score,20)
  97 + return recommender.RecommendationResult(item_score,20)
98 98  
99 99 class AxiContentBasedStrategy(RecommendationStrategy):
100 100 """
101 101 Content-based recommendation strategy based on Apt-xapian-index.
102 102 """
103   - def run(self,recommender,user):
  103 + def run(self,rec,user):
104 104 """
105 105 Perform recommendation strategy.
106 106 """
107   - profile = user.axi_tag_profile(recommender.items_repository,50)
  107 + profile = user.axi_tag_profile(rec.items_repository,50)
108 108 query = xapian.Query(xapian.Query.OP_OR,profile)
109   - enquire = xapian.Enquire(recommender.items_repository)
  109 + enquire = xapian.Enquire(rec.items_repository)
110 110 enquire.set_query(query)
111 111  
112 112 try:
113 113 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
114 114 except xapian.DatabaseError as error:
115 115 logging.critical(error.get_msg())
116   - exit(1)
  116 + raise Error
117 117  
118 118 item_score = {}
119 119 for m in mset:
120 120 item_score[m.document.get_data()] = m.rank
121   - return RecommendationResult(item_score,20)
  121 + return recommender.RecommendationResult(item_score,20)
122 122  
123 123 class ColaborativeStrategy(RecommendationStrategy):
124 124 """
... ...
src/user.py
... ... @@ -39,12 +39,12 @@ class User:
39 39 def items(self):
40 40 return self.item_score.keys()
41 41  
42   - def axi_tag_profile(self,xapian_db,profile_size):
  42 + def axi_tag_profile(self,apt_xapian_index,profile_size):
43 43 terms = []
44 44 for item in self.items():
45 45 terms.append("XP"+item)
46 46 query = xapian.Query(xapian.Query.OP_OR, terms)
47   - enquire = xapian.Enquire(xapian_db)
  47 + enquire = xapian.Enquire(apt_xapian_index)
48 48 enquire.set_query(query)
49 49 rset = xapian.RSet()
50 50 for m in enquire.get_mset(0,30000): #consider all matches
... ... @@ -56,8 +56,9 @@ class User:
56 56 logging.debug("%.2f %s" % (res.weight,res.term[2:]))
57 57 return profile
58 58  
59   - def debtags_tag_profile(self,debtags_db,profile_size):
60   - return debtags_db.get_relevant_tags(self.items(),profile_size)
  59 + def txi_tag_profile(self,tags_xapian_index,profile_size):
  60 + return tags_xapian_index.relevant_tags_from_db(self.items(),
  61 + profile_size)
61 62  
62 63 class LocalSystem(User):
63 64 """ """
... ...