Commit b88924a2567cad4aa1b74c779e354ee43cab44db

Authored by Tássia Camões Araújo
1 parent 03c81458
Exists in master and in 1 other branch add_vagrant

Code refactoring and amenities

- Merged DebtagsDB and DebtagsIndex into TagsXapianIndex
- Moved recommender setup to class initialization
- Handling errors with try, exception and raise statements
- Saving and logging computation time
src/app_recommender.py
@@ -20,6 +20,8 @@ @@ -20,6 +20,8 @@
20 import os 20 import os
21 import sys 21 import sys
22 import logging 22 import logging
  23 +import datetime
  24 +from datetime import timedelta
23 25
24 from config import * 26 from config import *
25 from data import * 27 from data import *
@@ -28,27 +30,24 @@ from similarity_measure import * @@ -28,27 +30,24 @@ from similarity_measure import *
28 from recommender import * 30 from recommender import *
29 from strategy import * 31 from strategy import *
30 from user import * 32 from user import *
  33 +from error import Error
31 34
32 -def set_up_recommender(cfg):  
33 - if cfg.strategy == "cta":  
34 - axi_db = xapian.Database(cfg.axi)  
35 - app_rec = Recommender(axi_db)  
36 - app_rec.set_strategy(AxiContentBasedStrategy()) 35 +if __name__ == '__main__':
  36 + try:
  37 + cfg = Config()
  38 + rec = Recommender(cfg)
  39 + user = LocalSystem()
37 40
38 - elif cfg.strategy == "ct":  
39 - debtags_db = DebtagsDB(cfg.tags_db)  
40 - if not debtags_db.load():  
41 - logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)  
42 - sys.exit(1)  
43 - debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))  
44 - debtags_index.load(debtags_db,cfg.reindex)  
45 - app_rec = Recommender(debtags_index)  
46 - app_rec.set_strategy(ContentBasedStrategy()) 41 + begin_time = datetime.datetime.now()
  42 + logging.debug("Recommendation computation started at %s" % begin_time)
47 43
48 - return app_rec 44 + print rec.get_recommendation(user)
  45 +
  46 + end_time = datetime.datetime.now()
  47 + logging.debug("Recommendation computation completed at %s" % end_time)
  48 + delta = end_time - begin_time
  49 + logging.info("Time elapsed: %d seconds." % delta.seconds)
  50 +
  51 + except Error:
  52 + logging.critical("Aborting proccess. Use '--debug' for more details.")
49 53
50 -if __name__ == '__main__':  
51 - cfg = Config()  
52 - rec = set_up_recommender(cfg)  
53 - user = LocalSystem()  
54 - print rec.get_recommendation(user)  
@@ -50,7 +50,7 @@ class Config(): @@ -50,7 +50,7 @@ class Config():
50 """ 50 """
51 Print usage help. 51 Print usage help.
52 """ 52 """
53 - print " [ general ]" 53 + print "\n [ general ]"
54 print " -h, --help Print this help" 54 print " -h, --help Print this help"
55 print " -d, --debug Set logging level to debug." 55 print " -d, --debug Set logging level to debug."
56 print " -v, --verbose Set logging level to verbose." 56 print " -v, --verbose Set logging level to verbose."
@@ -130,9 +130,9 @@ class Config(): @@ -130,9 +130,9 @@ class Config():
130 elif o in ("-c", "--config"): 130 elif o in ("-c", "--config"):
131 self.config = p 131 self.config = p
132 elif o in ("-t", "--tagsdb"): 132 elif o in ("-t", "--tagsdb"):
133 - self.tagsdb = p 133 + self.tags_db = p
134 elif o in ("-i", "--tagsindex"): 134 elif o in ("-i", "--tagsindex"):
135 - self.tagsindex = p 135 + self.tags_index = p
136 elif o in ("-r", "--force-reindex"): 136 elif o in ("-r", "--force-reindex"):
137 self.reindex = 1 137 self.reindex = 1
138 elif o in ("-a", "--axi"): 138 elif o in ("-a", "--axi"):
src/cross_validation.py
@@ -39,7 +39,7 @@ def set_up_recommender(cfg): @@ -39,7 +39,7 @@ def set_up_recommender(cfg):
39 debtags_db = DebtagsDB(cfg.tags_db) 39 debtags_db = DebtagsDB(cfg.tags_db)
40 if not debtags_db.load(): 40 if not debtags_db.load():
41 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) 41 logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
42 - sys.exit(1) 42 + raise Error
43 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) 43 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
44 debtags_index.load(debtags_db,cfg.reindex) 44 debtags_index.load(debtags_db,cfg.reindex)
45 app_rec = Recommender(debtags_index) 45 app_rec = Recommender(debtags_index)
@@ -26,6 +26,8 @@ from debian import debtags @@ -26,6 +26,8 @@ from debian import debtags
26 import logging 26 import logging
27 import hashlib 27 import hashlib
28 28
  29 +from error import Error
  30 +
29 class Item: 31 class Item:
30 """ """ 32 """ """
31 33
@@ -51,46 +53,46 @@ class Singleton(object): @@ -51,46 +53,46 @@ class Singleton(object):
51 cls._inst = object.__new__(cls) 53 cls._inst = object.__new__(cls)
52 return cls._inst 54 return cls._inst
53 55
54 -class DebtagsDB(debtags.DB,Singleton):  
55 - def __init__(self,path):  
56 - self.path = path 56 +class TagsXapianIndex(xapian.WritableDatabase,Singleton):
  57 + def __init__(self,cfg):
  58 + self.path = os.path.expanduser(cfg.tags_index)
  59 + self.db_path = os.path.expanduser(cfg.tags_db)
  60 + self.debtags_db = debtags.DB()
57 61
58 - def load(self): 62 + db = open(self.db_path)
  63 + md5 = hashlib.md5()
  64 + md5.update(db.read())
  65 + self.db_md5 = md5.hexdigest()
  66 +
  67 + self.load_index(cfg.reindex)
  68 +
  69 + def load_db(self):
59 tag_filter = re.compile(r"^special::.+$|^.+::TODO$") 70 tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
60 try: 71 try:
61 - self.read(open(self.path, "r"), lambda x: not tag_filter.match(x))  
62 - return 1  
63 - except IOError:  
64 - logging.error("IOError: could not open debtags file \'%s\'" %  
65 - self.path)  
66 - return 0  
67 -  
68 - def get_relevant_tags(self,pkgs_list,qtd_of_tags): 72 + db_file = open(self.db_path, "r")
  73 + self.debtags_db.read(db_file,lambda x: not tag_filter.match(x))
  74 + except IOError: #FIXME try is not catching this
  75 + logging.error("Could not load DebtagsDB from %s." % self.db_path)
  76 + raise Error
  77 +
  78 + def relevant_tags_from_db(self,pkgs_list,qtd_of_tags):
69 """ 79 """
70 Return most relevant tags considering a list of packages. 80 Return most relevant tags considering a list of packages.
71 """ 81 """
72 - relevant_db = self.choose_packages(pkgs_list)  
73 - relevance_index = debtags.relevance_index_function(self,relevant_db) 82 + if not self.debtags_db.package_count():
  83 + self.load_db()
  84 + relevant_db = self.debtags_db.choose_packages(pkgs_list)
  85 + relevance_index = debtags.relevance_index_function(self.debtags_db,
  86 + relevant_db)
74 sorted_relevant_tags = sorted(relevant_db.iter_tags(), 87 sorted_relevant_tags = sorted(relevant_db.iter_tags(),
75 lambda a, b: cmp(relevance_index(a), 88 lambda a, b: cmp(relevance_index(a),
76 relevance_index(b))) 89 relevance_index(b)))
77 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) 90 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
78 91
79 -class DebtagsIndex(xapian.WritableDatabase,Singleton):  
80 - def __init__(self,path):  
81 - self.path = path  
82 - self.db_md5 = 0  
83 -  
84 - def load(self,debtags_db,reindex=0): 92 + def load_index(self,reindex):
85 """ 93 """
86 Load an existing debtags index. 94 Load an existing debtags index.
87 """ 95 """
88 - self.debtags_db = debtags_db  
89 - db = open(debtags_db.path)  
90 - md5 = hashlib.md5()  
91 - md5.update(db.read())  
92 - self.db_md5 = md5.hexdigest()  
93 -  
94 if not reindex: 96 if not reindex:
95 try: 97 try:
96 logging.info("Opening existing debtags xapian index at \'%s\'" 98 logging.info("Opening existing debtags xapian index at \'%s\'"
@@ -105,11 +107,11 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton): @@ -105,11 +107,11 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton):
105 reindex =1 107 reindex =1
106 108
107 if reindex: 109 if reindex:
108 - self.create_index(debtags_db) 110 + self.new_index()
109 111
110 - def create_index(self,debtags_db): 112 + def new_index(self):
111 """ 113 """
112 - Create a xapian index for debtags info based on file 'debtags_db' and 114 + Create a xapian index for debtags info based on 'debtags_db' and
113 place it at 'index_path'. 115 place it at 'index_path'.
114 """ 116 """
115 if not os.path.exists(self.path): 117 if not os.path.exists(self.path):
@@ -122,10 +124,12 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton): @@ -122,10 +124,12 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton):
122 xapian.DB_CREATE_OR_OVERWRITE) 124 xapian.DB_CREATE_OR_OVERWRITE)
123 except xapian.DatabaseError: 125 except xapian.DatabaseError:
124 logging.critical("Could not create xapian index.") 126 logging.critical("Could not create xapian index.")
125 - exit(1) 127 + raise Error
126 128
  129 + self.load_db()
127 self.set_metadata("md5",self.db_md5) 130 self.set_metadata("md5",self.db_md5)
128 - for pkg,tags in debtags_db.iter_packages_tags(): 131 +
  132 + for pkg,tags in self.debtags_db.iter_packages_tags():
129 doc = xapian.Document() 133 doc = xapian.Document()
130 doc.set_data(pkg) 134 doc.set_data(pkg)
131 for tag in tags: 135 for tag in tags:
src/error.py 0 → 100644
@@ -0,0 +1,3 @@ @@ -0,0 +1,3 @@
  1 +class Error(Exception):
  2 + """Base class for exceptions."""
  3 + pass
src/evaluation.py
@@ -105,7 +105,7 @@ class CrossValidation: @@ -105,7 +105,7 @@ class CrossValidation:
105 self.partition_proportion = partition_proportion 105 self.partition_proportion = partition_proportion
106 else: 106 else:
107 logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") 107 logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.")
108 - exit(1) 108 + raise Error
109 self.rounds = rounds 109 self.rounds = rounds
110 self.recommender = rec 110 self.recommender = rec
111 self.metrics_list = metrics_list 111 self.metrics_list = metrics_list
@@ -143,7 +143,7 @@ class CrossValidation: @@ -143,7 +143,7 @@ class CrossValidation:
143 random_key = random.choice(cross_item_score.keys()) 143 random_key = random.choice(cross_item_score.keys())
144 else: 144 else:
145 logging.critical("cross_item_score vazio") 145 logging.critical("cross_item_score vazio")
146 - exit(1) 146 + raise Error
147 round_partition[random_key] = cross_item_score.pop(random_key) 147 round_partition[random_key] = cross_item_score.pop(random_key)
148 round_user = User(cross_item_score) 148 round_user = User(cross_item_score)
149 predicted_result = self.recommender.get_recommendation(round_user) 149 predicted_result = self.recommender.get_recommendation(round_user)
src/recommender.py
@@ -18,6 +18,9 @@ @@ -18,6 +18,9 @@
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 from operator import itemgetter 20 from operator import itemgetter
  21 +from data import *
  22 +from strategy import *
  23 +from error import Error
21 24
22 class RecommendationResult: 25 class RecommendationResult:
23 def __init__(self,item_score,size): 26 def __init__(self,item_score,size):
@@ -37,11 +40,22 @@ class RecommendationResult: @@ -37,11 +40,22 @@ class RecommendationResult:
37 40
38 class Recommender: 41 class Recommender:
39 """ """ 42 """ """
40 - def __init__(self,items_repository,users_repository=None,  
41 - knowledge_repository=None):  
42 - self.items_repository = items_repository  
43 - self.users_repository = users_repository  
44 - self.knowledge_repository = knowledge_repository 43 + def __init__(self,cfg):
  44 + try:
  45 + strategy = "self."+cfg.strategy+"(cfg)"
  46 + exec(strategy)
  47 + except (NameError, AttributeError, SyntaxError):
  48 + logging.critical("Could not perform recommendation strategy '%s'" %
  49 + cfg.strategy)
  50 + raise Error
  51 +
  52 + def ct(self,cfg):
  53 + self.items_repository = TagsXapianIndex(cfg)
  54 + self.strategy = ContentBasedStrategy()
  55 +
  56 + def cta(self,cfg):
  57 + self.items_repository = xapian.Database(cfg.axi)
  58 + self.strategy = AxiContentBasedStrategy()
45 59
46 def set_strategy(self,strategy): 60 def set_strategy(self,strategy):
47 """ """ 61 """ """
src/strategy.py
@@ -20,7 +20,7 @@ @@ -20,7 +20,7 @@
20 import os, re 20 import os, re
21 import xapian 21 import xapian
22 from data import * 22 from data import *
23 -from recommender import * 23 +import recommender
24 24
25 class ReputationHeuristic: 25 class ReputationHeuristic:
26 """ 26 """
@@ -75,50 +75,50 @@ class ContentBasedStrategy(RecommendationStrategy): @@ -75,50 +75,50 @@ class ContentBasedStrategy(RecommendationStrategy):
75 """ 75 """
76 Content-based recommendation strategy. 76 Content-based recommendation strategy.
77 """ 77 """
78 - def run(self,recommender,user): 78 + def run(self,rec,user):
79 """ 79 """
80 Perform recommendation strategy. 80 Perform recommendation strategy.
81 """ 81 """
82 - profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) 82 + profile = user.txi_tag_profile(rec.items_repository,50)
83 qp = xapian.QueryParser() 83 qp = xapian.QueryParser()
84 query = qp.parse_query(profile) 84 query = qp.parse_query(profile)
85 - enquire = xapian.Enquire(recommender.items_repository) 85 + enquire = xapian.Enquire(rec.items_repository)
86 enquire.set_query(query) 86 enquire.set_query(query)
87 87
88 try: 88 try:
89 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 89 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
90 except xapian.DatabaseError as error: 90 except xapian.DatabaseError as error:
91 logging.critical(error.get_msg()) 91 logging.critical(error.get_msg())
92 - exit(1) 92 + raise Error
93 93
94 item_score = {} 94 item_score = {}
95 for m in mset: 95 for m in mset:
96 item_score[m.document.get_data()] = m.rank 96 item_score[m.document.get_data()] = m.rank
97 - return RecommendationResult(item_score,20) 97 + return recommender.RecommendationResult(item_score,20)
98 98
99 class AxiContentBasedStrategy(RecommendationStrategy): 99 class AxiContentBasedStrategy(RecommendationStrategy):
100 """ 100 """
101 Content-based recommendation strategy based on Apt-xapian-index. 101 Content-based recommendation strategy based on Apt-xapian-index.
102 """ 102 """
103 - def run(self,recommender,user): 103 + def run(self,rec,user):
104 """ 104 """
105 Perform recommendation strategy. 105 Perform recommendation strategy.
106 """ 106 """
107 - profile = user.axi_tag_profile(recommender.items_repository,50) 107 + profile = user.axi_tag_profile(rec.items_repository,50)
108 query = xapian.Query(xapian.Query.OP_OR,profile) 108 query = xapian.Query(xapian.Query.OP_OR,profile)
109 - enquire = xapian.Enquire(recommender.items_repository) 109 + enquire = xapian.Enquire(rec.items_repository)
110 enquire.set_query(query) 110 enquire.set_query(query)
111 111
112 try: 112 try:
113 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 113 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
114 except xapian.DatabaseError as error: 114 except xapian.DatabaseError as error:
115 logging.critical(error.get_msg()) 115 logging.critical(error.get_msg())
116 - exit(1) 116 + raise Error
117 117
118 item_score = {} 118 item_score = {}
119 for m in mset: 119 for m in mset:
120 item_score[m.document.get_data()] = m.rank 120 item_score[m.document.get_data()] = m.rank
121 - return RecommendationResult(item_score,20) 121 + return recommender.RecommendationResult(item_score,20)
122 122
123 class ColaborativeStrategy(RecommendationStrategy): 123 class ColaborativeStrategy(RecommendationStrategy):
124 """ 124 """
@@ -39,12 +39,12 @@ class User: @@ -39,12 +39,12 @@ class User:
39 def items(self): 39 def items(self):
40 return self.item_score.keys() 40 return self.item_score.keys()
41 41
42 - def axi_tag_profile(self,xapian_db,profile_size): 42 + def axi_tag_profile(self,apt_xapian_index,profile_size):
43 terms = [] 43 terms = []
44 for item in self.items(): 44 for item in self.items():
45 terms.append("XP"+item) 45 terms.append("XP"+item)
46 query = xapian.Query(xapian.Query.OP_OR, terms) 46 query = xapian.Query(xapian.Query.OP_OR, terms)
47 - enquire = xapian.Enquire(xapian_db) 47 + enquire = xapian.Enquire(apt_xapian_index)
48 enquire.set_query(query) 48 enquire.set_query(query)
49 rset = xapian.RSet() 49 rset = xapian.RSet()
50 for m in enquire.get_mset(0,30000): #consider all matches 50 for m in enquire.get_mset(0,30000): #consider all matches
@@ -56,8 +56,9 @@ class User: @@ -56,8 +56,9 @@ class User:
56 logging.debug("%.2f %s" % (res.weight,res.term[2:])) 56 logging.debug("%.2f %s" % (res.weight,res.term[2:]))
57 return profile 57 return profile
58 58
59 - def debtags_tag_profile(self,debtags_db,profile_size):  
60 - return debtags_db.get_relevant_tags(self.items(),profile_size) 59 + def txi_tag_profile(self,tags_xapian_index,profile_size):
  60 + return tags_xapian_index.relevant_tags_from_db(self.items(),
  61 + profile_size)
61 62
62 class LocalSystem(User): 63 class LocalSystem(User):
63 """ """ 64 """ """