From b88924a2567cad4aa1b74c779e354ee43cab44db Mon Sep 17 00:00:00 2001 From: Tássia Camões Araújo Date: Thu, 10 Mar 2011 19:27:14 -0300 Subject: [PATCH] Code refactoring and amenities --- src/app_recommender.py | 39 +++++++++++++++++++-------------------- src/config.py | 6 +++--- src/cross_validation.py | 2 +- src/data.py | 66 +++++++++++++++++++++++++++++++++++------------------------------- src/error.py | 3 +++ src/evaluation.py | 4 ++-- src/recommender.py | 24 +++++++++++++++++++----- src/strategy.py | 22 +++++++++++----------- src/user.py | 9 +++++---- 9 files changed, 98 insertions(+), 77 deletions(-) create mode 100644 src/error.py diff --git a/src/app_recommender.py b/src/app_recommender.py index 7319a47..29695a5 100755 --- a/src/app_recommender.py +++ b/src/app_recommender.py @@ -20,6 +20,8 @@ import os import sys import logging +import datetime +from datetime import timedelta from config import * from data import * @@ -28,27 +30,24 @@ from similarity_measure import * from recommender import * from strategy import * from user import * +from error import Error -def set_up_recommender(cfg): - if cfg.strategy == "cta": - axi_db = xapian.Database(cfg.axi) - app_rec = Recommender(axi_db) - app_rec.set_strategy(AxiContentBasedStrategy()) +if __name__ == '__main__': + try: + cfg = Config() + rec = Recommender(cfg) + user = LocalSystem() - elif cfg.strategy == "ct": - debtags_db = DebtagsDB(cfg.tags_db) - if not debtags_db.load(): - logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) - sys.exit(1) - debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) - debtags_index.load(debtags_db,cfg.reindex) - app_rec = Recommender(debtags_index) - app_rec.set_strategy(ContentBasedStrategy()) + begin_time = datetime.datetime.now() + logging.debug("Recommendation computation started at %s" % begin_time) - return app_rec + print rec.get_recommendation(user) + + end_time = datetime.datetime.now() + logging.debug("Recommendation computation completed at %s" % end_time) + delta = end_time - begin_time + logging.info("Time elapsed: %d seconds." % delta.seconds) + + except Error: + logging.critical("Aborting proccess. Use '--debug' for more details.") -if __name__ == '__main__': - cfg = Config() - rec = set_up_recommender(cfg) - user = LocalSystem() - print rec.get_recommendation(user) diff --git a/src/config.py b/src/config.py index f90bfff..02fc300 100644 --- a/src/config.py +++ b/src/config.py @@ -50,7 +50,7 @@ class Config(): """ Print usage help. """ - print " [ general ]" + print "\n [ general ]" print " -h, --help Print this help" print " -d, --debug Set logging level to debug." print " -v, --verbose Set logging level to verbose." @@ -130,9 +130,9 @@ class Config(): elif o in ("-c", "--config"): self.config = p elif o in ("-t", "--tagsdb"): - self.tagsdb = p + self.tags_db = p elif o in ("-i", "--tagsindex"): - self.tagsindex = p + self.tags_index = p elif o in ("-r", "--force-reindex"): self.reindex = 1 elif o in ("-a", "--axi"): diff --git a/src/cross_validation.py b/src/cross_validation.py index 02c8bf8..0aa8708 100755 --- a/src/cross_validation.py +++ b/src/cross_validation.py @@ -39,7 +39,7 @@ def set_up_recommender(cfg): debtags_db = DebtagsDB(cfg.tags_db) if not debtags_db.load(): logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) - sys.exit(1) + raise Error debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) debtags_index.load(debtags_db,cfg.reindex) app_rec = Recommender(debtags_index) diff --git a/src/data.py b/src/data.py index a7156af..fcca39c 100644 --- a/src/data.py +++ b/src/data.py @@ -26,6 +26,8 @@ from debian import debtags import logging import hashlib +from error import Error + class Item: """ """ @@ -51,46 +53,46 @@ class Singleton(object): cls._inst = object.__new__(cls) return cls._inst -class DebtagsDB(debtags.DB,Singleton): - def __init__(self,path): - self.path = path +class TagsXapianIndex(xapian.WritableDatabase,Singleton): + def __init__(self,cfg): + self.path = os.path.expanduser(cfg.tags_index) + self.db_path = os.path.expanduser(cfg.tags_db) + self.debtags_db = debtags.DB() - def load(self): + db = open(self.db_path) + md5 = hashlib.md5() + md5.update(db.read()) + self.db_md5 = md5.hexdigest() + + self.load_index(cfg.reindex) + + def load_db(self): tag_filter = re.compile(r"^special::.+$|^.+::TODO$") try: - self.read(open(self.path, "r"), lambda x: not tag_filter.match(x)) - return 1 - except IOError: - logging.error("IOError: could not open debtags file \'%s\'" % - self.path) - return 0 - - def get_relevant_tags(self,pkgs_list,qtd_of_tags): + db_file = open(self.db_path, "r") + self.debtags_db.read(db_file,lambda x: not tag_filter.match(x)) + except IOError: #FIXME try is not catching this + logging.error("Could not load DebtagsDB from %s." % self.db_path) + raise Error + + def relevant_tags_from_db(self,pkgs_list,qtd_of_tags): """ Return most relevant tags considering a list of packages. """ - relevant_db = self.choose_packages(pkgs_list) - relevance_index = debtags.relevance_index_function(self,relevant_db) + if not self.debtags_db.package_count(): + self.load_db() + relevant_db = self.debtags_db.choose_packages(pkgs_list) + relevance_index = debtags.relevance_index_function(self.debtags_db, + relevant_db) sorted_relevant_tags = sorted(relevant_db.iter_tags(), lambda a, b: cmp(relevance_index(a), relevance_index(b))) return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) -class DebtagsIndex(xapian.WritableDatabase,Singleton): - def __init__(self,path): - self.path = path - self.db_md5 = 0 - - def load(self,debtags_db,reindex=0): + def load_index(self,reindex): """ Load an existing debtags index. """ - self.debtags_db = debtags_db - db = open(debtags_db.path) - md5 = hashlib.md5() - md5.update(db.read()) - self.db_md5 = md5.hexdigest() - if not reindex: try: logging.info("Opening existing debtags xapian index at \'%s\'" @@ -105,11 +107,11 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton): reindex =1 if reindex: - self.create_index(debtags_db) + self.new_index() - def create_index(self,debtags_db): + def new_index(self): """ - Create a xapian index for debtags info based on file 'debtags_db' and + Create a xapian index for debtags info based on 'debtags_db' and place it at 'index_path'. """ if not os.path.exists(self.path): @@ -122,10 +124,12 @@ class DebtagsIndex(xapian.WritableDatabase,Singleton): xapian.DB_CREATE_OR_OVERWRITE) except xapian.DatabaseError: logging.critical("Could not create xapian index.") - exit(1) + raise Error + self.load_db() self.set_metadata("md5",self.db_md5) - for pkg,tags in debtags_db.iter_packages_tags(): + + for pkg,tags in self.debtags_db.iter_packages_tags(): doc = xapian.Document() doc.set_data(pkg) for tag in tags: diff --git a/src/error.py b/src/error.py new file mode 100644 index 0000000..70e455f --- /dev/null +++ b/src/error.py @@ -0,0 +1,3 @@ +class Error(Exception): + """Base class for exceptions.""" + pass diff --git a/src/evaluation.py b/src/evaluation.py index 8874ec2..6e39d61 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -105,7 +105,7 @@ class CrossValidation: self.partition_proportion = partition_proportion else: logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") - exit(1) + raise Error self.rounds = rounds self.recommender = rec self.metrics_list = metrics_list @@ -143,7 +143,7 @@ class CrossValidation: random_key = random.choice(cross_item_score.keys()) else: logging.critical("cross_item_score vazio") - exit(1) + raise Error round_partition[random_key] = cross_item_score.pop(random_key) round_user = User(cross_item_score) predicted_result = self.recommender.get_recommendation(round_user) diff --git a/src/recommender.py b/src/recommender.py index e445c85..416886c 100644 --- a/src/recommender.py +++ b/src/recommender.py @@ -18,6 +18,9 @@ # along with this program. If not, see . from operator import itemgetter +from data import * +from strategy import * +from error import Error class RecommendationResult: def __init__(self,item_score,size): @@ -37,11 +40,22 @@ class RecommendationResult: class Recommender: """ """ - def __init__(self,items_repository,users_repository=None, - knowledge_repository=None): - self.items_repository = items_repository - self.users_repository = users_repository - self.knowledge_repository = knowledge_repository + def __init__(self,cfg): + try: + strategy = "self."+cfg.strategy+"(cfg)" + exec(strategy) + except (NameError, AttributeError, SyntaxError): + logging.critical("Could not perform recommendation strategy '%s'" % + cfg.strategy) + raise Error + + def ct(self,cfg): + self.items_repository = TagsXapianIndex(cfg) + self.strategy = ContentBasedStrategy() + + def cta(self,cfg): + self.items_repository = xapian.Database(cfg.axi) + self.strategy = AxiContentBasedStrategy() def set_strategy(self,strategy): """ """ diff --git a/src/strategy.py b/src/strategy.py index 1aa7510..22d6f49 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -20,7 +20,7 @@ import os, re import xapian from data import * -from recommender import * +import recommender class ReputationHeuristic: """ @@ -75,50 +75,50 @@ class ContentBasedStrategy(RecommendationStrategy): """ Content-based recommendation strategy. """ - def run(self,recommender,user): + def run(self,rec,user): """ Perform recommendation strategy. """ - profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) + profile = user.txi_tag_profile(rec.items_repository,50) qp = xapian.QueryParser() query = qp.parse_query(profile) - enquire = xapian.Enquire(recommender.items_repository) + enquire = xapian.Enquire(rec.items_repository) enquire.set_query(query) try: mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) except xapian.DatabaseError as error: logging.critical(error.get_msg()) - exit(1) + raise Error item_score = {} for m in mset: item_score[m.document.get_data()] = m.rank - return RecommendationResult(item_score,20) + return recommender.RecommendationResult(item_score,20) class AxiContentBasedStrategy(RecommendationStrategy): """ Content-based recommendation strategy based on Apt-xapian-index. """ - def run(self,recommender,user): + def run(self,rec,user): """ Perform recommendation strategy. """ - profile = user.axi_tag_profile(recommender.items_repository,50) + profile = user.axi_tag_profile(rec.items_repository,50) query = xapian.Query(xapian.Query.OP_OR,profile) - enquire = xapian.Enquire(recommender.items_repository) + enquire = xapian.Enquire(rec.items_repository) enquire.set_query(query) try: mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) except xapian.DatabaseError as error: logging.critical(error.get_msg()) - exit(1) + raise Error item_score = {} for m in mset: item_score[m.document.get_data()] = m.rank - return RecommendationResult(item_score,20) + return recommender.RecommendationResult(item_score,20) class ColaborativeStrategy(RecommendationStrategy): """ diff --git a/src/user.py b/src/user.py index 41dfc6e..57a92f8 100644 --- a/src/user.py +++ b/src/user.py @@ -39,12 +39,12 @@ class User: def items(self): return self.item_score.keys() - def axi_tag_profile(self,xapian_db,profile_size): + def axi_tag_profile(self,apt_xapian_index,profile_size): terms = [] for item in self.items(): terms.append("XP"+item) query = xapian.Query(xapian.Query.OP_OR, terms) - enquire = xapian.Enquire(xapian_db) + enquire = xapian.Enquire(apt_xapian_index) enquire.set_query(query) rset = xapian.RSet() for m in enquire.get_mset(0,30000): #consider all matches @@ -56,8 +56,9 @@ class User: logging.debug("%.2f %s" % (res.weight,res.term[2:])) return profile - def debtags_tag_profile(self,debtags_db,profile_size): - return debtags_db.get_relevant_tags(self.items(),profile_size) + def txi_tag_profile(self,tags_xapian_index,profile_size): + return tags_xapian_index.relevant_tags_from_db(self.items(), + profile_size) class LocalSystem(User): """ """ -- libgit2 0.21.2