diff --git a/src/app_recommender.py b/src/app_recommender.py index 8064a0e..792dceb 100755 --- a/src/app_recommender.py +++ b/src/app_recommender.py @@ -19,6 +19,7 @@ import os import sys +import logging from config import * from data import * @@ -28,41 +29,8 @@ from recommender import * from strategy import * from user import * -# Setup configuration -#DB_PATH = "/var/lib/debtags/package-tags" -#INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") -# -#XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index") -#XAPIANDB = XAPIANDBPATH + "/index" -#XAPIANDBVALUES = XAPIANDBPATH + "/values" - -def set_up_logger(cfg): - log_format = '%(asctime)s AppRecommender %(levelname)s: %(message)s' - log_level = logging.INFO - if cfg.debug is 1: - log_level = logging.DEBUG - logging.basicConfig(level=log_level,format=log_format,filename=cfg.output) - console = logging.StreamHandler(sys.stdout) - console.setLevel(log_level) - formatter = logging.Formatter('%(levelname)s: %(message)s') - console.setFormatter(formatter) - logging.getLogger('').addHandler(console) - def set_up_recommender(cfg): -# reindex = 0 -# axi = 0 -# if len(sys.argv) == 2: -# if sys.argv[1] == "axi": -# axi = 1 -# else: -# DB_PATH = sys.argv[1] -# reindex = 1 -# elif len(sys.argv) > 2: -# print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % -# sys.argv[0]) -# sys.exit(1) - - reindex = 0 + reindex = 1 #FIXME should do it only if necessary if cfg.strategy == "cta": axi_db = xapian.Database(cfg.axi) @@ -72,12 +40,13 @@ def set_up_recommender(cfg): elif cfg.strategy == "ct": debtags_db = DebtagsDB(cfg.tags_db) if not debtags_db.load(): - print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH) + logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) sys.exit(1) debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) debtags_index.load(debtags_db,reindex) app_rec = Recommender(debtags_index) app_rec.set_strategy(ContentBasedStrategy()) + return app_rec def cross_validation(recommender): @@ -90,7 +59,7 @@ def cross_validation(recommender): if __name__ == '__main__': cfg = Config() cfg.load_options() - set_up_logger(cfg) + cfg.set_logger() rec = set_up_recommender(cfg) user = LocalSystem() result = rec.get_recommendation(user) diff --git a/src/config.py b/src/config.py index aac568d..1a28006 100644 --- a/src/config.py +++ b/src/config.py @@ -20,7 +20,8 @@ import getopt import sys import os -import logging +from logging import * +import logging.handlers from ConfigParser import * @@ -33,6 +34,7 @@ class Config(): Set default configuration options. """ self.debug = 0 + self.verbose = 0 self.output = "/dev/null" self.config = None self.tags_db = "/var/lib/debtags/package-tags" @@ -47,7 +49,8 @@ class Config(): """ print " [ general ]" print " -h, --help Print this help" - print " -d, --debug Set debug to true. Default is false." + print " -d, --debug Set logging level to debug." + print " -v, --verbose Set logging level to verbose." print " -o, --output=PATH Path to file to save output." print " -c, --config=PATH Path to configuration file." print "" @@ -89,6 +92,7 @@ class Config(): os.abort() self.debug = self.read_option('general', 'debug') + self.debug = self.read_option('general', 'verbose') self.output_filename = self.read_option('general', 'output') self.config = self.read_option('general', 'config') @@ -96,8 +100,8 @@ class Config(): self.tags_index = self.read_option('recommender', 'tags_index') self.axi = self.read_option('recommender', 'axi') - short_options = "hdo:c:t:i:a:s:" - long_options = ["help", "debug", "output=", "config=", + short_options = "hdvo:c:t:i:a:s:" + long_options = ["help", "debug", "verbose", "output=", "config=", "tagsdb=", "tagsindex=", "axi=", "strategy="] try: opts, args = getopt.getopt(sys.argv[1:], short_options, @@ -114,6 +118,8 @@ class Config(): sys.exit() elif o in ("-d", "--debug"): self.debug = 1 + elif o in ("-v", "--verbose"): + self.verbose = 1 elif o in ("-o", "--output"): self.output = p elif o in ("-c", "--config"): @@ -129,3 +135,27 @@ class Config(): self.strategy = p else: assert False, "unhandled option" + + def set_logger(self): + self.logger = getLogger('') # root logger is used by default + self.logger.setLevel(DEBUG) + + if self.debug == 1: + log_level = DEBUG + elif self.verbose == 1: + log_level = INFO + else: + log_level = WARNING + + console_handler = StreamHandler(sys.stdout) + console_handler.setFormatter(Formatter('%(levelname)s: %(message)s')) + console_handler.setLevel(log_level) + self.logger.addHandler(console_handler) + + file_handler = logging.handlers.RotatingFileHandler(self.output, + maxBytes=5000, + backupCount=5) + log_format = '%(asctime)s AppRecommender %(levelname)-8s %(message)s' + file_handler.setFormatter(Formatter(log_format)) + file_handler.setLevel(log_level) + self.logger.addHandler(file_handler) diff --git a/src/data.py b/src/data.py index 2ece072..94cc705 100644 --- a/src/data.py +++ b/src/data.py @@ -23,6 +23,7 @@ import re import xapian import axi from debian import debtags +import logging class Item: """ """ @@ -38,7 +39,9 @@ class Package(Item): print "debian pkg",self.id def normalize_tags(string): - """ Normalize tag string so that it can be indexed and retrieved. """ + """ + Normalize tag string so that it can be indexed and retrieved. + """ return string.replace(':','_').replace('-','\'') # FIXME Data repositories should be singleton @@ -53,12 +56,14 @@ class DebtagsDB(debtags.DB): self.read(open(self.path, "r"), lambda x: not tag_filter.match(x)) return 1 except IOError: - print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" - % self.path) + logging.error("IOError: could not open debtags file \'%s\'" % + self.path) return 0 def get_relevant_tags(self,pkgs_list,qtd_of_tags): - """ Return most relevant tags considering a list of packages. """ + """ + Return most relevant tags considering a list of packages. + """ relevant_db = self.choose_packages(pkgs_list) relevance_index = debtags.relevance_index_function(self,relevant_db) sorted_relevant_tags = sorted(relevant_db.iter_tags(), @@ -71,31 +76,35 @@ class DebtagsIndex(xapian.WritableDatabase): self.path = path def load(self,debtags_db,reindex): - """ Load an existing debtags index. """ + """ + Load an existing debtags index. + """ self.debtags_db = debtags_db if not reindex: try: - print ("Opening existing debtags xapian index at \'%s\'" % - self.path) + logging.info("Opening existing debtags xapian index at \'%s\'" + % self.path) xapian.Database.__init__(self,self.path) except xapian.DatabaseError: - print "Could not open debtags xapian index" + logging.error("Could not open debtags xapian index") reindex =1 if reindex: self.reindex(debtags_db) def reindex(self,debtags_db): - """ Create a xapian index for debtags info based on file 'debtags_db' - and place it at 'index_path'. + """ + Create a xapian index for debtags info based on file 'debtags_db' and + place it at 'index_path'. """ if not os.path.exists(self.path): os.makedirs(self.path) - print "Creating new debtags xapian index at \'%s\'" % self.path + logging.info("Creating new debtags xapian index at \'%s\'" % self.path) xapian.WritableDatabase.__init__(self,self.path, - xapian.DB_CREATE_OR_OVERWRITE) + xapian.DB_CREATE_OR_OVERWRITE) for pkg,tags in debtags_db.iter_packages_tags(): doc = xapian.Document() doc.set_data(pkg) for tag in tags: doc.add_term(normalize_tags(tag)) - print "indexing ",self.add_document(doc) + doc_id = self.add_document(doc) + logging.debug("Indexing doc %d",doc_id) diff --git a/src/evaluation.py b/src/evaluation.py index 8a01603..8874ec2 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -19,6 +19,8 @@ import random from collections import defaultdict +import logging + from user import * from recommender import * @@ -92,14 +94,17 @@ class Evaluation: return metric.run(self) class CrossValidation: - """ Cross-validation method """ + """ + Cross-validation method + """ def __init__(self,partition_proportion,rounds,rec,metrics_list): - """ Set parameters: partition_size, rounds, recommender and - metrics_list """ + """ + Set defaults: partition_size, rounds, recommender and metrics_list + """ if partition_proportion<1 and partition_proportion>0: self.partition_proportion = partition_proportion else: - print "A proporcao de particao deve ser um avalor ente 0 e 1." + logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") exit(1) self.rounds = rounds self.recommender = rec @@ -126,7 +131,9 @@ class CrossValidation: print "| Mean |%s" % (metrics_mean) def run(self,user): - """ Perform cross-validation. """ + """ + Perform cross-validation. + """ partition_size = int(len(user.item_score)*self.partition_proportion) cross_item_score = user.item_score.copy() for r in range(self.rounds): @@ -135,7 +142,7 @@ class CrossValidation: if len(cross_item_score)>0: random_key = random.choice(cross_item_score.keys()) else: - print "cross_item_score vazio" + logging.critical("cross_item_score vazio") exit(1) round_partition[random_key] = cross_item_score.pop(random_key) round_user = User(cross_item_score) diff --git a/src/similarity_measure.py b/src/similarity_measure.py index 092464d..10a9ec5 100644 --- a/src/similarity_measure.py +++ b/src/similarity_measure.py @@ -21,44 +21,68 @@ import math import stats def norm(x): - """ Return vector norm. """ + """ + Return vector norm. + """ return math.sqrt(sum([x_i**2 for x_i in x])) def dot_product(x,y): - """ Return dot product of vectors 'x' and 'y'. """ + """ + Return dot product of vectors 'x' and 'y'. + """ return sum([(x[i] * y[i]) for i in range(len(x))]) class SimilarityMeasure: - """ Abstraction for diferent similarity measure approaches. """ + """ + Abstraction for diferent similarity measure approaches. + """ class Distance(SimilarityMeasure): - """ Euclidian distance measure. """ + """ + Euclidian distance measure. + """ def __call__(self,x,y): - """ Return euclidian distance between vectors 'x' and 'y'. """ + """ + Return euclidian distance between vectors 'x' and 'y'. + """ sum_pow = sum([((x[i] - y[i]) ** 2) for i in range(len(x))]) return math.sqrt(sum_pow) class Cosine(SimilarityMeasure): - """ Cosine similarity measure. """ + """ + Cosine similarity measure. + """ def __call__(self,x,y): - """ Return cosine of angle between vectors 'x' and 'y'. """ + """ + Return cosine of angle between vectors 'x' and 'y'. + """ return float(dot_product(x,y)/(norm(x)*norm(y))) class Pearson(SimilarityMeasure): - """ Pearson coeficient measure. """ # FIXME: ZeroDivisionError + """ + Pearson coeficient measure. + """ def __call__(self,x,y): """ Return Pearson coeficient between vectors 'x' and 'y'. """ - return stats.pearsonr(x,y) + return stats.pearsonr(x,y) # FIXME: ZeroDivisionError class Spearman(SimilarityMeasure): - """ Spearman correlation measure. """ # FIXME: ZeroDivisionError + """ + Spearman correlation measure. + """ def __call__(self,x,y): - """ Return Spearman correlation between vectors 'x' and 'y'. """ - return stats.spearmanr(x,y) + """ + Return Spearman correlation between vectors 'x' and 'y'. + """ + return stats.spearmanr(x,y) # FIXME: ZeroDivisionError class Tanimoto(SimilarityMeasure): - " Tanimoto coeficient measure. """ + """ + Tanimoto coeficient measure. + """ def __call__(self,x,y): - """ Return Tanimoto coeficient between vectors 'x' and 'y'. """ + """ + Return Tanimoto coeficient between vectors 'x' and 'y'. + """ z = [v for v in x if v in y] return float(len(z))/(len(x)+len(y)-len(z)) diff --git a/src/strategy.py b/src/strategy.py index 73d8344..af82018 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -23,20 +23,30 @@ from data import * from recommender import * class ReputationHeuristic: - """ Abstraction for diferent reputation heuristics. """ + """ + Abstraction for diferent reputation heuristics. + """ class BugsHeuristic(ReputationHeuristic): - """ Reputation heuristic based on quantity of open bugs. """ + """ + Reputation heuristic based on quantity of open bugs. + """ class RCBugsHeuristic(ReputationHeuristic): - """ Reputation heuristic based on quantity of RC bugs. """ + """ + Reputation heuristic based on quantity of RC bugs. + """ class PopularityHeuristic(ReputationHeuristic): - """ Reputation heuristic based on popularity of packages. """ + """ + Reputation heuristic based on popularity of packages. + """ class PkgMatchDecider(xapian.MatchDecider): - """ Extends xapian.MatchDecider to disconsider installed packages. """ + """ + Extends xapian.MatchDecider to disconsider installed packages. + """ def __init__(self, installed_pkgs): xapian.MatchDecider.__init__(self) @@ -47,18 +57,28 @@ class PkgMatchDecider(xapian.MatchDecider): class RecommendationStrategy: - """ Abstraction for diferent recommendation strategy. """ + """ + Abstraction for diferent recommendation strategy. + """ class ItemReputationStrategy(RecommendationStrategy): - """ Recommendation strategy based on items reputation. """ + """ + Recommendation strategy based on items reputation. + """ def run(self,items_list,heuristic): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ return RecomendationResult() class ContentBasedStrategy(RecommendationStrategy): - """ Content-based recommendation strategy. """ + """ + Content-based recommendation strategy. + """ def run(self,recommender,user): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) qp = xapian.QueryParser() query = qp.parse_query(profile) @@ -72,9 +92,13 @@ class ContentBasedStrategy(RecommendationStrategy): return RecommendationResult(item_score,20) class AxiContentBasedStrategy(RecommendationStrategy): - """ Content-based recommendation strategy based on Apt-xapian-index. """ + """ + Content-based recommendation strategy based on Apt-xapian-index. + """ def run(self,recommender,user): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ profile = user.axi_tag_profile(recommender.items_repository,50) query = xapian.Query(xapian.Query.OP_OR,profile) enquire = xapian.Enquire(recommender.items_repository) @@ -87,19 +111,31 @@ class AxiContentBasedStrategy(RecommendationStrategy): return RecommendationResult(item_score,20) class ColaborativeStrategy(RecommendationStrategy): - """ Colaborative recommendation strategy. """ + """ + Colaborative recommendation strategy. + """ def run(self,user,users_repository,similarity_measure): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ return RecomendationResult() class KnowledgeBasedStrategy(RecommendationStrategy): - """ Knowledge-based recommendation strategy. """ + """ + Knowledge-based recommendation strategy. + """ def run(self,user,knowledge_repository): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ return RecomendationResult() class DemographicStrategy(RecommendationStrategy): - """ Recommendation strategy based on demographic data. """ + """ + Recommendation strategy based on demographic data. + """ def run(self,user,items_repository): - """ Perform recommendation strategy """ + """ + Perform recommendation strategy. + """ return RecomendationResult() diff --git a/src/user.py b/src/user.py index 6849173..41dfc6e 100644 --- a/src/user.py +++ b/src/user.py @@ -19,11 +19,12 @@ import commands import xapian +import logging class FilterTag(xapian.ExpandDecider): def __call__(self, term): """ - Return true if the term is a tag, else false + Return true if the term is a tag, else false. """ return term[:2] == "XT" @@ -52,7 +53,7 @@ class User: profile = [] for res in eset: profile.append(res.term) - #print "%.2f %s" % (res.weight,res.term[2:]) + logging.debug("%.2f %s" % (res.weight,res.term[2:])) return profile def debtags_tag_profile(self,debtags_db,profile_size): -- libgit2 0.21.2