Commit 49638292ffb84578ac81aede125bf088bf376146

Authored by Tássia Camões Araújo
1 parent f53e9576
Exists in master and in 1 other branch add_vagrant

Handling log messages in different levels:

- DEBUG: catch with --debug option
- INFO: catch with --verbose or --debug option
- WARNING, ERROR, CRITICAL: catch by default
(close #9)
src/app_recommender.py
... ... @@ -19,6 +19,7 @@
19 19  
20 20 import os
21 21 import sys
  22 +import logging
22 23  
23 24 from config import *
24 25 from data import *
... ... @@ -28,41 +29,8 @@ from recommender import *
28 29 from strategy import *
29 30 from user import *
30 31  
31   -# Setup configuration
32   -#DB_PATH = "/var/lib/debtags/package-tags"
33   -#INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
34   -#
35   -#XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
36   -#XAPIANDB = XAPIANDBPATH + "/index"
37   -#XAPIANDBVALUES = XAPIANDBPATH + "/values"
38   -
39   -def set_up_logger(cfg):
40   - log_format = '%(asctime)s AppRecommender %(levelname)s: %(message)s'
41   - log_level = logging.INFO
42   - if cfg.debug is 1:
43   - log_level = logging.DEBUG
44   - logging.basicConfig(level=log_level,format=log_format,filename=cfg.output)
45   - console = logging.StreamHandler(sys.stdout)
46   - console.setLevel(log_level)
47   - formatter = logging.Formatter('%(levelname)s: %(message)s')
48   - console.setFormatter(formatter)
49   - logging.getLogger('').addHandler(console)
50   -
51 32 def set_up_recommender(cfg):
52   -# reindex = 0
53   -# axi = 0
54   -# if len(sys.argv) == 2:
55   -# if sys.argv[1] == "axi":
56   -# axi = 1
57   -# else:
58   -# DB_PATH = sys.argv[1]
59   -# reindex = 1
60   -# elif len(sys.argv) > 2:
61   -# print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
62   -# sys.argv[0])
63   -# sys.exit(1)
64   -
65   - reindex = 0
  33 + reindex = 1 #FIXME should do it only if necessary
66 34  
67 35 if cfg.strategy == "cta":
68 36 axi_db = xapian.Database(cfg.axi)
... ... @@ -72,12 +40,13 @@ def set_up_recommender(cfg):
72 40 elif cfg.strategy == "ct":
73 41 debtags_db = DebtagsDB(cfg.tags_db)
74 42 if not debtags_db.load():
75   - print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH)
  43 + logging.error("Could not load DebtagsDB from %s." % cfg.tags_db)
76 44 sys.exit(1)
77 45 debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index))
78 46 debtags_index.load(debtags_db,reindex)
79 47 app_rec = Recommender(debtags_index)
80 48 app_rec.set_strategy(ContentBasedStrategy())
  49 +
81 50 return app_rec
82 51  
83 52 def cross_validation(recommender):
... ... @@ -90,7 +59,7 @@ def cross_validation(recommender):
90 59 if __name__ == '__main__':
91 60 cfg = Config()
92 61 cfg.load_options()
93   - set_up_logger(cfg)
  62 + cfg.set_logger()
94 63 rec = set_up_recommender(cfg)
95 64 user = LocalSystem()
96 65 result = rec.get_recommendation(user)
... ...
src/config.py
... ... @@ -20,7 +20,8 @@
20 20 import getopt
21 21 import sys
22 22 import os
23   -import logging
  23 +from logging import *
  24 +import logging.handlers
24 25  
25 26 from ConfigParser import *
26 27  
... ... @@ -33,6 +34,7 @@ class Config():
33 34 Set default configuration options.
34 35 """
35 36 self.debug = 0
  37 + self.verbose = 0
36 38 self.output = "/dev/null"
37 39 self.config = None
38 40 self.tags_db = "/var/lib/debtags/package-tags"
... ... @@ -47,7 +49,8 @@ class Config():
47 49 """
48 50 print " [ general ]"
49 51 print " -h, --help Print this help"
50   - print " -d, --debug Set debug to true. Default is false."
  52 + print " -d, --debug Set logging level to debug."
  53 + print " -v, --verbose Set logging level to verbose."
51 54 print " -o, --output=PATH Path to file to save output."
52 55 print " -c, --config=PATH Path to configuration file."
53 56 print ""
... ... @@ -89,6 +92,7 @@ class Config():
89 92 os.abort()
90 93  
91 94 self.debug = self.read_option('general', 'debug')
  95 + self.debug = self.read_option('general', 'verbose')
92 96 self.output_filename = self.read_option('general', 'output')
93 97 self.config = self.read_option('general', 'config')
94 98  
... ... @@ -96,8 +100,8 @@ class Config():
96 100 self.tags_index = self.read_option('recommender', 'tags_index')
97 101 self.axi = self.read_option('recommender', 'axi')
98 102  
99   - short_options = "hdo:c:t:i:a:s:"
100   - long_options = ["help", "debug", "output=", "config=",
  103 + short_options = "hdvo:c:t:i:a:s:"
  104 + long_options = ["help", "debug", "verbose", "output=", "config=",
101 105 "tagsdb=", "tagsindex=", "axi=", "strategy="]
102 106 try:
103 107 opts, args = getopt.getopt(sys.argv[1:], short_options,
... ... @@ -114,6 +118,8 @@ class Config():
114 118 sys.exit()
115 119 elif o in ("-d", "--debug"):
116 120 self.debug = 1
  121 + elif o in ("-v", "--verbose"):
  122 + self.verbose = 1
117 123 elif o in ("-o", "--output"):
118 124 self.output = p
119 125 elif o in ("-c", "--config"):
... ... @@ -129,3 +135,27 @@ class Config():
129 135 self.strategy = p
130 136 else:
131 137 assert False, "unhandled option"
  138 +
  139 + def set_logger(self):
  140 + self.logger = getLogger('') # root logger is used by default
  141 + self.logger.setLevel(DEBUG)
  142 +
  143 + if self.debug == 1:
  144 + log_level = DEBUG
  145 + elif self.verbose == 1:
  146 + log_level = INFO
  147 + else:
  148 + log_level = WARNING
  149 +
  150 + console_handler = StreamHandler(sys.stdout)
  151 + console_handler.setFormatter(Formatter('%(levelname)s: %(message)s'))
  152 + console_handler.setLevel(log_level)
  153 + self.logger.addHandler(console_handler)
  154 +
  155 + file_handler = logging.handlers.RotatingFileHandler(self.output,
  156 + maxBytes=5000,
  157 + backupCount=5)
  158 + log_format = '%(asctime)s AppRecommender %(levelname)-8s %(message)s'
  159 + file_handler.setFormatter(Formatter(log_format))
  160 + file_handler.setLevel(log_level)
  161 + self.logger.addHandler(file_handler)
... ...
src/data.py
... ... @@ -23,6 +23,7 @@ import re
23 23 import xapian
24 24 import axi
25 25 from debian import debtags
  26 +import logging
26 27  
27 28 class Item:
28 29 """ """
... ... @@ -38,7 +39,9 @@ class Package(Item):
38 39 print "debian pkg",self.id
39 40  
40 41 def normalize_tags(string):
41   - """ Normalize tag string so that it can be indexed and retrieved. """
  42 + """
  43 + Normalize tag string so that it can be indexed and retrieved.
  44 + """
42 45 return string.replace(':','_').replace('-','\'')
43 46  
44 47 # FIXME Data repositories should be singleton
... ... @@ -53,12 +56,14 @@ class DebtagsDB(debtags.DB):
53 56 self.read(open(self.path, "r"), lambda x: not tag_filter.match(x))
54 57 return 1
55 58 except IOError:
56   - print >> sys.stderr, ("IOError: could not open debtags file \'%s\'"
57   - % self.path)
  59 + logging.error("IOError: could not open debtags file \'%s\'" %
  60 + self.path)
58 61 return 0
59 62  
60 63 def get_relevant_tags(self,pkgs_list,qtd_of_tags):
61   - """ Return most relevant tags considering a list of packages. """
  64 + """
  65 + Return most relevant tags considering a list of packages.
  66 + """
62 67 relevant_db = self.choose_packages(pkgs_list)
63 68 relevance_index = debtags.relevance_index_function(self,relevant_db)
64 69 sorted_relevant_tags = sorted(relevant_db.iter_tags(),
... ... @@ -71,31 +76,35 @@ class DebtagsIndex(xapian.WritableDatabase):
71 76 self.path = path
72 77  
73 78 def load(self,debtags_db,reindex):
74   - """ Load an existing debtags index. """
  79 + """
  80 + Load an existing debtags index.
  81 + """
75 82 self.debtags_db = debtags_db
76 83 if not reindex:
77 84 try:
78   - print ("Opening existing debtags xapian index at \'%s\'" %
79   - self.path)
  85 + logging.info("Opening existing debtags xapian index at \'%s\'"
  86 + % self.path)
80 87 xapian.Database.__init__(self,self.path)
81 88 except xapian.DatabaseError:
82   - print "Could not open debtags xapian index"
  89 + logging.error("Could not open debtags xapian index")
83 90 reindex =1
84 91 if reindex:
85 92 self.reindex(debtags_db)
86 93  
87 94 def reindex(self,debtags_db):
88   - """ Create a xapian index for debtags info based on file 'debtags_db'
89   - and place it at 'index_path'.
  95 + """
  96 + Create a xapian index for debtags info based on file 'debtags_db' and
  97 + place it at 'index_path'.
90 98 """
91 99 if not os.path.exists(self.path):
92 100 os.makedirs(self.path)
93   - print "Creating new debtags xapian index at \'%s\'" % self.path
  101 + logging.info("Creating new debtags xapian index at \'%s\'" % self.path)
94 102 xapian.WritableDatabase.__init__(self,self.path,
95   - xapian.DB_CREATE_OR_OVERWRITE)
  103 + xapian.DB_CREATE_OR_OVERWRITE)
96 104 for pkg,tags in debtags_db.iter_packages_tags():
97 105 doc = xapian.Document()
98 106 doc.set_data(pkg)
99 107 for tag in tags:
100 108 doc.add_term(normalize_tags(tag))
101   - print "indexing ",self.add_document(doc)
  109 + doc_id = self.add_document(doc)
  110 + logging.debug("Indexing doc %d",doc_id)
... ...
src/evaluation.py
... ... @@ -19,6 +19,8 @@
19 19  
20 20 import random
21 21 from collections import defaultdict
  22 +import logging
  23 +
22 24 from user import *
23 25 from recommender import *
24 26  
... ... @@ -92,14 +94,17 @@ class Evaluation:
92 94 return metric.run(self)
93 95  
94 96 class CrossValidation:
95   - """ Cross-validation method """
  97 + """
  98 + Cross-validation method
  99 + """
96 100 def __init__(self,partition_proportion,rounds,rec,metrics_list):
97   - """ Set parameters: partition_size, rounds, recommender and
98   - metrics_list """
  101 + """
  102 + Set defaults: partition_size, rounds, recommender and metrics_list
  103 + """
99 104 if partition_proportion<1 and partition_proportion>0:
100 105 self.partition_proportion = partition_proportion
101 106 else:
102   - print "A proporcao de particao deve ser um avalor ente 0 e 1."
  107 + logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.")
103 108 exit(1)
104 109 self.rounds = rounds
105 110 self.recommender = rec
... ... @@ -126,7 +131,9 @@ class CrossValidation:
126 131 print "| Mean |%s" % (metrics_mean)
127 132  
128 133 def run(self,user):
129   - """ Perform cross-validation. """
  134 + """
  135 + Perform cross-validation.
  136 + """
130 137 partition_size = int(len(user.item_score)*self.partition_proportion)
131 138 cross_item_score = user.item_score.copy()
132 139 for r in range(self.rounds):
... ... @@ -135,7 +142,7 @@ class CrossValidation:
135 142 if len(cross_item_score)>0:
136 143 random_key = random.choice(cross_item_score.keys())
137 144 else:
138   - print "cross_item_score vazio"
  145 + logging.critical("cross_item_score vazio")
139 146 exit(1)
140 147 round_partition[random_key] = cross_item_score.pop(random_key)
141 148 round_user = User(cross_item_score)
... ...
src/similarity_measure.py
... ... @@ -21,44 +21,68 @@ import math
21 21 import stats
22 22  
23 23 def norm(x):
24   - """ Return vector norm. """
  24 + """
  25 + Return vector norm.
  26 + """
25 27 return math.sqrt(sum([x_i**2 for x_i in x]))
26 28  
27 29 def dot_product(x,y):
28   - """ Return dot product of vectors 'x' and 'y'. """
  30 + """
  31 + Return dot product of vectors 'x' and 'y'.
  32 + """
29 33 return sum([(x[i] * y[i]) for i in range(len(x))])
30 34  
31 35 class SimilarityMeasure:
32   - """ Abstraction for diferent similarity measure approaches. """
  36 + """
  37 + Abstraction for diferent similarity measure approaches.
  38 + """
33 39  
34 40 class Distance(SimilarityMeasure):
35   - """ Euclidian distance measure. """
  41 + """
  42 + Euclidian distance measure.
  43 + """
36 44 def __call__(self,x,y):
37   - """ Return euclidian distance between vectors 'x' and 'y'. """
  45 + """
  46 + Return euclidian distance between vectors 'x' and 'y'.
  47 + """
38 48 sum_pow = sum([((x[i] - y[i]) ** 2) for i in range(len(x))])
39 49 return math.sqrt(sum_pow)
40 50  
41 51 class Cosine(SimilarityMeasure):
42   - """ Cosine similarity measure. """
  52 + """
  53 + Cosine similarity measure.
  54 + """
43 55 def __call__(self,x,y):
44   - """ Return cosine of angle between vectors 'x' and 'y'. """
  56 + """
  57 + Return cosine of angle between vectors 'x' and 'y'.
  58 + """
45 59 return float(dot_product(x,y)/(norm(x)*norm(y)))
46 60  
47 61 class Pearson(SimilarityMeasure):
48   - """ Pearson coeficient measure. """ # FIXME: ZeroDivisionError
  62 + """
  63 + Pearson coeficient measure.
  64 + """
49 65 def __call__(self,x,y):
50 66 """ Return Pearson coeficient between vectors 'x' and 'y'. """
51   - return stats.pearsonr(x,y)
  67 + return stats.pearsonr(x,y) # FIXME: ZeroDivisionError
52 68  
53 69 class Spearman(SimilarityMeasure):
54   - """ Spearman correlation measure. """ # FIXME: ZeroDivisionError
  70 + """
  71 + Spearman correlation measure.
  72 + """
55 73 def __call__(self,x,y):
56   - """ Return Spearman correlation between vectors 'x' and 'y'. """
57   - return stats.spearmanr(x,y)
  74 + """
  75 + Return Spearman correlation between vectors 'x' and 'y'.
  76 + """
  77 + return stats.spearmanr(x,y) # FIXME: ZeroDivisionError
58 78  
59 79 class Tanimoto(SimilarityMeasure):
60   - " Tanimoto coeficient measure. """
  80 + """
  81 + Tanimoto coeficient measure.
  82 + """
61 83 def __call__(self,x,y):
62   - """ Return Tanimoto coeficient between vectors 'x' and 'y'. """
  84 + """
  85 + Return Tanimoto coeficient between vectors 'x' and 'y'.
  86 + """
63 87 z = [v for v in x if v in y]
64 88 return float(len(z))/(len(x)+len(y)-len(z))
... ...
src/strategy.py
... ... @@ -23,20 +23,30 @@ from data import *
23 23 from recommender import *
24 24  
25 25 class ReputationHeuristic:
26   - """ Abstraction for diferent reputation heuristics. """
  26 + """
  27 + Abstraction for diferent reputation heuristics.
  28 + """
27 29  
28 30 class BugsHeuristic(ReputationHeuristic):
29   - """ Reputation heuristic based on quantity of open bugs. """
  31 + """
  32 + Reputation heuristic based on quantity of open bugs.
  33 + """
30 34  
31 35 class RCBugsHeuristic(ReputationHeuristic):
32   - """ Reputation heuristic based on quantity of RC bugs. """
  36 + """
  37 + Reputation heuristic based on quantity of RC bugs.
  38 + """
33 39  
34 40 class PopularityHeuristic(ReputationHeuristic):
35   - """ Reputation heuristic based on popularity of packages. """
  41 + """
  42 + Reputation heuristic based on popularity of packages.
  43 + """
36 44  
37 45  
38 46 class PkgMatchDecider(xapian.MatchDecider):
39   - """ Extends xapian.MatchDecider to disconsider installed packages. """
  47 + """
  48 + Extends xapian.MatchDecider to disconsider installed packages.
  49 + """
40 50  
41 51 def __init__(self, installed_pkgs):
42 52 xapian.MatchDecider.__init__(self)
... ... @@ -47,18 +57,28 @@ class PkgMatchDecider(xapian.MatchDecider):
47 57  
48 58  
49 59 class RecommendationStrategy:
50   - """ Abstraction for diferent recommendation strategy. """
  60 + """
  61 + Abstraction for diferent recommendation strategy.
  62 + """
51 63  
52 64 class ItemReputationStrategy(RecommendationStrategy):
53   - """ Recommendation strategy based on items reputation. """
  65 + """
  66 + Recommendation strategy based on items reputation.
  67 + """
54 68 def run(self,items_list,heuristic):
55   - """ Perform recommendation strategy """
  69 + """
  70 + Perform recommendation strategy.
  71 + """
56 72 return RecomendationResult()
57 73  
58 74 class ContentBasedStrategy(RecommendationStrategy):
59   - """ Content-based recommendation strategy. """
  75 + """
  76 + Content-based recommendation strategy.
  77 + """
60 78 def run(self,recommender,user):
61   - """ Perform recommendation strategy """
  79 + """
  80 + Perform recommendation strategy.
  81 + """
62 82 profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50)
63 83 qp = xapian.QueryParser()
64 84 query = qp.parse_query(profile)
... ... @@ -72,9 +92,13 @@ class ContentBasedStrategy(RecommendationStrategy):
72 92 return RecommendationResult(item_score,20)
73 93  
74 94 class AxiContentBasedStrategy(RecommendationStrategy):
75   - """ Content-based recommendation strategy based on Apt-xapian-index. """
  95 + """
  96 + Content-based recommendation strategy based on Apt-xapian-index.
  97 + """
76 98 def run(self,recommender,user):
77   - """ Perform recommendation strategy """
  99 + """
  100 + Perform recommendation strategy.
  101 + """
78 102 profile = user.axi_tag_profile(recommender.items_repository,50)
79 103 query = xapian.Query(xapian.Query.OP_OR,profile)
80 104 enquire = xapian.Enquire(recommender.items_repository)
... ... @@ -87,19 +111,31 @@ class AxiContentBasedStrategy(RecommendationStrategy):
87 111 return RecommendationResult(item_score,20)
88 112  
89 113 class ColaborativeStrategy(RecommendationStrategy):
90   - """ Colaborative recommendation strategy. """
  114 + """
  115 + Colaborative recommendation strategy.
  116 + """
91 117 def run(self,user,users_repository,similarity_measure):
92   - """ Perform recommendation strategy """
  118 + """
  119 + Perform recommendation strategy.
  120 + """
93 121 return RecomendationResult()
94 122  
95 123 class KnowledgeBasedStrategy(RecommendationStrategy):
96   - """ Knowledge-based recommendation strategy. """
  124 + """
  125 + Knowledge-based recommendation strategy.
  126 + """
97 127 def run(self,user,knowledge_repository):
98   - """ Perform recommendation strategy """
  128 + """
  129 + Perform recommendation strategy.
  130 + """
99 131 return RecomendationResult()
100 132  
101 133 class DemographicStrategy(RecommendationStrategy):
102   - """ Recommendation strategy based on demographic data. """
  134 + """
  135 + Recommendation strategy based on demographic data.
  136 + """
103 137 def run(self,user,items_repository):
104   - """ Perform recommendation strategy """
  138 + """
  139 + Perform recommendation strategy.
  140 + """
105 141 return RecomendationResult()
... ...
src/user.py
... ... @@ -19,11 +19,12 @@
19 19  
20 20 import commands
21 21 import xapian
  22 +import logging
22 23  
23 24 class FilterTag(xapian.ExpandDecider):
24 25 def __call__(self, term):
25 26 """
26   - Return true if the term is a tag, else false
  27 + Return true if the term is a tag, else false.
27 28 """
28 29 return term[:2] == "XT"
29 30  
... ... @@ -52,7 +53,7 @@ class User:
52 53 profile = []
53 54 for res in eset:
54 55 profile.append(res.term)
55   - #print "%.2f %s" % (res.weight,res.term[2:])
  56 + logging.debug("%.2f %s" % (res.weight,res.term[2:]))
56 57 return profile
57 58  
58 59 def debtags_tag_profile(self,debtags_db,profile_size):
... ...