Commit 49638292ffb84578ac81aede125bf088bf376146
1 parent
f53e9576
Exists in
master
and in
1 other branch
Handling log messages in different levels:
- DEBUG: catch with --debug option - INFO: catch with --verbose or --debug option - WARNING, ERROR, CRITICAL: catch by default (close #9)
Showing
7 changed files
with
169 additions
and
93 deletions
Show diff stats
src/app_recommender.py
| @@ -19,6 +19,7 @@ | @@ -19,6 +19,7 @@ | ||
| 19 | 19 | ||
| 20 | import os | 20 | import os |
| 21 | import sys | 21 | import sys |
| 22 | +import logging | ||
| 22 | 23 | ||
| 23 | from config import * | 24 | from config import * |
| 24 | from data import * | 25 | from data import * |
| @@ -28,41 +29,8 @@ from recommender import * | @@ -28,41 +29,8 @@ from recommender import * | ||
| 28 | from strategy import * | 29 | from strategy import * |
| 29 | from user import * | 30 | from user import * |
| 30 | 31 | ||
| 31 | -# Setup configuration | ||
| 32 | -#DB_PATH = "/var/lib/debtags/package-tags" | ||
| 33 | -#INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") | ||
| 34 | -# | ||
| 35 | -#XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index") | ||
| 36 | -#XAPIANDB = XAPIANDBPATH + "/index" | ||
| 37 | -#XAPIANDBVALUES = XAPIANDBPATH + "/values" | ||
| 38 | - | ||
| 39 | -def set_up_logger(cfg): | ||
| 40 | - log_format = '%(asctime)s AppRecommender %(levelname)s: %(message)s' | ||
| 41 | - log_level = logging.INFO | ||
| 42 | - if cfg.debug is 1: | ||
| 43 | - log_level = logging.DEBUG | ||
| 44 | - logging.basicConfig(level=log_level,format=log_format,filename=cfg.output) | ||
| 45 | - console = logging.StreamHandler(sys.stdout) | ||
| 46 | - console.setLevel(log_level) | ||
| 47 | - formatter = logging.Formatter('%(levelname)s: %(message)s') | ||
| 48 | - console.setFormatter(formatter) | ||
| 49 | - logging.getLogger('').addHandler(console) | ||
| 50 | - | ||
| 51 | def set_up_recommender(cfg): | 32 | def set_up_recommender(cfg): |
| 52 | -# reindex = 0 | ||
| 53 | -# axi = 0 | ||
| 54 | -# if len(sys.argv) == 2: | ||
| 55 | -# if sys.argv[1] == "axi": | ||
| 56 | -# axi = 1 | ||
| 57 | -# else: | ||
| 58 | -# DB_PATH = sys.argv[1] | ||
| 59 | -# reindex = 1 | ||
| 60 | -# elif len(sys.argv) > 2: | ||
| 61 | -# print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % | ||
| 62 | -# sys.argv[0]) | ||
| 63 | -# sys.exit(1) | ||
| 64 | - | ||
| 65 | - reindex = 0 | 33 | + reindex = 1 #FIXME should do it only if necessary |
| 66 | 34 | ||
| 67 | if cfg.strategy == "cta": | 35 | if cfg.strategy == "cta": |
| 68 | axi_db = xapian.Database(cfg.axi) | 36 | axi_db = xapian.Database(cfg.axi) |
| @@ -72,12 +40,13 @@ def set_up_recommender(cfg): | @@ -72,12 +40,13 @@ def set_up_recommender(cfg): | ||
| 72 | elif cfg.strategy == "ct": | 40 | elif cfg.strategy == "ct": |
| 73 | debtags_db = DebtagsDB(cfg.tags_db) | 41 | debtags_db = DebtagsDB(cfg.tags_db) |
| 74 | if not debtags_db.load(): | 42 | if not debtags_db.load(): |
| 75 | - print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH) | 43 | + logging.error("Could not load DebtagsDB from %s." % cfg.tags_db) |
| 76 | sys.exit(1) | 44 | sys.exit(1) |
| 77 | debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) | 45 | debtags_index = DebtagsIndex(os.path.expanduser(cfg.tags_index)) |
| 78 | debtags_index.load(debtags_db,reindex) | 46 | debtags_index.load(debtags_db,reindex) |
| 79 | app_rec = Recommender(debtags_index) | 47 | app_rec = Recommender(debtags_index) |
| 80 | app_rec.set_strategy(ContentBasedStrategy()) | 48 | app_rec.set_strategy(ContentBasedStrategy()) |
| 49 | + | ||
| 81 | return app_rec | 50 | return app_rec |
| 82 | 51 | ||
| 83 | def cross_validation(recommender): | 52 | def cross_validation(recommender): |
| @@ -90,7 +59,7 @@ def cross_validation(recommender): | @@ -90,7 +59,7 @@ def cross_validation(recommender): | ||
| 90 | if __name__ == '__main__': | 59 | if __name__ == '__main__': |
| 91 | cfg = Config() | 60 | cfg = Config() |
| 92 | cfg.load_options() | 61 | cfg.load_options() |
| 93 | - set_up_logger(cfg) | 62 | + cfg.set_logger() |
| 94 | rec = set_up_recommender(cfg) | 63 | rec = set_up_recommender(cfg) |
| 95 | user = LocalSystem() | 64 | user = LocalSystem() |
| 96 | result = rec.get_recommendation(user) | 65 | result = rec.get_recommendation(user) |
src/config.py
| @@ -20,7 +20,8 @@ | @@ -20,7 +20,8 @@ | ||
| 20 | import getopt | 20 | import getopt |
| 21 | import sys | 21 | import sys |
| 22 | import os | 22 | import os |
| 23 | -import logging | 23 | +from logging import * |
| 24 | +import logging.handlers | ||
| 24 | 25 | ||
| 25 | from ConfigParser import * | 26 | from ConfigParser import * |
| 26 | 27 | ||
| @@ -33,6 +34,7 @@ class Config(): | @@ -33,6 +34,7 @@ class Config(): | ||
| 33 | Set default configuration options. | 34 | Set default configuration options. |
| 34 | """ | 35 | """ |
| 35 | self.debug = 0 | 36 | self.debug = 0 |
| 37 | + self.verbose = 0 | ||
| 36 | self.output = "/dev/null" | 38 | self.output = "/dev/null" |
| 37 | self.config = None | 39 | self.config = None |
| 38 | self.tags_db = "/var/lib/debtags/package-tags" | 40 | self.tags_db = "/var/lib/debtags/package-tags" |
| @@ -47,7 +49,8 @@ class Config(): | @@ -47,7 +49,8 @@ class Config(): | ||
| 47 | """ | 49 | """ |
| 48 | print " [ general ]" | 50 | print " [ general ]" |
| 49 | print " -h, --help Print this help" | 51 | print " -h, --help Print this help" |
| 50 | - print " -d, --debug Set debug to true. Default is false." | 52 | + print " -d, --debug Set logging level to debug." |
| 53 | + print " -v, --verbose Set logging level to verbose." | ||
| 51 | print " -o, --output=PATH Path to file to save output." | 54 | print " -o, --output=PATH Path to file to save output." |
| 52 | print " -c, --config=PATH Path to configuration file." | 55 | print " -c, --config=PATH Path to configuration file." |
| 53 | print "" | 56 | print "" |
| @@ -89,6 +92,7 @@ class Config(): | @@ -89,6 +92,7 @@ class Config(): | ||
| 89 | os.abort() | 92 | os.abort() |
| 90 | 93 | ||
| 91 | self.debug = self.read_option('general', 'debug') | 94 | self.debug = self.read_option('general', 'debug') |
| 95 | + self.debug = self.read_option('general', 'verbose') | ||
| 92 | self.output_filename = self.read_option('general', 'output') | 96 | self.output_filename = self.read_option('general', 'output') |
| 93 | self.config = self.read_option('general', 'config') | 97 | self.config = self.read_option('general', 'config') |
| 94 | 98 | ||
| @@ -96,8 +100,8 @@ class Config(): | @@ -96,8 +100,8 @@ class Config(): | ||
| 96 | self.tags_index = self.read_option('recommender', 'tags_index') | 100 | self.tags_index = self.read_option('recommender', 'tags_index') |
| 97 | self.axi = self.read_option('recommender', 'axi') | 101 | self.axi = self.read_option('recommender', 'axi') |
| 98 | 102 | ||
| 99 | - short_options = "hdo:c:t:i:a:s:" | ||
| 100 | - long_options = ["help", "debug", "output=", "config=", | 103 | + short_options = "hdvo:c:t:i:a:s:" |
| 104 | + long_options = ["help", "debug", "verbose", "output=", "config=", | ||
| 101 | "tagsdb=", "tagsindex=", "axi=", "strategy="] | 105 | "tagsdb=", "tagsindex=", "axi=", "strategy="] |
| 102 | try: | 106 | try: |
| 103 | opts, args = getopt.getopt(sys.argv[1:], short_options, | 107 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
| @@ -114,6 +118,8 @@ class Config(): | @@ -114,6 +118,8 @@ class Config(): | ||
| 114 | sys.exit() | 118 | sys.exit() |
| 115 | elif o in ("-d", "--debug"): | 119 | elif o in ("-d", "--debug"): |
| 116 | self.debug = 1 | 120 | self.debug = 1 |
| 121 | + elif o in ("-v", "--verbose"): | ||
| 122 | + self.verbose = 1 | ||
| 117 | elif o in ("-o", "--output"): | 123 | elif o in ("-o", "--output"): |
| 118 | self.output = p | 124 | self.output = p |
| 119 | elif o in ("-c", "--config"): | 125 | elif o in ("-c", "--config"): |
| @@ -129,3 +135,27 @@ class Config(): | @@ -129,3 +135,27 @@ class Config(): | ||
| 129 | self.strategy = p | 135 | self.strategy = p |
| 130 | else: | 136 | else: |
| 131 | assert False, "unhandled option" | 137 | assert False, "unhandled option" |
| 138 | + | ||
| 139 | + def set_logger(self): | ||
| 140 | + self.logger = getLogger('') # root logger is used by default | ||
| 141 | + self.logger.setLevel(DEBUG) | ||
| 142 | + | ||
| 143 | + if self.debug == 1: | ||
| 144 | + log_level = DEBUG | ||
| 145 | + elif self.verbose == 1: | ||
| 146 | + log_level = INFO | ||
| 147 | + else: | ||
| 148 | + log_level = WARNING | ||
| 149 | + | ||
| 150 | + console_handler = StreamHandler(sys.stdout) | ||
| 151 | + console_handler.setFormatter(Formatter('%(levelname)s: %(message)s')) | ||
| 152 | + console_handler.setLevel(log_level) | ||
| 153 | + self.logger.addHandler(console_handler) | ||
| 154 | + | ||
| 155 | + file_handler = logging.handlers.RotatingFileHandler(self.output, | ||
| 156 | + maxBytes=5000, | ||
| 157 | + backupCount=5) | ||
| 158 | + log_format = '%(asctime)s AppRecommender %(levelname)-8s %(message)s' | ||
| 159 | + file_handler.setFormatter(Formatter(log_format)) | ||
| 160 | + file_handler.setLevel(log_level) | ||
| 161 | + self.logger.addHandler(file_handler) |
src/data.py
| @@ -23,6 +23,7 @@ import re | @@ -23,6 +23,7 @@ import re | ||
| 23 | import xapian | 23 | import xapian |
| 24 | import axi | 24 | import axi |
| 25 | from debian import debtags | 25 | from debian import debtags |
| 26 | +import logging | ||
| 26 | 27 | ||
| 27 | class Item: | 28 | class Item: |
| 28 | """ """ | 29 | """ """ |
| @@ -38,7 +39,9 @@ class Package(Item): | @@ -38,7 +39,9 @@ class Package(Item): | ||
| 38 | print "debian pkg",self.id | 39 | print "debian pkg",self.id |
| 39 | 40 | ||
| 40 | def normalize_tags(string): | 41 | def normalize_tags(string): |
| 41 | - """ Normalize tag string so that it can be indexed and retrieved. """ | 42 | + """ |
| 43 | + Normalize tag string so that it can be indexed and retrieved. | ||
| 44 | + """ | ||
| 42 | return string.replace(':','_').replace('-','\'') | 45 | return string.replace(':','_').replace('-','\'') |
| 43 | 46 | ||
| 44 | # FIXME Data repositories should be singleton | 47 | # FIXME Data repositories should be singleton |
| @@ -53,12 +56,14 @@ class DebtagsDB(debtags.DB): | @@ -53,12 +56,14 @@ class DebtagsDB(debtags.DB): | ||
| 53 | self.read(open(self.path, "r"), lambda x: not tag_filter.match(x)) | 56 | self.read(open(self.path, "r"), lambda x: not tag_filter.match(x)) |
| 54 | return 1 | 57 | return 1 |
| 55 | except IOError: | 58 | except IOError: |
| 56 | - print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" | ||
| 57 | - % self.path) | 59 | + logging.error("IOError: could not open debtags file \'%s\'" % |
| 60 | + self.path) | ||
| 58 | return 0 | 61 | return 0 |
| 59 | 62 | ||
| 60 | def get_relevant_tags(self,pkgs_list,qtd_of_tags): | 63 | def get_relevant_tags(self,pkgs_list,qtd_of_tags): |
| 61 | - """ Return most relevant tags considering a list of packages. """ | 64 | + """ |
| 65 | + Return most relevant tags considering a list of packages. | ||
| 66 | + """ | ||
| 62 | relevant_db = self.choose_packages(pkgs_list) | 67 | relevant_db = self.choose_packages(pkgs_list) |
| 63 | relevance_index = debtags.relevance_index_function(self,relevant_db) | 68 | relevance_index = debtags.relevance_index_function(self,relevant_db) |
| 64 | sorted_relevant_tags = sorted(relevant_db.iter_tags(), | 69 | sorted_relevant_tags = sorted(relevant_db.iter_tags(), |
| @@ -71,31 +76,35 @@ class DebtagsIndex(xapian.WritableDatabase): | @@ -71,31 +76,35 @@ class DebtagsIndex(xapian.WritableDatabase): | ||
| 71 | self.path = path | 76 | self.path = path |
| 72 | 77 | ||
| 73 | def load(self,debtags_db,reindex): | 78 | def load(self,debtags_db,reindex): |
| 74 | - """ Load an existing debtags index. """ | 79 | + """ |
| 80 | + Load an existing debtags index. | ||
| 81 | + """ | ||
| 75 | self.debtags_db = debtags_db | 82 | self.debtags_db = debtags_db |
| 76 | if not reindex: | 83 | if not reindex: |
| 77 | try: | 84 | try: |
| 78 | - print ("Opening existing debtags xapian index at \'%s\'" % | ||
| 79 | - self.path) | 85 | + logging.info("Opening existing debtags xapian index at \'%s\'" |
| 86 | + % self.path) | ||
| 80 | xapian.Database.__init__(self,self.path) | 87 | xapian.Database.__init__(self,self.path) |
| 81 | except xapian.DatabaseError: | 88 | except xapian.DatabaseError: |
| 82 | - print "Could not open debtags xapian index" | 89 | + logging.error("Could not open debtags xapian index") |
| 83 | reindex =1 | 90 | reindex =1 |
| 84 | if reindex: | 91 | if reindex: |
| 85 | self.reindex(debtags_db) | 92 | self.reindex(debtags_db) |
| 86 | 93 | ||
| 87 | def reindex(self,debtags_db): | 94 | def reindex(self,debtags_db): |
| 88 | - """ Create a xapian index for debtags info based on file 'debtags_db' | ||
| 89 | - and place it at 'index_path'. | 95 | + """ |
| 96 | + Create a xapian index for debtags info based on file 'debtags_db' and | ||
| 97 | + place it at 'index_path'. | ||
| 90 | """ | 98 | """ |
| 91 | if not os.path.exists(self.path): | 99 | if not os.path.exists(self.path): |
| 92 | os.makedirs(self.path) | 100 | os.makedirs(self.path) |
| 93 | - print "Creating new debtags xapian index at \'%s\'" % self.path | 101 | + logging.info("Creating new debtags xapian index at \'%s\'" % self.path) |
| 94 | xapian.WritableDatabase.__init__(self,self.path, | 102 | xapian.WritableDatabase.__init__(self,self.path, |
| 95 | - xapian.DB_CREATE_OR_OVERWRITE) | 103 | + xapian.DB_CREATE_OR_OVERWRITE) |
| 96 | for pkg,tags in debtags_db.iter_packages_tags(): | 104 | for pkg,tags in debtags_db.iter_packages_tags(): |
| 97 | doc = xapian.Document() | 105 | doc = xapian.Document() |
| 98 | doc.set_data(pkg) | 106 | doc.set_data(pkg) |
| 99 | for tag in tags: | 107 | for tag in tags: |
| 100 | doc.add_term(normalize_tags(tag)) | 108 | doc.add_term(normalize_tags(tag)) |
| 101 | - print "indexing ",self.add_document(doc) | 109 | + doc_id = self.add_document(doc) |
| 110 | + logging.debug("Indexing doc %d",doc_id) |
src/evaluation.py
| @@ -19,6 +19,8 @@ | @@ -19,6 +19,8 @@ | ||
| 19 | 19 | ||
| 20 | import random | 20 | import random |
| 21 | from collections import defaultdict | 21 | from collections import defaultdict |
| 22 | +import logging | ||
| 23 | + | ||
| 22 | from user import * | 24 | from user import * |
| 23 | from recommender import * | 25 | from recommender import * |
| 24 | 26 | ||
| @@ -92,14 +94,17 @@ class Evaluation: | @@ -92,14 +94,17 @@ class Evaluation: | ||
| 92 | return metric.run(self) | 94 | return metric.run(self) |
| 93 | 95 | ||
| 94 | class CrossValidation: | 96 | class CrossValidation: |
| 95 | - """ Cross-validation method """ | 97 | + """ |
| 98 | + Cross-validation method | ||
| 99 | + """ | ||
| 96 | def __init__(self,partition_proportion,rounds,rec,metrics_list): | 100 | def __init__(self,partition_proportion,rounds,rec,metrics_list): |
| 97 | - """ Set parameters: partition_size, rounds, recommender and | ||
| 98 | - metrics_list """ | 101 | + """ |
| 102 | + Set defaults: partition_size, rounds, recommender and metrics_list | ||
| 103 | + """ | ||
| 99 | if partition_proportion<1 and partition_proportion>0: | 104 | if partition_proportion<1 and partition_proportion>0: |
| 100 | self.partition_proportion = partition_proportion | 105 | self.partition_proportion = partition_proportion |
| 101 | else: | 106 | else: |
| 102 | - print "A proporcao de particao deve ser um avalor ente 0 e 1." | 107 | + logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") |
| 103 | exit(1) | 108 | exit(1) |
| 104 | self.rounds = rounds | 109 | self.rounds = rounds |
| 105 | self.recommender = rec | 110 | self.recommender = rec |
| @@ -126,7 +131,9 @@ class CrossValidation: | @@ -126,7 +131,9 @@ class CrossValidation: | ||
| 126 | print "| Mean |%s" % (metrics_mean) | 131 | print "| Mean |%s" % (metrics_mean) |
| 127 | 132 | ||
| 128 | def run(self,user): | 133 | def run(self,user): |
| 129 | - """ Perform cross-validation. """ | 134 | + """ |
| 135 | + Perform cross-validation. | ||
| 136 | + """ | ||
| 130 | partition_size = int(len(user.item_score)*self.partition_proportion) | 137 | partition_size = int(len(user.item_score)*self.partition_proportion) |
| 131 | cross_item_score = user.item_score.copy() | 138 | cross_item_score = user.item_score.copy() |
| 132 | for r in range(self.rounds): | 139 | for r in range(self.rounds): |
| @@ -135,7 +142,7 @@ class CrossValidation: | @@ -135,7 +142,7 @@ class CrossValidation: | ||
| 135 | if len(cross_item_score)>0: | 142 | if len(cross_item_score)>0: |
| 136 | random_key = random.choice(cross_item_score.keys()) | 143 | random_key = random.choice(cross_item_score.keys()) |
| 137 | else: | 144 | else: |
| 138 | - print "cross_item_score vazio" | 145 | + logging.critical("cross_item_score vazio") |
| 139 | exit(1) | 146 | exit(1) |
| 140 | round_partition[random_key] = cross_item_score.pop(random_key) | 147 | round_partition[random_key] = cross_item_score.pop(random_key) |
| 141 | round_user = User(cross_item_score) | 148 | round_user = User(cross_item_score) |
src/similarity_measure.py
| @@ -21,44 +21,68 @@ import math | @@ -21,44 +21,68 @@ import math | ||
| 21 | import stats | 21 | import stats |
| 22 | 22 | ||
| 23 | def norm(x): | 23 | def norm(x): |
| 24 | - """ Return vector norm. """ | 24 | + """ |
| 25 | + Return vector norm. | ||
| 26 | + """ | ||
| 25 | return math.sqrt(sum([x_i**2 for x_i in x])) | 27 | return math.sqrt(sum([x_i**2 for x_i in x])) |
| 26 | 28 | ||
| 27 | def dot_product(x,y): | 29 | def dot_product(x,y): |
| 28 | - """ Return dot product of vectors 'x' and 'y'. """ | 30 | + """ |
| 31 | + Return dot product of vectors 'x' and 'y'. | ||
| 32 | + """ | ||
| 29 | return sum([(x[i] * y[i]) for i in range(len(x))]) | 33 | return sum([(x[i] * y[i]) for i in range(len(x))]) |
| 30 | 34 | ||
| 31 | class SimilarityMeasure: | 35 | class SimilarityMeasure: |
| 32 | - """ Abstraction for diferent similarity measure approaches. """ | 36 | + """ |
| 37 | + Abstraction for diferent similarity measure approaches. | ||
| 38 | + """ | ||
| 33 | 39 | ||
| 34 | class Distance(SimilarityMeasure): | 40 | class Distance(SimilarityMeasure): |
| 35 | - """ Euclidian distance measure. """ | 41 | + """ |
| 42 | + Euclidian distance measure. | ||
| 43 | + """ | ||
| 36 | def __call__(self,x,y): | 44 | def __call__(self,x,y): |
| 37 | - """ Return euclidian distance between vectors 'x' and 'y'. """ | 45 | + """ |
| 46 | + Return euclidian distance between vectors 'x' and 'y'. | ||
| 47 | + """ | ||
| 38 | sum_pow = sum([((x[i] - y[i]) ** 2) for i in range(len(x))]) | 48 | sum_pow = sum([((x[i] - y[i]) ** 2) for i in range(len(x))]) |
| 39 | return math.sqrt(sum_pow) | 49 | return math.sqrt(sum_pow) |
| 40 | 50 | ||
| 41 | class Cosine(SimilarityMeasure): | 51 | class Cosine(SimilarityMeasure): |
| 42 | - """ Cosine similarity measure. """ | 52 | + """ |
| 53 | + Cosine similarity measure. | ||
| 54 | + """ | ||
| 43 | def __call__(self,x,y): | 55 | def __call__(self,x,y): |
| 44 | - """ Return cosine of angle between vectors 'x' and 'y'. """ | 56 | + """ |
| 57 | + Return cosine of angle between vectors 'x' and 'y'. | ||
| 58 | + """ | ||
| 45 | return float(dot_product(x,y)/(norm(x)*norm(y))) | 59 | return float(dot_product(x,y)/(norm(x)*norm(y))) |
| 46 | 60 | ||
| 47 | class Pearson(SimilarityMeasure): | 61 | class Pearson(SimilarityMeasure): |
| 48 | - """ Pearson coeficient measure. """ # FIXME: ZeroDivisionError | 62 | + """ |
| 63 | + Pearson coeficient measure. | ||
| 64 | + """ | ||
| 49 | def __call__(self,x,y): | 65 | def __call__(self,x,y): |
| 50 | """ Return Pearson coeficient between vectors 'x' and 'y'. """ | 66 | """ Return Pearson coeficient between vectors 'x' and 'y'. """ |
| 51 | - return stats.pearsonr(x,y) | 67 | + return stats.pearsonr(x,y) # FIXME: ZeroDivisionError |
| 52 | 68 | ||
| 53 | class Spearman(SimilarityMeasure): | 69 | class Spearman(SimilarityMeasure): |
| 54 | - """ Spearman correlation measure. """ # FIXME: ZeroDivisionError | 70 | + """ |
| 71 | + Spearman correlation measure. | ||
| 72 | + """ | ||
| 55 | def __call__(self,x,y): | 73 | def __call__(self,x,y): |
| 56 | - """ Return Spearman correlation between vectors 'x' and 'y'. """ | ||
| 57 | - return stats.spearmanr(x,y) | 74 | + """ |
| 75 | + Return Spearman correlation between vectors 'x' and 'y'. | ||
| 76 | + """ | ||
| 77 | + return stats.spearmanr(x,y) # FIXME: ZeroDivisionError | ||
| 58 | 78 | ||
| 59 | class Tanimoto(SimilarityMeasure): | 79 | class Tanimoto(SimilarityMeasure): |
| 60 | - " Tanimoto coeficient measure. """ | 80 | + """ |
| 81 | + Tanimoto coeficient measure. | ||
| 82 | + """ | ||
| 61 | def __call__(self,x,y): | 83 | def __call__(self,x,y): |
| 62 | - """ Return Tanimoto coeficient between vectors 'x' and 'y'. """ | 84 | + """ |
| 85 | + Return Tanimoto coeficient between vectors 'x' and 'y'. | ||
| 86 | + """ | ||
| 63 | z = [v for v in x if v in y] | 87 | z = [v for v in x if v in y] |
| 64 | return float(len(z))/(len(x)+len(y)-len(z)) | 88 | return float(len(z))/(len(x)+len(y)-len(z)) |
src/strategy.py
| @@ -23,20 +23,30 @@ from data import * | @@ -23,20 +23,30 @@ from data import * | ||
| 23 | from recommender import * | 23 | from recommender import * |
| 24 | 24 | ||
| 25 | class ReputationHeuristic: | 25 | class ReputationHeuristic: |
| 26 | - """ Abstraction for diferent reputation heuristics. """ | 26 | + """ |
| 27 | + Abstraction for diferent reputation heuristics. | ||
| 28 | + """ | ||
| 27 | 29 | ||
| 28 | class BugsHeuristic(ReputationHeuristic): | 30 | class BugsHeuristic(ReputationHeuristic): |
| 29 | - """ Reputation heuristic based on quantity of open bugs. """ | 31 | + """ |
| 32 | + Reputation heuristic based on quantity of open bugs. | ||
| 33 | + """ | ||
| 30 | 34 | ||
| 31 | class RCBugsHeuristic(ReputationHeuristic): | 35 | class RCBugsHeuristic(ReputationHeuristic): |
| 32 | - """ Reputation heuristic based on quantity of RC bugs. """ | 36 | + """ |
| 37 | + Reputation heuristic based on quantity of RC bugs. | ||
| 38 | + """ | ||
| 33 | 39 | ||
| 34 | class PopularityHeuristic(ReputationHeuristic): | 40 | class PopularityHeuristic(ReputationHeuristic): |
| 35 | - """ Reputation heuristic based on popularity of packages. """ | 41 | + """ |
| 42 | + Reputation heuristic based on popularity of packages. | ||
| 43 | + """ | ||
| 36 | 44 | ||
| 37 | 45 | ||
| 38 | class PkgMatchDecider(xapian.MatchDecider): | 46 | class PkgMatchDecider(xapian.MatchDecider): |
| 39 | - """ Extends xapian.MatchDecider to disconsider installed packages. """ | 47 | + """ |
| 48 | + Extends xapian.MatchDecider to disconsider installed packages. | ||
| 49 | + """ | ||
| 40 | 50 | ||
| 41 | def __init__(self, installed_pkgs): | 51 | def __init__(self, installed_pkgs): |
| 42 | xapian.MatchDecider.__init__(self) | 52 | xapian.MatchDecider.__init__(self) |
| @@ -47,18 +57,28 @@ class PkgMatchDecider(xapian.MatchDecider): | @@ -47,18 +57,28 @@ class PkgMatchDecider(xapian.MatchDecider): | ||
| 47 | 57 | ||
| 48 | 58 | ||
| 49 | class RecommendationStrategy: | 59 | class RecommendationStrategy: |
| 50 | - """ Abstraction for diferent recommendation strategy. """ | 60 | + """ |
| 61 | + Abstraction for diferent recommendation strategy. | ||
| 62 | + """ | ||
| 51 | 63 | ||
| 52 | class ItemReputationStrategy(RecommendationStrategy): | 64 | class ItemReputationStrategy(RecommendationStrategy): |
| 53 | - """ Recommendation strategy based on items reputation. """ | 65 | + """ |
| 66 | + Recommendation strategy based on items reputation. | ||
| 67 | + """ | ||
| 54 | def run(self,items_list,heuristic): | 68 | def run(self,items_list,heuristic): |
| 55 | - """ Perform recommendation strategy """ | 69 | + """ |
| 70 | + Perform recommendation strategy. | ||
| 71 | + """ | ||
| 56 | return RecomendationResult() | 72 | return RecomendationResult() |
| 57 | 73 | ||
| 58 | class ContentBasedStrategy(RecommendationStrategy): | 74 | class ContentBasedStrategy(RecommendationStrategy): |
| 59 | - """ Content-based recommendation strategy. """ | 75 | + """ |
| 76 | + Content-based recommendation strategy. | ||
| 77 | + """ | ||
| 60 | def run(self,recommender,user): | 78 | def run(self,recommender,user): |
| 61 | - """ Perform recommendation strategy """ | 79 | + """ |
| 80 | + Perform recommendation strategy. | ||
| 81 | + """ | ||
| 62 | profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) | 82 | profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) |
| 63 | qp = xapian.QueryParser() | 83 | qp = xapian.QueryParser() |
| 64 | query = qp.parse_query(profile) | 84 | query = qp.parse_query(profile) |
| @@ -72,9 +92,13 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -72,9 +92,13 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
| 72 | return RecommendationResult(item_score,20) | 92 | return RecommendationResult(item_score,20) |
| 73 | 93 | ||
| 74 | class AxiContentBasedStrategy(RecommendationStrategy): | 94 | class AxiContentBasedStrategy(RecommendationStrategy): |
| 75 | - """ Content-based recommendation strategy based on Apt-xapian-index. """ | 95 | + """ |
| 96 | + Content-based recommendation strategy based on Apt-xapian-index. | ||
| 97 | + """ | ||
| 76 | def run(self,recommender,user): | 98 | def run(self,recommender,user): |
| 77 | - """ Perform recommendation strategy """ | 99 | + """ |
| 100 | + Perform recommendation strategy. | ||
| 101 | + """ | ||
| 78 | profile = user.axi_tag_profile(recommender.items_repository,50) | 102 | profile = user.axi_tag_profile(recommender.items_repository,50) |
| 79 | query = xapian.Query(xapian.Query.OP_OR,profile) | 103 | query = xapian.Query(xapian.Query.OP_OR,profile) |
| 80 | enquire = xapian.Enquire(recommender.items_repository) | 104 | enquire = xapian.Enquire(recommender.items_repository) |
| @@ -87,19 +111,31 @@ class AxiContentBasedStrategy(RecommendationStrategy): | @@ -87,19 +111,31 @@ class AxiContentBasedStrategy(RecommendationStrategy): | ||
| 87 | return RecommendationResult(item_score,20) | 111 | return RecommendationResult(item_score,20) |
| 88 | 112 | ||
| 89 | class ColaborativeStrategy(RecommendationStrategy): | 113 | class ColaborativeStrategy(RecommendationStrategy): |
| 90 | - """ Colaborative recommendation strategy. """ | 114 | + """ |
| 115 | + Colaborative recommendation strategy. | ||
| 116 | + """ | ||
| 91 | def run(self,user,users_repository,similarity_measure): | 117 | def run(self,user,users_repository,similarity_measure): |
| 92 | - """ Perform recommendation strategy """ | 118 | + """ |
| 119 | + Perform recommendation strategy. | ||
| 120 | + """ | ||
| 93 | return RecomendationResult() | 121 | return RecomendationResult() |
| 94 | 122 | ||
| 95 | class KnowledgeBasedStrategy(RecommendationStrategy): | 123 | class KnowledgeBasedStrategy(RecommendationStrategy): |
| 96 | - """ Knowledge-based recommendation strategy. """ | 124 | + """ |
| 125 | + Knowledge-based recommendation strategy. | ||
| 126 | + """ | ||
| 97 | def run(self,user,knowledge_repository): | 127 | def run(self,user,knowledge_repository): |
| 98 | - """ Perform recommendation strategy """ | 128 | + """ |
| 129 | + Perform recommendation strategy. | ||
| 130 | + """ | ||
| 99 | return RecomendationResult() | 131 | return RecomendationResult() |
| 100 | 132 | ||
| 101 | class DemographicStrategy(RecommendationStrategy): | 133 | class DemographicStrategy(RecommendationStrategy): |
| 102 | - """ Recommendation strategy based on demographic data. """ | 134 | + """ |
| 135 | + Recommendation strategy based on demographic data. | ||
| 136 | + """ | ||
| 103 | def run(self,user,items_repository): | 137 | def run(self,user,items_repository): |
| 104 | - """ Perform recommendation strategy """ | 138 | + """ |
| 139 | + Perform recommendation strategy. | ||
| 140 | + """ | ||
| 105 | return RecomendationResult() | 141 | return RecomendationResult() |
src/user.py
| @@ -19,11 +19,12 @@ | @@ -19,11 +19,12 @@ | ||
| 19 | 19 | ||
| 20 | import commands | 20 | import commands |
| 21 | import xapian | 21 | import xapian |
| 22 | +import logging | ||
| 22 | 23 | ||
| 23 | class FilterTag(xapian.ExpandDecider): | 24 | class FilterTag(xapian.ExpandDecider): |
| 24 | def __call__(self, term): | 25 | def __call__(self, term): |
| 25 | """ | 26 | """ |
| 26 | - Return true if the term is a tag, else false | 27 | + Return true if the term is a tag, else false. |
| 27 | """ | 28 | """ |
| 28 | return term[:2] == "XT" | 29 | return term[:2] == "XT" |
| 29 | 30 | ||
| @@ -52,7 +53,7 @@ class User: | @@ -52,7 +53,7 @@ class User: | ||
| 52 | profile = [] | 53 | profile = [] |
| 53 | for res in eset: | 54 | for res in eset: |
| 54 | profile.append(res.term) | 55 | profile.append(res.term) |
| 55 | - #print "%.2f %s" % (res.weight,res.term[2:]) | 56 | + logging.debug("%.2f %s" % (res.weight,res.term[2:])) |
| 56 | return profile | 57 | return profile |
| 57 | 58 | ||
| 58 | def debtags_tag_profile(self,debtags_db,profile_size): | 59 | def debtags_tag_profile(self,debtags_db,profile_size): |