diff --git a/src/app_recommender.py b/src/app_recommender.py index 4c48ddd..1cd62a7 100755 --- a/src/app_recommender.py +++ b/src/app_recommender.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# AppRecommender - a GNU/Linux application recommender. # # Copyright (C) 2010 Tassia Camoes # diff --git a/src/config.py b/src/config.py index 02fc300..8183079 100644 --- a/src/config.py +++ b/src/config.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# config - python module for configuration options. # # Copyright (C) 2010 Tassia Camoes # @@ -144,6 +144,9 @@ class Config(): assert False, "unhandled option" def set_logger(self): + """ + Configure application logger and log level. + """ self.logger = getLogger('') # root logger is used by default self.logger.setLevel(DEBUG) diff --git a/src/cross_validation.py b/src/cross_validation.py index 97dbd93..ea571d6 100755 --- a/src/cross_validation.py +++ b/src/cross_validation.py @@ -1,6 +1,7 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# CrossValidation - python module for classes and methods related to +# recommenders evaluation. # # Copyright (C) 2010 Tassia Camoes # @@ -47,6 +48,7 @@ if __name__ == '__main__': metrics.append(Recall()) validation = CrossValidation(0.3,10,rec,metrics) validation.run(user) + print validation end_time = datetime.datetime.now() logging.debug("Cross-validation completed at %s" % end_time) diff --git a/src/data.py b/src/data.py index fcca39c..6097d3b 100644 --- a/src/data.py +++ b/src/data.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# data - python module for data sources classes and methods. # # Copyright (C) 2010 Tassia Camoes # @@ -29,32 +29,50 @@ import hashlib from error import Error class Item: - """ """ + """ + Generic item definition. + """ class Package(Item): - """ """ + """ + Definition of a GNU/Linux application as a recommender item. + """ def __init__(self,package_name): - """ """ + """ + Set initial attributes. + """ self.package_name = package_name - def load_package_info(self): - """ """ - print "debian pkg",self.id - def normalize_tags(string): """ - Normalize tag string so that it can be indexed and retrieved. + Substitute string characters : by _ and - by '. + Examples: + admin::package-management -> admin__package'management + implemented-in::c++ -> implemented-in__c++ """ return string.replace(':','_').replace('-','\'') class Singleton(object): + """ + Base class for inheritance of only-one-instance classes. + Singleton design pattern. + """ def __new__(cls, *args, **kwargs): + """ + Creates a new instance of the class only if none already exists. + """ if '_inst' not in vars(cls): cls._inst = object.__new__(cls) return cls._inst class TagsXapianIndex(xapian.WritableDatabase,Singleton): + """ + Data source for tags info defined as a singleton xapian database. + """ def __init__(self,cfg): + """ + Set initial attributes. + """ self.path = os.path.expanduser(cfg.tags_index) self.db_path = os.path.expanduser(cfg.tags_db) self.debtags_db = debtags.DB() @@ -67,6 +85,9 @@ class TagsXapianIndex(xapian.WritableDatabase,Singleton): self.load_index(cfg.reindex) def load_db(self): + """ + Load debtags database from the source file. + """ tag_filter = re.compile(r"^special::.+$|^.+::TODO$") try: db_file = open(self.db_path, "r") diff --git a/src/demo_rec.py b/src/demo_rec.py index f69e1b8..ea6802d 100755 --- a/src/demo_rec.py +++ b/src/demo_rec.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# DemoRecommender - demonstration of a GNU/Linux application recommender. # # Copyright (C) 2010 Tassia Camoes # diff --git a/src/error.py b/src/error.py index 70e455f..8a38e00 100644 --- a/src/error.py +++ b/src/error.py @@ -1,3 +1,24 @@ +#!/usr/bin/python + +# error.py - python module for error definition. +# +# Copyright (C) 2010 Tassia Camoes +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + class Error(Exception): - """Base class for exceptions.""" + """ + Base class for exceptions. + """ pass diff --git a/src/evaluation.py b/src/evaluation.py index 2c55f17..001da4a 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -1,6 +1,7 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# evaluation - python module for classes and methods related to recommenders +# evaluation. # # Copyright (C) 2010 Tassia Camoes # @@ -25,30 +26,57 @@ from user import * from recommender import * class Metric: - """ """ + """ + Base class for metrics. Strategy design pattern. + """ + pass class Precision(Metric): - """ """ + """ + Accuracy evaluation metric defined as the percentage of relevant itens + among the predicted ones. + """ def __init__(self): + """ + Set metric description. + """ self.desc = " Precision " def run(self,evaluation): + """ + Compute metric. + """ return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant) class Recall(Metric): - """ """ + """ + Accuracy evaluation metric defined as the percentage of relevant itens + which were predicted as so. + """ def __init__(self): + """ + Set metric description. + """ self.desc = " Recall " def run(self,evaluation): + """ + Compute metric. + """ return float(len(evaluation.predicted_real))/len(evaluation.real_relevant) class F1(Metric): """ """ def __init__(self): + """ + Set metric description. + """ self.desc = " F1 " def run(self,evaluation): + """ + Compute metric. + """ p = Precision().run(evaluation) r = Recall().run(evaluation) return float((2*p*r)/(p+r)) @@ -56,80 +84,110 @@ class F1(Metric): class MAE(Metric): """ """ def __init__(self): + """ + Set metric description. + """ self.desc = " MAE " def run(self,evaluation): - print "run" + """ + Compute metric. + """ + print "---" #FIXME class MSE(Metric): """ """ def __init__(self): + """ + Set metric description. + """ self.desc = " MSE " def run(self,evaluation): - print "run" + """ + Compute metric. + """ + print "---" #FIXME class Coverage(Metric): """ """ def __init__(self): + """ + Set metric description. + """ self.desc = " Coverage " def run(self,evaluation): - print "run" + """ + Compute metric. + """ + print "---" #FIXME class Evaluation: - """ """ + """ + Class designed to perform prediction evaluation, given data and metric. + """ def __init__(self,predicted_result,real_result): - """ """ + """ + Set initial parameters. + """ self.predicted_item_scores = predicted_result.item_score self.predicted_relevant = predicted_result.get_prediction() self.real_item_scores = real_result.item_score self.real_relevant = real_result.get_prediction() self.predicted_real = [v for v in self.predicted_relevant if v in self.real_relevant] - print len(self.predicted_relevant) - print len(self.real_relevant) - print len(self.predicted_real) + #print len(self.predicted_relevant) + #print len(self.real_relevant) + #print len(self.predicted_real) def run(self,metric): + """ + Perform the evaluation with the given metric. + """ return metric.run(self) class CrossValidation: """ - Cross-validation method + Class designed to perform cross-validation process. """ def __init__(self,partition_proportion,rounds,rec,metrics_list): """ - Set defaults: partition_size, rounds, recommender and metrics_list + Set initial parameters. """ if partition_proportion<1 and partition_proportion>0: self.partition_proportion = partition_proportion else: - logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") + logging.critical("Partition proportion must be a value in the + interval [0,1].") raise Error self.rounds = rounds self.recommender = rec self.metrics_list = metrics_list self.cross_results = defaultdict(list) - def print_result(self): - print "" + def __str__(self): + """ + String representation of the object. + """ + str = "\n" metrics_desc = "" for metric in self.metrics_list: metrics_desc += "%s|" % (metric.desc) - print "| Round |%s" % metrics_desc + str += "| Round |%s\n" % metrics_desc for r in range(self.rounds): metrics_result = "" for metric in self.metrics_list: metrics_result += (" %.2f |" % (self.cross_results[metric.desc][r])) - print "| %d |%s" % (r,metrics_result) + str += "| %d |%s\n" % (r,metrics_result) metrics_mean = "" for metric in self.metrics_list: mean = float(sum(self.cross_results[metric.desc]) / len(self.cross_results[metric.desc])) metrics_mean += " %.2f |" % (mean) - print "| Mean |%s" % (metrics_mean) + str += "| Mean |%s\n" % (metrics_mean) + return str def run(self,user): """ @@ -144,7 +202,7 @@ class CrossValidation: if len(cross_item_score)>0: random_key = random.choice(cross_item_score.keys()) else: - logging.critical("cross_item_score vazio") + logging.critical("Empty cross_item_score.") raise Error round_partition[random_key] = cross_item_score.pop(random_key) round_user = User(cross_item_score) @@ -157,5 +215,4 @@ class CrossValidation: while len(round_partition)>0: item,score = round_partition.popitem() cross_item_score[item] = score - self.print_result() diff --git a/src/generate_doc.sh b/src/generate_doc.sh index 06d42b0..8f22842 100755 --- a/src/generate_doc.sh +++ b/src/generate_doc.sh @@ -1,5 +1,23 @@ #!/bin/bash +# +# generate_doc.sh - shell script to generate documentation using doxygen. +# +# Copyright (C) 2010 Tassia Camoes +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# Get project version from git repository TAG=$(git describe --tags --abbrev=0) sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config rm -Rf ../doc/html diff --git a/src/recommender.py b/src/recommender.py index 416886c..6e392e4 100644 --- a/src/recommender.py +++ b/src/recommender.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# recommender - python module for classes related to recommenders. # # Copyright (C) 2010 Tassia Camoes # @@ -23,11 +23,20 @@ from strategy import * from error import Error class RecommendationResult: + """ + Class designed to describe a recommendation result: items and scores. + """ def __init__(self,item_score,size): + """ + Set initial parameters. + """ self.item_score = item_score self.size = size def __str__(self): + """ + String representation of the object. + """ result = self.get_prediction() str = "\n" for i in range(len(result)): @@ -35,12 +44,20 @@ class RecommendationResult: return str def get_prediction(self): + """ + Return prediction based on recommendation size (number of items). + """ sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) return sorted_result[:self.size] class Recommender: - """ """ + """ + Class designed to play the role of recommender. + """ def __init__(self,cfg): + """ + Set initial parameters. + """ try: strategy = "self."+cfg.strategy+"(cfg)" exec(strategy) @@ -50,17 +67,28 @@ class Recommender: raise Error def ct(self,cfg): + """ + Perform content-based recommendation using tags index as source data. + """ self.items_repository = TagsXapianIndex(cfg) self.strategy = ContentBasedStrategy() def cta(self,cfg): + """ + Perform content-based recommendation using apt-xapian-index as source + data. + """ self.items_repository = xapian.Database(cfg.axi) self.strategy = AxiContentBasedStrategy() def set_strategy(self,strategy): - """ """ + """ + Set the recommendation strategy. + """ self.strategy = strategy def get_recommendation(self,user): - """ """ + """ + Produces recommendation using previously loaded strategy. + """ return self.strategy.run(self,user) diff --git a/src/similarity_measure.py b/src/similarity_measure.py index 10a9ec5..2bbec38 100644 --- a/src/similarity_measure.py +++ b/src/similarity_measure.py @@ -1,6 +1,7 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# similarity-measure - python module for classes and methods related to +# measuring similarity between two sets of data. # # Copyright (C) 2010 Tassia Camoes # diff --git a/src/strategy.py b/src/strategy.py index 22d6f49..54d552c 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -1,6 +1,7 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# strategy - python module for classes and methods related to recommendation +# strategies. # # Copyright (C) 2010 Tassia Camoes # @@ -26,40 +27,51 @@ class ReputationHeuristic: """ Abstraction for diferent reputation heuristics. """ + pass class BugsHeuristic(ReputationHeuristic): """ Reputation heuristic based on quantity of open bugs. """ + pass class RCBugsHeuristic(ReputationHeuristic): """ Reputation heuristic based on quantity of RC bugs. """ + pass class PopularityHeuristic(ReputationHeuristic): """ Reputation heuristic based on popularity of packages. """ + pass class PkgMatchDecider(xapian.MatchDecider): """ - Extends xapian.MatchDecider to disconsider installed packages. + Extend xapian.MatchDecider to not consider installed packages. """ def __init__(self, installed_pkgs): + """ + Set initial parameters. + """ xapian.MatchDecider.__init__(self) self.installed_pkgs = installed_pkgs def __call__(self, doc): + """ + True if the package is not already installed. + """ return doc.get_data() not in self.installed_pkgs class RecommendationStrategy: """ - Abstraction for diferent recommendation strategy. + Base class for recommendation strategies. """ + pass class ItemReputationStrategy(RecommendationStrategy): """ diff --git a/src/user.py b/src/user.py index 86c6c6d..e0394dc 100644 --- a/src/user.py +++ b/src/user.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# AppRecommender - A GNU/Linux application recommender +# user - python module for classes and methods related to recommenders' users. # # Copyright (C) 2010 Tassia Camoes # @@ -23,6 +23,9 @@ import logging import apt class FilterTag(xapian.ExpandDecider): + """ + Extend xapian.ExpandDecider to consider only tag terms. + """ def __call__(self, term): """ Return true if the term is a tag, else false. @@ -30,29 +33,28 @@ class FilterTag(xapian.ExpandDecider): return term[:2] == "XT" class User: - """ """ + """ + Define a user of a recommender. + """ def __init__(self,item_score,user_id=0,demographic_profile=0): - """ """ + """ + Set initial parameters. + """ self.id = user_id self.item_score = item_score self.pkg_profile = self.item_score.keys() self.demographic_profile = demographic_profile def items(self): + """ + Return dictionary relating items and repective scores. + """ return self.item_score.keys() - def maximal_pkg_profile(self): - cache = apt.Cache() - old_profile_size = len(self.pkg_profile) - for p in self.pkg_profile[:]: #iterate list copy - pkg = cache[p] - if pkg.is_auto_installed: - self.pkg_profile.remove(p) - profile_size = len(self.pkg_profile) - logging.info("Reduced packages profile size from %d to %d." % - (old_profile_size, profile_size)) - def axi_tag_profile(self,apt_xapian_index,profile_size): + """ + Return most relevant tags for a list of packages based on axi. + """ terms = [] for item in self.pkg_profile: terms.append("XP"+item) @@ -70,15 +72,38 @@ class User: return profile def txi_tag_profile(self,tags_xapian_index,profile_size): + """ + Return most relevant tags for a list of packages based on tags index. + """ return tags_xapian_index.relevant_tags_from_db(self.pkg_profile, profile_size) class LocalSystem(User): - """ """ + """ + Extend the class User to consider the packages installed on the local + system as the set of selected itens. + """ def __init__(self): + """ + Set initial parameters. + """ item_score = {} dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') for line in dpkg_output.splitlines(): pkg = line.split('\t')[0] item_score[pkg] = 1 User.__init__(self,item_score) + + def maximal_pkg_profile(self): + """ + Return list of packages voluntarily installed. + """ + cache = apt.Cache() + old_profile_size = len(self.pkg_profile) + for p in self.pkg_profile[:]: #iterate list copy + pkg = cache[p] + if pkg.is_auto_installed: + self.pkg_profile.remove(p) + profile_size = len(self.pkg_profile) + logging.info("Reduced packages profile size from %d to %d." % + (old_profile_size, profile_size)) -- libgit2 0.21.2