Commit 2255aea0def90c40a6512e86637e1559d65711fc

Authored by Tássia Camões Araújo
1 parent 7c99a2c6
Exists in master and in 1 other branch add_vagrant

Documentation improved (using python docstring).

src/app_recommender.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# AppRecommender - a GNU/Linux application recommender.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ...
src/config.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# config - python module for configuration options.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ... @@ -144,6 +144,9 @@ class Config():
144 144 assert False, "unhandled option"
145 145  
146 146 def set_logger(self):
  147 + """
  148 + Configure application logger and log level.
  149 + """
147 150 self.logger = getLogger('') # root logger is used by default
148 151 self.logger.setLevel(DEBUG)
149 152  
... ...
src/cross_validation.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# CrossValidation - python module for classes and methods related to
  4 +# recommenders evaluation.
4 5 #
5 6 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 7 #
... ... @@ -47,6 +48,7 @@ if __name__ == &#39;__main__&#39;:
47 48 metrics.append(Recall())
48 49 validation = CrossValidation(0.3,10,rec,metrics)
49 50 validation.run(user)
  51 + print validation
50 52  
51 53 end_time = datetime.datetime.now()
52 54 logging.debug("Cross-validation completed at %s" % end_time)
... ...
src/data.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# data - python module for data sources classes and methods.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ... @@ -29,32 +29,50 @@ import hashlib
29 29 from error import Error
30 30  
31 31 class Item:
32   - """ """
  32 + """
  33 + Generic item definition.
  34 + """
33 35  
34 36 class Package(Item):
35   - """ """
  37 + """
  38 + Definition of a GNU/Linux application as a recommender item.
  39 + """
36 40 def __init__(self,package_name):
37   - """ """
  41 + """
  42 + Set initial attributes.
  43 + """
38 44 self.package_name = package_name
39 45  
40   - def load_package_info(self):
41   - """ """
42   - print "debian pkg",self.id
43   -
44 46 def normalize_tags(string):
45 47 """
46   - Normalize tag string so that it can be indexed and retrieved.
  48 + Substitute string characters : by _ and - by '.
  49 + Examples:
  50 + admin::package-management -> admin__package'management
  51 + implemented-in::c++ -> implemented-in__c++
47 52 """
48 53 return string.replace(':','_').replace('-','\'')
49 54  
50 55 class Singleton(object):
  56 + """
  57 + Base class for inheritance of only-one-instance classes.
  58 + Singleton design pattern.
  59 + """
51 60 def __new__(cls, *args, **kwargs):
  61 + """
  62 + Creates a new instance of the class only if none already exists.
  63 + """
52 64 if '_inst' not in vars(cls):
53 65 cls._inst = object.__new__(cls)
54 66 return cls._inst
55 67  
56 68 class TagsXapianIndex(xapian.WritableDatabase,Singleton):
  69 + """
  70 + Data source for tags info defined as a singleton xapian database.
  71 + """
57 72 def __init__(self,cfg):
  73 + """
  74 + Set initial attributes.
  75 + """
58 76 self.path = os.path.expanduser(cfg.tags_index)
59 77 self.db_path = os.path.expanduser(cfg.tags_db)
60 78 self.debtags_db = debtags.DB()
... ... @@ -67,6 +85,9 @@ class TagsXapianIndex(xapian.WritableDatabase,Singleton):
67 85 self.load_index(cfg.reindex)
68 86  
69 87 def load_db(self):
  88 + """
  89 + Load debtags database from the source file.
  90 + """
70 91 tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
71 92 try:
72 93 db_file = open(self.db_path, "r")
... ...
src/demo_rec.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# DemoRecommender - demonstration of a GNU/Linux application recommender.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ...
src/error.py
  1 +#!/usr/bin/python
  2 +
  3 +# error.py - python module for error definition.
  4 +#
  5 +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
1 20 class Error(Exception):
2   - """Base class for exceptions."""
  21 + """
  22 + Base class for exceptions.
  23 + """
3 24 pass
... ...
src/evaluation.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# evaluation - python module for classes and methods related to recommenders
  4 +# evaluation.
4 5 #
5 6 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 7 #
... ... @@ -25,30 +26,57 @@ from user import *
25 26 from recommender import *
26 27  
27 28 class Metric:
28   - """ """
  29 + """
  30 + Base class for metrics. Strategy design pattern.
  31 + """
  32 + pass
29 33  
30 34 class Precision(Metric):
31   - """ """
  35 + """
  36 + Accuracy evaluation metric defined as the percentage of relevant itens
  37 + among the predicted ones.
  38 + """
32 39 def __init__(self):
  40 + """
  41 + Set metric description.
  42 + """
33 43 self.desc = " Precision "
34 44  
35 45 def run(self,evaluation):
  46 + """
  47 + Compute metric.
  48 + """
36 49 return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant)
37 50  
38 51 class Recall(Metric):
39   - """ """
  52 + """
  53 + Accuracy evaluation metric defined as the percentage of relevant itens
  54 + which were predicted as so.
  55 + """
40 56 def __init__(self):
  57 + """
  58 + Set metric description.
  59 + """
41 60 self.desc = " Recall "
42 61  
43 62 def run(self,evaluation):
  63 + """
  64 + Compute metric.
  65 + """
44 66 return float(len(evaluation.predicted_real))/len(evaluation.real_relevant)
45 67  
46 68 class F1(Metric):
47 69 """ """
48 70 def __init__(self):
  71 + """
  72 + Set metric description.
  73 + """
49 74 self.desc = " F1 "
50 75  
51 76 def run(self,evaluation):
  77 + """
  78 + Compute metric.
  79 + """
52 80 p = Precision().run(evaluation)
53 81 r = Recall().run(evaluation)
54 82 return float((2*p*r)/(p+r))
... ... @@ -56,80 +84,110 @@ class F1(Metric):
56 84 class MAE(Metric):
57 85 """ """
58 86 def __init__(self):
  87 + """
  88 + Set metric description.
  89 + """
59 90 self.desc = " MAE "
60 91  
61 92 def run(self,evaluation):
62   - print "run"
  93 + """
  94 + Compute metric.
  95 + """
  96 + print "---" #FIXME
63 97  
64 98 class MSE(Metric):
65 99 """ """
66 100 def __init__(self):
  101 + """
  102 + Set metric description.
  103 + """
67 104 self.desc = " MSE "
68 105  
69 106 def run(self,evaluation):
70   - print "run"
  107 + """
  108 + Compute metric.
  109 + """
  110 + print "---" #FIXME
71 111  
72 112 class Coverage(Metric):
73 113 """ """
74 114 def __init__(self):
  115 + """
  116 + Set metric description.
  117 + """
75 118 self.desc = " Coverage "
76 119  
77 120 def run(self,evaluation):
78   - print "run"
  121 + """
  122 + Compute metric.
  123 + """
  124 + print "---" #FIXME
79 125  
80 126 class Evaluation:
81   - """ """
  127 + """
  128 + Class designed to perform prediction evaluation, given data and metric.
  129 + """
82 130 def __init__(self,predicted_result,real_result):
83   - """ """
  131 + """
  132 + Set initial parameters.
  133 + """
84 134 self.predicted_item_scores = predicted_result.item_score
85 135 self.predicted_relevant = predicted_result.get_prediction()
86 136 self.real_item_scores = real_result.item_score
87 137 self.real_relevant = real_result.get_prediction()
88 138 self.predicted_real = [v for v in self.predicted_relevant if v in
89 139 self.real_relevant]
90   - print len(self.predicted_relevant)
91   - print len(self.real_relevant)
92   - print len(self.predicted_real)
  140 + #print len(self.predicted_relevant)
  141 + #print len(self.real_relevant)
  142 + #print len(self.predicted_real)
93 143  
94 144 def run(self,metric):
  145 + """
  146 + Perform the evaluation with the given metric.
  147 + """
95 148 return metric.run(self)
96 149  
97 150 class CrossValidation:
98 151 """
99   - Cross-validation method
  152 + Class designed to perform cross-validation process.
100 153 """
101 154 def __init__(self,partition_proportion,rounds,rec,metrics_list):
102 155 """
103   - Set defaults: partition_size, rounds, recommender and metrics_list
  156 + Set initial parameters.
104 157 """
105 158 if partition_proportion<1 and partition_proportion>0:
106 159 self.partition_proportion = partition_proportion
107 160 else:
108   - logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.")
  161 + logging.critical("Partition proportion must be a value in the
  162 + interval [0,1].")
109 163 raise Error
110 164 self.rounds = rounds
111 165 self.recommender = rec
112 166 self.metrics_list = metrics_list
113 167 self.cross_results = defaultdict(list)
114 168  
115   - def print_result(self):
116   - print ""
  169 + def __str__(self):
  170 + """
  171 + String representation of the object.
  172 + """
  173 + str = "\n"
117 174 metrics_desc = ""
118 175 for metric in self.metrics_list:
119 176 metrics_desc += "%s|" % (metric.desc)
120   - print "| Round |%s" % metrics_desc
  177 + str += "| Round |%s\n" % metrics_desc
121 178 for r in range(self.rounds):
122 179 metrics_result = ""
123 180 for metric in self.metrics_list:
124 181 metrics_result += (" %.2f |" %
125 182 (self.cross_results[metric.desc][r]))
126   - print "| %d |%s" % (r,metrics_result)
  183 + str += "| %d |%s\n" % (r,metrics_result)
127 184 metrics_mean = ""
128 185 for metric in self.metrics_list:
129 186 mean = float(sum(self.cross_results[metric.desc]) /
130 187 len(self.cross_results[metric.desc]))
131 188 metrics_mean += " %.2f |" % (mean)
132   - print "| Mean |%s" % (metrics_mean)
  189 + str += "| Mean |%s\n" % (metrics_mean)
  190 + return str
133 191  
134 192 def run(self,user):
135 193 """
... ... @@ -144,7 +202,7 @@ class CrossValidation:
144 202 if len(cross_item_score)>0:
145 203 random_key = random.choice(cross_item_score.keys())
146 204 else:
147   - logging.critical("cross_item_score vazio")
  205 + logging.critical("Empty cross_item_score.")
148 206 raise Error
149 207 round_partition[random_key] = cross_item_score.pop(random_key)
150 208 round_user = User(cross_item_score)
... ... @@ -157,5 +215,4 @@ class CrossValidation:
157 215 while len(round_partition)>0:
158 216 item,score = round_partition.popitem()
159 217 cross_item_score[item] = score
160   - self.print_result()
161 218  
... ...
src/generate_doc.sh
1 1 #!/bin/bash
  2 +#
  3 +# generate_doc.sh - shell script to generate documentation using doxygen.
  4 +#
  5 +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
2 19  
  20 +# Get project version from git repository
3 21 TAG=$(git describe --tags --abbrev=0)
4 22 sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config
5 23 rm -Rf ../doc/html
... ...
src/recommender.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# recommender - python module for classes related to recommenders.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ... @@ -23,11 +23,20 @@ from strategy import *
23 23 from error import Error
24 24  
25 25 class RecommendationResult:
  26 + """
  27 + Class designed to describe a recommendation result: items and scores.
  28 + """
26 29 def __init__(self,item_score,size):
  30 + """
  31 + Set initial parameters.
  32 + """
27 33 self.item_score = item_score
28 34 self.size = size
29 35  
30 36 def __str__(self):
  37 + """
  38 + String representation of the object.
  39 + """
31 40 result = self.get_prediction()
32 41 str = "\n"
33 42 for i in range(len(result)):
... ... @@ -35,12 +44,20 @@ class RecommendationResult:
35 44 return str
36 45  
37 46 def get_prediction(self):
  47 + """
  48 + Return prediction based on recommendation size (number of items).
  49 + """
38 50 sorted_result = sorted(self.item_score.items(), key=itemgetter(1))
39 51 return sorted_result[:self.size]
40 52  
41 53 class Recommender:
42   - """ """
  54 + """
  55 + Class designed to play the role of recommender.
  56 + """
43 57 def __init__(self,cfg):
  58 + """
  59 + Set initial parameters.
  60 + """
44 61 try:
45 62 strategy = "self."+cfg.strategy+"(cfg)"
46 63 exec(strategy)
... ... @@ -50,17 +67,28 @@ class Recommender:
50 67 raise Error
51 68  
52 69 def ct(self,cfg):
  70 + """
  71 + Perform content-based recommendation using tags index as source data.
  72 + """
53 73 self.items_repository = TagsXapianIndex(cfg)
54 74 self.strategy = ContentBasedStrategy()
55 75  
56 76 def cta(self,cfg):
  77 + """
  78 + Perform content-based recommendation using apt-xapian-index as source
  79 + data.
  80 + """
57 81 self.items_repository = xapian.Database(cfg.axi)
58 82 self.strategy = AxiContentBasedStrategy()
59 83  
60 84 def set_strategy(self,strategy):
61   - """ """
  85 + """
  86 + Set the recommendation strategy.
  87 + """
62 88 self.strategy = strategy
63 89  
64 90 def get_recommendation(self,user):
65   - """ """
  91 + """
  92 + Produces recommendation using previously loaded strategy.
  93 + """
66 94 return self.strategy.run(self,user)
... ...
src/similarity_measure.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# similarity-measure - python module for classes and methods related to
  4 +# measuring similarity between two sets of data.
4 5 #
5 6 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 7 #
... ...
src/strategy.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# strategy - python module for classes and methods related to recommendation
  4 +# strategies.
4 5 #
5 6 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 7 #
... ... @@ -26,40 +27,51 @@ class ReputationHeuristic:
26 27 """
27 28 Abstraction for diferent reputation heuristics.
28 29 """
  30 + pass
29 31  
30 32 class BugsHeuristic(ReputationHeuristic):
31 33 """
32 34 Reputation heuristic based on quantity of open bugs.
33 35 """
  36 + pass
34 37  
35 38 class RCBugsHeuristic(ReputationHeuristic):
36 39 """
37 40 Reputation heuristic based on quantity of RC bugs.
38 41 """
  42 + pass
39 43  
40 44 class PopularityHeuristic(ReputationHeuristic):
41 45 """
42 46 Reputation heuristic based on popularity of packages.
43 47 """
  48 + pass
44 49  
45 50  
46 51 class PkgMatchDecider(xapian.MatchDecider):
47 52 """
48   - Extends xapian.MatchDecider to disconsider installed packages.
  53 + Extend xapian.MatchDecider to not consider installed packages.
49 54 """
50 55  
51 56 def __init__(self, installed_pkgs):
  57 + """
  58 + Set initial parameters.
  59 + """
52 60 xapian.MatchDecider.__init__(self)
53 61 self.installed_pkgs = installed_pkgs
54 62  
55 63 def __call__(self, doc):
  64 + """
  65 + True if the package is not already installed.
  66 + """
56 67 return doc.get_data() not in self.installed_pkgs
57 68  
58 69  
59 70 class RecommendationStrategy:
60 71 """
61   - Abstraction for diferent recommendation strategy.
  72 + Base class for recommendation strategies.
62 73 """
  74 + pass
63 75  
64 76 class ItemReputationStrategy(RecommendationStrategy):
65 77 """
... ...
src/user.py
1 1 #!/usr/bin/python
2 2  
3   -# AppRecommender - A GNU/Linux application recommender
  3 +# user - python module for classes and methods related to recommenders' users.
4 4 #
5 5 # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6 6 #
... ... @@ -23,6 +23,9 @@ import logging
23 23 import apt
24 24  
25 25 class FilterTag(xapian.ExpandDecider):
  26 + """
  27 + Extend xapian.ExpandDecider to consider only tag terms.
  28 + """
26 29 def __call__(self, term):
27 30 """
28 31 Return true if the term is a tag, else false.
... ... @@ -30,29 +33,28 @@ class FilterTag(xapian.ExpandDecider):
30 33 return term[:2] == "XT"
31 34  
32 35 class User:
33   - """ """
  36 + """
  37 + Define a user of a recommender.
  38 + """
34 39 def __init__(self,item_score,user_id=0,demographic_profile=0):
35   - """ """
  40 + """
  41 + Set initial parameters.
  42 + """
36 43 self.id = user_id
37 44 self.item_score = item_score
38 45 self.pkg_profile = self.item_score.keys()
39 46 self.demographic_profile = demographic_profile
40 47  
41 48 def items(self):
  49 + """
  50 + Return dictionary relating items and repective scores.
  51 + """
42 52 return self.item_score.keys()
43 53  
44   - def maximal_pkg_profile(self):
45   - cache = apt.Cache()
46   - old_profile_size = len(self.pkg_profile)
47   - for p in self.pkg_profile[:]: #iterate list copy
48   - pkg = cache[p]
49   - if pkg.is_auto_installed:
50   - self.pkg_profile.remove(p)
51   - profile_size = len(self.pkg_profile)
52   - logging.info("Reduced packages profile size from %d to %d." %
53   - (old_profile_size, profile_size))
54   -
55 54 def axi_tag_profile(self,apt_xapian_index,profile_size):
  55 + """
  56 + Return most relevant tags for a list of packages based on axi.
  57 + """
56 58 terms = []
57 59 for item in self.pkg_profile:
58 60 terms.append("XP"+item)
... ... @@ -70,15 +72,38 @@ class User:
70 72 return profile
71 73  
72 74 def txi_tag_profile(self,tags_xapian_index,profile_size):
  75 + """
  76 + Return most relevant tags for a list of packages based on tags index.
  77 + """
73 78 return tags_xapian_index.relevant_tags_from_db(self.pkg_profile,
74 79 profile_size)
75 80  
76 81 class LocalSystem(User):
77   - """ """
  82 + """
  83 + Extend the class User to consider the packages installed on the local
  84 + system as the set of selected itens.
  85 + """
78 86 def __init__(self):
  87 + """
  88 + Set initial parameters.
  89 + """
79 90 item_score = {}
80 91 dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections')
81 92 for line in dpkg_output.splitlines():
82 93 pkg = line.split('\t')[0]
83 94 item_score[pkg] = 1
84 95 User.__init__(self,item_score)
  96 +
  97 + def maximal_pkg_profile(self):
  98 + """
  99 + Return list of packages voluntarily installed.
  100 + """
  101 + cache = apt.Cache()
  102 + old_profile_size = len(self.pkg_profile)
  103 + for p in self.pkg_profile[:]: #iterate list copy
  104 + pkg = cache[p]
  105 + if pkg.is_auto_installed:
  106 + self.pkg_profile.remove(p)
  107 + profile_size = len(self.pkg_profile)
  108 + logging.info("Reduced packages profile size from %d to %d." %
  109 + (old_profile_size, profile_size))
... ...