Commit 2255aea0def90c40a6512e86637e1559d65711fc
1 parent
7c99a2c6
Exists in
master
and in
1 other branch
Documentation improved (using python docstring).
Showing
12 changed files
with
247 additions
and
59 deletions
Show diff stats
src/app_recommender.py
src/config.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# config - python module for configuration options. |
4 | # | 4 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 6 | # |
@@ -144,6 +144,9 @@ class Config(): | @@ -144,6 +144,9 @@ class Config(): | ||
144 | assert False, "unhandled option" | 144 | assert False, "unhandled option" |
145 | 145 | ||
146 | def set_logger(self): | 146 | def set_logger(self): |
147 | + """ | ||
148 | + Configure application logger and log level. | ||
149 | + """ | ||
147 | self.logger = getLogger('') # root logger is used by default | 150 | self.logger = getLogger('') # root logger is used by default |
148 | self.logger.setLevel(DEBUG) | 151 | self.logger.setLevel(DEBUG) |
149 | 152 |
src/cross_validation.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# CrossValidation - python module for classes and methods related to |
4 | +# recommenders evaluation. | ||
4 | # | 5 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 6 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 7 | # |
@@ -47,6 +48,7 @@ if __name__ == '__main__': | @@ -47,6 +48,7 @@ if __name__ == '__main__': | ||
47 | metrics.append(Recall()) | 48 | metrics.append(Recall()) |
48 | validation = CrossValidation(0.3,10,rec,metrics) | 49 | validation = CrossValidation(0.3,10,rec,metrics) |
49 | validation.run(user) | 50 | validation.run(user) |
51 | + print validation | ||
50 | 52 | ||
51 | end_time = datetime.datetime.now() | 53 | end_time = datetime.datetime.now() |
52 | logging.debug("Cross-validation completed at %s" % end_time) | 54 | logging.debug("Cross-validation completed at %s" % end_time) |
src/data.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# data - python module for data sources classes and methods. |
4 | # | 4 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 6 | # |
@@ -29,32 +29,50 @@ import hashlib | @@ -29,32 +29,50 @@ import hashlib | ||
29 | from error import Error | 29 | from error import Error |
30 | 30 | ||
31 | class Item: | 31 | class Item: |
32 | - """ """ | 32 | + """ |
33 | + Generic item definition. | ||
34 | + """ | ||
33 | 35 | ||
34 | class Package(Item): | 36 | class Package(Item): |
35 | - """ """ | 37 | + """ |
38 | + Definition of a GNU/Linux application as a recommender item. | ||
39 | + """ | ||
36 | def __init__(self,package_name): | 40 | def __init__(self,package_name): |
37 | - """ """ | 41 | + """ |
42 | + Set initial attributes. | ||
43 | + """ | ||
38 | self.package_name = package_name | 44 | self.package_name = package_name |
39 | 45 | ||
40 | - def load_package_info(self): | ||
41 | - """ """ | ||
42 | - print "debian pkg",self.id | ||
43 | - | ||
44 | def normalize_tags(string): | 46 | def normalize_tags(string): |
45 | """ | 47 | """ |
46 | - Normalize tag string so that it can be indexed and retrieved. | 48 | + Substitute string characters : by _ and - by '. |
49 | + Examples: | ||
50 | + admin::package-management -> admin__package'management | ||
51 | + implemented-in::c++ -> implemented-in__c++ | ||
47 | """ | 52 | """ |
48 | return string.replace(':','_').replace('-','\'') | 53 | return string.replace(':','_').replace('-','\'') |
49 | 54 | ||
50 | class Singleton(object): | 55 | class Singleton(object): |
56 | + """ | ||
57 | + Base class for inheritance of only-one-instance classes. | ||
58 | + Singleton design pattern. | ||
59 | + """ | ||
51 | def __new__(cls, *args, **kwargs): | 60 | def __new__(cls, *args, **kwargs): |
61 | + """ | ||
62 | + Creates a new instance of the class only if none already exists. | ||
63 | + """ | ||
52 | if '_inst' not in vars(cls): | 64 | if '_inst' not in vars(cls): |
53 | cls._inst = object.__new__(cls) | 65 | cls._inst = object.__new__(cls) |
54 | return cls._inst | 66 | return cls._inst |
55 | 67 | ||
56 | class TagsXapianIndex(xapian.WritableDatabase,Singleton): | 68 | class TagsXapianIndex(xapian.WritableDatabase,Singleton): |
69 | + """ | ||
70 | + Data source for tags info defined as a singleton xapian database. | ||
71 | + """ | ||
57 | def __init__(self,cfg): | 72 | def __init__(self,cfg): |
73 | + """ | ||
74 | + Set initial attributes. | ||
75 | + """ | ||
58 | self.path = os.path.expanduser(cfg.tags_index) | 76 | self.path = os.path.expanduser(cfg.tags_index) |
59 | self.db_path = os.path.expanduser(cfg.tags_db) | 77 | self.db_path = os.path.expanduser(cfg.tags_db) |
60 | self.debtags_db = debtags.DB() | 78 | self.debtags_db = debtags.DB() |
@@ -67,6 +85,9 @@ class TagsXapianIndex(xapian.WritableDatabase,Singleton): | @@ -67,6 +85,9 @@ class TagsXapianIndex(xapian.WritableDatabase,Singleton): | ||
67 | self.load_index(cfg.reindex) | 85 | self.load_index(cfg.reindex) |
68 | 86 | ||
69 | def load_db(self): | 87 | def load_db(self): |
88 | + """ | ||
89 | + Load debtags database from the source file. | ||
90 | + """ | ||
70 | tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | 91 | tag_filter = re.compile(r"^special::.+$|^.+::TODO$") |
71 | try: | 92 | try: |
72 | db_file = open(self.db_path, "r") | 93 | db_file = open(self.db_path, "r") |
src/demo_rec.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# DemoRecommender - demonstration of a GNU/Linux application recommender. |
4 | # | 4 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 6 | # |
src/error.py
1 | +#!/usr/bin/python | ||
2 | + | ||
3 | +# error.py - python module for error definition. | ||
4 | +# | ||
5 | +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | ||
6 | +# | ||
7 | +# This program is free software: you can redistribute it and/or modify | ||
8 | +# it under the terms of the GNU General Public License as published by | ||
9 | +# the Free Software Foundation, either version 3 of the License, or | ||
10 | +# (at your option) any later version. | ||
11 | +# | ||
12 | +# This program is distributed in the hope that it will be useful, | ||
13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | +# GNU General Public License for more details. | ||
16 | +# | ||
17 | +# You should have received a copy of the GNU General Public License | ||
18 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | + | ||
1 | class Error(Exception): | 20 | class Error(Exception): |
2 | - """Base class for exceptions.""" | 21 | + """ |
22 | + Base class for exceptions. | ||
23 | + """ | ||
3 | pass | 24 | pass |
src/evaluation.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# evaluation - python module for classes and methods related to recommenders |
4 | +# evaluation. | ||
4 | # | 5 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 6 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 7 | # |
@@ -25,30 +26,57 @@ from user import * | @@ -25,30 +26,57 @@ from user import * | ||
25 | from recommender import * | 26 | from recommender import * |
26 | 27 | ||
27 | class Metric: | 28 | class Metric: |
28 | - """ """ | 29 | + """ |
30 | + Base class for metrics. Strategy design pattern. | ||
31 | + """ | ||
32 | + pass | ||
29 | 33 | ||
30 | class Precision(Metric): | 34 | class Precision(Metric): |
31 | - """ """ | 35 | + """ |
36 | + Accuracy evaluation metric defined as the percentage of relevant itens | ||
37 | + among the predicted ones. | ||
38 | + """ | ||
32 | def __init__(self): | 39 | def __init__(self): |
40 | + """ | ||
41 | + Set metric description. | ||
42 | + """ | ||
33 | self.desc = " Precision " | 43 | self.desc = " Precision " |
34 | 44 | ||
35 | def run(self,evaluation): | 45 | def run(self,evaluation): |
46 | + """ | ||
47 | + Compute metric. | ||
48 | + """ | ||
36 | return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant) | 49 | return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant) |
37 | 50 | ||
38 | class Recall(Metric): | 51 | class Recall(Metric): |
39 | - """ """ | 52 | + """ |
53 | + Accuracy evaluation metric defined as the percentage of relevant itens | ||
54 | + which were predicted as so. | ||
55 | + """ | ||
40 | def __init__(self): | 56 | def __init__(self): |
57 | + """ | ||
58 | + Set metric description. | ||
59 | + """ | ||
41 | self.desc = " Recall " | 60 | self.desc = " Recall " |
42 | 61 | ||
43 | def run(self,evaluation): | 62 | def run(self,evaluation): |
63 | + """ | ||
64 | + Compute metric. | ||
65 | + """ | ||
44 | return float(len(evaluation.predicted_real))/len(evaluation.real_relevant) | 66 | return float(len(evaluation.predicted_real))/len(evaluation.real_relevant) |
45 | 67 | ||
46 | class F1(Metric): | 68 | class F1(Metric): |
47 | """ """ | 69 | """ """ |
48 | def __init__(self): | 70 | def __init__(self): |
71 | + """ | ||
72 | + Set metric description. | ||
73 | + """ | ||
49 | self.desc = " F1 " | 74 | self.desc = " F1 " |
50 | 75 | ||
51 | def run(self,evaluation): | 76 | def run(self,evaluation): |
77 | + """ | ||
78 | + Compute metric. | ||
79 | + """ | ||
52 | p = Precision().run(evaluation) | 80 | p = Precision().run(evaluation) |
53 | r = Recall().run(evaluation) | 81 | r = Recall().run(evaluation) |
54 | return float((2*p*r)/(p+r)) | 82 | return float((2*p*r)/(p+r)) |
@@ -56,80 +84,110 @@ class F1(Metric): | @@ -56,80 +84,110 @@ class F1(Metric): | ||
56 | class MAE(Metric): | 84 | class MAE(Metric): |
57 | """ """ | 85 | """ """ |
58 | def __init__(self): | 86 | def __init__(self): |
87 | + """ | ||
88 | + Set metric description. | ||
89 | + """ | ||
59 | self.desc = " MAE " | 90 | self.desc = " MAE " |
60 | 91 | ||
61 | def run(self,evaluation): | 92 | def run(self,evaluation): |
62 | - print "run" | 93 | + """ |
94 | + Compute metric. | ||
95 | + """ | ||
96 | + print "---" #FIXME | ||
63 | 97 | ||
64 | class MSE(Metric): | 98 | class MSE(Metric): |
65 | """ """ | 99 | """ """ |
66 | def __init__(self): | 100 | def __init__(self): |
101 | + """ | ||
102 | + Set metric description. | ||
103 | + """ | ||
67 | self.desc = " MSE " | 104 | self.desc = " MSE " |
68 | 105 | ||
69 | def run(self,evaluation): | 106 | def run(self,evaluation): |
70 | - print "run" | 107 | + """ |
108 | + Compute metric. | ||
109 | + """ | ||
110 | + print "---" #FIXME | ||
71 | 111 | ||
72 | class Coverage(Metric): | 112 | class Coverage(Metric): |
73 | """ """ | 113 | """ """ |
74 | def __init__(self): | 114 | def __init__(self): |
115 | + """ | ||
116 | + Set metric description. | ||
117 | + """ | ||
75 | self.desc = " Coverage " | 118 | self.desc = " Coverage " |
76 | 119 | ||
77 | def run(self,evaluation): | 120 | def run(self,evaluation): |
78 | - print "run" | 121 | + """ |
122 | + Compute metric. | ||
123 | + """ | ||
124 | + print "---" #FIXME | ||
79 | 125 | ||
80 | class Evaluation: | 126 | class Evaluation: |
81 | - """ """ | 127 | + """ |
128 | + Class designed to perform prediction evaluation, given data and metric. | ||
129 | + """ | ||
82 | def __init__(self,predicted_result,real_result): | 130 | def __init__(self,predicted_result,real_result): |
83 | - """ """ | 131 | + """ |
132 | + Set initial parameters. | ||
133 | + """ | ||
84 | self.predicted_item_scores = predicted_result.item_score | 134 | self.predicted_item_scores = predicted_result.item_score |
85 | self.predicted_relevant = predicted_result.get_prediction() | 135 | self.predicted_relevant = predicted_result.get_prediction() |
86 | self.real_item_scores = real_result.item_score | 136 | self.real_item_scores = real_result.item_score |
87 | self.real_relevant = real_result.get_prediction() | 137 | self.real_relevant = real_result.get_prediction() |
88 | self.predicted_real = [v for v in self.predicted_relevant if v in | 138 | self.predicted_real = [v for v in self.predicted_relevant if v in |
89 | self.real_relevant] | 139 | self.real_relevant] |
90 | - print len(self.predicted_relevant) | ||
91 | - print len(self.real_relevant) | ||
92 | - print len(self.predicted_real) | 140 | + #print len(self.predicted_relevant) |
141 | + #print len(self.real_relevant) | ||
142 | + #print len(self.predicted_real) | ||
93 | 143 | ||
94 | def run(self,metric): | 144 | def run(self,metric): |
145 | + """ | ||
146 | + Perform the evaluation with the given metric. | ||
147 | + """ | ||
95 | return metric.run(self) | 148 | return metric.run(self) |
96 | 149 | ||
97 | class CrossValidation: | 150 | class CrossValidation: |
98 | """ | 151 | """ |
99 | - Cross-validation method | 152 | + Class designed to perform cross-validation process. |
100 | """ | 153 | """ |
101 | def __init__(self,partition_proportion,rounds,rec,metrics_list): | 154 | def __init__(self,partition_proportion,rounds,rec,metrics_list): |
102 | """ | 155 | """ |
103 | - Set defaults: partition_size, rounds, recommender and metrics_list | 156 | + Set initial parameters. |
104 | """ | 157 | """ |
105 | if partition_proportion<1 and partition_proportion>0: | 158 | if partition_proportion<1 and partition_proportion>0: |
106 | self.partition_proportion = partition_proportion | 159 | self.partition_proportion = partition_proportion |
107 | else: | 160 | else: |
108 | - logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.") | 161 | + logging.critical("Partition proportion must be a value in the |
162 | + interval [0,1].") | ||
109 | raise Error | 163 | raise Error |
110 | self.rounds = rounds | 164 | self.rounds = rounds |
111 | self.recommender = rec | 165 | self.recommender = rec |
112 | self.metrics_list = metrics_list | 166 | self.metrics_list = metrics_list |
113 | self.cross_results = defaultdict(list) | 167 | self.cross_results = defaultdict(list) |
114 | 168 | ||
115 | - def print_result(self): | ||
116 | - print "" | 169 | + def __str__(self): |
170 | + """ | ||
171 | + String representation of the object. | ||
172 | + """ | ||
173 | + str = "\n" | ||
117 | metrics_desc = "" | 174 | metrics_desc = "" |
118 | for metric in self.metrics_list: | 175 | for metric in self.metrics_list: |
119 | metrics_desc += "%s|" % (metric.desc) | 176 | metrics_desc += "%s|" % (metric.desc) |
120 | - print "| Round |%s" % metrics_desc | 177 | + str += "| Round |%s\n" % metrics_desc |
121 | for r in range(self.rounds): | 178 | for r in range(self.rounds): |
122 | metrics_result = "" | 179 | metrics_result = "" |
123 | for metric in self.metrics_list: | 180 | for metric in self.metrics_list: |
124 | metrics_result += (" %.2f |" % | 181 | metrics_result += (" %.2f |" % |
125 | (self.cross_results[metric.desc][r])) | 182 | (self.cross_results[metric.desc][r])) |
126 | - print "| %d |%s" % (r,metrics_result) | 183 | + str += "| %d |%s\n" % (r,metrics_result) |
127 | metrics_mean = "" | 184 | metrics_mean = "" |
128 | for metric in self.metrics_list: | 185 | for metric in self.metrics_list: |
129 | mean = float(sum(self.cross_results[metric.desc]) / | 186 | mean = float(sum(self.cross_results[metric.desc]) / |
130 | len(self.cross_results[metric.desc])) | 187 | len(self.cross_results[metric.desc])) |
131 | metrics_mean += " %.2f |" % (mean) | 188 | metrics_mean += " %.2f |" % (mean) |
132 | - print "| Mean |%s" % (metrics_mean) | 189 | + str += "| Mean |%s\n" % (metrics_mean) |
190 | + return str | ||
133 | 191 | ||
134 | def run(self,user): | 192 | def run(self,user): |
135 | """ | 193 | """ |
@@ -144,7 +202,7 @@ class CrossValidation: | @@ -144,7 +202,7 @@ class CrossValidation: | ||
144 | if len(cross_item_score)>0: | 202 | if len(cross_item_score)>0: |
145 | random_key = random.choice(cross_item_score.keys()) | 203 | random_key = random.choice(cross_item_score.keys()) |
146 | else: | 204 | else: |
147 | - logging.critical("cross_item_score vazio") | 205 | + logging.critical("Empty cross_item_score.") |
148 | raise Error | 206 | raise Error |
149 | round_partition[random_key] = cross_item_score.pop(random_key) | 207 | round_partition[random_key] = cross_item_score.pop(random_key) |
150 | round_user = User(cross_item_score) | 208 | round_user = User(cross_item_score) |
@@ -157,5 +215,4 @@ class CrossValidation: | @@ -157,5 +215,4 @@ class CrossValidation: | ||
157 | while len(round_partition)>0: | 215 | while len(round_partition)>0: |
158 | item,score = round_partition.popitem() | 216 | item,score = round_partition.popitem() |
159 | cross_item_score[item] = score | 217 | cross_item_score[item] = score |
160 | - self.print_result() | ||
161 | 218 |
src/generate_doc.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | +# | ||
3 | +# generate_doc.sh - shell script to generate documentation using doxygen. | ||
4 | +# | ||
5 | +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | ||
6 | +# | ||
7 | +# This program is free software: you can redistribute it and/or modify | ||
8 | +# it under the terms of the GNU General Public License as published by | ||
9 | +# the Free Software Foundation, either version 3 of the License, or | ||
10 | +# (at your option) any later version. | ||
11 | +# | ||
12 | +# This program is distributed in the hope that it will be useful, | ||
13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | +# GNU General Public License for more details. | ||
16 | +# | ||
17 | +# You should have received a copy of the GNU General Public License | ||
18 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
2 | 19 | ||
20 | +# Get project version from git repository | ||
3 | TAG=$(git describe --tags --abbrev=0) | 21 | TAG=$(git describe --tags --abbrev=0) |
4 | sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config | 22 | sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config |
5 | rm -Rf ../doc/html | 23 | rm -Rf ../doc/html |
src/recommender.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# recommender - python module for classes related to recommenders. |
4 | # | 4 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 6 | # |
@@ -23,11 +23,20 @@ from strategy import * | @@ -23,11 +23,20 @@ from strategy import * | ||
23 | from error import Error | 23 | from error import Error |
24 | 24 | ||
25 | class RecommendationResult: | 25 | class RecommendationResult: |
26 | + """ | ||
27 | + Class designed to describe a recommendation result: items and scores. | ||
28 | + """ | ||
26 | def __init__(self,item_score,size): | 29 | def __init__(self,item_score,size): |
30 | + """ | ||
31 | + Set initial parameters. | ||
32 | + """ | ||
27 | self.item_score = item_score | 33 | self.item_score = item_score |
28 | self.size = size | 34 | self.size = size |
29 | 35 | ||
30 | def __str__(self): | 36 | def __str__(self): |
37 | + """ | ||
38 | + String representation of the object. | ||
39 | + """ | ||
31 | result = self.get_prediction() | 40 | result = self.get_prediction() |
32 | str = "\n" | 41 | str = "\n" |
33 | for i in range(len(result)): | 42 | for i in range(len(result)): |
@@ -35,12 +44,20 @@ class RecommendationResult: | @@ -35,12 +44,20 @@ class RecommendationResult: | ||
35 | return str | 44 | return str |
36 | 45 | ||
37 | def get_prediction(self): | 46 | def get_prediction(self): |
47 | + """ | ||
48 | + Return prediction based on recommendation size (number of items). | ||
49 | + """ | ||
38 | sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) | 50 | sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) |
39 | return sorted_result[:self.size] | 51 | return sorted_result[:self.size] |
40 | 52 | ||
41 | class Recommender: | 53 | class Recommender: |
42 | - """ """ | 54 | + """ |
55 | + Class designed to play the role of recommender. | ||
56 | + """ | ||
43 | def __init__(self,cfg): | 57 | def __init__(self,cfg): |
58 | + """ | ||
59 | + Set initial parameters. | ||
60 | + """ | ||
44 | try: | 61 | try: |
45 | strategy = "self."+cfg.strategy+"(cfg)" | 62 | strategy = "self."+cfg.strategy+"(cfg)" |
46 | exec(strategy) | 63 | exec(strategy) |
@@ -50,17 +67,28 @@ class Recommender: | @@ -50,17 +67,28 @@ class Recommender: | ||
50 | raise Error | 67 | raise Error |
51 | 68 | ||
52 | def ct(self,cfg): | 69 | def ct(self,cfg): |
70 | + """ | ||
71 | + Perform content-based recommendation using tags index as source data. | ||
72 | + """ | ||
53 | self.items_repository = TagsXapianIndex(cfg) | 73 | self.items_repository = TagsXapianIndex(cfg) |
54 | self.strategy = ContentBasedStrategy() | 74 | self.strategy = ContentBasedStrategy() |
55 | 75 | ||
56 | def cta(self,cfg): | 76 | def cta(self,cfg): |
77 | + """ | ||
78 | + Perform content-based recommendation using apt-xapian-index as source | ||
79 | + data. | ||
80 | + """ | ||
57 | self.items_repository = xapian.Database(cfg.axi) | 81 | self.items_repository = xapian.Database(cfg.axi) |
58 | self.strategy = AxiContentBasedStrategy() | 82 | self.strategy = AxiContentBasedStrategy() |
59 | 83 | ||
60 | def set_strategy(self,strategy): | 84 | def set_strategy(self,strategy): |
61 | - """ """ | 85 | + """ |
86 | + Set the recommendation strategy. | ||
87 | + """ | ||
62 | self.strategy = strategy | 88 | self.strategy = strategy |
63 | 89 | ||
64 | def get_recommendation(self,user): | 90 | def get_recommendation(self,user): |
65 | - """ """ | 91 | + """ |
92 | + Produces recommendation using previously loaded strategy. | ||
93 | + """ | ||
66 | return self.strategy.run(self,user) | 94 | return self.strategy.run(self,user) |
src/similarity_measure.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# similarity-measure - python module for classes and methods related to |
4 | +# measuring similarity between two sets of data. | ||
4 | # | 5 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 6 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 7 | # |
src/strategy.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# strategy - python module for classes and methods related to recommendation |
4 | +# strategies. | ||
4 | # | 5 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 6 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 7 | # |
@@ -26,40 +27,51 @@ class ReputationHeuristic: | @@ -26,40 +27,51 @@ class ReputationHeuristic: | ||
26 | """ | 27 | """ |
27 | Abstraction for diferent reputation heuristics. | 28 | Abstraction for diferent reputation heuristics. |
28 | """ | 29 | """ |
30 | + pass | ||
29 | 31 | ||
30 | class BugsHeuristic(ReputationHeuristic): | 32 | class BugsHeuristic(ReputationHeuristic): |
31 | """ | 33 | """ |
32 | Reputation heuristic based on quantity of open bugs. | 34 | Reputation heuristic based on quantity of open bugs. |
33 | """ | 35 | """ |
36 | + pass | ||
34 | 37 | ||
35 | class RCBugsHeuristic(ReputationHeuristic): | 38 | class RCBugsHeuristic(ReputationHeuristic): |
36 | """ | 39 | """ |
37 | Reputation heuristic based on quantity of RC bugs. | 40 | Reputation heuristic based on quantity of RC bugs. |
38 | """ | 41 | """ |
42 | + pass | ||
39 | 43 | ||
40 | class PopularityHeuristic(ReputationHeuristic): | 44 | class PopularityHeuristic(ReputationHeuristic): |
41 | """ | 45 | """ |
42 | Reputation heuristic based on popularity of packages. | 46 | Reputation heuristic based on popularity of packages. |
43 | """ | 47 | """ |
48 | + pass | ||
44 | 49 | ||
45 | 50 | ||
46 | class PkgMatchDecider(xapian.MatchDecider): | 51 | class PkgMatchDecider(xapian.MatchDecider): |
47 | """ | 52 | """ |
48 | - Extends xapian.MatchDecider to disconsider installed packages. | 53 | + Extend xapian.MatchDecider to not consider installed packages. |
49 | """ | 54 | """ |
50 | 55 | ||
51 | def __init__(self, installed_pkgs): | 56 | def __init__(self, installed_pkgs): |
57 | + """ | ||
58 | + Set initial parameters. | ||
59 | + """ | ||
52 | xapian.MatchDecider.__init__(self) | 60 | xapian.MatchDecider.__init__(self) |
53 | self.installed_pkgs = installed_pkgs | 61 | self.installed_pkgs = installed_pkgs |
54 | 62 | ||
55 | def __call__(self, doc): | 63 | def __call__(self, doc): |
64 | + """ | ||
65 | + True if the package is not already installed. | ||
66 | + """ | ||
56 | return doc.get_data() not in self.installed_pkgs | 67 | return doc.get_data() not in self.installed_pkgs |
57 | 68 | ||
58 | 69 | ||
59 | class RecommendationStrategy: | 70 | class RecommendationStrategy: |
60 | """ | 71 | """ |
61 | - Abstraction for diferent recommendation strategy. | 72 | + Base class for recommendation strategies. |
62 | """ | 73 | """ |
74 | + pass | ||
63 | 75 | ||
64 | class ItemReputationStrategy(RecommendationStrategy): | 76 | class ItemReputationStrategy(RecommendationStrategy): |
65 | """ | 77 | """ |
src/user.py
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | 2 | ||
3 | -# AppRecommender - A GNU/Linux application recommender | 3 | +# user - python module for classes and methods related to recommenders' users. |
4 | # | 4 | # |
5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | 5 | # Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> |
6 | # | 6 | # |
@@ -23,6 +23,9 @@ import logging | @@ -23,6 +23,9 @@ import logging | ||
23 | import apt | 23 | import apt |
24 | 24 | ||
25 | class FilterTag(xapian.ExpandDecider): | 25 | class FilterTag(xapian.ExpandDecider): |
26 | + """ | ||
27 | + Extend xapian.ExpandDecider to consider only tag terms. | ||
28 | + """ | ||
26 | def __call__(self, term): | 29 | def __call__(self, term): |
27 | """ | 30 | """ |
28 | Return true if the term is a tag, else false. | 31 | Return true if the term is a tag, else false. |
@@ -30,29 +33,28 @@ class FilterTag(xapian.ExpandDecider): | @@ -30,29 +33,28 @@ class FilterTag(xapian.ExpandDecider): | ||
30 | return term[:2] == "XT" | 33 | return term[:2] == "XT" |
31 | 34 | ||
32 | class User: | 35 | class User: |
33 | - """ """ | 36 | + """ |
37 | + Define a user of a recommender. | ||
38 | + """ | ||
34 | def __init__(self,item_score,user_id=0,demographic_profile=0): | 39 | def __init__(self,item_score,user_id=0,demographic_profile=0): |
35 | - """ """ | 40 | + """ |
41 | + Set initial parameters. | ||
42 | + """ | ||
36 | self.id = user_id | 43 | self.id = user_id |
37 | self.item_score = item_score | 44 | self.item_score = item_score |
38 | self.pkg_profile = self.item_score.keys() | 45 | self.pkg_profile = self.item_score.keys() |
39 | self.demographic_profile = demographic_profile | 46 | self.demographic_profile = demographic_profile |
40 | 47 | ||
41 | def items(self): | 48 | def items(self): |
49 | + """ | ||
50 | + Return dictionary relating items and repective scores. | ||
51 | + """ | ||
42 | return self.item_score.keys() | 52 | return self.item_score.keys() |
43 | 53 | ||
44 | - def maximal_pkg_profile(self): | ||
45 | - cache = apt.Cache() | ||
46 | - old_profile_size = len(self.pkg_profile) | ||
47 | - for p in self.pkg_profile[:]: #iterate list copy | ||
48 | - pkg = cache[p] | ||
49 | - if pkg.is_auto_installed: | ||
50 | - self.pkg_profile.remove(p) | ||
51 | - profile_size = len(self.pkg_profile) | ||
52 | - logging.info("Reduced packages profile size from %d to %d." % | ||
53 | - (old_profile_size, profile_size)) | ||
54 | - | ||
55 | def axi_tag_profile(self,apt_xapian_index,profile_size): | 54 | def axi_tag_profile(self,apt_xapian_index,profile_size): |
55 | + """ | ||
56 | + Return most relevant tags for a list of packages based on axi. | ||
57 | + """ | ||
56 | terms = [] | 58 | terms = [] |
57 | for item in self.pkg_profile: | 59 | for item in self.pkg_profile: |
58 | terms.append("XP"+item) | 60 | terms.append("XP"+item) |
@@ -70,15 +72,38 @@ class User: | @@ -70,15 +72,38 @@ class User: | ||
70 | return profile | 72 | return profile |
71 | 73 | ||
72 | def txi_tag_profile(self,tags_xapian_index,profile_size): | 74 | def txi_tag_profile(self,tags_xapian_index,profile_size): |
75 | + """ | ||
76 | + Return most relevant tags for a list of packages based on tags index. | ||
77 | + """ | ||
73 | return tags_xapian_index.relevant_tags_from_db(self.pkg_profile, | 78 | return tags_xapian_index.relevant_tags_from_db(self.pkg_profile, |
74 | profile_size) | 79 | profile_size) |
75 | 80 | ||
76 | class LocalSystem(User): | 81 | class LocalSystem(User): |
77 | - """ """ | 82 | + """ |
83 | + Extend the class User to consider the packages installed on the local | ||
84 | + system as the set of selected itens. | ||
85 | + """ | ||
78 | def __init__(self): | 86 | def __init__(self): |
87 | + """ | ||
88 | + Set initial parameters. | ||
89 | + """ | ||
79 | item_score = {} | 90 | item_score = {} |
80 | dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') | 91 | dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') |
81 | for line in dpkg_output.splitlines(): | 92 | for line in dpkg_output.splitlines(): |
82 | pkg = line.split('\t')[0] | 93 | pkg = line.split('\t')[0] |
83 | item_score[pkg] = 1 | 94 | item_score[pkg] = 1 |
84 | User.__init__(self,item_score) | 95 | User.__init__(self,item_score) |
96 | + | ||
97 | + def maximal_pkg_profile(self): | ||
98 | + """ | ||
99 | + Return list of packages voluntarily installed. | ||
100 | + """ | ||
101 | + cache = apt.Cache() | ||
102 | + old_profile_size = len(self.pkg_profile) | ||
103 | + for p in self.pkg_profile[:]: #iterate list copy | ||
104 | + pkg = cache[p] | ||
105 | + if pkg.is_auto_installed: | ||
106 | + self.pkg_profile.remove(p) | ||
107 | + profile_size = len(self.pkg_profile) | ||
108 | + logging.info("Reduced packages profile size from %d to %d." % | ||
109 | + (old_profile_size, profile_size)) |