Commit bb8d206b3a8d722c6c54f32f50f50059417f8dc3
1 parent
20875f7d
Exists in
master
and in
1 other branch
Implementation of cross validation completed. The result is printed in a
matrix format. (closes #3)
Showing
3 changed files
with
75 additions
and
31 deletions
Show diff stats
src/app_recommender.py
... | ... | @@ -50,3 +50,9 @@ if __name__ == '__main__': |
50 | 50 | |
51 | 51 | result = recommender.generate_recommendation(user) |
52 | 52 | result.print_result() |
53 | + | |
54 | + metrics = [] | |
55 | + metrics.append(Precision()) | |
56 | + metrics.append(Recall()) | |
57 | + validation = CrossValidation(0.1,10,recommender,metrics) | |
58 | + validation.run(user) | ... | ... |
src/data.py
... | ... | @@ -76,8 +76,8 @@ class DebtagsIndex: |
76 | 76 | """ Load an existing debtags index. """ |
77 | 77 | if not reindex: |
78 | 78 | try: |
79 | - print ("Opening existing debtags xapian index at \'%s\'" % | |
80 | - self.path) | |
79 | + #print ("Opening existing debtags xapian index at \'%s\'" % | |
80 | + # self.path) | |
81 | 81 | self.index = xapian.Database(self.path) |
82 | 82 | except DatabaseError: |
83 | 83 | print "Could not open debtags xapian index" | ... | ... |
src/evaluation.py
... | ... | @@ -17,13 +17,18 @@ |
17 | 17 | # You should have received a copy of the GNU General Public License |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | +import random | |
21 | +from collections import defaultdict | |
22 | +from user import * | |
23 | +from recommender import * | |
24 | + | |
20 | 25 | class Metric: |
21 | 26 | """ """ |
22 | 27 | |
23 | 28 | class Precision(Metric): |
24 | 29 | """ """ |
25 | - def __init_(self): | |
26 | - self.desc = "Precision" | |
30 | + def __init__(self): | |
31 | + self.desc = " Precision " | |
27 | 32 | |
28 | 33 | def run(self,evaluation): |
29 | 34 | return float(len(evaluation.predicted_real) / |
... | ... | @@ -31,8 +36,8 @@ class Precision(Metric): |
31 | 36 | |
32 | 37 | class Recall(Metric): |
33 | 38 | """ """ |
34 | - def __init_(self): | |
35 | - self.desc = "Recall" | |
39 | + def __init__(self): | |
40 | + self.desc = " Recall " | |
36 | 41 | |
37 | 42 | def run(self,evaluation): |
38 | 43 | return float(len(evaluation.predicted_real) / |
... | ... | @@ -40,8 +45,8 @@ class Recall(Metric): |
40 | 45 | |
41 | 46 | class F1(Metric): |
42 | 47 | """ """ |
43 | - def __init_(self): | |
44 | - self.desc = "F1" | |
48 | + def __init__(self): | |
49 | + self.desc = " F1 " | |
45 | 50 | |
46 | 51 | def run(self,evaluation): |
47 | 52 | p = Precision().run(evaluation) |
... | ... | @@ -50,24 +55,24 @@ class F1(Metric): |
50 | 55 | |
51 | 56 | class MAE(Metric): |
52 | 57 | """ """ |
53 | - def __init_(self): | |
54 | - self.desc = "MAE" | |
58 | + def __init__(self): | |
59 | + self.desc = " MAE " | |
55 | 60 | |
56 | 61 | def run(self,evaluation): |
57 | 62 | print "run" |
58 | 63 | |
59 | 64 | class MSE(Metric): |
60 | 65 | """ """ |
61 | - def __init_(self): | |
62 | - self.desc = "MSE" | |
66 | + def __init__(self): | |
67 | + self.desc = " MSE " | |
63 | 68 | |
64 | 69 | def run(self,evaluation): |
65 | 70 | print "run" |
66 | 71 | |
67 | 72 | class Coverage(Metric): |
68 | 73 | """ """ |
69 | - def __init_(self): | |
70 | - self.desc = "Coverage" | |
74 | + def __init__(self): | |
75 | + self.desc = " Coverage " | |
71 | 76 | |
72 | 77 | def run(self,evaluation): |
73 | 78 | print "run" |
... | ... | @@ -77,9 +82,9 @@ class Evaluation: |
77 | 82 | def __init__(self,predicted_result,real_result): |
78 | 83 | """ """ |
79 | 84 | self.predicted_item_scores = predicted_result.item_score |
80 | - self.predicted_relevant = predicted_result.get_prediction.keys() | |
85 | + self.predicted_relevant = predicted_result.get_prediction() | |
81 | 86 | self.real_item_scores = real_result.item_score |
82 | - self.real_relevant = real_result.get_prediction.keys() | |
87 | + self.real_relevant = real_result.get_prediction() | |
83 | 88 | self.predicted_real = [v for v in self.predicted_relevant if v in |
84 | 89 | self.real_relevant] |
85 | 90 | |
... | ... | @@ -88,27 +93,60 @@ class Evaluation: |
88 | 93 | |
89 | 94 | class CrossValidation: |
90 | 95 | """ Cross-validation method """ |
91 | - def __init__(self,partition_size,rounds,rec,metrics_list): | |
96 | + def __init__(self,partition_proportion,rounds,rec,metrics_list): | |
92 | 97 | """ Set parameters: partition_size, rounds, recommender and |
93 | 98 | metrics_list """ |
94 | - self.partition_size = partition_size | |
99 | + if partition_proportion<1 and partition_proportion>0: | |
100 | + self.partition_proportion = partition_proportion | |
101 | + else: | |
102 | + print "A proporcao de particao deve ser um avalor ente 0 e 1." | |
103 | + exit(1) | |
95 | 104 | self.rounds = rounds |
96 | 105 | self.recommender = rec |
97 | - self.metrics_list = self.metrics_list | |
106 | + self.metrics_list = metrics_list | |
107 | + self.cross_results = defaultdict(list) | |
108 | + | |
109 | + def print_result(self): | |
110 | + print "" | |
111 | + metrics_desc = "" | |
112 | + for metric in self.metrics_list: | |
113 | + metrics_desc += "%s|" % (metric.desc) | |
114 | + print "| Round |%s" % metrics_desc | |
115 | + for r in range(self.rounds): | |
116 | + metrics_result = "" | |
117 | + for metric in self.metrics_list: | |
118 | + metrics_result += (" %.2f |" % | |
119 | + (self.cross_results[metric.desc][r])) | |
120 | + print "| %d |%s" % (r,metrics_result) | |
121 | + metrics_mean = "" | |
122 | + for metric in self.metrics_list: | |
123 | + mean = float(sum(self.cross_results[metric.desc]) / | |
124 | + len(self.cross_results[metric.desc])) | |
125 | + metrics_mean += " %.2f |" % (mean) | |
126 | + print "| Mean |%s" % (metrics_mean) | |
98 | 127 | |
99 | 128 | def run(self,user): |
100 | 129 | """ Perform cross-validation. """ |
101 | - for i in rounds: | |
102 | - cross_result = {} | |
103 | - for metric in self.metrics_list: | |
104 | - cross_results[metric.desc] = [] | |
105 | - cross_user = User(user.item_score) # FIXME: choose subset | |
106 | - predicted_result = self.recommender.gererateRecommendation() | |
107 | - evaluation = Evaluation(predicted_result,user.item_score) | |
130 | + partition_size = int(len(user.item_score)*self.partition_proportion) | |
131 | + cross_item_score = user.item_score.copy() | |
132 | + for r in range(self.rounds): | |
133 | + round_partition = {} | |
134 | + for j in range(partition_size): | |
135 | + if len(cross_item_score)>0: | |
136 | + random_key = random.choice(cross_item_score.keys()) | |
137 | + else: | |
138 | + print "cross_item_score vazio" | |
139 | + exit(1) | |
140 | + round_partition[random_key] = cross_item_score.pop(random_key) | |
141 | + round_user = User(cross_item_score) | |
142 | + predicted_result = self.recommender.generate_recommendation(round_user) | |
143 | + real_result = RecommendationResult(round_partition,len(round_partition)) | |
144 | + evaluation = Evaluation(predicted_result,real_result) | |
108 | 145 | for metric in self.metrics_list: |
109 | - cross_results[metric.desc].append(evaluation.run(metric)) | |
110 | - for metric in self.metrics_list: | |
111 | - mean = (sum(cross_result[metric.desc]) / | |
112 | - len(cross_result[metric.desc])) | |
113 | - print "Mean %d: %2f" % (metric.desc,mean) | |
146 | + result = evaluation.run(metric) | |
147 | + self.cross_results[metric.desc].append(result) | |
148 | + while len(round_partition)>0: | |
149 | + item,score = round_partition.popitem() | |
150 | + cross_item_score[item] = score | |
151 | + self.print_result() | |
114 | 152 | ... | ... |