Commit bb8d206b3a8d722c6c54f32f50f50059417f8dc3
1 parent
20875f7d
Exists in
master
and in
1 other branch
Implementation of cross validation completed. The result is printed in a
matrix format. (closes #3)
Showing
3 changed files
with
75 additions
and
31 deletions
Show diff stats
src/app_recommender.py
| ... | ... | @@ -50,3 +50,9 @@ if __name__ == '__main__': |
| 50 | 50 | |
| 51 | 51 | result = recommender.generate_recommendation(user) |
| 52 | 52 | result.print_result() |
| 53 | + | |
| 54 | + metrics = [] | |
| 55 | + metrics.append(Precision()) | |
| 56 | + metrics.append(Recall()) | |
| 57 | + validation = CrossValidation(0.1,10,recommender,metrics) | |
| 58 | + validation.run(user) | ... | ... |
src/data.py
| ... | ... | @@ -76,8 +76,8 @@ class DebtagsIndex: |
| 76 | 76 | """ Load an existing debtags index. """ |
| 77 | 77 | if not reindex: |
| 78 | 78 | try: |
| 79 | - print ("Opening existing debtags xapian index at \'%s\'" % | |
| 80 | - self.path) | |
| 79 | + #print ("Opening existing debtags xapian index at \'%s\'" % | |
| 80 | + # self.path) | |
| 81 | 81 | self.index = xapian.Database(self.path) |
| 82 | 82 | except DatabaseError: |
| 83 | 83 | print "Could not open debtags xapian index" | ... | ... |
src/evaluation.py
| ... | ... | @@ -17,13 +17,18 @@ |
| 17 | 17 | # You should have received a copy of the GNU General Public License |
| 18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | 19 | |
| 20 | +import random | |
| 21 | +from collections import defaultdict | |
| 22 | +from user import * | |
| 23 | +from recommender import * | |
| 24 | + | |
| 20 | 25 | class Metric: |
| 21 | 26 | """ """ |
| 22 | 27 | |
| 23 | 28 | class Precision(Metric): |
| 24 | 29 | """ """ |
| 25 | - def __init_(self): | |
| 26 | - self.desc = "Precision" | |
| 30 | + def __init__(self): | |
| 31 | + self.desc = " Precision " | |
| 27 | 32 | |
| 28 | 33 | def run(self,evaluation): |
| 29 | 34 | return float(len(evaluation.predicted_real) / |
| ... | ... | @@ -31,8 +36,8 @@ class Precision(Metric): |
| 31 | 36 | |
| 32 | 37 | class Recall(Metric): |
| 33 | 38 | """ """ |
| 34 | - def __init_(self): | |
| 35 | - self.desc = "Recall" | |
| 39 | + def __init__(self): | |
| 40 | + self.desc = " Recall " | |
| 36 | 41 | |
| 37 | 42 | def run(self,evaluation): |
| 38 | 43 | return float(len(evaluation.predicted_real) / |
| ... | ... | @@ -40,8 +45,8 @@ class Recall(Metric): |
| 40 | 45 | |
| 41 | 46 | class F1(Metric): |
| 42 | 47 | """ """ |
| 43 | - def __init_(self): | |
| 44 | - self.desc = "F1" | |
| 48 | + def __init__(self): | |
| 49 | + self.desc = " F1 " | |
| 45 | 50 | |
| 46 | 51 | def run(self,evaluation): |
| 47 | 52 | p = Precision().run(evaluation) |
| ... | ... | @@ -50,24 +55,24 @@ class F1(Metric): |
| 50 | 55 | |
| 51 | 56 | class MAE(Metric): |
| 52 | 57 | """ """ |
| 53 | - def __init_(self): | |
| 54 | - self.desc = "MAE" | |
| 58 | + def __init__(self): | |
| 59 | + self.desc = " MAE " | |
| 55 | 60 | |
| 56 | 61 | def run(self,evaluation): |
| 57 | 62 | print "run" |
| 58 | 63 | |
| 59 | 64 | class MSE(Metric): |
| 60 | 65 | """ """ |
| 61 | - def __init_(self): | |
| 62 | - self.desc = "MSE" | |
| 66 | + def __init__(self): | |
| 67 | + self.desc = " MSE " | |
| 63 | 68 | |
| 64 | 69 | def run(self,evaluation): |
| 65 | 70 | print "run" |
| 66 | 71 | |
| 67 | 72 | class Coverage(Metric): |
| 68 | 73 | """ """ |
| 69 | - def __init_(self): | |
| 70 | - self.desc = "Coverage" | |
| 74 | + def __init__(self): | |
| 75 | + self.desc = " Coverage " | |
| 71 | 76 | |
| 72 | 77 | def run(self,evaluation): |
| 73 | 78 | print "run" |
| ... | ... | @@ -77,9 +82,9 @@ class Evaluation: |
| 77 | 82 | def __init__(self,predicted_result,real_result): |
| 78 | 83 | """ """ |
| 79 | 84 | self.predicted_item_scores = predicted_result.item_score |
| 80 | - self.predicted_relevant = predicted_result.get_prediction.keys() | |
| 85 | + self.predicted_relevant = predicted_result.get_prediction() | |
| 81 | 86 | self.real_item_scores = real_result.item_score |
| 82 | - self.real_relevant = real_result.get_prediction.keys() | |
| 87 | + self.real_relevant = real_result.get_prediction() | |
| 83 | 88 | self.predicted_real = [v for v in self.predicted_relevant if v in |
| 84 | 89 | self.real_relevant] |
| 85 | 90 | |
| ... | ... | @@ -88,27 +93,60 @@ class Evaluation: |
| 88 | 93 | |
| 89 | 94 | class CrossValidation: |
| 90 | 95 | """ Cross-validation method """ |
| 91 | - def __init__(self,partition_size,rounds,rec,metrics_list): | |
| 96 | + def __init__(self,partition_proportion,rounds,rec,metrics_list): | |
| 92 | 97 | """ Set parameters: partition_size, rounds, recommender and |
| 93 | 98 | metrics_list """ |
| 94 | - self.partition_size = partition_size | |
| 99 | + if partition_proportion<1 and partition_proportion>0: | |
| 100 | + self.partition_proportion = partition_proportion | |
| 101 | + else: | |
| 102 | + print "A proporcao de particao deve ser um avalor ente 0 e 1." | |
| 103 | + exit(1) | |
| 95 | 104 | self.rounds = rounds |
| 96 | 105 | self.recommender = rec |
| 97 | - self.metrics_list = self.metrics_list | |
| 106 | + self.metrics_list = metrics_list | |
| 107 | + self.cross_results = defaultdict(list) | |
| 108 | + | |
| 109 | + def print_result(self): | |
| 110 | + print "" | |
| 111 | + metrics_desc = "" | |
| 112 | + for metric in self.metrics_list: | |
| 113 | + metrics_desc += "%s|" % (metric.desc) | |
| 114 | + print "| Round |%s" % metrics_desc | |
| 115 | + for r in range(self.rounds): | |
| 116 | + metrics_result = "" | |
| 117 | + for metric in self.metrics_list: | |
| 118 | + metrics_result += (" %.2f |" % | |
| 119 | + (self.cross_results[metric.desc][r])) | |
| 120 | + print "| %d |%s" % (r,metrics_result) | |
| 121 | + metrics_mean = "" | |
| 122 | + for metric in self.metrics_list: | |
| 123 | + mean = float(sum(self.cross_results[metric.desc]) / | |
| 124 | + len(self.cross_results[metric.desc])) | |
| 125 | + metrics_mean += " %.2f |" % (mean) | |
| 126 | + print "| Mean |%s" % (metrics_mean) | |
| 98 | 127 | |
| 99 | 128 | def run(self,user): |
| 100 | 129 | """ Perform cross-validation. """ |
| 101 | - for i in rounds: | |
| 102 | - cross_result = {} | |
| 103 | - for metric in self.metrics_list: | |
| 104 | - cross_results[metric.desc] = [] | |
| 105 | - cross_user = User(user.item_score) # FIXME: choose subset | |
| 106 | - predicted_result = self.recommender.gererateRecommendation() | |
| 107 | - evaluation = Evaluation(predicted_result,user.item_score) | |
| 130 | + partition_size = int(len(user.item_score)*self.partition_proportion) | |
| 131 | + cross_item_score = user.item_score.copy() | |
| 132 | + for r in range(self.rounds): | |
| 133 | + round_partition = {} | |
| 134 | + for j in range(partition_size): | |
| 135 | + if len(cross_item_score)>0: | |
| 136 | + random_key = random.choice(cross_item_score.keys()) | |
| 137 | + else: | |
| 138 | + print "cross_item_score vazio" | |
| 139 | + exit(1) | |
| 140 | + round_partition[random_key] = cross_item_score.pop(random_key) | |
| 141 | + round_user = User(cross_item_score) | |
| 142 | + predicted_result = self.recommender.generate_recommendation(round_user) | |
| 143 | + real_result = RecommendationResult(round_partition,len(round_partition)) | |
| 144 | + evaluation = Evaluation(predicted_result,real_result) | |
| 108 | 145 | for metric in self.metrics_list: |
| 109 | - cross_results[metric.desc].append(evaluation.run(metric)) | |
| 110 | - for metric in self.metrics_list: | |
| 111 | - mean = (sum(cross_result[metric.desc]) / | |
| 112 | - len(cross_result[metric.desc])) | |
| 113 | - print "Mean %d: %2f" % (metric.desc,mean) | |
| 146 | + result = evaluation.run(metric) | |
| 147 | + self.cross_results[metric.desc].append(result) | |
| 148 | + while len(round_partition)>0: | |
| 149 | + item,score = round_partition.popitem() | |
| 150 | + cross_item_score[item] = score | |
| 151 | + self.print_result() | |
| 114 | 152 | ... | ... |