Commit bb8d206b3a8d722c6c54f32f50f50059417f8dc3

Authored by Tássia Camões Araújo
1 parent 20875f7d
Exists in master and in 1 other branch add_vagrant

Implementation of cross validation completed. The result is printed in a

matrix format. (closes #3)
src/app_recommender.py
... ... @@ -50,3 +50,9 @@ if __name__ == '__main__':
50 50  
51 51 result = recommender.generate_recommendation(user)
52 52 result.print_result()
  53 +
  54 + metrics = []
  55 + metrics.append(Precision())
  56 + metrics.append(Recall())
  57 + validation = CrossValidation(0.1,10,recommender,metrics)
  58 + validation.run(user)
... ...
src/data.py
... ... @@ -76,8 +76,8 @@ class DebtagsIndex:
76 76 """ Load an existing debtags index. """
77 77 if not reindex:
78 78 try:
79   - print ("Opening existing debtags xapian index at \'%s\'" %
80   - self.path)
  79 + #print ("Opening existing debtags xapian index at \'%s\'" %
  80 + # self.path)
81 81 self.index = xapian.Database(self.path)
82 82 except DatabaseError:
83 83 print "Could not open debtags xapian index"
... ...
src/evaluation.py
... ... @@ -17,13 +17,18 @@
17 17 # You should have received a copy of the GNU General Public License
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
  20 +import random
  21 +from collections import defaultdict
  22 +from user import *
  23 +from recommender import *
  24 +
20 25 class Metric:
21 26 """ """
22 27  
23 28 class Precision(Metric):
24 29 """ """
25   - def __init_(self):
26   - self.desc = "Precision"
  30 + def __init__(self):
  31 + self.desc = " Precision "
27 32  
28 33 def run(self,evaluation):
29 34 return float(len(evaluation.predicted_real) /
... ... @@ -31,8 +36,8 @@ class Precision(Metric):
31 36  
32 37 class Recall(Metric):
33 38 """ """
34   - def __init_(self):
35   - self.desc = "Recall"
  39 + def __init__(self):
  40 + self.desc = " Recall "
36 41  
37 42 def run(self,evaluation):
38 43 return float(len(evaluation.predicted_real) /
... ... @@ -40,8 +45,8 @@ class Recall(Metric):
40 45  
41 46 class F1(Metric):
42 47 """ """
43   - def __init_(self):
44   - self.desc = "F1"
  48 + def __init__(self):
  49 + self.desc = " F1 "
45 50  
46 51 def run(self,evaluation):
47 52 p = Precision().run(evaluation)
... ... @@ -50,24 +55,24 @@ class F1(Metric):
50 55  
51 56 class MAE(Metric):
52 57 """ """
53   - def __init_(self):
54   - self.desc = "MAE"
  58 + def __init__(self):
  59 + self.desc = " MAE "
55 60  
56 61 def run(self,evaluation):
57 62 print "run"
58 63  
59 64 class MSE(Metric):
60 65 """ """
61   - def __init_(self):
62   - self.desc = "MSE"
  66 + def __init__(self):
  67 + self.desc = " MSE "
63 68  
64 69 def run(self,evaluation):
65 70 print "run"
66 71  
67 72 class Coverage(Metric):
68 73 """ """
69   - def __init_(self):
70   - self.desc = "Coverage"
  74 + def __init__(self):
  75 + self.desc = " Coverage "
71 76  
72 77 def run(self,evaluation):
73 78 print "run"
... ... @@ -77,9 +82,9 @@ class Evaluation:
77 82 def __init__(self,predicted_result,real_result):
78 83 """ """
79 84 self.predicted_item_scores = predicted_result.item_score
80   - self.predicted_relevant = predicted_result.get_prediction.keys()
  85 + self.predicted_relevant = predicted_result.get_prediction()
81 86 self.real_item_scores = real_result.item_score
82   - self.real_relevant = real_result.get_prediction.keys()
  87 + self.real_relevant = real_result.get_prediction()
83 88 self.predicted_real = [v for v in self.predicted_relevant if v in
84 89 self.real_relevant]
85 90  
... ... @@ -88,27 +93,60 @@ class Evaluation:
88 93  
89 94 class CrossValidation:
90 95 """ Cross-validation method """
91   - def __init__(self,partition_size,rounds,rec,metrics_list):
  96 + def __init__(self,partition_proportion,rounds,rec,metrics_list):
92 97 """ Set parameters: partition_size, rounds, recommender and
93 98 metrics_list """
94   - self.partition_size = partition_size
  99 + if partition_proportion<1 and partition_proportion>0:
  100 + self.partition_proportion = partition_proportion
  101 + else:
  102 + print "A proporcao de particao deve ser um avalor ente 0 e 1."
  103 + exit(1)
95 104 self.rounds = rounds
96 105 self.recommender = rec
97   - self.metrics_list = self.metrics_list
  106 + self.metrics_list = metrics_list
  107 + self.cross_results = defaultdict(list)
  108 +
  109 + def print_result(self):
  110 + print ""
  111 + metrics_desc = ""
  112 + for metric in self.metrics_list:
  113 + metrics_desc += "%s|" % (metric.desc)
  114 + print "| Round |%s" % metrics_desc
  115 + for r in range(self.rounds):
  116 + metrics_result = ""
  117 + for metric in self.metrics_list:
  118 + metrics_result += (" %.2f |" %
  119 + (self.cross_results[metric.desc][r]))
  120 + print "| %d |%s" % (r,metrics_result)
  121 + metrics_mean = ""
  122 + for metric in self.metrics_list:
  123 + mean = float(sum(self.cross_results[metric.desc]) /
  124 + len(self.cross_results[metric.desc]))
  125 + metrics_mean += " %.2f |" % (mean)
  126 + print "| Mean |%s" % (metrics_mean)
98 127  
99 128 def run(self,user):
100 129 """ Perform cross-validation. """
101   - for i in rounds:
102   - cross_result = {}
103   - for metric in self.metrics_list:
104   - cross_results[metric.desc] = []
105   - cross_user = User(user.item_score) # FIXME: choose subset
106   - predicted_result = self.recommender.gererateRecommendation()
107   - evaluation = Evaluation(predicted_result,user.item_score)
  130 + partition_size = int(len(user.item_score)*self.partition_proportion)
  131 + cross_item_score = user.item_score.copy()
  132 + for r in range(self.rounds):
  133 + round_partition = {}
  134 + for j in range(partition_size):
  135 + if len(cross_item_score)>0:
  136 + random_key = random.choice(cross_item_score.keys())
  137 + else:
  138 + print "cross_item_score vazio"
  139 + exit(1)
  140 + round_partition[random_key] = cross_item_score.pop(random_key)
  141 + round_user = User(cross_item_score)
  142 + predicted_result = self.recommender.generate_recommendation(round_user)
  143 + real_result = RecommendationResult(round_partition,len(round_partition))
  144 + evaluation = Evaluation(predicted_result,real_result)
108 145 for metric in self.metrics_list:
109   - cross_results[metric.desc].append(evaluation.run(metric))
110   - for metric in self.metrics_list:
111   - mean = (sum(cross_result[metric.desc]) /
112   - len(cross_result[metric.desc]))
113   - print "Mean %d: %2f" % (metric.desc,mean)
  146 + result = evaluation.run(metric)
  147 + self.cross_results[metric.desc].append(result)
  148 + while len(round_partition)>0:
  149 + item,score = round_partition.popitem()
  150 + cross_item_score[item] = score
  151 + self.print_result()
114 152  
... ...