Commit fb71f2a3262db92476316a13eaf9583be0b192d9

Authored by Tássia Camões Araújo
1 parent b48d6eca
Exists in master and in 1 other branch add_vagrant

Refactored evaluation classes.

Showing 1 changed file with 106 additions and 51 deletions   Show diff stats
src/evaluation.py
... ... @@ -49,6 +49,45 @@ class Metric(Singleton):
49 49 evaluation.real_item_scores[k]))
50 50 return errors
51 51  
  52 +
  53 +class SimpleAccuracy(Metric):
  54 + """
  55 + Classification accuracy metric which consider classes sizes.
  56 + """
  57 + def __init__(self):
  58 + """
  59 + Set metric description.
  60 + """
  61 + self.desc = " S_Accuracy "
  62 +
  63 + def run(self,evaluation):
  64 + """
  65 + Compute metric.
  66 + """
  67 + return float((evaluation.repository_size-
  68 + len(evaluation.false_positive))-
  69 + len(evaluation.false_negative))/evaluation.repository_size
  70 +
  71 +class Accuracy(Metric):
  72 + """
  73 + Classification accuracy metric which consider classes sizes.
  74 + """
  75 + def __init__(self):
  76 + """
  77 + Set metric description.
  78 + """
  79 + self.desc = " Accuracy "
  80 +
  81 + def run(self,evaluation):
  82 + """
  83 + Compute metric.
  84 + """
  85 + error_1 = (float(len(evaluation.false_positive))/
  86 + (evaluation.repository_size-len(evaluation.real_relevant)))
  87 + error_2 = (float(len(evaluation.false_negative))/len(evaluation.real_relevant))
  88 + accuracy = 1-(float(error_1+error_2)/2)
  89 + return accuracy
  90 +
52 91 class Precision(Metric):
53 92 """
54 93 Classification accuracy metric defined as the percentage of relevant itens
... ... @@ -64,7 +103,7 @@ class Precision(Metric):
64 103 """
65 104 Compute metric.
66 105 """
67   - return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant)
  106 + return float(len(evaluation.true_positive))/len(evaluation.predicted_relevant)
68 107  
69 108 class Recall(Metric):
70 109 """
... ... @@ -81,7 +120,7 @@ class Recall(Metric):
81 120 """
82 121 Compute metric.
83 122 """
84   - return float(len(evaluation.predicted_real))/len(evaluation.real_relevant)
  123 + return float(len(evaluation.true_positive))/len(evaluation.real_relevant)
85 124  
86 125 class F1(Metric):
87 126 """
... ... @@ -100,7 +139,10 @@ class F1(Metric):
100 139 """
101 140 p = Precision().run(evaluation)
102 141 r = Recall().run(evaluation)
103   - return float((2*p*r))/(p+r)
  142 + if (p+r)>0:
  143 + return float((2*p*r))/(p+r)
  144 + else:
  145 + return 0
104 146  
105 147 class MAE(Metric):
106 148 """
... ... @@ -158,43 +200,47 @@ class Coverage(Metric):
158 200 Evaluation metric defined as the percentage of itens covered by the
159 201 recommender (have been recommended at least once).
160 202 """
161   - def __init__(self,repository_size):
  203 + def __init__(self):
162 204 """
163 205 Set initial parameters.
164 206 """
165 207 self.desc = " Coverage "
166   - self.repository_size = repository_size
167   - self.covered = set()
168   -
169   - def save_covered(self,recommended_list):
170   - """
171   - Register that a list of itens has been recommended.
172   - """
173   - self.covered.update(set(recommended_list))
174 208  
175   - def run(self,evaluation):
  209 + def run(self,evaluations_set):
176 210 """
177 211 Compute metric.
178 212 """
179   - return float(self.covered.size)/self.repository_size
  213 + covered = set()
  214 + for evaluation in evaluations_set:
  215 + covered.update(set(evaluation.predicted_relevant))
  216 + return float(len(covered))/evaluation.repository_size
180 217  
181 218 class Evaluation:
182 219 """
183 220 Class designed to perform prediction evaluation, given data and metric.
184 221 """
185   - def __init__(self,predicted_result,real_result):
  222 + def __init__(self,predicted,real,repository_size):
186 223 """
187 224 Set initial parameters.
188 225 """
189   - self.predicted_item_scores = predicted_result.item_score
190   - self.predicted_relevant = predicted_result.get_prediction()
191   - self.real_item_scores = real_result.item_score
192   - self.real_relevant = real_result.get_prediction()
193   - self.predicted_real = [v for v in self.predicted_relevant if v in
194   - self.real_relevant]
195   - #print len(self.predicted_relevant)
196   - #print len(self.real_relevant)
197   - #print len(self.predicted_real)
  226 + self.repository_size = repository_size
  227 + self.predicted_item_scores = predicted.item_score
  228 + self.predicted_relevant = predicted.get_prediction()
  229 + self.real_item_scores = real.item_score
  230 + self.real_relevant = real.get_prediction()
  231 +
  232 + self.true_positive = [v[0] for v in self.predicted_relevant if v[0] in
  233 + [w[0] for w in self.real_relevant]]
  234 + self.false_positive = [v[0] for v in self.predicted_relevant if not v[0] in
  235 + [w[0] for w in self.real_relevant]]
  236 + self.false_negative = [v[0] for v in self.real_relevant if not v[0] in
  237 + [w[0] for w in self.predicted_relevant]]
  238 +
  239 + logging.debug("TP: %d" % len(self.true_positive))
  240 + logging.debug("FP: %d" % len(self.false_positive))
  241 + logging.debug("FN: %d" % len(self.false_negative))
  242 + logging.debug("Repo_size: %d" % self.repository_size)
  243 + logging.debug("Relevant: %d" % len(self.real_relevant))
198 244  
199 245 def run(self,metric):
200 246 """
... ... @@ -206,7 +252,7 @@ class CrossValidation:
206 252 """
207 253 Class designed to perform cross-validation process.
208 254 """
209   - def __init__(self,partition_proportion,rounds,rec,metrics_list):
  255 + def __init__(self,partition_proportion,rounds,rec,metrics_list,result_proportion):
210 256 """
211 257 Set initial parameters.
212 258 """
... ... @@ -219,34 +265,13 @@ class CrossValidation:
219 265 self.recommender = rec
220 266 self.metrics_list = metrics_list
221 267 self.cross_results = defaultdict(list)
222   -
223   - def __str__(self):
224   - """
225   - String representation of the object.
226   - """
227   - str = "\n"
228   - metrics_desc = ""
229   - for metric in self.metrics_list:
230   - metrics_desc += "%s|" % (metric.desc)
231   - str += "| Round |%s\n" % metrics_desc
232   - for r in range(self.rounds):
233   - metrics_result = ""
234   - for metric in self.metrics_list:
235   - metrics_result += (" %2.1f%% |" %
236   - (self.cross_results[metric.desc][r]*100))
237   - str += "| %d |%s\n" % (r,metrics_result)
238   - metrics_mean = ""
239   - for metric in self.metrics_list:
240   - mean = float(sum(self.cross_results[metric.desc]) /
241   - len(self.cross_results[metric.desc]))
242   - metrics_mean += " %2.1f%% |" % (mean*100)
243   - str += "| Mean |%s\n" % (metrics_mean)
244   - return str
  268 + self.result_proportion = result_proportion
245 269  
246 270 def run(self,user):
247 271 """
248 272 Perform cross-validation.
249 273 """
  274 + #
250 275 cross_item_score = dict.fromkeys(user.pkg_profile,1)
251 276 partition_size = int(len(cross_item_score)*self.partition_proportion)
252 277 for r in range(self.rounds):
... ... @@ -258,10 +283,17 @@ class CrossValidation:
258 283 logging.critical("Empty cross_item_score.")
259 284 raise Error
260 285 round_partition[random_key] = cross_item_score.pop(random_key)
  286 + #logging.debug("Round partition: %s",str(round_partition))
  287 + #logging.debug("Cross item-score: %s",str(cross_item_score))
261 288 round_user = User(cross_item_score)
262   - predicted_result = self.recommender.get_recommendation(round_user)
263   - real_result = RecommendationResult(round_partition,len(round_partition))
264   - evaluation = Evaluation(predicted_result,real_result)
  289 + result_size = int(self.recommender.items_repository.get_doccount()*
  290 + self.result_proportion)
  291 + predicted_result = self.recommender.get_recommendation(round_user,result_size)
  292 + print len(round_partition)
  293 + real_result = RecommendationResult(round_partition)
  294 + #logging.debug("Predicted result: %s",predicted_result)
  295 + evaluation = Evaluation(predicted_result,real_result,
  296 + self.recommender.items_repository.get_doccount())
265 297 for metric in self.metrics_list:
266 298 result = evaluation.run(metric)
267 299 self.cross_results[metric.desc].append(result)
... ... @@ -269,3 +301,26 @@ class CrossValidation:
269 301 item,score = round_partition.popitem()
270 302 cross_item_score[item] = score
271 303  
  304 + def __str__(self):
  305 + """
  306 + String representation of the object.
  307 + """
  308 + str = "\n"
  309 + metrics_desc = ""
  310 + for metric in self.metrics_list:
  311 + metrics_desc += "%s|" % (metric.desc)
  312 + str += "| Round |%s\n" % metrics_desc
  313 + for r in range(self.rounds):
  314 + metrics_result = ""
  315 + for metric in self.metrics_list:
  316 + metrics_result += (" %2.1f%% |" %
  317 + (self.cross_results[metric.desc][r]*100))
  318 + str += "| %d |%s\n" % (r,metrics_result)
  319 + metrics_mean = ""
  320 + for metric in self.metrics_list:
  321 + mean = float(sum(self.cross_results[metric.desc]) /
  322 + len(self.cross_results[metric.desc]))
  323 + metrics_mean += " %2.1f%% |" % (mean*100)
  324 + str += "| Mean |%s\n" % (metrics_mean)
  325 + return str
  326 +
... ...