Commit b33c0cb1890a68c50ca6511c5d63fc2ffebfa854

Authored by Tássia Camões Araújo
1 parent ccd4ef55
Exists in master and in 1 other branch add_vagrant

Up-to-date metrics experiments.

Showing 2 changed files with 106 additions and 101 deletions   Show diff stats
src/experiments/hybrid.py
@@ -31,6 +31,8 @@ import random @@ -31,6 +31,8 @@ import random
31 import Gnuplot 31 import Gnuplot
32 import numpy 32 import numpy
33 33
  34 +#hybrid_strategies = ['knnco','knnco_eset']
  35 +
34 if __name__ == '__main__': 36 if __name__ == '__main__':
35 if len(sys.argv)<2: 37 if len(sys.argv)<2:
36 print "Usage: hybrid strategy sample_file" 38 print "Usage: hybrid strategy sample_file"
@@ -38,9 +40,7 @@ if __name__ == &#39;__main__&#39;: @@ -38,9 +40,7 @@ if __name__ == &#39;__main__&#39;:
38 40
39 iterations = 20 41 iterations = 20
40 profile_size = [10,40,70,100,170,240] 42 profile_size = [10,40,70,100,170,240]
41 - neighbor_size = [3,10,50,100,200,400]  
42 -  
43 - #hybrid_strategies = ['knnco','knnco_eset'] 43 + neighbor_size = [3,10,50,70,100,150,200]
44 44
45 #iterations = 1 45 #iterations = 1
46 #profile_size = [10,20,30] 46 #profile_size = [10,20,30]
@@ -55,55 +55,55 @@ if __name__ == &#39;__main__&#39;: @@ -55,55 +55,55 @@ if __name__ == &#39;__main__&#39;:
55 for line in f.readlines(): 55 for line in f.readlines():
56 user_id = line.strip('\n') 56 user_id = line.strip('\n')
57 population_sample.append(os.path.join(cfg.popcon_dir,user_id[:2],user_id)) 57 population_sample.append(os.path.join(cfg.popcon_dir,user_id[:2],user_id))
58 - sample_dir = ("results/hybrid/%s" % sample_str) 58 + sample_dir = ("results/hybrid/%s/%s" % (sample_str,strategy))
59 if not os.path.exists(sample_dir): 59 if not os.path.exists(sample_dir):
60 os.makedirs(sample_dir) 60 os.makedirs(sample_dir)
61 61
62 cfg.strategy = strategy 62 cfg.strategy = strategy
63 - p_20_summary = {} 63 + p_10_summary = {}
64 f05_100_summary = {} 64 f05_100_summary = {}
65 - c_20 = {} 65 + c_10 = {}
66 c_100 = {} 66 c_100 = {}
67 67
68 log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) 68 log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy)
69 - graph_20 = {} 69 + graph_10 = {}
70 graph_100 = {} 70 graph_100 = {}
71 - graph_20_jpg = {} 71 + graph_10_jpg = {}
72 graph_100_jpg = {} 72 graph_100_jpg = {}
73 - comment_20 = {} 73 + comment_10 = {}
74 comment_100 = {} 74 comment_100 = {}
75 for k in neighbor_size: 75 for k in neighbor_size:
76 - graph_20[k] = log_file+("-neighboorhod%.3d-020.png"%k)  
77 - graph_100[k] = log_file+("-neighboorhod%.3d-100.png"%k)  
78 - graph_20_jpg[k] = graph_20[k].strip(".png")+".jpg" 76 + graph_10[k] = log_file+("-neighborhood%.3d-010.png"%k)
  77 + graph_100[k] = log_file+("-neighborhood%.3d-100.png"%k)
  78 + graph_10_jpg[k] = graph_10[k].strip(".png")+".jpg"
79 graph_100_jpg[k] = graph_100[k].strip(".png")+".jpg" 79 graph_100_jpg[k] = graph_100[k].strip(".png")+".jpg"
80 - comment_20[k] = graph_20_jpg[k]+".comment" 80 + comment_10[k] = graph_10_jpg[k]+".comment"
81 comment_100[k] = graph_100_jpg[k]+".comment" 81 comment_100[k] = graph_100_jpg[k]+".comment"
82 82
83 - with open(comment_20[k],'w') as f: 83 + with open(comment_10[k],'w') as f:
84 f.write("# %s\n" % sample_str) 84 f.write("# %s\n" % sample_str)
85 - f.write("# strategy %s\n# threshold 20\n# iterations %d\n\n" % 85 + f.write("# strategy %s\n# threshold 10\n# iterations %d\n\n" %
86 (cfg.strategy,iterations)) 86 (cfg.strategy,iterations))
87 - f.write("# neighboorhood\tprofile\tp_20\tc_20\n\n") 87 + f.write("# neighborhood\tprofile\tmean_p_10\tdev_p_10\tc_10\n\n")
88 with open(comment_100[k],'w') as f: 88 with open(comment_100[k],'w') as f:
89 f.write("# %s\n" % sample_str) 89 f.write("# %s\n" % sample_str)
90 f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % 90 f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" %
91 (cfg.strategy,iterations)) 91 (cfg.strategy,iterations))
92 - f.write("# neighboorhood\tprofile\tf05_100\tc_100\n\n") 92 + f.write("# neighborhood\tprofile\tmean_f05_100\tdev_f05_100\tc_100\n\n")
93 93
94 - c_20[k] = {} 94 + c_10[k] = {}
95 c_100[k] = {} 95 c_100[k] = {}
96 - p_20_summary[k] = {} 96 + p_10_summary[k] = {}
97 f05_100_summary[k] = {} 97 f05_100_summary[k] = {}
98 for size in profile_size: 98 for size in profile_size:
99 - c_20[k][size] = set() 99 + c_10[k][size] = set()
100 c_100[k][size] = set() 100 c_100[k][size] = set()
101 - p_20_summary[k][size] = [] 101 + p_10_summary[k][size] = []
102 f05_100_summary[k][size] = [] 102 f05_100_summary[k][size] = []
103 - with open(log_file+"-neighboorhood%.3d-profile%.3d"%(k,size),'w') as f: 103 + with open(log_file+"-neighborhood%.3d-profile%.3d"%(k,size),'w') as f:
104 f.write("# %s\n" % sample_str) 104 f.write("# %s\n" % sample_str)
105 - f.write("# strategy %s-neighboorhood%.3d-profile%.3d\n\n" % (cfg.strategy,k,size))  
106 - f.write("# p_20\t\tf05_100\n\n") 105 + f.write("# strategy %s-neighborhood%.3d-profile%.3d\n\n" % (cfg.strategy,k,size))
  106 + f.write("# p_10\t\tf05_100\n\n")
107 107
108 # main loop per user 108 # main loop per user
109 for submission_file in population_sample: 109 for submission_file in population_sample:
@@ -116,7 +116,7 @@ if __name__ == &#39;__main__&#39;: @@ -116,7 +116,7 @@ if __name__ == &#39;__main__&#39;:
116 cfg.profile_size = size 116 cfg.profile_size = size
117 rec = Recommender(cfg) 117 rec = Recommender(cfg)
118 repo_size = rec.items_repository.get_doccount() 118 repo_size = rec.items_repository.get_doccount()
119 - p_20 = [] 119 + p_10 = []
120 f05_100 = [] 120 f05_100 = []
121 for n in range(iterations): 121 for n in range(iterations):
122 # Fill sample profile 122 # Fill sample profile
@@ -134,40 +134,42 @@ if __name__ == &#39;__main__&#39;: @@ -134,40 +134,42 @@ if __name__ == &#39;__main__&#39;:
134 if hasattr(recommendation,"ranking"): 134 if hasattr(recommendation,"ranking"):
135 ranking = recommendation.ranking 135 ranking = recommendation.ranking
136 real = RecommendationResult(sample) 136 real = RecommendationResult(sample)
137 - predicted_20 = RecommendationResult(dict.fromkeys(ranking[:20],1))  
138 - evaluation = Evaluation(predicted_20,real,repo_size)  
139 - p_20.append(evaluation.run(Precision())) 137 + predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10],1))
  138 + evaluation = Evaluation(predicted_10,real,repo_size)
  139 + p_10.append(evaluation.run(Precision()))
140 predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) 140 predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1))
141 evaluation = Evaluation(predicted_100,real,repo_size) 141 evaluation = Evaluation(predicted_100,real,repo_size)
142 f05_100.append(evaluation.run(F_score(0.5))) 142 f05_100.append(evaluation.run(F_score(0.5)))
143 - c_20[k][size] = c_20[k][size].union(recommendation.ranking[:20]) 143 + c_10[k][size] = c_10[k][size].union(recommendation.ranking[:10])
144 c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100]) 144 c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100])
145 # save summary 145 # save summary
146 - if p_20:  
147 - p_20_summary[k][size].append(sum(p_20)/len(p_20)) 146 + if p_10:
  147 + p_10_summary[k][size].append(numpy.mean(p_10))
148 if f05_100: 148 if f05_100:
149 - f05_100_summary[k][size].append(sum(f05_100)/len(f05_100)) 149 + f05_100_summary[k][size].append(numpy.mean(f05_100))
150 150
151 - with open(log_file+"-neighboorhood%.3d-profile%.3d"%(k,size),'a') as f: 151 + with open(log_file+"-neighborhood%.3d-profile%.3d"%(k,size),'a') as f:
152 f.write("%.4f\t\t%.4f\n" % 152 f.write("%.4f\t\t%.4f\n" %
153 - ((sum(p_20)/len(p_20),sum(f05_100)/len(f05_100)))) 153 + (numpy.mean(p_10),numpy.mean(f05_100)))
154 154
155 # back to main flow 155 # back to main flow
156 - coverage_20 = {} 156 + coverage_10 = {}
157 coverage_100 = {} 157 coverage_100 = {}
158 for k in neighbor_size: 158 for k in neighbor_size:
159 - coverage_20[k] = {} 159 + coverage_10[k] = {}
160 coverage_100[k] = {} 160 coverage_100[k] = {}
161 - with open(comment_20[k],'a') as f: 161 + with open(comment_10[k],'a') as f:
162 for size in profile_size: 162 for size in profile_size:
163 - coverage_20[k][size] = len(c_20[k][size])/float(repo_size)  
164 - f.write("%3d\t\t%3d\t\t%.4f\t%.4f\n" %  
165 - (k,size,float(sum(p_20_summary[k][size]))/len(p_20_summary[k][size]),coverage_20[k][size])) 163 + coverage_10[k][size] = len(c_10[k][size])/float(repo_size)
  164 + f.write("%3d\t\t%3d\t\t%.4f\t%.4f\t%.4f\n" %
  165 + (k,size,numpy.mean(p_10_summary[k][size]),
  166 + numpy.std(p_10_summary[k][size]),coverage_10[k][size]))
166 with open(comment_100[k],'a') as f: 167 with open(comment_100[k],'a') as f:
167 for size in profile_size: 168 for size in profile_size:
168 coverage_100[k][size] = len(c_100[k][size])/float(repo_size) 169 coverage_100[k][size] = len(c_100[k][size])/float(repo_size)
169 - f.write("%3d\t\t%3d\t\t%.4f\t%.4f\n" %  
170 - (k,size,float(sum(f05_100_summary[k][size]))/len(f05_100_summary[k][size]),coverage_100[k][size])) 170 + f.write("%3d\t\t%3d\t\t%.4f\t%.4f\t%.4f\n" %
  171 + (k,size,numpy.mean(f05_100_summary[k][size]),
  172 + numpy.std(f05_100_summary[k][size]),coverage_100[k][size]))
171 173
172 for k in neighbor_size: 174 for k in neighbor_size:
173 # plot results summary 175 # plot results summary
@@ -175,23 +177,26 @@ if __name__ == &#39;__main__&#39;: @@ -175,23 +177,26 @@ if __name__ == &#39;__main__&#39;:
175 g('set style data lines') 177 g('set style data lines')
176 g('set yrange [0:1.0]') 178 g('set yrange [0:1.0]')
177 g.xlabel('Profile size') 179 g.xlabel('Profile size')
178 - g.title("Setup: %s-neighboorhood%3d (threshold 20)" % (cfg.strategy,k))  
179 - g.plot(Gnuplot.Data(sorted([[i,sum(p_20_summary[k][i])/len(p_20_summary[k][i])]  
180 - for i in p_20_summary[k].keys()]),title="Precision"),  
181 - Gnuplot.Data(sorted([[i,coverage_20[k][i]]  
182 - for i in coverage_20[k].keys()]),title="Coverage"))  
183 - g.hardcopy(graph_20[k],terminal="png")  
184 - #commands.getoutput("convert -quality 100 %s %s" %  
185 - # (graph_20[k],graph_20_jpg[k])) 180 + g.title("Setup: %s-neighborhood%3d (threshold 10)" % (cfg.strategy,k))
  181 + g.plot(Gnuplot.Data(sorted([[i,numpy.mean(p_10_summary[k][i]),numpy.std(p_10_summary[k][i])]
  182 + for i in p_10_summary[k].keys()]),title="Precision"),
  183 + Gnuplot.Data(sorted([[i,numpy.mean(p_10_summary[k][i]),numpy.std(p_10_summary[k][i])]
  184 + for i in p_10_summary[k].keys()]),title="Deviation",
  185 + with_="yerrorbar lt 2 pt 6"),
  186 + Gnuplot.Data(sorted([[i,coverage_10[k][i]]
  187 + for i in coverage_10[k].keys()]),title="Coverage"))
  188 + g.hardcopy(graph_10[k],terminal="png")
  189 +
186 g = Gnuplot.Gnuplot() 190 g = Gnuplot.Gnuplot()
187 g('set style data lines') 191 g('set style data lines')
188 g('set yrange [0:1.0]') 192 g('set yrange [0:1.0]')
189 g.xlabel('Profile size') 193 g.xlabel('Profile size')
190 - g.title("Setup: %s-neighboorhood%3d (threshold 100)" % (cfg.strategy,k))  
191 - g.plot(Gnuplot.Data(sorted([[i,sum(f05_100_summary[k][i])/len(f05_100_summary[k][i])] 194 + g.title("Setup: %s-neighborhood%3d (threshold 100)" % (cfg.strategy,k))
  195 + g.plot(Gnuplot.Data(sorted([[i,numpy.mean(f05_100_summary[k][i]),numpy.std(f05_100_summary[k][i])]
192 for i in f05_100_summary[k].keys()]),title="F05"), 196 for i in f05_100_summary[k].keys()]),title="F05"),
  197 + Gnuplot.Data(sorted([[i,numpy.mean(f05_100_summary[k][i]),numpy.std(f05_100_summary[k][i])]
  198 + for i in f05_100_summary[k].keys()]),title="Deviation",
  199 + with_="yerrorbar lt 2 pt 6"),
193 Gnuplot.Data(sorted([[i,coverage_100[k][i]] 200 Gnuplot.Data(sorted([[i,coverage_100[k][i]]
194 for i in coverage_100[k].keys()]),title="Coverage")) 201 for i in coverage_100[k].keys()]),title="Coverage"))
195 g.hardcopy(graph_100[k],terminal="png") 202 g.hardcopy(graph_100[k],terminal="png")
196 - #commands.getoutput("convert -quality 100 %s %s" %  
197 - # (graph_100[k],graph_100_jpg[k]))  
src/experiments/pure.py
@@ -33,22 +33,21 @@ import numpy @@ -33,22 +33,21 @@ import numpy
33 33
34 if __name__ == '__main__': 34 if __name__ == '__main__':
35 if len(sys.argv)<2: 35 if len(sys.argv)<2:
36 - print "Usage: profile-suite strategy_category sample_file" 36 + print "Usage: pure strategy_category sample_file"
37 exit(1) 37 exit(1)
38 38
39 iterations = 20 39 iterations = 20
40 - profile_size = [10,20,40,70,100,140,170,200,240]  
41 - neighbor_size = [3,5,10,50,100,150,200,300,400,500] 40 + profile_size = [10,20,40,60,80,100,140,170,200,240]
  41 + neighbor_size = [3,5,10,20,30,50,70,100,150,200]
42 42
43 content_strategies = ['cb','cbt','cbd','cbh','cb_eset','cbt_eset','cbd_eset','cbh_eset'] 43 content_strategies = ['cb','cbt','cbd','cbh','cb_eset','cbt_eset','cbd_eset','cbh_eset']
44 - collaborative_strategies = ['knn_eset']#,'knn_eset','knn_plus']  
45 - #collaborative_strategies = ['knn','knn_eset','knn_plus'] 44 + collaborative_strategies = ['knn_eset','knn','knn_plus']
46 45
47 #iterations = 1 46 #iterations = 1
48 #profile_size = [10,20,30] 47 #profile_size = [10,20,30]
49 - #neighbor_size = [10,20,30] 48 + #neighbor_size = [3,5,10,20,30,50]
50 #content_strategies = ['cb'] 49 #content_strategies = ['cb']
51 - #collaborative_strategies = ['knn_eset'] 50 + #collaborative_strategies = ['knn']
52 51
53 strategy_category = sys.argv[1] 52 strategy_category = sys.argv[1]
54 if strategy_category == "content": 53 if strategy_category == "content":
@@ -78,39 +77,39 @@ if __name__ == &#39;__main__&#39;: @@ -78,39 +77,39 @@ if __name__ == &#39;__main__&#39;:
78 77
79 for strategy in strategies: 78 for strategy in strategies:
80 cfg.strategy = strategy 79 cfg.strategy = strategy
81 - p_20_summary = {} 80 + p_10_summary = {}
82 f05_100_summary = {} 81 f05_100_summary = {}
83 - c_20 = {} 82 + c_10 = {}
84 c_100 = {} 83 c_100 = {}
85 84
86 log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) 85 log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy)
87 - graph_20 = log_file+"-20.png" 86 + graph_10 = log_file+"-10.png"
88 graph_100 = log_file+"-100.png" 87 graph_100 = log_file+"-100.png"
89 - graph_20_jpg = graph_20.strip(".png")+".jpg" 88 + graph_10_jpg = graph_10.strip(".png")+".jpg"
90 graph_100_jpg = graph_100.strip(".png")+".jpg" 89 graph_100_jpg = graph_100.strip(".png")+".jpg"
91 - comment_20 = graph_20_jpg+".comment" 90 + comment_10 = graph_10_jpg+".comment"
92 comment_100 = graph_100_jpg+".comment" 91 comment_100 = graph_100_jpg+".comment"
93 92
94 - with open(comment_20,'w') as f: 93 + with open(comment_10,'w') as f:
95 f.write("# sample %s\n" % sample_str) 94 f.write("# sample %s\n" % sample_str)
96 - f.write("# strategy %s\n# threshold 20\n# iterations %d\n\n" % 95 + f.write("# strategy %s\n# threshold 10\n# iterations %d\n\n" %
97 (cfg.strategy,iterations)) 96 (cfg.strategy,iterations))
98 - f.write("# %s\tp_20\tc_20\n\n"%option_str) 97 + f.write("# %s\tmean_p_10\tdev_p_10\tc_10\n\n"%option_str)
99 with open(comment_100,'w') as f: 98 with open(comment_100,'w') as f:
100 f.write("# sample %s\n" % sample_str) 99 f.write("# sample %s\n" % sample_str)
101 f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % 100 f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" %
102 (cfg.strategy,iterations)) 101 (cfg.strategy,iterations))
103 - f.write("# %s\t\tf05_100\t\tc_100\n\n"%option_str) 102 + f.write("# %s\t\tmean_f05_100\t\tdev_f05_100\t\tc_100\n\n"%option_str)
104 103
105 for size in sizes: 104 for size in sizes:
106 - c_20[size] = set() 105 + c_10[size] = set()
107 c_100[size] = set() 106 c_100[size] = set()
108 - p_20_summary[size] = [] 107 + p_10_summary[size] = []
109 f05_100_summary[size] = [] 108 f05_100_summary[size] = []
110 with open(log_file+"-%s%.3d"%(option_str,size),'w') as f: 109 with open(log_file+"-%s%.3d"%(option_str,size),'w') as f:
111 f.write("# sample %s\n" % sample_str) 110 f.write("# sample %s\n" % sample_str)
112 f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy,option_str,size)) 111 f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy,option_str,size))
113 - f.write("# p_20\tf05_100\n\n") 112 + f.write("# p_10\tf05_100\n\n")
114 113
115 # main loop per user 114 # main loop per user
116 for submission_file in population_sample: 115 for submission_file in population_sample:
@@ -122,7 +121,7 @@ if __name__ == &#39;__main__&#39;: @@ -122,7 +121,7 @@ if __name__ == &#39;__main__&#39;:
122 cfg.k_neighbors = size 121 cfg.k_neighbors = size
123 rec = Recommender(cfg) 122 rec = Recommender(cfg)
124 repo_size = rec.items_repository.get_doccount() 123 repo_size = rec.items_repository.get_doccount()
125 - p_20 = [] 124 + p_10 = []
126 f05_100 = [] 125 f05_100 = []
127 for n in range(iterations): 126 for n in range(iterations):
128 # Fill sample profile 127 # Fill sample profile
@@ -140,60 +139,61 @@ if __name__ == &#39;__main__&#39;: @@ -140,60 +139,61 @@ if __name__ == &#39;__main__&#39;:
140 if hasattr(recommendation,"ranking"): 139 if hasattr(recommendation,"ranking"):
141 ranking = recommendation.ranking 140 ranking = recommendation.ranking
142 real = RecommendationResult(sample) 141 real = RecommendationResult(sample)
143 - predicted_20 = RecommendationResult(dict.fromkeys(ranking[:20],1))  
144 - evaluation = Evaluation(predicted_20,real,repo_size)  
145 - p_20.append(evaluation.run(Precision())) 142 + predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10],1))
  143 + evaluation = Evaluation(predicted_10,real,repo_size)
  144 + p_10.append(evaluation.run(Precision()))
146 predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) 145 predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1))
147 evaluation = Evaluation(predicted_100,real,repo_size) 146 evaluation = Evaluation(predicted_100,real,repo_size)
148 f05_100.append(evaluation.run(F_score(0.5))) 147 f05_100.append(evaluation.run(F_score(0.5)))
149 - c_20[size] = c_20[size].union(recommendation.ranking[:20]) 148 + c_10[size] = c_10[size].union(recommendation.ranking[:10])
150 c_100[size] = c_100[size].union(recommendation.ranking[:100]) 149 c_100[size] = c_100[size].union(recommendation.ranking[:100])
151 # save summary 150 # save summary
152 - if p_20:  
153 - p_20_summary[size].append(sum(p_20)/len(p_20)) 151 + if p_10:
  152 + p_10_summary[size].append(numpy.mean(p_10))
154 if f05_100: 153 if f05_100:
155 - f05_100_summary[size].append(sum(f05_100)/len(f05_100)) 154 + f05_100_summary[size].append(numpy.mean(f05_100))
156 155
157 with open(log_file+"-%s%.3d"%(option_str,size),'a') as f: 156 with open(log_file+"-%s%.3d"%(option_str,size),'a') as f:
158 - f.write("%.4f \t%.4f\n" %  
159 - ((sum(p_20)/len(p_20),sum(f05_100)/len(f05_100)))) 157 + f.write("%.4f \t%.4f\n" % (numpy.mean(p_10),numpy.mean(f05_100)))
160 158
161 # back to main flow 159 # back to main flow
162 - coverage_20 = {} 160 + coverage_10 = {}
163 coverage_100 = {} 161 coverage_100 = {}
164 - with open(comment_20,'a') as f: 162 + with open(comment_10,'a') as f:
165 for size in sizes: 163 for size in sizes:
166 - coverage_20[size] = len(c_20[size])/float(repo_size)  
167 - f.write("%3d\t\t%.4f\t\t%.4f\n" %  
168 - (size,float(sum(p_20_summary[size]))/len(p_20_summary[size]),coverage_20[size])) 164 + coverage_10[size] = len(c_10[size])/float(repo_size)
  165 + f.write("%3d\t\t%.4f\t\t%.4f\t\t%.4f\n" %
  166 + (size,numpy.mean(p_10_summary[size]),numpy.std(p_10_summary[size]),coverage_10[size]))
169 with open(comment_100,'a') as f: 167 with open(comment_100,'a') as f:
170 for size in sizes: 168 for size in sizes:
171 coverage_100[size] = len(c_100[size])/float(repo_size) 169 coverage_100[size] = len(c_100[size])/float(repo_size)
172 - f.write("%3d\t\t%.4f\t\t%.4f\n" %  
173 - (size,float(sum(f05_100_summary[size]))/len(f05_100_summary[size]),coverage_100[size])) 170 + f.write("%3d\t\t%.4f\t\t%.4f\t\t%.4f\n" %
  171 + (size,numpy.mean(f05_100_summary[size]),numpy.std(f05_100_summary[size]),coverage_100[size]))
174 172
175 # plot results summary 173 # plot results summary
176 g = Gnuplot.Gnuplot() 174 g = Gnuplot.Gnuplot()
177 g('set style data lines') 175 g('set style data lines')
178 g('set yrange [0:1.0]') 176 g('set yrange [0:1.0]')
179 g.xlabel('%s size'%option_str.capitalize()) 177 g.xlabel('%s size'%option_str.capitalize())
180 - g.title("Setup: %s (threshold 20)" % cfg.strategy)  
181 - g.plot(Gnuplot.Data(sorted([[k,sum(p_20_summary[k])/len(p_20_summary[k])]  
182 - for k in p_20_summary.keys()]),title="Precision"),  
183 - Gnuplot.Data(sorted([[k,coverage_20[k]]  
184 - for k in coverage_20.keys()]),title="Coverage"))  
185 - g.hardcopy(graph_20,terminal="png")  
186 - commands.getoutput("convert -quality 20 %s %s" %  
187 - (graph_100,graph_20_jpg)) 178 + g.title("Setup: %s (threshold 10)" % cfg.strategy)
  179 + g.plot(Gnuplot.Data(sorted([[k,numpy.mean(p_10_summary[k]),numpy.std(p_10_summary[k])]
  180 + for k in p_10_summary.keys()]),title="Precision"),
  181 + Gnuplot.Data(sorted([[k,numpy.mean(p_10_summary[k]),numpy.std(p_10_summary[k])]
  182 + for k in p_10_summary.keys()]),title="Deviation",
  183 + with_="yerrorbar lt 2 pt 6"),
  184 + Gnuplot.Data(sorted([[k,coverage_10[k]]
  185 + for k in coverage_10.keys()]),title="Coverage"))
  186 + g.hardcopy(graph_10,terminal="png")
188 g = Gnuplot.Gnuplot() 187 g = Gnuplot.Gnuplot()
189 g('set style data lines') 188 g('set style data lines')
190 g('set yrange [0:1.0]') 189 g('set yrange [0:1.0]')
191 g.xlabel('%s size'%option_str.capitalize()) 190 g.xlabel('%s size'%option_str.capitalize())
192 g.title("Setup: %s (threshold 100)" % cfg.strategy) 191 g.title("Setup: %s (threshold 100)" % cfg.strategy)
193 - g.plot(Gnuplot.Data(sorted([[k,sum(f05_100_summary[k])/len(f05_100_summary[k])] 192 + g.plot(Gnuplot.Data(sorted([[k,numpy.mean(f05_100_summary[k]),numpy.std(f05_100_summary[k])]
194 for k in f05_100_summary.keys()]),title="F05"), 193 for k in f05_100_summary.keys()]),title="F05"),
  194 + Gnuplot.Data(sorted([[k,numpy.mean(f05_100_summary[k]),numpy.std(f05_100_summary[k])]
  195 + for k in f05_100_summary.keys()]),title="Deviation",
  196 + with_="yerrorbar lt 2 pt 6"),
195 Gnuplot.Data(sorted([[k,coverage_100[k]] 197 Gnuplot.Data(sorted([[k,coverage_100[k]]
196 for k in coverage_100.keys()]),title="Coverage")) 198 for k in coverage_100.keys()]),title="Coverage"))
197 g.hardcopy(graph_100,terminal="png") 199 g.hardcopy(graph_100,terminal="png")
198 - commands.getoutput("convert -quality 100 %s %s" %  
199 - (graph_100,graph_100_jpg))