Commit b33c0cb1890a68c50ca6511c5d63fc2ffebfa854
1 parent
ccd4ef55
Exists in
master
and in
1 other branch
Up-to-date metrics experiments.
Showing
2 changed files
with
106 additions
and
101 deletions
Show diff stats
src/experiments/hybrid.py
@@ -31,6 +31,8 @@ import random | @@ -31,6 +31,8 @@ import random | ||
31 | import Gnuplot | 31 | import Gnuplot |
32 | import numpy | 32 | import numpy |
33 | 33 | ||
34 | +#hybrid_strategies = ['knnco','knnco_eset'] | ||
35 | + | ||
34 | if __name__ == '__main__': | 36 | if __name__ == '__main__': |
35 | if len(sys.argv)<2: | 37 | if len(sys.argv)<2: |
36 | print "Usage: hybrid strategy sample_file" | 38 | print "Usage: hybrid strategy sample_file" |
@@ -38,9 +40,7 @@ if __name__ == '__main__': | @@ -38,9 +40,7 @@ if __name__ == '__main__': | ||
38 | 40 | ||
39 | iterations = 20 | 41 | iterations = 20 |
40 | profile_size = [10,40,70,100,170,240] | 42 | profile_size = [10,40,70,100,170,240] |
41 | - neighbor_size = [3,10,50,100,200,400] | ||
42 | - | ||
43 | - #hybrid_strategies = ['knnco','knnco_eset'] | 43 | + neighbor_size = [3,10,50,70,100,150,200] |
44 | 44 | ||
45 | #iterations = 1 | 45 | #iterations = 1 |
46 | #profile_size = [10,20,30] | 46 | #profile_size = [10,20,30] |
@@ -55,55 +55,55 @@ if __name__ == '__main__': | @@ -55,55 +55,55 @@ if __name__ == '__main__': | ||
55 | for line in f.readlines(): | 55 | for line in f.readlines(): |
56 | user_id = line.strip('\n') | 56 | user_id = line.strip('\n') |
57 | population_sample.append(os.path.join(cfg.popcon_dir,user_id[:2],user_id)) | 57 | population_sample.append(os.path.join(cfg.popcon_dir,user_id[:2],user_id)) |
58 | - sample_dir = ("results/hybrid/%s" % sample_str) | 58 | + sample_dir = ("results/hybrid/%s/%s" % (sample_str,strategy)) |
59 | if not os.path.exists(sample_dir): | 59 | if not os.path.exists(sample_dir): |
60 | os.makedirs(sample_dir) | 60 | os.makedirs(sample_dir) |
61 | 61 | ||
62 | cfg.strategy = strategy | 62 | cfg.strategy = strategy |
63 | - p_20_summary = {} | 63 | + p_10_summary = {} |
64 | f05_100_summary = {} | 64 | f05_100_summary = {} |
65 | - c_20 = {} | 65 | + c_10 = {} |
66 | c_100 = {} | 66 | c_100 = {} |
67 | 67 | ||
68 | log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) | 68 | log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) |
69 | - graph_20 = {} | 69 | + graph_10 = {} |
70 | graph_100 = {} | 70 | graph_100 = {} |
71 | - graph_20_jpg = {} | 71 | + graph_10_jpg = {} |
72 | graph_100_jpg = {} | 72 | graph_100_jpg = {} |
73 | - comment_20 = {} | 73 | + comment_10 = {} |
74 | comment_100 = {} | 74 | comment_100 = {} |
75 | for k in neighbor_size: | 75 | for k in neighbor_size: |
76 | - graph_20[k] = log_file+("-neighboorhod%.3d-020.png"%k) | ||
77 | - graph_100[k] = log_file+("-neighboorhod%.3d-100.png"%k) | ||
78 | - graph_20_jpg[k] = graph_20[k].strip(".png")+".jpg" | 76 | + graph_10[k] = log_file+("-neighborhood%.3d-010.png"%k) |
77 | + graph_100[k] = log_file+("-neighborhood%.3d-100.png"%k) | ||
78 | + graph_10_jpg[k] = graph_10[k].strip(".png")+".jpg" | ||
79 | graph_100_jpg[k] = graph_100[k].strip(".png")+".jpg" | 79 | graph_100_jpg[k] = graph_100[k].strip(".png")+".jpg" |
80 | - comment_20[k] = graph_20_jpg[k]+".comment" | 80 | + comment_10[k] = graph_10_jpg[k]+".comment" |
81 | comment_100[k] = graph_100_jpg[k]+".comment" | 81 | comment_100[k] = graph_100_jpg[k]+".comment" |
82 | 82 | ||
83 | - with open(comment_20[k],'w') as f: | 83 | + with open(comment_10[k],'w') as f: |
84 | f.write("# %s\n" % sample_str) | 84 | f.write("# %s\n" % sample_str) |
85 | - f.write("# strategy %s\n# threshold 20\n# iterations %d\n\n" % | 85 | + f.write("# strategy %s\n# threshold 10\n# iterations %d\n\n" % |
86 | (cfg.strategy,iterations)) | 86 | (cfg.strategy,iterations)) |
87 | - f.write("# neighboorhood\tprofile\tp_20\tc_20\n\n") | 87 | + f.write("# neighborhood\tprofile\tmean_p_10\tdev_p_10\tc_10\n\n") |
88 | with open(comment_100[k],'w') as f: | 88 | with open(comment_100[k],'w') as f: |
89 | f.write("# %s\n" % sample_str) | 89 | f.write("# %s\n" % sample_str) |
90 | f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % | 90 | f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % |
91 | (cfg.strategy,iterations)) | 91 | (cfg.strategy,iterations)) |
92 | - f.write("# neighboorhood\tprofile\tf05_100\tc_100\n\n") | 92 | + f.write("# neighborhood\tprofile\tmean_f05_100\tdev_f05_100\tc_100\n\n") |
93 | 93 | ||
94 | - c_20[k] = {} | 94 | + c_10[k] = {} |
95 | c_100[k] = {} | 95 | c_100[k] = {} |
96 | - p_20_summary[k] = {} | 96 | + p_10_summary[k] = {} |
97 | f05_100_summary[k] = {} | 97 | f05_100_summary[k] = {} |
98 | for size in profile_size: | 98 | for size in profile_size: |
99 | - c_20[k][size] = set() | 99 | + c_10[k][size] = set() |
100 | c_100[k][size] = set() | 100 | c_100[k][size] = set() |
101 | - p_20_summary[k][size] = [] | 101 | + p_10_summary[k][size] = [] |
102 | f05_100_summary[k][size] = [] | 102 | f05_100_summary[k][size] = [] |
103 | - with open(log_file+"-neighboorhood%.3d-profile%.3d"%(k,size),'w') as f: | 103 | + with open(log_file+"-neighborhood%.3d-profile%.3d"%(k,size),'w') as f: |
104 | f.write("# %s\n" % sample_str) | 104 | f.write("# %s\n" % sample_str) |
105 | - f.write("# strategy %s-neighboorhood%.3d-profile%.3d\n\n" % (cfg.strategy,k,size)) | ||
106 | - f.write("# p_20\t\tf05_100\n\n") | 105 | + f.write("# strategy %s-neighborhood%.3d-profile%.3d\n\n" % (cfg.strategy,k,size)) |
106 | + f.write("# p_10\t\tf05_100\n\n") | ||
107 | 107 | ||
108 | # main loop per user | 108 | # main loop per user |
109 | for submission_file in population_sample: | 109 | for submission_file in population_sample: |
@@ -116,7 +116,7 @@ if __name__ == '__main__': | @@ -116,7 +116,7 @@ if __name__ == '__main__': | ||
116 | cfg.profile_size = size | 116 | cfg.profile_size = size |
117 | rec = Recommender(cfg) | 117 | rec = Recommender(cfg) |
118 | repo_size = rec.items_repository.get_doccount() | 118 | repo_size = rec.items_repository.get_doccount() |
119 | - p_20 = [] | 119 | + p_10 = [] |
120 | f05_100 = [] | 120 | f05_100 = [] |
121 | for n in range(iterations): | 121 | for n in range(iterations): |
122 | # Fill sample profile | 122 | # Fill sample profile |
@@ -134,40 +134,42 @@ if __name__ == '__main__': | @@ -134,40 +134,42 @@ if __name__ == '__main__': | ||
134 | if hasattr(recommendation,"ranking"): | 134 | if hasattr(recommendation,"ranking"): |
135 | ranking = recommendation.ranking | 135 | ranking = recommendation.ranking |
136 | real = RecommendationResult(sample) | 136 | real = RecommendationResult(sample) |
137 | - predicted_20 = RecommendationResult(dict.fromkeys(ranking[:20],1)) | ||
138 | - evaluation = Evaluation(predicted_20,real,repo_size) | ||
139 | - p_20.append(evaluation.run(Precision())) | 137 | + predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10],1)) |
138 | + evaluation = Evaluation(predicted_10,real,repo_size) | ||
139 | + p_10.append(evaluation.run(Precision())) | ||
140 | predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) | 140 | predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) |
141 | evaluation = Evaluation(predicted_100,real,repo_size) | 141 | evaluation = Evaluation(predicted_100,real,repo_size) |
142 | f05_100.append(evaluation.run(F_score(0.5))) | 142 | f05_100.append(evaluation.run(F_score(0.5))) |
143 | - c_20[k][size] = c_20[k][size].union(recommendation.ranking[:20]) | 143 | + c_10[k][size] = c_10[k][size].union(recommendation.ranking[:10]) |
144 | c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100]) | 144 | c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100]) |
145 | # save summary | 145 | # save summary |
146 | - if p_20: | ||
147 | - p_20_summary[k][size].append(sum(p_20)/len(p_20)) | 146 | + if p_10: |
147 | + p_10_summary[k][size].append(numpy.mean(p_10)) | ||
148 | if f05_100: | 148 | if f05_100: |
149 | - f05_100_summary[k][size].append(sum(f05_100)/len(f05_100)) | 149 | + f05_100_summary[k][size].append(numpy.mean(f05_100)) |
150 | 150 | ||
151 | - with open(log_file+"-neighboorhood%.3d-profile%.3d"%(k,size),'a') as f: | 151 | + with open(log_file+"-neighborhood%.3d-profile%.3d"%(k,size),'a') as f: |
152 | f.write("%.4f\t\t%.4f\n" % | 152 | f.write("%.4f\t\t%.4f\n" % |
153 | - ((sum(p_20)/len(p_20),sum(f05_100)/len(f05_100)))) | 153 | + (numpy.mean(p_10),numpy.mean(f05_100))) |
154 | 154 | ||
155 | # back to main flow | 155 | # back to main flow |
156 | - coverage_20 = {} | 156 | + coverage_10 = {} |
157 | coverage_100 = {} | 157 | coverage_100 = {} |
158 | for k in neighbor_size: | 158 | for k in neighbor_size: |
159 | - coverage_20[k] = {} | 159 | + coverage_10[k] = {} |
160 | coverage_100[k] = {} | 160 | coverage_100[k] = {} |
161 | - with open(comment_20[k],'a') as f: | 161 | + with open(comment_10[k],'a') as f: |
162 | for size in profile_size: | 162 | for size in profile_size: |
163 | - coverage_20[k][size] = len(c_20[k][size])/float(repo_size) | ||
164 | - f.write("%3d\t\t%3d\t\t%.4f\t%.4f\n" % | ||
165 | - (k,size,float(sum(p_20_summary[k][size]))/len(p_20_summary[k][size]),coverage_20[k][size])) | 163 | + coverage_10[k][size] = len(c_10[k][size])/float(repo_size) |
164 | + f.write("%3d\t\t%3d\t\t%.4f\t%.4f\t%.4f\n" % | ||
165 | + (k,size,numpy.mean(p_10_summary[k][size]), | ||
166 | + numpy.std(p_10_summary[k][size]),coverage_10[k][size])) | ||
166 | with open(comment_100[k],'a') as f: | 167 | with open(comment_100[k],'a') as f: |
167 | for size in profile_size: | 168 | for size in profile_size: |
168 | coverage_100[k][size] = len(c_100[k][size])/float(repo_size) | 169 | coverage_100[k][size] = len(c_100[k][size])/float(repo_size) |
169 | - f.write("%3d\t\t%3d\t\t%.4f\t%.4f\n" % | ||
170 | - (k,size,float(sum(f05_100_summary[k][size]))/len(f05_100_summary[k][size]),coverage_100[k][size])) | 170 | + f.write("%3d\t\t%3d\t\t%.4f\t%.4f\t%.4f\n" % |
171 | + (k,size,numpy.mean(f05_100_summary[k][size]), | ||
172 | + numpy.std(f05_100_summary[k][size]),coverage_100[k][size])) | ||
171 | 173 | ||
172 | for k in neighbor_size: | 174 | for k in neighbor_size: |
173 | # plot results summary | 175 | # plot results summary |
@@ -175,23 +177,26 @@ if __name__ == '__main__': | @@ -175,23 +177,26 @@ if __name__ == '__main__': | ||
175 | g('set style data lines') | 177 | g('set style data lines') |
176 | g('set yrange [0:1.0]') | 178 | g('set yrange [0:1.0]') |
177 | g.xlabel('Profile size') | 179 | g.xlabel('Profile size') |
178 | - g.title("Setup: %s-neighboorhood%3d (threshold 20)" % (cfg.strategy,k)) | ||
179 | - g.plot(Gnuplot.Data(sorted([[i,sum(p_20_summary[k][i])/len(p_20_summary[k][i])] | ||
180 | - for i in p_20_summary[k].keys()]),title="Precision"), | ||
181 | - Gnuplot.Data(sorted([[i,coverage_20[k][i]] | ||
182 | - for i in coverage_20[k].keys()]),title="Coverage")) | ||
183 | - g.hardcopy(graph_20[k],terminal="png") | ||
184 | - #commands.getoutput("convert -quality 100 %s %s" % | ||
185 | - # (graph_20[k],graph_20_jpg[k])) | 180 | + g.title("Setup: %s-neighborhood%3d (threshold 10)" % (cfg.strategy,k)) |
181 | + g.plot(Gnuplot.Data(sorted([[i,numpy.mean(p_10_summary[k][i]),numpy.std(p_10_summary[k][i])] | ||
182 | + for i in p_10_summary[k].keys()]),title="Precision"), | ||
183 | + Gnuplot.Data(sorted([[i,numpy.mean(p_10_summary[k][i]),numpy.std(p_10_summary[k][i])] | ||
184 | + for i in p_10_summary[k].keys()]),title="Deviation", | ||
185 | + with_="yerrorbar lt 2 pt 6"), | ||
186 | + Gnuplot.Data(sorted([[i,coverage_10[k][i]] | ||
187 | + for i in coverage_10[k].keys()]),title="Coverage")) | ||
188 | + g.hardcopy(graph_10[k],terminal="png") | ||
189 | + | ||
186 | g = Gnuplot.Gnuplot() | 190 | g = Gnuplot.Gnuplot() |
187 | g('set style data lines') | 191 | g('set style data lines') |
188 | g('set yrange [0:1.0]') | 192 | g('set yrange [0:1.0]') |
189 | g.xlabel('Profile size') | 193 | g.xlabel('Profile size') |
190 | - g.title("Setup: %s-neighboorhood%3d (threshold 100)" % (cfg.strategy,k)) | ||
191 | - g.plot(Gnuplot.Data(sorted([[i,sum(f05_100_summary[k][i])/len(f05_100_summary[k][i])] | 194 | + g.title("Setup: %s-neighborhood%3d (threshold 100)" % (cfg.strategy,k)) |
195 | + g.plot(Gnuplot.Data(sorted([[i,numpy.mean(f05_100_summary[k][i]),numpy.std(f05_100_summary[k][i])] | ||
192 | for i in f05_100_summary[k].keys()]),title="F05"), | 196 | for i in f05_100_summary[k].keys()]),title="F05"), |
197 | + Gnuplot.Data(sorted([[i,numpy.mean(f05_100_summary[k][i]),numpy.std(f05_100_summary[k][i])] | ||
198 | + for i in f05_100_summary[k].keys()]),title="Deviation", | ||
199 | + with_="yerrorbar lt 2 pt 6"), | ||
193 | Gnuplot.Data(sorted([[i,coverage_100[k][i]] | 200 | Gnuplot.Data(sorted([[i,coverage_100[k][i]] |
194 | for i in coverage_100[k].keys()]),title="Coverage")) | 201 | for i in coverage_100[k].keys()]),title="Coverage")) |
195 | g.hardcopy(graph_100[k],terminal="png") | 202 | g.hardcopy(graph_100[k],terminal="png") |
196 | - #commands.getoutput("convert -quality 100 %s %s" % | ||
197 | - # (graph_100[k],graph_100_jpg[k])) |
src/experiments/pure.py
@@ -33,22 +33,21 @@ import numpy | @@ -33,22 +33,21 @@ import numpy | ||
33 | 33 | ||
34 | if __name__ == '__main__': | 34 | if __name__ == '__main__': |
35 | if len(sys.argv)<2: | 35 | if len(sys.argv)<2: |
36 | - print "Usage: profile-suite strategy_category sample_file" | 36 | + print "Usage: pure strategy_category sample_file" |
37 | exit(1) | 37 | exit(1) |
38 | 38 | ||
39 | iterations = 20 | 39 | iterations = 20 |
40 | - profile_size = [10,20,40,70,100,140,170,200,240] | ||
41 | - neighbor_size = [3,5,10,50,100,150,200,300,400,500] | 40 | + profile_size = [10,20,40,60,80,100,140,170,200,240] |
41 | + neighbor_size = [3,5,10,20,30,50,70,100,150,200] | ||
42 | 42 | ||
43 | content_strategies = ['cb','cbt','cbd','cbh','cb_eset','cbt_eset','cbd_eset','cbh_eset'] | 43 | content_strategies = ['cb','cbt','cbd','cbh','cb_eset','cbt_eset','cbd_eset','cbh_eset'] |
44 | - collaborative_strategies = ['knn_eset']#,'knn_eset','knn_plus'] | ||
45 | - #collaborative_strategies = ['knn','knn_eset','knn_plus'] | 44 | + collaborative_strategies = ['knn_eset','knn','knn_plus'] |
46 | 45 | ||
47 | #iterations = 1 | 46 | #iterations = 1 |
48 | #profile_size = [10,20,30] | 47 | #profile_size = [10,20,30] |
49 | - #neighbor_size = [10,20,30] | 48 | + #neighbor_size = [3,5,10,20,30,50] |
50 | #content_strategies = ['cb'] | 49 | #content_strategies = ['cb'] |
51 | - #collaborative_strategies = ['knn_eset'] | 50 | + #collaborative_strategies = ['knn'] |
52 | 51 | ||
53 | strategy_category = sys.argv[1] | 52 | strategy_category = sys.argv[1] |
54 | if strategy_category == "content": | 53 | if strategy_category == "content": |
@@ -78,39 +77,39 @@ if __name__ == '__main__': | @@ -78,39 +77,39 @@ if __name__ == '__main__': | ||
78 | 77 | ||
79 | for strategy in strategies: | 78 | for strategy in strategies: |
80 | cfg.strategy = strategy | 79 | cfg.strategy = strategy |
81 | - p_20_summary = {} | 80 | + p_10_summary = {} |
82 | f05_100_summary = {} | 81 | f05_100_summary = {} |
83 | - c_20 = {} | 82 | + c_10 = {} |
84 | c_100 = {} | 83 | c_100 = {} |
85 | 84 | ||
86 | log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) | 85 | log_file = os.path.join(sample_dir,sample_str+"-"+cfg.strategy) |
87 | - graph_20 = log_file+"-20.png" | 86 | + graph_10 = log_file+"-10.png" |
88 | graph_100 = log_file+"-100.png" | 87 | graph_100 = log_file+"-100.png" |
89 | - graph_20_jpg = graph_20.strip(".png")+".jpg" | 88 | + graph_10_jpg = graph_10.strip(".png")+".jpg" |
90 | graph_100_jpg = graph_100.strip(".png")+".jpg" | 89 | graph_100_jpg = graph_100.strip(".png")+".jpg" |
91 | - comment_20 = graph_20_jpg+".comment" | 90 | + comment_10 = graph_10_jpg+".comment" |
92 | comment_100 = graph_100_jpg+".comment" | 91 | comment_100 = graph_100_jpg+".comment" |
93 | 92 | ||
94 | - with open(comment_20,'w') as f: | 93 | + with open(comment_10,'w') as f: |
95 | f.write("# sample %s\n" % sample_str) | 94 | f.write("# sample %s\n" % sample_str) |
96 | - f.write("# strategy %s\n# threshold 20\n# iterations %d\n\n" % | 95 | + f.write("# strategy %s\n# threshold 10\n# iterations %d\n\n" % |
97 | (cfg.strategy,iterations)) | 96 | (cfg.strategy,iterations)) |
98 | - f.write("# %s\tp_20\tc_20\n\n"%option_str) | 97 | + f.write("# %s\tmean_p_10\tdev_p_10\tc_10\n\n"%option_str) |
99 | with open(comment_100,'w') as f: | 98 | with open(comment_100,'w') as f: |
100 | f.write("# sample %s\n" % sample_str) | 99 | f.write("# sample %s\n" % sample_str) |
101 | f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % | 100 | f.write("# strategy %s\n# threshold 100\n# iterations %d\n\n" % |
102 | (cfg.strategy,iterations)) | 101 | (cfg.strategy,iterations)) |
103 | - f.write("# %s\t\tf05_100\t\tc_100\n\n"%option_str) | 102 | + f.write("# %s\t\tmean_f05_100\t\tdev_f05_100\t\tc_100\n\n"%option_str) |
104 | 103 | ||
105 | for size in sizes: | 104 | for size in sizes: |
106 | - c_20[size] = set() | 105 | + c_10[size] = set() |
107 | c_100[size] = set() | 106 | c_100[size] = set() |
108 | - p_20_summary[size] = [] | 107 | + p_10_summary[size] = [] |
109 | f05_100_summary[size] = [] | 108 | f05_100_summary[size] = [] |
110 | with open(log_file+"-%s%.3d"%(option_str,size),'w') as f: | 109 | with open(log_file+"-%s%.3d"%(option_str,size),'w') as f: |
111 | f.write("# sample %s\n" % sample_str) | 110 | f.write("# sample %s\n" % sample_str) |
112 | f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy,option_str,size)) | 111 | f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy,option_str,size)) |
113 | - f.write("# p_20\tf05_100\n\n") | 112 | + f.write("# p_10\tf05_100\n\n") |
114 | 113 | ||
115 | # main loop per user | 114 | # main loop per user |
116 | for submission_file in population_sample: | 115 | for submission_file in population_sample: |
@@ -122,7 +121,7 @@ if __name__ == '__main__': | @@ -122,7 +121,7 @@ if __name__ == '__main__': | ||
122 | cfg.k_neighbors = size | 121 | cfg.k_neighbors = size |
123 | rec = Recommender(cfg) | 122 | rec = Recommender(cfg) |
124 | repo_size = rec.items_repository.get_doccount() | 123 | repo_size = rec.items_repository.get_doccount() |
125 | - p_20 = [] | 124 | + p_10 = [] |
126 | f05_100 = [] | 125 | f05_100 = [] |
127 | for n in range(iterations): | 126 | for n in range(iterations): |
128 | # Fill sample profile | 127 | # Fill sample profile |
@@ -140,60 +139,61 @@ if __name__ == '__main__': | @@ -140,60 +139,61 @@ if __name__ == '__main__': | ||
140 | if hasattr(recommendation,"ranking"): | 139 | if hasattr(recommendation,"ranking"): |
141 | ranking = recommendation.ranking | 140 | ranking = recommendation.ranking |
142 | real = RecommendationResult(sample) | 141 | real = RecommendationResult(sample) |
143 | - predicted_20 = RecommendationResult(dict.fromkeys(ranking[:20],1)) | ||
144 | - evaluation = Evaluation(predicted_20,real,repo_size) | ||
145 | - p_20.append(evaluation.run(Precision())) | 142 | + predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10],1)) |
143 | + evaluation = Evaluation(predicted_10,real,repo_size) | ||
144 | + p_10.append(evaluation.run(Precision())) | ||
146 | predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) | 145 | predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100],1)) |
147 | evaluation = Evaluation(predicted_100,real,repo_size) | 146 | evaluation = Evaluation(predicted_100,real,repo_size) |
148 | f05_100.append(evaluation.run(F_score(0.5))) | 147 | f05_100.append(evaluation.run(F_score(0.5))) |
149 | - c_20[size] = c_20[size].union(recommendation.ranking[:20]) | 148 | + c_10[size] = c_10[size].union(recommendation.ranking[:10]) |
150 | c_100[size] = c_100[size].union(recommendation.ranking[:100]) | 149 | c_100[size] = c_100[size].union(recommendation.ranking[:100]) |
151 | # save summary | 150 | # save summary |
152 | - if p_20: | ||
153 | - p_20_summary[size].append(sum(p_20)/len(p_20)) | 151 | + if p_10: |
152 | + p_10_summary[size].append(numpy.mean(p_10)) | ||
154 | if f05_100: | 153 | if f05_100: |
155 | - f05_100_summary[size].append(sum(f05_100)/len(f05_100)) | 154 | + f05_100_summary[size].append(numpy.mean(f05_100)) |
156 | 155 | ||
157 | with open(log_file+"-%s%.3d"%(option_str,size),'a') as f: | 156 | with open(log_file+"-%s%.3d"%(option_str,size),'a') as f: |
158 | - f.write("%.4f \t%.4f\n" % | ||
159 | - ((sum(p_20)/len(p_20),sum(f05_100)/len(f05_100)))) | 157 | + f.write("%.4f \t%.4f\n" % (numpy.mean(p_10),numpy.mean(f05_100))) |
160 | 158 | ||
161 | # back to main flow | 159 | # back to main flow |
162 | - coverage_20 = {} | 160 | + coverage_10 = {} |
163 | coverage_100 = {} | 161 | coverage_100 = {} |
164 | - with open(comment_20,'a') as f: | 162 | + with open(comment_10,'a') as f: |
165 | for size in sizes: | 163 | for size in sizes: |
166 | - coverage_20[size] = len(c_20[size])/float(repo_size) | ||
167 | - f.write("%3d\t\t%.4f\t\t%.4f\n" % | ||
168 | - (size,float(sum(p_20_summary[size]))/len(p_20_summary[size]),coverage_20[size])) | 164 | + coverage_10[size] = len(c_10[size])/float(repo_size) |
165 | + f.write("%3d\t\t%.4f\t\t%.4f\t\t%.4f\n" % | ||
166 | + (size,numpy.mean(p_10_summary[size]),numpy.std(p_10_summary[size]),coverage_10[size])) | ||
169 | with open(comment_100,'a') as f: | 167 | with open(comment_100,'a') as f: |
170 | for size in sizes: | 168 | for size in sizes: |
171 | coverage_100[size] = len(c_100[size])/float(repo_size) | 169 | coverage_100[size] = len(c_100[size])/float(repo_size) |
172 | - f.write("%3d\t\t%.4f\t\t%.4f\n" % | ||
173 | - (size,float(sum(f05_100_summary[size]))/len(f05_100_summary[size]),coverage_100[size])) | 170 | + f.write("%3d\t\t%.4f\t\t%.4f\t\t%.4f\n" % |
171 | + (size,numpy.mean(f05_100_summary[size]),numpy.std(f05_100_summary[size]),coverage_100[size])) | ||
174 | 172 | ||
175 | # plot results summary | 173 | # plot results summary |
176 | g = Gnuplot.Gnuplot() | 174 | g = Gnuplot.Gnuplot() |
177 | g('set style data lines') | 175 | g('set style data lines') |
178 | g('set yrange [0:1.0]') | 176 | g('set yrange [0:1.0]') |
179 | g.xlabel('%s size'%option_str.capitalize()) | 177 | g.xlabel('%s size'%option_str.capitalize()) |
180 | - g.title("Setup: %s (threshold 20)" % cfg.strategy) | ||
181 | - g.plot(Gnuplot.Data(sorted([[k,sum(p_20_summary[k])/len(p_20_summary[k])] | ||
182 | - for k in p_20_summary.keys()]),title="Precision"), | ||
183 | - Gnuplot.Data(sorted([[k,coverage_20[k]] | ||
184 | - for k in coverage_20.keys()]),title="Coverage")) | ||
185 | - g.hardcopy(graph_20,terminal="png") | ||
186 | - commands.getoutput("convert -quality 20 %s %s" % | ||
187 | - (graph_100,graph_20_jpg)) | 178 | + g.title("Setup: %s (threshold 10)" % cfg.strategy) |
179 | + g.plot(Gnuplot.Data(sorted([[k,numpy.mean(p_10_summary[k]),numpy.std(p_10_summary[k])] | ||
180 | + for k in p_10_summary.keys()]),title="Precision"), | ||
181 | + Gnuplot.Data(sorted([[k,numpy.mean(p_10_summary[k]),numpy.std(p_10_summary[k])] | ||
182 | + for k in p_10_summary.keys()]),title="Deviation", | ||
183 | + with_="yerrorbar lt 2 pt 6"), | ||
184 | + Gnuplot.Data(sorted([[k,coverage_10[k]] | ||
185 | + for k in coverage_10.keys()]),title="Coverage")) | ||
186 | + g.hardcopy(graph_10,terminal="png") | ||
188 | g = Gnuplot.Gnuplot() | 187 | g = Gnuplot.Gnuplot() |
189 | g('set style data lines') | 188 | g('set style data lines') |
190 | g('set yrange [0:1.0]') | 189 | g('set yrange [0:1.0]') |
191 | g.xlabel('%s size'%option_str.capitalize()) | 190 | g.xlabel('%s size'%option_str.capitalize()) |
192 | g.title("Setup: %s (threshold 100)" % cfg.strategy) | 191 | g.title("Setup: %s (threshold 100)" % cfg.strategy) |
193 | - g.plot(Gnuplot.Data(sorted([[k,sum(f05_100_summary[k])/len(f05_100_summary[k])] | 192 | + g.plot(Gnuplot.Data(sorted([[k,numpy.mean(f05_100_summary[k]),numpy.std(f05_100_summary[k])] |
194 | for k in f05_100_summary.keys()]),title="F05"), | 193 | for k in f05_100_summary.keys()]),title="F05"), |
194 | + Gnuplot.Data(sorted([[k,numpy.mean(f05_100_summary[k]),numpy.std(f05_100_summary[k])] | ||
195 | + for k in f05_100_summary.keys()]),title="Deviation", | ||
196 | + with_="yerrorbar lt 2 pt 6"), | ||
195 | Gnuplot.Data(sorted([[k,coverage_100[k]] | 197 | Gnuplot.Data(sorted([[k,coverage_100[k]] |
196 | for k in coverage_100.keys()]),title="Coverage")) | 198 | for k in coverage_100.keys()]),title="Coverage")) |
197 | g.hardcopy(graph_100,terminal="png") | 199 | g.hardcopy(graph_100,terminal="png") |
198 | - commands.getoutput("convert -quality 100 %s %s" % | ||
199 | - (graph_100,graph_100_jpg)) |