Commit 78b054a84b4e76c0df737985214929e245e2c1e2

Authored by Tássia Camões Araújo
1 parent c673b9b2
Exists in master and in 1 other branch add_vagrant

Deleted old files.

src/experiments/experiments.cfg
@@ -1,27 +0,0 @@ @@ -1,27 +0,0 @@
1 -[DEFAULT]  
2 -repetitions = 1  
3 -iterations = 10  
4 -path = 'results'  
5 -experiment = 'grid'  
6 -weight = ['bm25', 'trad']  
7 -;profile_size = range(10,100,10)  
8 -;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]  
9 -sample = [0.6, 0.7, 0.8, 0.9]  
10 -  
11 -[content]  
12 -strategy = ['cb','cbt','cbd']  
13 -  
14 -[clustering]  
15 -experiment = 'single'  
16 -;iterations = 4  
17 -;medoids = range(2,6)  
18 -iterations = 6  
19 -medoids = [100,500,1000,5000,10000,50000]  
20 -;disabled for this experiment  
21 -weight = 0  
22 -profile_size = 0  
23 -sample = 0  
24 -  
25 -[colaborative]  
26 -users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]  
27 -neighbors = range(10,1010,50)  
src/experiments/legacy/clustering-suite.py
@@ -1,51 +0,0 @@ @@ -1,51 +0,0 @@
1 -#!/usr/bin/env python  
2 -"""  
3 - recommender suite - recommender experiments suite  
4 -"""  
5 -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"  
6 -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"  
7 -__license__ = """  
8 - This program is free software: you can redistribute it and/or modify  
9 - it under the terms of the GNU General Public License as published by  
10 - the Free Software Foundation, either version 3 of the License, or  
11 - (at your option) any later version.  
12 -  
13 - This program is distributed in the hope that it will be useful,  
14 - but WITHOUT ANY WARRANTY; without even the implied warranty of  
15 - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
16 - GNU General Public License for more details.  
17 -  
18 - You should have received a copy of the GNU General Public License  
19 - along with this program. If not, see <http://www.gnu.org/licenses/>.  
20 -"""  
21 -  
22 -import sys  
23 -import os  
24 -sys.path.insert(0,'../')  
25 -from config import Config  
26 -from data import PopconXapianIndex, PopconSubmission  
27 -from recommender import Recommender  
28 -from user import LocalSystem, User  
29 -from evaluation import *  
30 -import logging  
31 -import random  
32 -import Gnuplot  
33 -  
34 -if __name__ == '__main__':  
35 -  
36 - cfg = Config()  
37 - cfg.index_mode = "recluster"  
38 - logging.info("Starting clustering experiments")  
39 - logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))  
40 - cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")  
41 - cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %  
42 - (cfg.k_medoids,cfg.max_popcon))  
43 - cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %  
44 - (cfg.k_medoids,cfg.max_popcon))  
45 - pxi = PopconXapianIndex(cfg)  
46 - logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)  
47 - # Write clustering log  
48 - output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')  
49 - output.write("# k_medoids\tmax_popcon\tdispersion\n")  
50 - output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))  
51 - output.close()  
src/experiments/legacy/experiments.cfg
@@ -1,27 +0,0 @@ @@ -1,27 +0,0 @@
1 -[DEFAULT]  
2 -repetitions = 1  
3 -iterations = 10  
4 -path = 'results'  
5 -experiment = 'grid'  
6 -weight = ['bm25', 'trad']  
7 -;profile_size = range(10,100,10)  
8 -;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]  
9 -sample = [0.6, 0.7, 0.8, 0.9]  
10 -  
11 -[content]  
12 -strategy = ['cb','cbt','cbd']  
13 -  
14 -[clustering]  
15 -experiment = 'single'  
16 -;iterations = 4  
17 -;medoids = range(2,6)  
18 -iterations = 6  
19 -medoids = [100,500,1000,5000,10000,50000]  
20 -;disabled for this experiment  
21 -weight = 0  
22 -profile_size = 0  
23 -sample = 0  
24 -  
25 -[colaborative]  
26 -users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]  
27 -neighbors = range(10,1010,50)  
src/experiments/legacy/runner.py
@@ -1,171 +0,0 @@ @@ -1,171 +0,0 @@
1 -#!/usr/bin/env python  
2 -"""  
3 - recommender suite - recommender experiments suite  
4 -"""  
5 -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"  
6 -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"  
7 -__license__ = """  
8 - This program is free software: you can redistribute it and/or modify  
9 - it under the terms of the GNU General Public License as published by  
10 - the Free Software Foundation, either version 3 of the License, or  
11 - (at your option) any later version.  
12 -  
13 - This program is distributed in the hope that it will be useful,  
14 - but WITHOUT ANY WARRANTY; without even the implied warranty of  
15 - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
16 - GNU General Public License for more details.  
17 -  
18 - You should have received a copy of the GNU General Public License  
19 - along with this program. If not, see <http://www.gnu.org/licenses/>.  
20 -"""  
21 -  
22 -import expsuite  
23 -import sys  
24 -sys.path.insert(0,'../')  
25 -from config import Config  
26 -from data import PopconXapianIndex, PopconSubmission  
27 -from recommender import Recommender  
28 -from user import LocalSystem, User  
29 -from evaluation import *  
30 -import logging  
31 -import random  
32 -import Gnuplot  
33 -  
34 -class ClusteringSuite(expsuite.PyExperimentSuite):  
35 - def reset(self, params, rep):  
36 - self.cfg = Config()  
37 - self.cfg.popcon_index = "../tests/test_data/.sample_pxi"  
38 - self.cfg.popcon_dir = "../tests/test_data/popcon_dir"  
39 - self.cfg.clusters_dir = "../tests/test_data/clusters_dir"  
40 -  
41 - if params['name'] == "clustering":  
42 - logging.info("Starting 'clustering' experiments suite...")  
43 - self.cfg.index_mode = "recluster"  
44 -  
45 - def iterate(self, params, rep, n):  
46 - if params['name'] == "clustering":  
47 - logging.info("Running iteration %d" % params['medoids'][n])  
48 - self.cfg.k_medoids = params['medoids'][n]  
49 - pxi = PopconXapianIndex(self.cfg)  
50 - result = {'k_medoids': params['medoids'][n],  
51 - 'dispersion': pxi.cluster_dispersion}  
52 - else:  
53 - result = {}  
54 - return result  
55 -  
56 -class ContentBasedSuite(expsuite.PyExperimentSuite):  
57 - def reset(self, params, rep):  
58 - if params['name'].startswith("content"):  
59 - cfg = Config()  
60 - #if the index was not built yet  
61 - #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")  
62 - cfg.axi = "data/AppAxi"  
63 - cfg.index_mode = "old"  
64 - cfg.weight = params['weight']  
65 - self.rec = Recommender(cfg)  
66 - self.rec.set_strategy(params['strategy'])  
67 - self.repo_size = self.rec.items_repository.get_doccount()  
68 - self.user = LocalSystem()  
69 - self.user.app_pkg_profile(self.rec.items_repository)  
70 - self.user.no_auto_pkg_profile()  
71 - self.sample_size = int(len(self.user.pkg_profile)*params['sample'])  
72 - # iteration should be set to 10 in config file  
73 - #self.profile_size = range(10,101,10)  
74 -  
75 - def iterate(self, params, rep, n):  
76 - if params['name'].startswith("content"):  
77 - item_score = dict.fromkeys(self.user.pkg_profile,1)  
78 - # Prepare partition  
79 - sample = {}  
80 - for i in range(self.sample_size):  
81 - key = random.choice(item_score.keys())  
82 - sample[key] = item_score.pop(key)  
83 - # Get full recommendation  
84 - user = User(item_score)  
85 - recommendation = self.rec.get_recommendation(user,self.repo_size)  
86 - # Write recall log  
87 - recall_file = "results/content/recall/%s-%s-%.2f-%d" % \  
88 - (params['strategy'],params['weight'],params['sample'],n)  
89 - output = open(recall_file,'w')  
90 - output.write("# weight=%s\n" % params['weight'])  
91 - output.write("# strategy=%s\n" % params['strategy'])  
92 - output.write("# sample=%f\n" % params['sample'])  
93 - output.write("\n%d %d %d\n" % \  
94 - (self.repo_size,len(item_score),self.sample_size))  
95 - notfound = []  
96 - ranks = []  
97 - for pkg in sample.keys():  
98 - if pkg in recommendation.ranking:  
99 - ranks.append(recommendation.ranking.index(pkg))  
100 - else:  
101 - notfound.append(pkg)  
102 - for r in sorted(ranks):  
103 - output.write(str(r)+"\n")  
104 - if notfound:  
105 - output.write("Out of recommendation:\n")  
106 - for pkg in notfound:  
107 - output.write(pkg+"\n")  
108 - output.close()  
109 - # Plot metrics summary  
110 - accuracy = []  
111 - precision = []  
112 - recall = []  
113 - f1 = []  
114 - g = Gnuplot.Gnuplot()  
115 - g('set style data lines')  
116 - g.xlabel('Recommendation size')  
117 - for size in range(1,len(recommendation.ranking)+1,100):  
118 - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))  
119 - real = RecommendationResult(sample)  
120 - evaluation = Evaluation(predicted,real,self.repo_size)  
121 - accuracy.append([size,evaluation.run(Accuracy())])  
122 - precision.append([size,evaluation.run(Precision())])  
123 - recall.append([size,evaluation.run(Recall())])  
124 - f1.append([size,evaluation.run(F1())])  
125 - g.plot(Gnuplot.Data(accuracy,title="Accuracy"),  
126 - Gnuplot.Data(precision,title="Precision"),  
127 - Gnuplot.Data(recall,title="Recall"),  
128 - Gnuplot.Data(f1,title="F1"))  
129 - g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)  
130 - # Iteration log  
131 - result = {'iteration': n,  
132 - 'weight': params['weight'],  
133 - 'strategy': params['strategy'],  
134 - 'accuracy': accuracy[20],  
135 - 'precision': precision[20],  
136 - 'recall:': recall[20],  
137 - 'f1': f1[20]}  
138 - return result  
139 -  
140 -#class CollaborativeSuite(expsuite.PyExperimentSuite):  
141 -# def reset(self, params, rep):  
142 -# if params['name'].startswith("collaborative"):  
143 -#  
144 -# def iterate(self, params, rep, n):  
145 -# if params['name'].startswith("collaborative"):  
146 -# for root, dirs, files in os.walk(self.source_dir):  
147 -# for popcon_file in files:  
148 -# submission = PopconSubmission(os.path.join(root,popcon_file))  
149 -# user = User(submission.packages)  
150 -# user.maximal_pkg_profile()  
151 -# rec.get_recommendation(user)  
152 -# precision = 0  
153 -# result = {'weight': params['weight'],  
154 -# 'strategy': params['strategy'],  
155 -# 'profile_size': self.profile_size[n],  
156 -# 'accuracy': accuracy,  
157 -# 'precision': precision,  
158 -# 'recall:': recall,  
159 -# 'f1': }  
160 -# else:  
161 -# result = {}  
162 -# return result  
163 -  
164 -if __name__ == '__main__':  
165 -  
166 - if "clustering" in sys.argv or len(sys.argv)<3:  
167 - ClusteringSuite().start()  
168 - if "content" in sys.argv or len(sys.argv)<3:  
169 - ContentBasedSuite().start()  
170 - #if "collaborative" in sys.argv or len(sys.argv)<3:  
171 - #CollaborativeSuite().start()  
src/experiments/runner.py
@@ -1,171 +0,0 @@ @@ -1,171 +0,0 @@
1 -#!/usr/bin/env python  
2 -"""  
3 - recommender suite - recommender experiments suite  
4 -"""  
5 -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"  
6 -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"  
7 -__license__ = """  
8 - This program is free software: you can redistribute it and/or modify  
9 - it under the terms of the GNU General Public License as published by  
10 - the Free Software Foundation, either version 3 of the License, or  
11 - (at your option) any later version.  
12 -  
13 - This program is distributed in the hope that it will be useful,  
14 - but WITHOUT ANY WARRANTY; without even the implied warranty of  
15 - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
16 - GNU General Public License for more details.  
17 -  
18 - You should have received a copy of the GNU General Public License  
19 - along with this program. If not, see <http://www.gnu.org/licenses/>.  
20 -"""  
21 -  
22 -import expsuite  
23 -import sys  
24 -sys.path.insert(0,'../')  
25 -from config import Config  
26 -from data import PopconXapianIndex, PopconSubmission  
27 -from recommender import Recommender  
28 -from user import LocalSystem, User  
29 -from evaluation import *  
30 -import logging  
31 -import random  
32 -import Gnuplot  
33 -  
34 -class ClusteringSuite(expsuite.PyExperimentSuite):  
35 - def reset(self, params, rep):  
36 - self.cfg = Config()  
37 - self.cfg.popcon_index = "../tests/test_data/.sample_pxi"  
38 - self.cfg.popcon_dir = "../tests/test_data/popcon_dir"  
39 - self.cfg.clusters_dir = "../tests/test_data/clusters_dir"  
40 -  
41 - if params['name'] == "clustering":  
42 - logging.info("Starting 'clustering' experiments suite...")  
43 - self.cfg.index_mode = "recluster"  
44 -  
45 - def iterate(self, params, rep, n):  
46 - if params['name'] == "clustering":  
47 - logging.info("Running iteration %d" % params['medoids'][n])  
48 - self.cfg.k_medoids = params['medoids'][n]  
49 - pxi = PopconXapianIndex(self.cfg)  
50 - result = {'k_medoids': params['medoids'][n],  
51 - 'dispersion': pxi.cluster_dispersion}  
52 - else:  
53 - result = {}  
54 - return result  
55 -  
56 -class ContentBasedSuite(expsuite.PyExperimentSuite):  
57 - def reset(self, params, rep):  
58 - if params['name'].startswith("content"):  
59 - cfg = Config()  
60 - #if the index was not built yet  
61 - #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")  
62 - cfg.axi = "data/AppAxi"  
63 - cfg.index_mode = "old"  
64 - cfg.weight = params['weight']  
65 - self.rec = Recommender(cfg)  
66 - self.rec.set_strategy(params['strategy'])  
67 - self.repo_size = self.rec.items_repository.get_doccount()  
68 - self.user = LocalSystem()  
69 - self.user.app_pkg_profile(self.rec.items_repository)  
70 - self.user.no_auto_pkg_profile()  
71 - self.sample_size = int(len(self.user.pkg_profile)*params['sample'])  
72 - # iteration should be set to 10 in config file  
73 - #self.profile_size = range(10,101,10)  
74 -  
75 - def iterate(self, params, rep, n):  
76 - if params['name'].startswith("content"):  
77 - item_score = dict.fromkeys(self.user.pkg_profile,1)  
78 - # Prepare partition  
79 - sample = {}  
80 - for i in range(self.sample_size):  
81 - key = random.choice(item_score.keys())  
82 - sample[key] = item_score.pop(key)  
83 - # Get full recommendation  
84 - user = User(item_score)  
85 - recommendation = self.rec.get_recommendation(user,self.repo_size)  
86 - # Write recall log  
87 - recall_file = "results/content/recall/%s-%s-%.2f-%d" % \  
88 - (params['strategy'],params['weight'],params['sample'],n)  
89 - output = open(recall_file,'w')  
90 - output.write("# weight=%s\n" % params['weight'])  
91 - output.write("# strategy=%s\n" % params['strategy'])  
92 - output.write("# sample=%f\n" % params['sample'])  
93 - output.write("\n%d %d %d\n" % \  
94 - (self.repo_size,len(item_score),self.sample_size))  
95 - notfound = []  
96 - ranks = []  
97 - for pkg in sample.keys():  
98 - if pkg in recommendation.ranking:  
99 - ranks.append(recommendation.ranking.index(pkg))  
100 - else:  
101 - notfound.append(pkg)  
102 - for r in sorted(ranks):  
103 - output.write(str(r)+"\n")  
104 - if notfound:  
105 - output.write("Out of recommendation:\n")  
106 - for pkg in notfound:  
107 - output.write(pkg+"\n")  
108 - output.close()  
109 - # Plot metrics summary  
110 - accuracy = []  
111 - precision = []  
112 - recall = []  
113 - f1 = []  
114 - g = Gnuplot.Gnuplot()  
115 - g('set style data lines')  
116 - g.xlabel('Recommendation size')  
117 - for size in range(1,len(recommendation.ranking)+1,100):  
118 - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))  
119 - real = RecommendationResult(sample)  
120 - evaluation = Evaluation(predicted,real,self.repo_size)  
121 - accuracy.append([size,evaluation.run(Accuracy())])  
122 - precision.append([size,evaluation.run(Precision())])  
123 - recall.append([size,evaluation.run(Recall())])  
124 - f1.append([size,evaluation.run(F1())])  
125 - g.plot(Gnuplot.Data(accuracy,title="Accuracy"),  
126 - Gnuplot.Data(precision,title="Precision"),  
127 - Gnuplot.Data(recall,title="Recall"),  
128 - Gnuplot.Data(f1,title="F1"))  
129 - g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)  
130 - # Iteration log  
131 - result = {'iteration': n,  
132 - 'weight': params['weight'],  
133 - 'strategy': params['strategy'],  
134 - 'accuracy': accuracy[20],  
135 - 'precision': precision[20],  
136 - 'recall:': recall[20],  
137 - 'f1': f1[20]}  
138 - return result  
139 -  
140 -#class CollaborativeSuite(expsuite.PyExperimentSuite):  
141 -# def reset(self, params, rep):  
142 -# if params['name'].startswith("collaborative"):  
143 -#  
144 -# def iterate(self, params, rep, n):  
145 -# if params['name'].startswith("collaborative"):  
146 -# for root, dirs, files in os.walk(self.source_dir):  
147 -# for popcon_file in files:  
148 -# submission = PopconSubmission(os.path.join(root,popcon_file))  
149 -# user = User(submission.packages)  
150 -# user.maximal_pkg_profile()  
151 -# rec.get_recommendation(user)  
152 -# precision = 0  
153 -# result = {'weight': params['weight'],  
154 -# 'strategy': params['strategy'],  
155 -# 'profile_size': self.profile_size[n],  
156 -# 'accuracy': accuracy,  
157 -# 'precision': precision,  
158 -# 'recall:': recall,  
159 -# 'f1': }  
160 -# else:  
161 -# result = {}  
162 -# return result  
163 -  
164 -if __name__ == '__main__':  
165 -  
166 - if "clustering" in sys.argv or len(sys.argv)<3:  
167 - ClusteringSuite().start()  
168 - if "content" in sys.argv or len(sys.argv)<3:  
169 - ContentBasedSuite().start()  
170 - #if "collaborative" in sys.argv or len(sys.argv)<3:  
171 - #CollaborativeSuite().start()