Commit 78b054a84b4e76c0df737985214929e245e2c1e2

Authored by Tássia Camões Araújo
1 parent c673b9b2
Exists in master and in 1 other branch add_vagrant

Deleted old files.

src/experiments/experiments.cfg
... ... @@ -1,27 +0,0 @@
1   -[DEFAULT]
2   -repetitions = 1
3   -iterations = 10
4   -path = 'results'
5   -experiment = 'grid'
6   -weight = ['bm25', 'trad']
7   -;profile_size = range(10,100,10)
8   -;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
9   -sample = [0.6, 0.7, 0.8, 0.9]
10   -
11   -[content]
12   -strategy = ['cb','cbt','cbd']
13   -
14   -[clustering]
15   -experiment = 'single'
16   -;iterations = 4
17   -;medoids = range(2,6)
18   -iterations = 6
19   -medoids = [100,500,1000,5000,10000,50000]
20   -;disabled for this experiment
21   -weight = 0
22   -profile_size = 0
23   -sample = 0
24   -
25   -[colaborative]
26   -users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
27   -neighbors = range(10,1010,50)
src/experiments/legacy/clustering-suite.py
... ... @@ -1,51 +0,0 @@
1   -#!/usr/bin/env python
2   -"""
3   - recommender suite - recommender experiments suite
4   -"""
5   -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6   -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7   -__license__ = """
8   - This program is free software: you can redistribute it and/or modify
9   - it under the terms of the GNU General Public License as published by
10   - the Free Software Foundation, either version 3 of the License, or
11   - (at your option) any later version.
12   -
13   - This program is distributed in the hope that it will be useful,
14   - but WITHOUT ANY WARRANTY; without even the implied warranty of
15   - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16   - GNU General Public License for more details.
17   -
18   - You should have received a copy of the GNU General Public License
19   - along with this program. If not, see <http://www.gnu.org/licenses/>.
20   -"""
21   -
22   -import sys
23   -import os
24   -sys.path.insert(0,'../')
25   -from config import Config
26   -from data import PopconXapianIndex, PopconSubmission
27   -from recommender import Recommender
28   -from user import LocalSystem, User
29   -from evaluation import *
30   -import logging
31   -import random
32   -import Gnuplot
33   -
34   -if __name__ == '__main__':
35   -
36   - cfg = Config()
37   - cfg.index_mode = "recluster"
38   - logging.info("Starting clustering experiments")
39   - logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
40   - cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
41   - cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
42   - (cfg.k_medoids,cfg.max_popcon))
43   - cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
44   - (cfg.k_medoids,cfg.max_popcon))
45   - pxi = PopconXapianIndex(cfg)
46   - logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
47   - # Write clustering log
48   - output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
49   - output.write("# k_medoids\tmax_popcon\tdispersion\n")
50   - output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
51   - output.close()
src/experiments/legacy/experiments.cfg
... ... @@ -1,27 +0,0 @@
1   -[DEFAULT]
2   -repetitions = 1
3   -iterations = 10
4   -path = 'results'
5   -experiment = 'grid'
6   -weight = ['bm25', 'trad']
7   -;profile_size = range(10,100,10)
8   -;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
9   -sample = [0.6, 0.7, 0.8, 0.9]
10   -
11   -[content]
12   -strategy = ['cb','cbt','cbd']
13   -
14   -[clustering]
15   -experiment = 'single'
16   -;iterations = 4
17   -;medoids = range(2,6)
18   -iterations = 6
19   -medoids = [100,500,1000,5000,10000,50000]
20   -;disabled for this experiment
21   -weight = 0
22   -profile_size = 0
23   -sample = 0
24   -
25   -[colaborative]
26   -users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
27   -neighbors = range(10,1010,50)
src/experiments/legacy/runner.py
... ... @@ -1,171 +0,0 @@
1   -#!/usr/bin/env python
2   -"""
3   - recommender suite - recommender experiments suite
4   -"""
5   -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6   -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7   -__license__ = """
8   - This program is free software: you can redistribute it and/or modify
9   - it under the terms of the GNU General Public License as published by
10   - the Free Software Foundation, either version 3 of the License, or
11   - (at your option) any later version.
12   -
13   - This program is distributed in the hope that it will be useful,
14   - but WITHOUT ANY WARRANTY; without even the implied warranty of
15   - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16   - GNU General Public License for more details.
17   -
18   - You should have received a copy of the GNU General Public License
19   - along with this program. If not, see <http://www.gnu.org/licenses/>.
20   -"""
21   -
22   -import expsuite
23   -import sys
24   -sys.path.insert(0,'../')
25   -from config import Config
26   -from data import PopconXapianIndex, PopconSubmission
27   -from recommender import Recommender
28   -from user import LocalSystem, User
29   -from evaluation import *
30   -import logging
31   -import random
32   -import Gnuplot
33   -
34   -class ClusteringSuite(expsuite.PyExperimentSuite):
35   - def reset(self, params, rep):
36   - self.cfg = Config()
37   - self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
38   - self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
39   - self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
40   -
41   - if params['name'] == "clustering":
42   - logging.info("Starting 'clustering' experiments suite...")
43   - self.cfg.index_mode = "recluster"
44   -
45   - def iterate(self, params, rep, n):
46   - if params['name'] == "clustering":
47   - logging.info("Running iteration %d" % params['medoids'][n])
48   - self.cfg.k_medoids = params['medoids'][n]
49   - pxi = PopconXapianIndex(self.cfg)
50   - result = {'k_medoids': params['medoids'][n],
51   - 'dispersion': pxi.cluster_dispersion}
52   - else:
53   - result = {}
54   - return result
55   -
56   -class ContentBasedSuite(expsuite.PyExperimentSuite):
57   - def reset(self, params, rep):
58   - if params['name'].startswith("content"):
59   - cfg = Config()
60   - #if the index was not built yet
61   - #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
62   - cfg.axi = "data/AppAxi"
63   - cfg.index_mode = "old"
64   - cfg.weight = params['weight']
65   - self.rec = Recommender(cfg)
66   - self.rec.set_strategy(params['strategy'])
67   - self.repo_size = self.rec.items_repository.get_doccount()
68   - self.user = LocalSystem()
69   - self.user.app_pkg_profile(self.rec.items_repository)
70   - self.user.no_auto_pkg_profile()
71   - self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
72   - # iteration should be set to 10 in config file
73   - #self.profile_size = range(10,101,10)
74   -
75   - def iterate(self, params, rep, n):
76   - if params['name'].startswith("content"):
77   - item_score = dict.fromkeys(self.user.pkg_profile,1)
78   - # Prepare partition
79   - sample = {}
80   - for i in range(self.sample_size):
81   - key = random.choice(item_score.keys())
82   - sample[key] = item_score.pop(key)
83   - # Get full recommendation
84   - user = User(item_score)
85   - recommendation = self.rec.get_recommendation(user,self.repo_size)
86   - # Write recall log
87   - recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
88   - (params['strategy'],params['weight'],params['sample'],n)
89   - output = open(recall_file,'w')
90   - output.write("# weight=%s\n" % params['weight'])
91   - output.write("# strategy=%s\n" % params['strategy'])
92   - output.write("# sample=%f\n" % params['sample'])
93   - output.write("\n%d %d %d\n" % \
94   - (self.repo_size,len(item_score),self.sample_size))
95   - notfound = []
96   - ranks = []
97   - for pkg in sample.keys():
98   - if pkg in recommendation.ranking:
99   - ranks.append(recommendation.ranking.index(pkg))
100   - else:
101   - notfound.append(pkg)
102   - for r in sorted(ranks):
103   - output.write(str(r)+"\n")
104   - if notfound:
105   - output.write("Out of recommendation:\n")
106   - for pkg in notfound:
107   - output.write(pkg+"\n")
108   - output.close()
109   - # Plot metrics summary
110   - accuracy = []
111   - precision = []
112   - recall = []
113   - f1 = []
114   - g = Gnuplot.Gnuplot()
115   - g('set style data lines')
116   - g.xlabel('Recommendation size')
117   - for size in range(1,len(recommendation.ranking)+1,100):
118   - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
119   - real = RecommendationResult(sample)
120   - evaluation = Evaluation(predicted,real,self.repo_size)
121   - accuracy.append([size,evaluation.run(Accuracy())])
122   - precision.append([size,evaluation.run(Precision())])
123   - recall.append([size,evaluation.run(Recall())])
124   - f1.append([size,evaluation.run(F1())])
125   - g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
126   - Gnuplot.Data(precision,title="Precision"),
127   - Gnuplot.Data(recall,title="Recall"),
128   - Gnuplot.Data(f1,title="F1"))
129   - g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
130   - # Iteration log
131   - result = {'iteration': n,
132   - 'weight': params['weight'],
133   - 'strategy': params['strategy'],
134   - 'accuracy': accuracy[20],
135   - 'precision': precision[20],
136   - 'recall:': recall[20],
137   - 'f1': f1[20]}
138   - return result
139   -
140   -#class CollaborativeSuite(expsuite.PyExperimentSuite):
141   -# def reset(self, params, rep):
142   -# if params['name'].startswith("collaborative"):
143   -#
144   -# def iterate(self, params, rep, n):
145   -# if params['name'].startswith("collaborative"):
146   -# for root, dirs, files in os.walk(self.source_dir):
147   -# for popcon_file in files:
148   -# submission = PopconSubmission(os.path.join(root,popcon_file))
149   -# user = User(submission.packages)
150   -# user.maximal_pkg_profile()
151   -# rec.get_recommendation(user)
152   -# precision = 0
153   -# result = {'weight': params['weight'],
154   -# 'strategy': params['strategy'],
155   -# 'profile_size': self.profile_size[n],
156   -# 'accuracy': accuracy,
157   -# 'precision': precision,
158   -# 'recall:': recall,
159   -# 'f1': }
160   -# else:
161   -# result = {}
162   -# return result
163   -
164   -if __name__ == '__main__':
165   -
166   - if "clustering" in sys.argv or len(sys.argv)<3:
167   - ClusteringSuite().start()
168   - if "content" in sys.argv or len(sys.argv)<3:
169   - ContentBasedSuite().start()
170   - #if "collaborative" in sys.argv or len(sys.argv)<3:
171   - #CollaborativeSuite().start()
src/experiments/runner.py
... ... @@ -1,171 +0,0 @@
1   -#!/usr/bin/env python
2   -"""
3   - recommender suite - recommender experiments suite
4   -"""
5   -__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6   -__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7   -__license__ = """
8   - This program is free software: you can redistribute it and/or modify
9   - it under the terms of the GNU General Public License as published by
10   - the Free Software Foundation, either version 3 of the License, or
11   - (at your option) any later version.
12   -
13   - This program is distributed in the hope that it will be useful,
14   - but WITHOUT ANY WARRANTY; without even the implied warranty of
15   - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16   - GNU General Public License for more details.
17   -
18   - You should have received a copy of the GNU General Public License
19   - along with this program. If not, see <http://www.gnu.org/licenses/>.
20   -"""
21   -
22   -import expsuite
23   -import sys
24   -sys.path.insert(0,'../')
25   -from config import Config
26   -from data import PopconXapianIndex, PopconSubmission
27   -from recommender import Recommender
28   -from user import LocalSystem, User
29   -from evaluation import *
30   -import logging
31   -import random
32   -import Gnuplot
33   -
34   -class ClusteringSuite(expsuite.PyExperimentSuite):
35   - def reset(self, params, rep):
36   - self.cfg = Config()
37   - self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
38   - self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
39   - self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
40   -
41   - if params['name'] == "clustering":
42   - logging.info("Starting 'clustering' experiments suite...")
43   - self.cfg.index_mode = "recluster"
44   -
45   - def iterate(self, params, rep, n):
46   - if params['name'] == "clustering":
47   - logging.info("Running iteration %d" % params['medoids'][n])
48   - self.cfg.k_medoids = params['medoids'][n]
49   - pxi = PopconXapianIndex(self.cfg)
50   - result = {'k_medoids': params['medoids'][n],
51   - 'dispersion': pxi.cluster_dispersion}
52   - else:
53   - result = {}
54   - return result
55   -
56   -class ContentBasedSuite(expsuite.PyExperimentSuite):
57   - def reset(self, params, rep):
58   - if params['name'].startswith("content"):
59   - cfg = Config()
60   - #if the index was not built yet
61   - #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
62   - cfg.axi = "data/AppAxi"
63   - cfg.index_mode = "old"
64   - cfg.weight = params['weight']
65   - self.rec = Recommender(cfg)
66   - self.rec.set_strategy(params['strategy'])
67   - self.repo_size = self.rec.items_repository.get_doccount()
68   - self.user = LocalSystem()
69   - self.user.app_pkg_profile(self.rec.items_repository)
70   - self.user.no_auto_pkg_profile()
71   - self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
72   - # iteration should be set to 10 in config file
73   - #self.profile_size = range(10,101,10)
74   -
75   - def iterate(self, params, rep, n):
76   - if params['name'].startswith("content"):
77   - item_score = dict.fromkeys(self.user.pkg_profile,1)
78   - # Prepare partition
79   - sample = {}
80   - for i in range(self.sample_size):
81   - key = random.choice(item_score.keys())
82   - sample[key] = item_score.pop(key)
83   - # Get full recommendation
84   - user = User(item_score)
85   - recommendation = self.rec.get_recommendation(user,self.repo_size)
86   - # Write recall log
87   - recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
88   - (params['strategy'],params['weight'],params['sample'],n)
89   - output = open(recall_file,'w')
90   - output.write("# weight=%s\n" % params['weight'])
91   - output.write("# strategy=%s\n" % params['strategy'])
92   - output.write("# sample=%f\n" % params['sample'])
93   - output.write("\n%d %d %d\n" % \
94   - (self.repo_size,len(item_score),self.sample_size))
95   - notfound = []
96   - ranks = []
97   - for pkg in sample.keys():
98   - if pkg in recommendation.ranking:
99   - ranks.append(recommendation.ranking.index(pkg))
100   - else:
101   - notfound.append(pkg)
102   - for r in sorted(ranks):
103   - output.write(str(r)+"\n")
104   - if notfound:
105   - output.write("Out of recommendation:\n")
106   - for pkg in notfound:
107   - output.write(pkg+"\n")
108   - output.close()
109   - # Plot metrics summary
110   - accuracy = []
111   - precision = []
112   - recall = []
113   - f1 = []
114   - g = Gnuplot.Gnuplot()
115   - g('set style data lines')
116   - g.xlabel('Recommendation size')
117   - for size in range(1,len(recommendation.ranking)+1,100):
118   - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
119   - real = RecommendationResult(sample)
120   - evaluation = Evaluation(predicted,real,self.repo_size)
121   - accuracy.append([size,evaluation.run(Accuracy())])
122   - precision.append([size,evaluation.run(Precision())])
123   - recall.append([size,evaluation.run(Recall())])
124   - f1.append([size,evaluation.run(F1())])
125   - g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
126   - Gnuplot.Data(precision,title="Precision"),
127   - Gnuplot.Data(recall,title="Recall"),
128   - Gnuplot.Data(f1,title="F1"))
129   - g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
130   - # Iteration log
131   - result = {'iteration': n,
132   - 'weight': params['weight'],
133   - 'strategy': params['strategy'],
134   - 'accuracy': accuracy[20],
135   - 'precision': precision[20],
136   - 'recall:': recall[20],
137   - 'f1': f1[20]}
138   - return result
139   -
140   -#class CollaborativeSuite(expsuite.PyExperimentSuite):
141   -# def reset(self, params, rep):
142   -# if params['name'].startswith("collaborative"):
143   -#
144   -# def iterate(self, params, rep, n):
145   -# if params['name'].startswith("collaborative"):
146   -# for root, dirs, files in os.walk(self.source_dir):
147   -# for popcon_file in files:
148   -# submission = PopconSubmission(os.path.join(root,popcon_file))
149   -# user = User(submission.packages)
150   -# user.maximal_pkg_profile()
151   -# rec.get_recommendation(user)
152   -# precision = 0
153   -# result = {'weight': params['weight'],
154   -# 'strategy': params['strategy'],
155   -# 'profile_size': self.profile_size[n],
156   -# 'accuracy': accuracy,
157   -# 'precision': precision,
158   -# 'recall:': recall,
159   -# 'f1': }
160   -# else:
161   -# result = {}
162   -# return result
163   -
164   -if __name__ == '__main__':
165   -
166   - if "clustering" in sys.argv or len(sys.argv)<3:
167   - ClusteringSuite().start()
168   - if "content" in sys.argv or len(sys.argv)<3:
169   - ContentBasedSuite().start()
170   - #if "collaborative" in sys.argv or len(sys.argv)<3:
171   - #CollaborativeSuite().start()