Deleted old files.

Tássia Camões Araújo
1 parent c673b9b2
Showing 5 changed files with 0 additions and 447 deletions Show diff stats
src/experiments/experiments.cfg
src/experiments/legacy/clustering-suite.py
src/experiments/legacy/experiments.cfg
src/experiments/legacy/runner.py
src/experiments/runner.py
@@ -1,27 +0,0 @@
-[DEFAULT]
-repetitions = 1
-iterations = 10
-path = 'results'
-experiment = 'grid'
-weight = ['bm25', 'trad']
-;profile_size = range(10,100,10)
-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-sample = [0.6, 0.7, 0.8, 0.9]
-
-[content]
-strategy = ['cb','cbt','cbd']
-
-[clustering]
-experiment = 'single'
-;iterations = 4
-;medoids = range(2,6)
-iterations = 6
-medoids = [100,500,1000,5000,10000,50000]
-;disabled for this experiment
-weight = 0
-profile_size = 0
-sample = 0
-
-[colaborative]
-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
-neighbors = range(10,1010,50)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import sys
-import os
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-if __name__ == '__main__':
-
-    cfg = Config()
-    cfg.index_mode = "recluster"
-    logging.info("Starting clustering experiments")
-    logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
-    cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
-    cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
-                                         (cfg.k_medoids,cfg.max_popcon))
-    cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
-                                         (cfg.k_medoids,cfg.max_popcon))
-    pxi = PopconXapianIndex(cfg)
-    logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
-    # Write clustering log
-    output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
-    output.write("# k_medoids\tmax_popcon\tdispersion\n")
-    output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
-    output.close()
@@ -1,27 +0,0 @@
-[DEFAULT]
-repetitions = 1
-iterations = 10
-path = 'results'
-experiment = 'grid'
-weight = ['bm25', 'trad']
-;profile_size = range(10,100,10)
-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-sample = [0.6, 0.7, 0.8, 0.9]
-
-[content]
-strategy = ['cb','cbt','cbd']
-
-[clustering]
-experiment = 'single'
-;iterations = 4
-;medoids = range(2,6)
-iterations = 6
-medoids = [100,500,1000,5000,10000,50000]
-;disabled for this experiment
-weight = 0
-profile_size = 0
-sample = 0
-
-[colaborative]
-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
-neighbors = range(10,1010,50)
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import expsuite
-import sys
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-class ClusteringSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        self.cfg = Config()
-        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
-        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
-        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
-
-        if params['name'] == "clustering":
-            logging.info("Starting 'clustering' experiments suite...")
-            self.cfg.index_mode = "recluster"
-
-    def iterate(self, params, rep, n):
-        if params['name'] == "clustering":
-            logging.info("Running iteration %d" % params['medoids'][n])
-            self.cfg.k_medoids = params['medoids'][n]
-            pxi = PopconXapianIndex(self.cfg)
-            result = {'k_medoids': params['medoids'][n],
-                   'dispersion': pxi.cluster_dispersion}
-        else:
-            result = {}
-        return result
-
-class ContentBasedSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        if params['name'].startswith("content"):
-            cfg = Config()
-            #if the index was not built yet
-            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
-            cfg.axi = "data/AppAxi"
-            cfg.index_mode = "old"
-            cfg.weight = params['weight']
-            self.rec = Recommender(cfg)
-            self.rec.set_strategy(params['strategy'])
-            self.repo_size = self.rec.items_repository.get_doccount()
-            self.user = LocalSystem()
-            self.user.app_pkg_profile(self.rec.items_repository)
-            self.user.no_auto_pkg_profile()
-            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
-            # iteration should be set to 10 in config file
-            #self.profile_size = range(10,101,10)
-
-    def iterate(self, params, rep, n):
-        if params['name'].startswith("content"):
-            item_score = dict.fromkeys(self.user.pkg_profile,1)
-            # Prepare partition
-            sample = {}
-            for i in range(self.sample_size):
-                 key = random.choice(item_score.keys())
-                 sample[key] = item_score.pop(key)
-            # Get full recommendation
-            user = User(item_score)
-            recommendation = self.rec.get_recommendation(user,self.repo_size)
-            # Write recall log
-            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
-                          (params['strategy'],params['weight'],params['sample'],n)
-            output = open(recall_file,'w')
-            output.write("# weight=%s\n" % params['weight'])
-            output.write("# strategy=%s\n" % params['strategy'])
-            output.write("# sample=%f\n" % params['sample'])
-            output.write("\n%d %d %d\n" % \
-                         (self.repo_size,len(item_score),self.sample_size))
-            notfound = []
-            ranks = []
-            for pkg in sample.keys():
-                if pkg in recommendation.ranking:
-                    ranks.append(recommendation.ranking.index(pkg))
-                else:
-                    notfound.append(pkg)
-            for r in sorted(ranks):
-                output.write(str(r)+"\n")
-            if notfound:
-                output.write("Out of recommendation:\n")
-                for pkg in notfound:
-                    output.write(pkg+"\n")
-            output.close()
-            # Plot metrics summary
-            accuracy = []
-            precision = []
-            recall = []
-            f1 = []
-            g = Gnuplot.Gnuplot()
-            g('set style data lines')
-            g.xlabel('Recommendation size')
-            for size in range(1,len(recommendation.ranking)+1,100):
-                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
-                real = RecommendationResult(sample)
-                evaluation = Evaluation(predicted,real,self.repo_size)
-                accuracy.append([size,evaluation.run(Accuracy())])
-                precision.append([size,evaluation.run(Precision())])
-                recall.append([size,evaluation.run(Recall())])
-                f1.append([size,evaluation.run(F1())])
-            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
-                   Gnuplot.Data(precision,title="Precision"),
-                   Gnuplot.Data(recall,title="Recall"),
-                   Gnuplot.Data(f1,title="F1"))
-            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
-            # Iteration log
-            result = {'iteration': n,
-                      'weight': params['weight'],
-                      'strategy': params['strategy'],
-                      'accuracy': accuracy[20],
-                      'precision': precision[20],
-                      'recall:': recall[20],
-                      'f1': f1[20]}
-            return result
-
-#class CollaborativeSuite(expsuite.PyExperimentSuite):
-#    def reset(self, params, rep):
-#        if params['name'].startswith("collaborative"):
-#
-#    def iterate(self, params, rep, n):
-#        if params['name'].startswith("collaborative"):
-#            for root, dirs, files in os.walk(self.source_dir):
-#                for popcon_file in files:
-#                    submission = PopconSubmission(os.path.join(root,popcon_file))
-#                    user = User(submission.packages)
-#                    user.maximal_pkg_profile()
-#                    rec.get_recommendation(user)
-#                    precision = 0
-#                    result = {'weight': params['weight'],
-#                              'strategy': params['strategy'],
-#                              'profile_size': self.profile_size[n],
-#                              'accuracy': accuracy,
-#                              'precision': precision,
-#                              'recall:': recall,
-#                              'f1': }
-#        else:
-#            result = {}
-#        return result
-
-if __name__ == '__main__':
-
-    if "clustering" in sys.argv or len(sys.argv)<3:
-        ClusteringSuite().start()
-    if "content" in sys.argv or len(sys.argv)<3:
-        ContentBasedSuite().start()
-    #if "collaborative" in sys.argv or len(sys.argv)<3:
-    #CollaborativeSuite().start()
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import expsuite
-import sys
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-class ClusteringSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        self.cfg = Config()
-        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
-        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
-        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
-
-        if params['name'] == "clustering":
-            logging.info("Starting 'clustering' experiments suite...")
-            self.cfg.index_mode = "recluster"
-
-    def iterate(self, params, rep, n):
-        if params['name'] == "clustering":
-            logging.info("Running iteration %d" % params['medoids'][n])
-            self.cfg.k_medoids = params['medoids'][n]
-            pxi = PopconXapianIndex(self.cfg)
-            result = {'k_medoids': params['medoids'][n],
-                   'dispersion': pxi.cluster_dispersion}
-        else:
-            result = {}
-        return result
-
-class ContentBasedSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        if params['name'].startswith("content"):
-            cfg = Config()
-            #if the index was not built yet
-            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
-            cfg.axi = "data/AppAxi"
-            cfg.index_mode = "old"
-            cfg.weight = params['weight']
-            self.rec = Recommender(cfg)
-            self.rec.set_strategy(params['strategy'])
-            self.repo_size = self.rec.items_repository.get_doccount()
-            self.user = LocalSystem()
-            self.user.app_pkg_profile(self.rec.items_repository)
-            self.user.no_auto_pkg_profile()
-            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
-            # iteration should be set to 10 in config file
-            #self.profile_size = range(10,101,10)
-
-    def iterate(self, params, rep, n):
-        if params['name'].startswith("content"):
-            item_score = dict.fromkeys(self.user.pkg_profile,1)
-            # Prepare partition
-            sample = {}
-            for i in range(self.sample_size):
-                 key = random.choice(item_score.keys())
-                 sample[key] = item_score.pop(key)
-            # Get full recommendation
-            user = User(item_score)
-            recommendation = self.rec.get_recommendation(user,self.repo_size)
-            # Write recall log
-            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
-                          (params['strategy'],params['weight'],params['sample'],n)
-            output = open(recall_file,'w')
-            output.write("# weight=%s\n" % params['weight'])
-            output.write("# strategy=%s\n" % params['strategy'])
-            output.write("# sample=%f\n" % params['sample'])
-            output.write("\n%d %d %d\n" % \
-                         (self.repo_size,len(item_score),self.sample_size))
-            notfound = []
-            ranks = []
-            for pkg in sample.keys():
-                if pkg in recommendation.ranking:
-                    ranks.append(recommendation.ranking.index(pkg))
-                else:
-                    notfound.append(pkg)
-            for r in sorted(ranks):
-                output.write(str(r)+"\n")
-            if notfound:
-                output.write("Out of recommendation:\n")
-                for pkg in notfound:
-                    output.write(pkg+"\n")
-            output.close()
-            # Plot metrics summary
-            accuracy = []
-            precision = []
-            recall = []
-            f1 = []
-            g = Gnuplot.Gnuplot()
-            g('set style data lines')
-            g.xlabel('Recommendation size')
-            for size in range(1,len(recommendation.ranking)+1,100):
-                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
-                real = RecommendationResult(sample)
-                evaluation = Evaluation(predicted,real,self.repo_size)
-                accuracy.append([size,evaluation.run(Accuracy())])
-                precision.append([size,evaluation.run(Precision())])
-                recall.append([size,evaluation.run(Recall())])
-                f1.append([size,evaluation.run(F1())])
-            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
-                   Gnuplot.Data(precision,title="Precision"),
-                   Gnuplot.Data(recall,title="Recall"),
-                   Gnuplot.Data(f1,title="F1"))
-            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
-            # Iteration log
-            result = {'iteration': n,
-                      'weight': params['weight'],
-                      'strategy': params['strategy'],
-                      'accuracy': accuracy[20],
-                      'precision': precision[20],
-                      'recall:': recall[20],
-                      'f1': f1[20]}
-            return result
-
-#class CollaborativeSuite(expsuite.PyExperimentSuite):
-#    def reset(self, params, rep):
-#        if params['name'].startswith("collaborative"):
-#
-#    def iterate(self, params, rep, n):
-#        if params['name'].startswith("collaborative"):
-#            for root, dirs, files in os.walk(self.source_dir):
-#                for popcon_file in files:
-#                    submission = PopconSubmission(os.path.join(root,popcon_file))
-#                    user = User(submission.packages)
-#                    user.maximal_pkg_profile()
-#                    rec.get_recommendation(user)
-#                    precision = 0
-#                    result = {'weight': params['weight'],
-#                              'strategy': params['strategy'],
-#                              'profile_size': self.profile_size[n],
-#                              'accuracy': accuracy,
-#                              'precision': precision,
-#                              'recall:': recall,
-#                              'f1': }
-#        else:
-#            result = {}
-#        return result
-
-if __name__ == '__main__':
-
-    if "clustering" in sys.argv or len(sys.argv)<3:
-        ClusteringSuite().start()
-    if "content" in sys.argv or len(sys.argv)<3:
-        ContentBasedSuite().start()
-    #if "collaborative" in sys.argv or len(sys.argv)<3:
-    #CollaborativeSuite().start()
	@@ -1,27 +0,0 @@	@@ -1,27 +0,0 @@
1	-[DEFAULT]
2	-repetitions = 1
3	-iterations = 10
4	-path = 'results'
5	-experiment = 'grid'
6	-weight = ['bm25', 'trad']
7	-;profile_size = range(10,100,10)
8	-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
9	-sample = [0.6, 0.7, 0.8, 0.9]
10	-
11	-[content]
12	-strategy = ['cb','cbt','cbd']
13	-
14	-[clustering]
15	-experiment = 'single'
16	-;iterations = 4
17	-;medoids = range(2,6)
18	-iterations = 6
19	-medoids = [100,500,1000,5000,10000,50000]
20	-;disabled for this experiment
21	-weight = 0
22	-profile_size = 0
23	-sample = 0
24	-
25	-[colaborative]
26	-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
27	-neighbors = range(10,1010,50)
	@@ -1,51 +0,0 @@	@@ -1,51 +0,0 @@
1	-#!/usr/bin/env python
2	-"""
3	- recommender suite - recommender experiments suite
4	-"""
5	-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6	-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7	-__license__ = """
8	- This program is free software: you can redistribute it and/or modify
9	- it under the terms of the GNU General Public License as published by
10	- the Free Software Foundation, either version 3 of the License, or
11	- (at your option) any later version.
12	-
13	- This program is distributed in the hope that it will be useful,
14	- but WITHOUT ANY WARRANTY; without even the implied warranty of
15	- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	- GNU General Public License for more details.
17	-
18	- You should have received a copy of the GNU General Public License
19	- along with this program. If not, see <http://www.gnu.org/licenses/>.
20	-"""
21	-
22	-import sys
23	-import os
24	-sys.path.insert(0,'../')
25	-from config import Config
26	-from data import PopconXapianIndex, PopconSubmission
27	-from recommender import Recommender
28	-from user import LocalSystem, User
29	-from evaluation import *
30	-import logging
31	-import random
32	-import Gnuplot
33	-
34	-if __name__ == '__main__':
35	-
36	- cfg = Config()
37	- cfg.index_mode = "recluster"
38	- logging.info("Starting clustering experiments")
39	- logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
40	- cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
41	- cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
42	- (cfg.k_medoids,cfg.max_popcon))
43	- cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
44	- (cfg.k_medoids,cfg.max_popcon))
45	- pxi = PopconXapianIndex(cfg)
46	- logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
47	- # Write clustering log
48	- output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
49	- output.write("# k_medoids\tmax_popcon\tdispersion\n")
50	- output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
51	- output.close()
	@@ -1,171 +0,0 @@	@@ -1,171 +0,0 @@
1	-#!/usr/bin/env python
2	-"""
3	- recommender suite - recommender experiments suite
4	-"""
5	-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6	-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7	-__license__ = """
8	- This program is free software: you can redistribute it and/or modify
9	- it under the terms of the GNU General Public License as published by
10	- the Free Software Foundation, either version 3 of the License, or
11	- (at your option) any later version.
12	-
13	- This program is distributed in the hope that it will be useful,
14	- but WITHOUT ANY WARRANTY; without even the implied warranty of
15	- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	- GNU General Public License for more details.
17	-
18	- You should have received a copy of the GNU General Public License
19	- along with this program. If not, see <http://www.gnu.org/licenses/>.
20	-"""
21	-
22	-import expsuite
23	-import sys
24	-sys.path.insert(0,'../')
25	-from config import Config
26	-from data import PopconXapianIndex, PopconSubmission
27	-from recommender import Recommender
28	-from user import LocalSystem, User
29	-from evaluation import *
30	-import logging
31	-import random
32	-import Gnuplot
33	-
34	-class ClusteringSuite(expsuite.PyExperimentSuite):
35	- def reset(self, params, rep):
36	- self.cfg = Config()
37	- self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
38	- self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
39	- self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
40	-
41	- if params['name'] == "clustering":
42	- logging.info("Starting 'clustering' experiments suite...")
43	- self.cfg.index_mode = "recluster"
44	-
45	- def iterate(self, params, rep, n):
46	- if params['name'] == "clustering":
47	- logging.info("Running iteration %d" % params['medoids'][n])
48	- self.cfg.k_medoids = params['medoids'][n]
49	- pxi = PopconXapianIndex(self.cfg)
50	- result = {'k_medoids': params['medoids'][n],
51	- 'dispersion': pxi.cluster_dispersion}
52	- else:
53	- result = {}
54	- return result
55	-
56	-class ContentBasedSuite(expsuite.PyExperimentSuite):
57	- def reset(self, params, rep):
58	- if params['name'].startswith("content"):
59	- cfg = Config()
60	- #if the index was not built yet
61	- #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
62	- cfg.axi = "data/AppAxi"
63	- cfg.index_mode = "old"
64	- cfg.weight = params['weight']
65	- self.rec = Recommender(cfg)
66	- self.rec.set_strategy(params['strategy'])
67	- self.repo_size = self.rec.items_repository.get_doccount()
68	- self.user = LocalSystem()
69	- self.user.app_pkg_profile(self.rec.items_repository)
70	- self.user.no_auto_pkg_profile()
71	- self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
72	- # iteration should be set to 10 in config file
73	- #self.profile_size = range(10,101,10)
74	-
75	- def iterate(self, params, rep, n):
76	- if params['name'].startswith("content"):
77	- item_score = dict.fromkeys(self.user.pkg_profile,1)
78	- # Prepare partition
79	- sample = {}
80	- for i in range(self.sample_size):
81	- key = random.choice(item_score.keys())
82	- sample[key] = item_score.pop(key)
83	- # Get full recommendation
84	- user = User(item_score)
85	- recommendation = self.rec.get_recommendation(user,self.repo_size)
86	- # Write recall log
87	- recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
88	- (params['strategy'],params['weight'],params['sample'],n)
89	- output = open(recall_file,'w')
90	- output.write("# weight=%s\n" % params['weight'])
91	- output.write("# strategy=%s\n" % params['strategy'])
92	- output.write("# sample=%f\n" % params['sample'])
93	- output.write("\n%d %d %d\n" % \
94	- (self.repo_size,len(item_score),self.sample_size))
95	- notfound = []
96	- ranks = []
97	- for pkg in sample.keys():
98	- if pkg in recommendation.ranking:
99	- ranks.append(recommendation.ranking.index(pkg))
100	- else:
101	- notfound.append(pkg)
102	- for r in sorted(ranks):
103	- output.write(str(r)+"\n")
104	- if notfound:
105	- output.write("Out of recommendation:\n")
106	- for pkg in notfound:
107	- output.write(pkg+"\n")
108	- output.close()
109	- # Plot metrics summary
110	- accuracy = []
111	- precision = []
112	- recall = []
113	- f1 = []
114	- g = Gnuplot.Gnuplot()
115	- g('set style data lines')
116	- g.xlabel('Recommendation size')
117	- for size in range(1,len(recommendation.ranking)+1,100):
118	- predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
119	- real = RecommendationResult(sample)
120	- evaluation = Evaluation(predicted,real,self.repo_size)
121	- accuracy.append([size,evaluation.run(Accuracy())])
122	- precision.append([size,evaluation.run(Precision())])
123	- recall.append([size,evaluation.run(Recall())])
124	- f1.append([size,evaluation.run(F1())])
125	- g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
126	- Gnuplot.Data(precision,title="Precision"),
127	- Gnuplot.Data(recall,title="Recall"),
128	- Gnuplot.Data(f1,title="F1"))
129	- g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
130	- # Iteration log
131	- result = {'iteration': n,
132	- 'weight': params['weight'],
133	- 'strategy': params['strategy'],
134	- 'accuracy': accuracy[20],
135	- 'precision': precision[20],
136	- 'recall:': recall[20],
137	- 'f1': f1[20]}
138	- return result
139	-
140	-#class CollaborativeSuite(expsuite.PyExperimentSuite):
141	-# def reset(self, params, rep):
142	-# if params['name'].startswith("collaborative"):
143	-#
144	-# def iterate(self, params, rep, n):
145	-# if params['name'].startswith("collaborative"):
146	-# for root, dirs, files in os.walk(self.source_dir):
147	-# for popcon_file in files:
148	-# submission = PopconSubmission(os.path.join(root,popcon_file))
149	-# user = User(submission.packages)
150	-# user.maximal_pkg_profile()
151	-# rec.get_recommendation(user)
152	-# precision = 0
153	-# result = {'weight': params['weight'],
154	-# 'strategy': params['strategy'],
155	-# 'profile_size': self.profile_size[n],
156	-# 'accuracy': accuracy,
157	-# 'precision': precision,
158	-# 'recall:': recall,
159	-# 'f1': }
160	-# else:
161	-# result = {}
162	-# return result
163	-
164	-if __name__ == '__main__':
165	-
166	- if "clustering" in sys.argv or len(sys.argv)<3:
167	- ClusteringSuite().start()
168	- if "content" in sys.argv or len(sys.argv)<3:
169	- ContentBasedSuite().start()
170	- #if "collaborative" in sys.argv or len(sys.argv)<3:
171	- #CollaborativeSuite().start()