Deleted old files.

Tássia Camões Araújo
1 parent c673b9b2
Showing 5 changed files with 0 additions and 447 deletions Show diff stats
src/experiments/experiments.cfg
src/experiments/legacy/clustering-suite.py
src/experiments/legacy/experiments.cfg
src/experiments/legacy/runner.py
src/experiments/runner.py
@@ -1,27 +0,0 @@
-[DEFAULT]
-repetitions = 1
-iterations = 10
-path = 'results'
-experiment = 'grid'
-weight = ['bm25', 'trad']
-;profile_size = range(10,100,10)
-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-sample = [0.6, 0.7, 0.8, 0.9]
-
-[content]
-strategy = ['cb','cbt','cbd']
-
-[clustering]
-experiment = 'single'
-;iterations = 4
-;medoids = range(2,6)
-iterations = 6
-medoids = [100,500,1000,5000,10000,50000]
-;disabled for this experiment
-weight = 0
-profile_size = 0
-sample = 0
-
-[colaborative]
-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
-neighbors = range(10,1010,50)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import sys
-import os
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-if __name__ == '__main__':
-
-    cfg = Config()
-    cfg.index_mode = "recluster"
-    logging.info("Starting clustering experiments")
-    logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
-    cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
-    cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
-                                         (cfg.k_medoids,cfg.max_popcon))
-    cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
-                                         (cfg.k_medoids,cfg.max_popcon))
-    pxi = PopconXapianIndex(cfg)
-    logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
-    # Write clustering log
-    output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
-    output.write("# k_medoids\tmax_popcon\tdispersion\n")
-    output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
-    output.close()
@@ -1,27 +0,0 @@
-[DEFAULT]
-repetitions = 1
-iterations = 10
-path = 'results'
-experiment = 'grid'
-weight = ['bm25', 'trad']
-;profile_size = range(10,100,10)
-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-sample = [0.6, 0.7, 0.8, 0.9]
-
-[content]
-strategy = ['cb','cbt','cbd']
-
-[clustering]
-experiment = 'single'
-;iterations = 4
-;medoids = range(2,6)
-iterations = 6
-medoids = [100,500,1000,5000,10000,50000]
-;disabled for this experiment
-weight = 0
-profile_size = 0
-sample = 0
-
-[colaborative]
-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
-neighbors = range(10,1010,50)
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import expsuite
-import sys
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-class ClusteringSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        self.cfg = Config()
-        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
-        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
-        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
-
-        if params['name'] == "clustering":
-            logging.info("Starting 'clustering' experiments suite...")
-            self.cfg.index_mode = "recluster"
-
-    def iterate(self, params, rep, n):
-        if params['name'] == "clustering":
-            logging.info("Running iteration %d" % params['medoids'][n])
-            self.cfg.k_medoids = params['medoids'][n]
-            pxi = PopconXapianIndex(self.cfg)
-            result = {'k_medoids': params['medoids'][n],
-                   'dispersion': pxi.cluster_dispersion}
-        else:
-            result = {}
-        return result
-
-class ContentBasedSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        if params['name'].startswith("content"):
-            cfg = Config()
-            #if the index was not built yet
-            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
-            cfg.axi = "data/AppAxi"
-            cfg.index_mode = "old"
-            cfg.weight = params['weight']
-            self.rec = Recommender(cfg)
-            self.rec.set_strategy(params['strategy'])
-            self.repo_size = self.rec.items_repository.get_doccount()
-            self.user = LocalSystem()
-            self.user.app_pkg_profile(self.rec.items_repository)
-            self.user.no_auto_pkg_profile()
-            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
-            # iteration should be set to 10 in config file
-            #self.profile_size = range(10,101,10)
-
-    def iterate(self, params, rep, n):
-        if params['name'].startswith("content"):
-            item_score = dict.fromkeys(self.user.pkg_profile,1)
-            # Prepare partition
-            sample = {}
-            for i in range(self.sample_size):
-                 key = random.choice(item_score.keys())
-                 sample[key] = item_score.pop(key)
-            # Get full recommendation
-            user = User(item_score)
-            recommendation = self.rec.get_recommendation(user,self.repo_size)
-            # Write recall log
-            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
-                          (params['strategy'],params['weight'],params['sample'],n)
-            output = open(recall_file,'w')
-            output.write("# weight=%s\n" % params['weight'])
-            output.write("# strategy=%s\n" % params['strategy'])
-            output.write("# sample=%f\n" % params['sample'])
-            output.write("\n%d %d %d\n" % \
-                         (self.repo_size,len(item_score),self.sample_size))
-            notfound = []
-            ranks = []
-            for pkg in sample.keys():
-                if pkg in recommendation.ranking:
-                    ranks.append(recommendation.ranking.index(pkg))
-                else:
-                    notfound.append(pkg)
-            for r in sorted(ranks):
-                output.write(str(r)+"\n")
-            if notfound:
-                output.write("Out of recommendation:\n")
-                for pkg in notfound:
-                    output.write(pkg+"\n")
-            output.close()
-            # Plot metrics summary
-            accuracy = []
-            precision = []
-            recall = []
-            f1 = []
-            g = Gnuplot.Gnuplot()
-            g('set style data lines')
-            g.xlabel('Recommendation size')
-            for size in range(1,len(recommendation.ranking)+1,100):
-                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
-                real = RecommendationResult(sample)
-                evaluation = Evaluation(predicted,real,self.repo_size)
-                accuracy.append([size,evaluation.run(Accuracy())])
-                precision.append([size,evaluation.run(Precision())])
-                recall.append([size,evaluation.run(Recall())])
-                f1.append([size,evaluation.run(F1())])
-            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
-                   Gnuplot.Data(precision,title="Precision"),
-                   Gnuplot.Data(recall,title="Recall"),
-                   Gnuplot.Data(f1,title="F1"))
-            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
-            # Iteration log
-            result = {'iteration': n,
-                      'weight': params['weight'],
-                      'strategy': params['strategy'],
-                      'accuracy': accuracy[20],
-                      'precision': precision[20],
-                      'recall:': recall[20],
-                      'f1': f1[20]}
-            return result
-
-#class CollaborativeSuite(expsuite.PyExperimentSuite):
-#    def reset(self, params, rep):
-#        if params['name'].startswith("collaborative"):
-#
-#    def iterate(self, params, rep, n):
-#        if params['name'].startswith("collaborative"):
-#            for root, dirs, files in os.walk(self.source_dir):
-#                for popcon_file in files:
-#                    submission = PopconSubmission(os.path.join(root,popcon_file))
-#                    user = User(submission.packages)
-#                    user.maximal_pkg_profile()
-#                    rec.get_recommendation(user)
-#                    precision = 0
-#                    result = {'weight': params['weight'],
-#                              'strategy': params['strategy'],
-#                              'profile_size': self.profile_size[n],
-#                              'accuracy': accuracy,
-#                              'precision': precision,
-#                              'recall:': recall,
-#                              'f1': }
-#        else:
-#            result = {}
-#        return result
-
-if __name__ == '__main__':
-
-    if "clustering" in sys.argv or len(sys.argv)<3:
-        ClusteringSuite().start()
-    if "content" in sys.argv or len(sys.argv)<3:
-        ContentBasedSuite().start()
-    #if "collaborative" in sys.argv or len(sys.argv)<3:
-    #CollaborativeSuite().start()
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-"""
-    recommender suite - recommender experiments suite 
-"""
-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
-__license__ = """
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import expsuite
-import sys
-sys.path.insert(0,'../')
-from config import Config
-from data import PopconXapianIndex, PopconSubmission
-from recommender import Recommender
-from user import LocalSystem, User
-from evaluation import *
-import logging
-import random
-import Gnuplot
-
-class ClusteringSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        self.cfg = Config()
-        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
-        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
-        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
-
-        if params['name'] == "clustering":
-            logging.info("Starting 'clustering' experiments suite...")
-            self.cfg.index_mode = "recluster"
-
-    def iterate(self, params, rep, n):
-        if params['name'] == "clustering":
-            logging.info("Running iteration %d" % params['medoids'][n])
-            self.cfg.k_medoids = params['medoids'][n]
-            pxi = PopconXapianIndex(self.cfg)
-            result = {'k_medoids': params['medoids'][n],
-                   'dispersion': pxi.cluster_dispersion}
-        else:
-            result = {}
-        return result
-
-class ContentBasedSuite(expsuite.PyExperimentSuite):
-    def reset(self, params, rep):
-        if params['name'].startswith("content"):
-            cfg = Config()
-            #if the index was not built yet
-            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
-            cfg.axi = "data/AppAxi"
-            cfg.index_mode = "old"
-            cfg.weight = params['weight']
-            self.rec = Recommender(cfg)
-            self.rec.set_strategy(params['strategy'])
-            self.repo_size = self.rec.items_repository.get_doccount()
-            self.user = LocalSystem()
-            self.user.app_pkg_profile(self.rec.items_repository)
-            self.user.no_auto_pkg_profile()
-            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
-            # iteration should be set to 10 in config file
-            #self.profile_size = range(10,101,10)
-
-    def iterate(self, params, rep, n):
-        if params['name'].startswith("content"):
-            item_score = dict.fromkeys(self.user.pkg_profile,1)
-            # Prepare partition
-            sample = {}
-            for i in range(self.sample_size):
-                 key = random.choice(item_score.keys())
-                 sample[key] = item_score.pop(key)
-            # Get full recommendation
-            user = User(item_score)
-            recommendation = self.rec.get_recommendation(user,self.repo_size)
-            # Write recall log
-            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
-                          (params['strategy'],params['weight'],params['sample'],n)
-            output = open(recall_file,'w')
-            output.write("# weight=%s\n" % params['weight'])
-            output.write("# strategy=%s\n" % params['strategy'])
-            output.write("# sample=%f\n" % params['sample'])
-            output.write("\n%d %d %d\n" % \
-                         (self.repo_size,len(item_score),self.sample_size))
-            notfound = []
-            ranks = []
-            for pkg in sample.keys():
-                if pkg in recommendation.ranking:
-                    ranks.append(recommendation.ranking.index(pkg))
-                else:
-                    notfound.append(pkg)
-            for r in sorted(ranks):
-                output.write(str(r)+"\n")
-            if notfound:
-                output.write("Out of recommendation:\n")
-                for pkg in notfound:
-                    output.write(pkg+"\n")
-            output.close()
-            # Plot metrics summary
-            accuracy = []
-            precision = []
-            recall = []
-            f1 = []
-            g = Gnuplot.Gnuplot()
-            g('set style data lines')
-            g.xlabel('Recommendation size')
-            for size in range(1,len(recommendation.ranking)+1,100):
-                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
-                real = RecommendationResult(sample)
-                evaluation = Evaluation(predicted,real,self.repo_size)
-                accuracy.append([size,evaluation.run(Accuracy())])
-                precision.append([size,evaluation.run(Precision())])
-                recall.append([size,evaluation.run(Recall())])
-                f1.append([size,evaluation.run(F1())])
-            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
-                   Gnuplot.Data(precision,title="Precision"),
-                   Gnuplot.Data(recall,title="Recall"),
-                   Gnuplot.Data(f1,title="F1"))
-            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
-            # Iteration log
-            result = {'iteration': n,
-                      'weight': params['weight'],
-                      'strategy': params['strategy'],
-                      'accuracy': accuracy[20],
-                      'precision': precision[20],
-                      'recall:': recall[20],
-                      'f1': f1[20]}
-            return result
-
-#class CollaborativeSuite(expsuite.PyExperimentSuite):
-#    def reset(self, params, rep):
-#        if params['name'].startswith("collaborative"):
-#
-#    def iterate(self, params, rep, n):
-#        if params['name'].startswith("collaborative"):
-#            for root, dirs, files in os.walk(self.source_dir):
-#                for popcon_file in files:
-#                    submission = PopconSubmission(os.path.join(root,popcon_file))
-#                    user = User(submission.packages)
-#                    user.maximal_pkg_profile()
-#                    rec.get_recommendation(user)
-#                    precision = 0
-#                    result = {'weight': params['weight'],
-#                              'strategy': params['strategy'],
-#                              'profile_size': self.profile_size[n],
-#                              'accuracy': accuracy,
-#                              'precision': precision,
-#                              'recall:': recall,
-#                              'f1': }
-#        else:
-#            result = {}
-#        return result
-
-if __name__ == '__main__':
-
-    if "clustering" in sys.argv or len(sys.argv)<3:
-        ClusteringSuite().start()
-    if "content" in sys.argv or len(sys.argv)<3:
-        ContentBasedSuite().start()
-    #if "collaborative" in sys.argv or len(sys.argv)<3:
-    #CollaborativeSuite().start()
...	...	@@ -1,27 +0,0 @@
1		-[DEFAULT]
2		-repetitions = 1
3		-iterations = 10
4		-path = 'results'
5		-experiment = 'grid'
6		-weight = ['bm25', 'trad']
7		-;profile_size = range(10,100,10)
8		-;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
9		-sample = [0.6, 0.7, 0.8, 0.9]
10		-
11		-[content]
12		-strategy = ['cb','cbt','cbd']
13		-
14		-[clustering]
15		-experiment = 'single'
16		-;iterations = 4
17		-;medoids = range(2,6)
18		-iterations = 6
19		-medoids = [100,500,1000,5000,10000,50000]
20		-;disabled for this experiment
21		-weight = 0
22		-profile_size = 0
23		-sample = 0
24		-
25		-[colaborative]
26		-users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
27		-neighbors = range(10,1010,50)
...	...	@@ -1,51 +0,0 @@
1		-#!/usr/bin/env python
2		-"""
3		- recommender suite - recommender experiments suite
4		-"""
5		-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6		-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7		-__license__ = """
8		- This program is free software: you can redistribute it and/or modify
9		- it under the terms of the GNU General Public License as published by
10		- the Free Software Foundation, either version 3 of the License, or
11		- (at your option) any later version.
12		-
13		- This program is distributed in the hope that it will be useful,
14		- but WITHOUT ANY WARRANTY; without even the implied warranty of
15		- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		- GNU General Public License for more details.
17		-
18		- You should have received a copy of the GNU General Public License
19		- along with this program. If not, see <http://www.gnu.org/licenses/>.
20		-"""
21		-
22		-import sys
23		-import os
24		-sys.path.insert(0,'../')
25		-from config import Config
26		-from data import PopconXapianIndex, PopconSubmission
27		-from recommender import Recommender
28		-from user import LocalSystem, User
29		-from evaluation import *
30		-import logging
31		-import random
32		-import Gnuplot
33		-
34		-if __name__ == '__main__':
35		-
36		- cfg = Config()
37		- cfg.index_mode = "recluster"
38		- logging.info("Starting clustering experiments")
39		- logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
40		- cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
41		- cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
42		- (cfg.k_medoids,cfg.max_popcon))
43		- cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
44		- (cfg.k_medoids,cfg.max_popcon))
45		- pxi = PopconXapianIndex(cfg)
46		- logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
47		- # Write clustering log
48		- output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
49		- output.write("# k_medoids\tmax_popcon\tdispersion\n")
50		- output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
51		- output.close()
...	...	@@ -1,171 +0,0 @@
1		-#!/usr/bin/env python
2		-"""
3		- recommender suite - recommender experiments suite
4		-"""
5		-__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6		-__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
7		-__license__ = """
8		- This program is free software: you can redistribute it and/or modify
9		- it under the terms of the GNU General Public License as published by
10		- the Free Software Foundation, either version 3 of the License, or
11		- (at your option) any later version.
12		-
13		- This program is distributed in the hope that it will be useful,
14		- but WITHOUT ANY WARRANTY; without even the implied warranty of
15		- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		- GNU General Public License for more details.
17		-
18		- You should have received a copy of the GNU General Public License
19		- along with this program. If not, see <http://www.gnu.org/licenses/>.
20		-"""
21		-
22		-import expsuite
23		-import sys
24		-sys.path.insert(0,'../')
25		-from config import Config
26		-from data import PopconXapianIndex, PopconSubmission
27		-from recommender import Recommender
28		-from user import LocalSystem, User
29		-from evaluation import *
30		-import logging
31		-import random
32		-import Gnuplot
33		-
34		-class ClusteringSuite(expsuite.PyExperimentSuite):
35		- def reset(self, params, rep):
36		- self.cfg = Config()
37		- self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
38		- self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
39		- self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
40		-
41		- if params['name'] == "clustering":
42		- logging.info("Starting 'clustering' experiments suite...")
43		- self.cfg.index_mode = "recluster"
44		-
45		- def iterate(self, params, rep, n):
46		- if params['name'] == "clustering":
47		- logging.info("Running iteration %d" % params['medoids'][n])
48		- self.cfg.k_medoids = params['medoids'][n]
49		- pxi = PopconXapianIndex(self.cfg)
50		- result = {'k_medoids': params['medoids'][n],
51		- 'dispersion': pxi.cluster_dispersion}
52		- else:
53		- result = {}
54		- return result
55		-
56		-class ContentBasedSuite(expsuite.PyExperimentSuite):
57		- def reset(self, params, rep):
58		- if params['name'].startswith("content"):
59		- cfg = Config()
60		- #if the index was not built yet
61		- #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
62		- cfg.axi = "data/AppAxi"
63		- cfg.index_mode = "old"
64		- cfg.weight = params['weight']
65		- self.rec = Recommender(cfg)
66		- self.rec.set_strategy(params['strategy'])
67		- self.repo_size = self.rec.items_repository.get_doccount()
68		- self.user = LocalSystem()
69		- self.user.app_pkg_profile(self.rec.items_repository)
70		- self.user.no_auto_pkg_profile()
71		- self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
72		- # iteration should be set to 10 in config file
73		- #self.profile_size = range(10,101,10)
74		-
75		- def iterate(self, params, rep, n):
76		- if params['name'].startswith("content"):
77		- item_score = dict.fromkeys(self.user.pkg_profile,1)
78		- # Prepare partition
79		- sample = {}
80		- for i in range(self.sample_size):
81		- key = random.choice(item_score.keys())
82		- sample[key] = item_score.pop(key)
83		- # Get full recommendation
84		- user = User(item_score)
85		- recommendation = self.rec.get_recommendation(user,self.repo_size)
86		- # Write recall log
87		- recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
88		- (params['strategy'],params['weight'],params['sample'],n)
89		- output = open(recall_file,'w')
90		- output.write("# weight=%s\n" % params['weight'])
91		- output.write("# strategy=%s\n" % params['strategy'])
92		- output.write("# sample=%f\n" % params['sample'])
93		- output.write("\n%d %d %d\n" % \
94		- (self.repo_size,len(item_score),self.sample_size))
95		- notfound = []
96		- ranks = []
97		- for pkg in sample.keys():
98		- if pkg in recommendation.ranking:
99		- ranks.append(recommendation.ranking.index(pkg))
100		- else:
101		- notfound.append(pkg)
102		- for r in sorted(ranks):
103		- output.write(str(r)+"\n")
104		- if notfound:
105		- output.write("Out of recommendation:\n")
106		- for pkg in notfound:
107		- output.write(pkg+"\n")
108		- output.close()
109		- # Plot metrics summary
110		- accuracy = []
111		- precision = []
112		- recall = []
113		- f1 = []
114		- g = Gnuplot.Gnuplot()
115		- g('set style data lines')
116		- g.xlabel('Recommendation size')
117		- for size in range(1,len(recommendation.ranking)+1,100):
118		- predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
119		- real = RecommendationResult(sample)
120		- evaluation = Evaluation(predicted,real,self.repo_size)
121		- accuracy.append([size,evaluation.run(Accuracy())])
122		- precision.append([size,evaluation.run(Precision())])
123		- recall.append([size,evaluation.run(Recall())])
124		- f1.append([size,evaluation.run(F1())])
125		- g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
126		- Gnuplot.Data(precision,title="Precision"),
127		- Gnuplot.Data(recall,title="Recall"),
128		- Gnuplot.Data(f1,title="F1"))
129		- g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
130		- # Iteration log
131		- result = {'iteration': n,
132		- 'weight': params['weight'],
133		- 'strategy': params['strategy'],
134		- 'accuracy': accuracy[20],
135		- 'precision': precision[20],
136		- 'recall:': recall[20],
137		- 'f1': f1[20]}
138		- return result
139		-
140		-#class CollaborativeSuite(expsuite.PyExperimentSuite):
141		-# def reset(self, params, rep):
142		-# if params['name'].startswith("collaborative"):
143		-#
144		-# def iterate(self, params, rep, n):
145		-# if params['name'].startswith("collaborative"):
146		-# for root, dirs, files in os.walk(self.source_dir):
147		-# for popcon_file in files:
148		-# submission = PopconSubmission(os.path.join(root,popcon_file))
149		-# user = User(submission.packages)
150		-# user.maximal_pkg_profile()
151		-# rec.get_recommendation(user)
152		-# precision = 0
153		-# result = {'weight': params['weight'],
154		-# 'strategy': params['strategy'],
155		-# 'profile_size': self.profile_size[n],
156		-# 'accuracy': accuracy,
157		-# 'precision': precision,
158		-# 'recall:': recall,
159		-# 'f1': }
160		-# else:
161		-# result = {}
162		-# return result
163		-
164		-if __name__ == '__main__':
165		-
166		- if "clustering" in sys.argv or len(sys.argv)<3:
167		- ClusteringSuite().start()
168		- if "content" in sys.argv or len(sys.argv)<3:
169		- ContentBasedSuite().start()
170		- #if "collaborative" in sys.argv or len(sys.argv)<3:
171		- #CollaborativeSuite().start()