Added experiments files

Tássia Camões Araújo
1 parent ae5b1cdb
Showing 3 changed files with 201 additions and 0 deletions Show diff stats
src/experiments/README
src/experiments/experiments.cfg
src/experiments/runner.py
@@ -0,0 +1,2 @@
+Experiments handled by expsuite:
+https://github.com/rueckstiess/expsuite
@@ -0,0 +1,26 @@
+[DEFAULT]
+repetitions = 1
+iterations = 10
+path = 'results'
+experiment = 'grid'
+weight = ['bm25', 'trad']
+;profile_size = range(10,100,10)
+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+[content]
+strategy = ['cb','cbt','cbd']
+
+[clustering]
+experiment = 'single'
+;iterations = 4
+;medoids = range(2,6)
+iterations = 6
+medoids = [100,500,1000,5000,10000,50000]
+;disabled for this experiment
+weight = 0
+profile_size = 0
+sample = 0
+
+[colaborative]
+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
+neighbors = range(10,1010,50)
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+"""
+    recommender suite - recommender experiments suite 
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import expsuite
+import sys
+sys.path.insert(0,'../')
+from config import Config
+from data import PopconXapianIndex, PopconSubmission
+from recommender import Recommender
+from user import LocalSystem, User
+from evaluation import *
+import logging
+import random
+import Gnuplot
+
+class ClusteringSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        self.cfg = Config()
+        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
+        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
+        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
+
+        if params['name'] == "clustering":
+            logging.info("Starting 'clustering' experiments suite...")
+            self.cfg.index_mode = "recluster"
+
+    def iterate(self, params, rep, n):
+        if params['name'] == "clustering":
+            logging.info("Running iteration %d" % params['medoids'][n])
+            self.cfg.k_medoids = params['medoids'][n]
+            pxi = PopconXapianIndex(self.cfg)
+            result = {'k_medoids': params['medoids'][n],
+                   'dispersion': pxi.cluster_dispersion}
+        else:
+            result = {}
+        return result
+
+class ContentBasedSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        if params['name'].startswith("content"):
+            cfg = Config()
+            #if the index was not built yet
+            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
+            cfg.axi = "data/AppAxi"
+            cfg.index_mode = "old"
+            cfg.weight = params['weight']
+            self.rec = Recommender(cfg)
+            self.rec.set_strategy(params['strategy'])
+            self.repo_size = self.rec.items_repository.get_doccount()
+            self.user = LocalSystem()
+            self.user.app_pkg_profile(self.rec.items_repository)
+            self.user.no_auto_pkg_profile()
+            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
+            # iteration should be set to 10 in config file
+            #self.profile_size = range(10,101,10)
+
+    def iterate(self, params, rep, n):
+        if params['name'].startswith("content"):
+            # Get full recommendation
+            item_score = dict.fromkeys(self.user.pkg_profile,1)
+            sample = {}
+            for i in range(self.sample_size):
+                 item, score = item_score.popitem()
+                 sample[item] = score
+            user = User(item_score)
+            recommendation = self.rec.get_recommendation(user,self.repo_size)
+            # Write recall log
+            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
+                          (params['strategy'],params['weight'],params['sample'],n)
+            output = open(recall_file,'w')
+            output.write("# weight=%s\n" % params['weight'])
+            output.write("# strategy=%s\n" % params['strategy'])
+            output.write("# sample=%f\n" % params['sample'])
+            output.write("\n%d %d %d\n" % \
+                         (self.repo_size,len(item_score),self.sample_size))
+            notfound = []
+            ranks = []
+            for pkg in sample.keys():
+                if pkg in recommendation.ranking:
+                    ranks.append(recommendation.ranking.index(pkg))
+                else:
+                    notfound.append(pkg)
+            for r in sorted(ranks):
+                output.write(str(r)+"\n")
+            if notfound:
+                output.write("Out of recommendation:\n")
+                for pkg in notfound:
+                    output.write(pkg+"\n")
+            output.close()
+            # Plot metrics summary
+            g = Gnuplot.Gnuplot()
+            g('set style data lines')
+            g.xlabel('Recommendation size')
+            accuracy = []
+            precision = []
+            recall = []
+            f1 = []
+            for size in range(1,len(recommendation.ranking)+1,100):
+                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
+                real = RecommendationResult(sample)
+                evaluation = Evaluation(predicted,real,self.repo_size)
+                accuracy.append([size,evaluation.run(Accuracy())])
+                precision.append([size,evaluation.run(Precision())])
+                recall.append([size,evaluation.run(Recall())])
+                f1.append([size,evaluation.run(F1())])
+            #print "accuracy", len(accuracy)
+            #print "precision", len(precision)
+            #print "recall", len(recall)
+            #print "f1", len(f1)
+            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
+                   Gnuplot.Data(precision,title="Precision"),
+                   Gnuplot.Data(recall,title="Recall"),
+                   Gnuplot.Data(f1,title="F1"))
+            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
+            result = {}
+            result = {'weight': params['weight'],
+                      'strategy': params['strategy'],
+                      'accuracy': accuracy[20],
+                      'precision': precision[20],
+                      'recall:': recall[20],
+                      'f1': f1[20]}
+            return result
+
+#class CollaborativeSuite(expsuite.PyExperimentSuite):
+#    def reset(self, params, rep):
+#        if params['name'].startswith("collaborative"):
+#
+#    def iterate(self, params, rep, n):
+#        if params['name'].startswith("collaborative"):
+#            for root, dirs, files in os.walk(self.source_dir):
+#                for popcon_file in files:
+#                    submission = PopconSubmission(os.path.join(root,popcon_file))
+#                    user = User(submission.packages)
+#                    user.maximal_pkg_profile()
+#                    rec.get_recommendation(user)
+#                    precision = 0
+#                    result = {'weight': params['weight'],
+#                              'strategy': params['strategy'],
+#                              'profile_size': self.profile_size[n],
+#                              'accuracy': accuracy,
+#                              'precision': precision,
+#                              'recall:': recall,
+#                              'f1': }
+#        else:
+#            result = {}
+#        return result
+
+if __name__ == '__main__':
+
+    if "clustering" in sys.argv or len(sys.argv)<3:
+        ClusteringSuite().start()
+    if "content" in sys.argv or len(sys.argv)<3:
+        ContentBasedSuite().start()
+    #if "collaborative" in sys.argv or len(sys.argv)<3:
+    #CollaborativeSuite().start()
...	...	@@ -0,0 +1,2 @@
	1	+Experiments handled by expsuite:
	2	+https://github.com/rueckstiess/expsuite
...	...
...	...	@@ -0,0 +1,26 @@
	1	+[DEFAULT]
	2	+repetitions = 1
	3	+iterations = 10
	4	+path = 'results'
	5	+experiment = 'grid'
	6	+weight = ['bm25', 'trad']
	7	+;profile_size = range(10,100,10)
	8	+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
	9	+
	10	+[content]
	11	+strategy = ['cb','cbt','cbd']
	12	+
	13	+[clustering]
	14	+experiment = 'single'
	15	+;iterations = 4
	16	+;medoids = range(2,6)
	17	+iterations = 6
	18	+medoids = [100,500,1000,5000,10000,50000]
	19	+;disabled for this experiment
	20	+weight = 0
	21	+profile_size = 0
	22	+sample = 0
	23	+
	24	+[colaborative]
	25	+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
	26	+neighbors = range(10,1010,50)
...	...
...	...	@@ -0,0 +1,173 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ recommender suite - recommender experiments suite
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import expsuite
	23	+import sys
	24	+sys.path.insert(0,'../')
	25	+from config import Config
	26	+from data import PopconXapianIndex, PopconSubmission
	27	+from recommender import Recommender
	28	+from user import LocalSystem, User
	29	+from evaluation import *
	30	+import logging
	31	+import random
	32	+import Gnuplot
	33	+
	34	+class ClusteringSuite(expsuite.PyExperimentSuite):
	35	+ def reset(self, params, rep):
	36	+ self.cfg = Config()
	37	+ self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
	38	+ self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
	39	+ self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
	40	+
	41	+ if params['name'] == "clustering":
	42	+ logging.info("Starting 'clustering' experiments suite...")
	43	+ self.cfg.index_mode = "recluster"
	44	+
	45	+ def iterate(self, params, rep, n):
	46	+ if params['name'] == "clustering":
	47	+ logging.info("Running iteration %d" % params['medoids'][n])
	48	+ self.cfg.k_medoids = params['medoids'][n]
	49	+ pxi = PopconXapianIndex(self.cfg)
	50	+ result = {'k_medoids': params['medoids'][n],
	51	+ 'dispersion': pxi.cluster_dispersion}
	52	+ else:
	53	+ result = {}
	54	+ return result
	55	+
	56	+class ContentBasedSuite(expsuite.PyExperimentSuite):
	57	+ def reset(self, params, rep):
	58	+ if params['name'].startswith("content"):
	59	+ cfg = Config()
	60	+ #if the index was not built yet
	61	+ #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
	62	+ cfg.axi = "data/AppAxi"
	63	+ cfg.index_mode = "old"
	64	+ cfg.weight = params['weight']
	65	+ self.rec = Recommender(cfg)
	66	+ self.rec.set_strategy(params['strategy'])
	67	+ self.repo_size = self.rec.items_repository.get_doccount()
	68	+ self.user = LocalSystem()
	69	+ self.user.app_pkg_profile(self.rec.items_repository)
	70	+ self.user.no_auto_pkg_profile()
	71	+ self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
	72	+ # iteration should be set to 10 in config file
	73	+ #self.profile_size = range(10,101,10)
	74	+
	75	+ def iterate(self, params, rep, n):
	76	+ if params['name'].startswith("content"):
	77	+ # Get full recommendation
	78	+ item_score = dict.fromkeys(self.user.pkg_profile,1)
	79	+ sample = {}
	80	+ for i in range(self.sample_size):
	81	+ item, score = item_score.popitem()
	82	+ sample[item] = score
	83	+ user = User(item_score)
	84	+ recommendation = self.rec.get_recommendation(user,self.repo_size)
	85	+ # Write recall log
	86	+ recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
	87	+ (params['strategy'],params['weight'],params['sample'],n)
	88	+ output = open(recall_file,'w')
	89	+ output.write("# weight=%s\n" % params['weight'])
	90	+ output.write("# strategy=%s\n" % params['strategy'])
	91	+ output.write("# sample=%f\n" % params['sample'])
	92	+ output.write("\n%d %d %d\n" % \
	93	+ (self.repo_size,len(item_score),self.sample_size))
	94	+ notfound = []
	95	+ ranks = []
	96	+ for pkg in sample.keys():
	97	+ if pkg in recommendation.ranking:
	98	+ ranks.append(recommendation.ranking.index(pkg))
	99	+ else:
	100	+ notfound.append(pkg)
	101	+ for r in sorted(ranks):
	102	+ output.write(str(r)+"\n")
	103	+ if notfound:
	104	+ output.write("Out of recommendation:\n")
	105	+ for pkg in notfound:
	106	+ output.write(pkg+"\n")
	107	+ output.close()
	108	+ # Plot metrics summary
	109	+ g = Gnuplot.Gnuplot()
	110	+ g('set style data lines')
	111	+ g.xlabel('Recommendation size')
	112	+ accuracy = []
	113	+ precision = []
	114	+ recall = []
	115	+ f1 = []
	116	+ for size in range(1,len(recommendation.ranking)+1,100):
	117	+ predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
	118	+ real = RecommendationResult(sample)
	119	+ evaluation = Evaluation(predicted,real,self.repo_size)
	120	+ accuracy.append([size,evaluation.run(Accuracy())])
	121	+ precision.append([size,evaluation.run(Precision())])
	122	+ recall.append([size,evaluation.run(Recall())])
	123	+ f1.append([size,evaluation.run(F1())])
	124	+ #print "accuracy", len(accuracy)
	125	+ #print "precision", len(precision)
	126	+ #print "recall", len(recall)
	127	+ #print "f1", len(f1)
	128	+ g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
	129	+ Gnuplot.Data(precision,title="Precision"),
	130	+ Gnuplot.Data(recall,title="Recall"),
	131	+ Gnuplot.Data(f1,title="F1"))
	132	+ g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
	133	+ result = {}
	134	+ result = {'weight': params['weight'],
	135	+ 'strategy': params['strategy'],
	136	+ 'accuracy': accuracy[20],
	137	+ 'precision': precision[20],
	138	+ 'recall:': recall[20],
	139	+ 'f1': f1[20]}
	140	+ return result
	141	+
	142	+#class CollaborativeSuite(expsuite.PyExperimentSuite):
	143	+# def reset(self, params, rep):
	144	+# if params['name'].startswith("collaborative"):
	145	+#
	146	+# def iterate(self, params, rep, n):
	147	+# if params['name'].startswith("collaborative"):
	148	+# for root, dirs, files in os.walk(self.source_dir):
	149	+# for popcon_file in files:
	150	+# submission = PopconSubmission(os.path.join(root,popcon_file))
	151	+# user = User(submission.packages)
	152	+# user.maximal_pkg_profile()
	153	+# rec.get_recommendation(user)
	154	+# precision = 0
	155	+# result = {'weight': params['weight'],
	156	+# 'strategy': params['strategy'],
	157	+# 'profile_size': self.profile_size[n],
	158	+# 'accuracy': accuracy,
	159	+# 'precision': precision,
	160	+# 'recall:': recall,
	161	+# 'f1': }
	162	+# else:
	163	+# result = {}
	164	+# return result
	165	+
	166	+if __name__ == '__main__':
	167	+
	168	+ if "clustering" in sys.argv or len(sys.argv)<3:
	169	+ ClusteringSuite().start()
	170	+ if "content" in sys.argv or len(sys.argv)<3:
	171	+ ContentBasedSuite().start()
	172	+ #if "collaborative" in sys.argv or len(sys.argv)<3:
	173	+ #CollaborativeSuite().start()
...	...