Experiments suite without expsuite.

Tássia Camões Araújo
1 parent 5a8e4f02
Showing 1 changed file with 150 additions and 0 deletions Show diff stats
src/experiments/strategies-suite.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+"""
+    recommender suite - recommender experiments suite 
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import sys
+sys.path.insert(0,'../')
+from config import Config
+from data import PopconXapianIndex, PopconSubmission, AppAptXapianIndex
+from recommender import Recommender
+from user import LocalSystem, User
+from evaluation import *
+import logging
+import random
+import Gnuplot
+
+def run_iteration(label,cfg,sample_proportion,n):
+    rec = Recommender(cfg)
+    repo_size = rec.items_repository.get_doccount()
+    user = PopconSystem("/root/popularity-contest-tassia")
+    print "profile",user.pkg_profile
+    user.maximal_pkg_profile()
+    sample_size = int(len(user.pkg_profile)*sample_proportion)
+    for n in range(iteration):
+        item_score = dict.fromkeys(user.pkg_profile,1)
+        # Prepare partition
+        sample = {}
+        for i in range(sample_size):
+             key = random.choice(item_score.keys())
+             sample[key] = item_score.pop(key)
+        # Get full recommendation
+        user = User(item_score)
+        recommendation = rec.get_recommendation(user,repo_size)
+        # Write recall log
+        log_file = "results/strategies/"+label["values"]
+        output = open(log_file,'w')
+        output.write("# %s\n" % label["description"])
+        output.write("# %s\n" % label["values"])
+        notfound = []
+        ranks = []
+        for pkg in sample.keys():
+            if pkg in recommendation.ranking:
+                ranks.append(recommendation.ranking.index(pkg))
+            else:
+                notfound.append(pkg)
+        for r in sorted(ranks):
+            output.write(str(r)+"\n")
+        if notfound:
+            output.write("Out of recommendation:\n")
+            for pkg in notfound:
+                output.write(pkg+"\n")
+        output.close()
+        # Plot metrics summary
+        accuracy = []
+        precision = []
+        recall = []
+        f1 = []
+        g = Gnuplot.Gnuplot()
+        g('set style data lines')
+        g.xlabel('Recommendation size')
+        for size in range(1,len(recommendation.ranking)+1,100):
+            predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
+            real = RecommendationResult(sample)
+            evaluation = Evaluation(predicted,real,repo_size)
+            accuracy.append([size,evaluation.run(Accuracy())])
+            precision.append([size,evaluation.run(Precision())])
+            recall.append([size,evaluation.run(Recall())])
+            f1.append([size,evaluation.run(F1())])
+
+        g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
+               Gnuplot.Data(precision,title="Precision"),
+               Gnuplot.Data(recall,title="Recall"),
+               Gnuplot.Data(f1,title="F1"))
+        g.hardcopy(log_file+"-plot.ps", enhanced=1, color=1)
+
+
+if __name__ == '__main__':
+    iteration = 10
+    samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]
+    weights = ['bm25', 'trad']
+    cb_strategies = ['cb','cbt','cbd']
+    #cb_strategies = []
+    profile_size = range(10,100,10)
+    items_repository = ["data/AppAxi","/var/lib/apt-xapian-index/index"]
+    users_repository = ["data/popcon_index_full","data/popcon_index-50000",
+                        "data/popcon_index_10000","data/popcon_index_1000"]
+    users_repository = []
+    neighbors = range(10,1010,100)
+
+    cfg = Config()
+    cfg.index_mode = "old"
+    label = {}
+
+    for w in weights:
+        cfg.weight = w
+        for items_repo in items_repository:
+            cfg.axi = items_repo
+            if "App" in cfg.axi:
+                axi_str = "axiapp"
+            else:
+                axi_str = "axifull"
+            for sample_proportion in samples_proportion:
+                if "content" in sys.argv or len(sys.argv)<2:
+                    for size in profile_size:
+                        cfg.profile_size = size
+                        for strategy in cb_strategies:
+                            cfg.strategy = strategy
+                            for n in range(iteration):
+                                label["description"] = "weight-axi-profile-strategy-sample-n"
+                                label["values"] = ("%s-%s-%d-%s-%.2f-%d" %
+                                                   (cfg.weight,axi_str,cfg.profile_size,
+                                                    cfg.strategy,sample_proportion,n))
+                                run_iteration(label,cfg,sample_proportion,n)
+                if "colaborative" in sys.argv or len(sys.argv)<2:
+                    cfg.strategy = "col"
+                    for users_repo in users_repository:
+                        cfg.popcon_index = users_repo
+                        for k in neighbors:
+                            cfg.k_neighbors = k
+                            for n in range(iteration):
+                                k_str = "k"+str(cfg.k_neighbors)
+                                if "full" in cfg.popcon_index:
+                                    popcon_str = "popfull"
+                                if "50000" in cfg.popcon_index:
+                                    popcon_str = "pop50000"
+                                if "10000" in cfg.popcon_index:
+                                    popcon_str = "pop10000"
+                                if "1000" in cfg.popcon_index:
+                                    popcon_str = "pop1000"
+                                label["description"] = "weight-axi-popcon-profile-strategy-k-sample-n"
+                                label["values"] = ("%s-%s-%s-%d-%s-%s-%.2f-%d" %
+                                                   (cfg.weight,axi_str,popcon_str,cfg.profile_size,
+                                                    cfg.strategy,k_str,sample_proportion,n))
+                                run_iteration(label,cfg,sample_proportion,n)
...	...	@@ -0,0 +1,150 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ recommender suite - recommender experiments suite
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import sys
	23	+sys.path.insert(0,'../')
	24	+from config import Config
	25	+from data import PopconXapianIndex, PopconSubmission, AppAptXapianIndex
	26	+from recommender import Recommender
	27	+from user import LocalSystem, User
	28	+from evaluation import *
	29	+import logging
	30	+import random
	31	+import Gnuplot
	32	+
	33	+def run_iteration(label,cfg,sample_proportion,n):
	34	+ rec = Recommender(cfg)
	35	+ repo_size = rec.items_repository.get_doccount()
	36	+ user = PopconSystem("/root/popularity-contest-tassia")
	37	+ print "profile",user.pkg_profile
	38	+ user.maximal_pkg_profile()
	39	+ sample_size = int(len(user.pkg_profile)*sample_proportion)
	40	+ for n in range(iteration):
	41	+ item_score = dict.fromkeys(user.pkg_profile,1)
	42	+ # Prepare partition
	43	+ sample = {}
	44	+ for i in range(sample_size):
	45	+ key = random.choice(item_score.keys())
	46	+ sample[key] = item_score.pop(key)
	47	+ # Get full recommendation
	48	+ user = User(item_score)
	49	+ recommendation = rec.get_recommendation(user,repo_size)
	50	+ # Write recall log
	51	+ log_file = "results/strategies/"+label["values"]
	52	+ output = open(log_file,'w')
	53	+ output.write("# %s\n" % label["description"])
	54	+ output.write("# %s\n" % label["values"])
	55	+ notfound = []
	56	+ ranks = []
	57	+ for pkg in sample.keys():
	58	+ if pkg in recommendation.ranking:
	59	+ ranks.append(recommendation.ranking.index(pkg))
	60	+ else:
	61	+ notfound.append(pkg)
	62	+ for r in sorted(ranks):
	63	+ output.write(str(r)+"\n")
	64	+ if notfound:
	65	+ output.write("Out of recommendation:\n")
	66	+ for pkg in notfound:
	67	+ output.write(pkg+"\n")
	68	+ output.close()
	69	+ # Plot metrics summary
	70	+ accuracy = []
	71	+ precision = []
	72	+ recall = []
	73	+ f1 = []
	74	+ g = Gnuplot.Gnuplot()
	75	+ g('set style data lines')
	76	+ g.xlabel('Recommendation size')
	77	+ for size in range(1,len(recommendation.ranking)+1,100):
	78	+ predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
	79	+ real = RecommendationResult(sample)
	80	+ evaluation = Evaluation(predicted,real,repo_size)
	81	+ accuracy.append([size,evaluation.run(Accuracy())])
	82	+ precision.append([size,evaluation.run(Precision())])
	83	+ recall.append([size,evaluation.run(Recall())])
	84	+ f1.append([size,evaluation.run(F1())])
	85	+
	86	+ g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
	87	+ Gnuplot.Data(precision,title="Precision"),
	88	+ Gnuplot.Data(recall,title="Recall"),
	89	+ Gnuplot.Data(f1,title="F1"))
	90	+ g.hardcopy(log_file+"-plot.ps", enhanced=1, color=1)
	91	+
	92	+
	93	+if __name__ == '__main__':
	94	+ iteration = 10
	95	+ samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]
	96	+ weights = ['bm25', 'trad']
	97	+ cb_strategies = ['cb','cbt','cbd']
	98	+ #cb_strategies = []
	99	+ profile_size = range(10,100,10)
	100	+ items_repository = ["data/AppAxi","/var/lib/apt-xapian-index/index"]
	101	+ users_repository = ["data/popcon_index_full","data/popcon_index-50000",
	102	+ "data/popcon_index_10000","data/popcon_index_1000"]
	103	+ users_repository = []
	104	+ neighbors = range(10,1010,100)
	105	+
	106	+ cfg = Config()
	107	+ cfg.index_mode = "old"
	108	+ label = {}
	109	+
	110	+ for w in weights:
	111	+ cfg.weight = w
	112	+ for items_repo in items_repository:
	113	+ cfg.axi = items_repo
	114	+ if "App" in cfg.axi:
	115	+ axi_str = "axiapp"
	116	+ else:
	117	+ axi_str = "axifull"
	118	+ for sample_proportion in samples_proportion:
	119	+ if "content" in sys.argv or len(sys.argv)<2:
	120	+ for size in profile_size:
	121	+ cfg.profile_size = size
	122	+ for strategy in cb_strategies:
	123	+ cfg.strategy = strategy
	124	+ for n in range(iteration):
	125	+ label["description"] = "weight-axi-profile-strategy-sample-n"
	126	+ label["values"] = ("%s-%s-%d-%s-%.2f-%d" %
	127	+ (cfg.weight,axi_str,cfg.profile_size,
	128	+ cfg.strategy,sample_proportion,n))
	129	+ run_iteration(label,cfg,sample_proportion,n)
	130	+ if "colaborative" in sys.argv or len(sys.argv)<2:
	131	+ cfg.strategy = "col"
	132	+ for users_repo in users_repository:
	133	+ cfg.popcon_index = users_repo
	134	+ for k in neighbors:
	135	+ cfg.k_neighbors = k
	136	+ for n in range(iteration):
	137	+ k_str = "k"+str(cfg.k_neighbors)
	138	+ if "full" in cfg.popcon_index:
	139	+ popcon_str = "popfull"
	140	+ if "50000" in cfg.popcon_index:
	141	+ popcon_str = "pop50000"
	142	+ if "10000" in cfg.popcon_index:
	143	+ popcon_str = "pop10000"
	144	+ if "1000" in cfg.popcon_index:
	145	+ popcon_str = "pop1000"
	146	+ label["description"] = "weight-axi-popcon-profile-strategy-k-sample-n"
	147	+ label["values"] = ("%s-%s-%s-%d-%s-%s-%.2f-%d" %
	148	+ (cfg.weight,axi_str,popcon_str,cfg.profile_size,
	149	+ cfg.strategy,k_str,sample_proportion,n))
	150	+ run_iteration(label,cfg,sample_proportion,n)
...	...