diff --git a/src/experiments/deprecated/clustering-suite.py b/src/experiments/deprecated/clustering-suite.py new file mode 100755 index 0000000..1bf3b90 --- /dev/null +++ b/src/experiments/deprecated/clustering-suite.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +""" + recommender suite - recommender experiments suite +""" +__author__ = "Tassia Camoes Araujo " +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" +__license__ = """ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +import sys +import os +sys.path.insert(0,'../') +from config import Config +from data import PopconXapianIndex, PopconSubmission +from recommender import Recommender +from user import LocalSystem, User +from evaluation import * +import logging +import random +import Gnuplot + +if __name__ == '__main__': + + cfg = Config() + cfg.index_mode = "recluster" + logging.info("Starting clustering experiments") + logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon)) + cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/") + cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" % + (cfg.k_medoids,cfg.max_popcon)) + cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" % + (cfg.k_medoids,cfg.max_popcon)) + pxi = PopconXapianIndex(cfg) + logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion) + # Write clustering log + output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w') + output.write("# k_medoids\tmax_popcon\tdispersion\n") + output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion)) + output.close() diff --git a/src/experiments/deprecated/experiments.cfg b/src/experiments/deprecated/experiments.cfg new file mode 100644 index 0000000..333214f --- /dev/null +++ b/src/experiments/deprecated/experiments.cfg @@ -0,0 +1,27 @@ +[DEFAULT] +repetitions = 1 +iterations = 10 +path = 'results' +experiment = 'grid' +weight = ['bm25', 'trad'] +;profile_size = range(10,100,10) +;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] +sample = [0.6, 0.7, 0.8, 0.9] + +[content] +strategy = ['cb','cbt','cbd'] + +[clustering] +experiment = 'single' +;iterations = 4 +;medoids = range(2,6) +iterations = 6 +medoids = [100,500,1000,5000,10000,50000] +;disabled for this experiment +weight = 0 +profile_size = 0 +sample = 0 + +[colaborative] +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"] +neighbors = range(10,1010,50) diff --git a/src/experiments/deprecated/runner.py b/src/experiments/deprecated/runner.py new file mode 100755 index 0000000..bc230c9 --- /dev/null +++ b/src/experiments/deprecated/runner.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +""" + recommender suite - recommender experiments suite +""" +__author__ = "Tassia Camoes Araujo " +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" +__license__ = """ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +import expsuite +import sys +sys.path.insert(0,'../') +from config import Config +from data import PopconXapianIndex, PopconSubmission +from recommender import Recommender +from user import LocalSystem, User +from evaluation import * +import logging +import random +import Gnuplot + +class ClusteringSuite(expsuite.PyExperimentSuite): + def reset(self, params, rep): + self.cfg = Config() + self.cfg.popcon_index = "../tests/test_data/.sample_pxi" + self.cfg.popcon_dir = "../tests/test_data/popcon_dir" + self.cfg.clusters_dir = "../tests/test_data/clusters_dir" + + if params['name'] == "clustering": + logging.info("Starting 'clustering' experiments suite...") + self.cfg.index_mode = "recluster" + + def iterate(self, params, rep, n): + if params['name'] == "clustering": + logging.info("Running iteration %d" % params['medoids'][n]) + self.cfg.k_medoids = params['medoids'][n] + pxi = PopconXapianIndex(self.cfg) + result = {'k_medoids': params['medoids'][n], + 'dispersion': pxi.cluster_dispersion} + else: + result = {} + return result + +class ContentBasedSuite(expsuite.PyExperimentSuite): + def reset(self, params, rep): + if params['name'].startswith("content"): + cfg = Config() + #if the index was not built yet + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") + cfg.axi = "data/AppAxi" + cfg.index_mode = "old" + cfg.weight = params['weight'] + self.rec = Recommender(cfg) + self.rec.set_strategy(params['strategy']) + self.repo_size = self.rec.items_repository.get_doccount() + self.user = LocalSystem() + self.user.app_pkg_profile(self.rec.items_repository) + self.user.no_auto_pkg_profile() + self.sample_size = int(len(self.user.pkg_profile)*params['sample']) + # iteration should be set to 10 in config file + #self.profile_size = range(10,101,10) + + def iterate(self, params, rep, n): + if params['name'].startswith("content"): + item_score = dict.fromkeys(self.user.pkg_profile,1) + # Prepare partition + sample = {} + for i in range(self.sample_size): + key = random.choice(item_score.keys()) + sample[key] = item_score.pop(key) + # Get full recommendation + user = User(item_score) + recommendation = self.rec.get_recommendation(user,self.repo_size) + # Write recall log + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ + (params['strategy'],params['weight'],params['sample'],n) + output = open(recall_file,'w') + output.write("# weight=%s\n" % params['weight']) + output.write("# strategy=%s\n" % params['strategy']) + output.write("# sample=%f\n" % params['sample']) + output.write("\n%d %d %d\n" % \ + (self.repo_size,len(item_score),self.sample_size)) + notfound = [] + ranks = [] + for pkg in sample.keys(): + if pkg in recommendation.ranking: + ranks.append(recommendation.ranking.index(pkg)) + else: + notfound.append(pkg) + for r in sorted(ranks): + output.write(str(r)+"\n") + if notfound: + output.write("Out of recommendation:\n") + for pkg in notfound: + output.write(pkg+"\n") + output.close() + # Plot metrics summary + accuracy = [] + precision = [] + recall = [] + f1 = [] + g = Gnuplot.Gnuplot() + g('set style data lines') + g.xlabel('Recommendation size') + for size in range(1,len(recommendation.ranking)+1,100): + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1)) + real = RecommendationResult(sample) + evaluation = Evaluation(predicted,real,self.repo_size) + accuracy.append([size,evaluation.run(Accuracy())]) + precision.append([size,evaluation.run(Precision())]) + recall.append([size,evaluation.run(Recall())]) + f1.append([size,evaluation.run(F1())]) + g.plot(Gnuplot.Data(accuracy,title="Accuracy"), + Gnuplot.Data(precision,title="Precision"), + Gnuplot.Data(recall,title="Recall"), + Gnuplot.Data(f1,title="F1")) + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1) + # Iteration log + result = {'iteration': n, + 'weight': params['weight'], + 'strategy': params['strategy'], + 'accuracy': accuracy[20], + 'precision': precision[20], + 'recall:': recall[20], + 'f1': f1[20]} + return result + +#class CollaborativeSuite(expsuite.PyExperimentSuite): +# def reset(self, params, rep): +# if params['name'].startswith("collaborative"): +# +# def iterate(self, params, rep, n): +# if params['name'].startswith("collaborative"): +# for root, dirs, files in os.walk(self.source_dir): +# for popcon_file in files: +# submission = PopconSubmission(os.path.join(root,popcon_file)) +# user = User(submission.packages) +# user.maximal_pkg_profile() +# rec.get_recommendation(user) +# precision = 0 +# result = {'weight': params['weight'], +# 'strategy': params['strategy'], +# 'profile_size': self.profile_size[n], +# 'accuracy': accuracy, +# 'precision': precision, +# 'recall:': recall, +# 'f1': } +# else: +# result = {} +# return result + +if __name__ == '__main__': + + if "clustering" in sys.argv or len(sys.argv)<3: + ClusteringSuite().start() + if "content" in sys.argv or len(sys.argv)<3: + ContentBasedSuite().start() + #if "collaborative" in sys.argv or len(sys.argv)<3: + #CollaborativeSuite().start() -- libgit2 0.21.2