#!/usr/bin/env python """ recommender suite - recommender experiments suite """ __author__ = "Tassia Camoes Araujo " __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" __license__ = """ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ import expsuite import sys sys.path.insert(0,'../') from config import Config from data import PopconXapianIndex, PopconSubmission from recommender import Recommender from user import LocalSystem, User from evaluation import * import logging import random import Gnuplot class ClusteringSuite(expsuite.PyExperimentSuite): def reset(self, params, rep): self.cfg = Config() self.cfg.popcon_index = "../tests/test_data/.sample_pxi" self.cfg.popcon_dir = "../tests/test_data/popcon_dir" self.cfg.clusters_dir = "../tests/test_data/clusters_dir" if params['name'] == "clustering": logging.info("Starting 'clustering' experiments suite...") self.cfg.index_mode = "recluster" def iterate(self, params, rep, n): if params['name'] == "clustering": logging.info("Running iteration %d" % params['medoids'][n]) self.cfg.k_medoids = params['medoids'][n] pxi = PopconXapianIndex(self.cfg) result = {'k_medoids': params['medoids'][n], 'dispersion': pxi.cluster_dispersion} else: result = {} return result class ContentBasedSuite(expsuite.PyExperimentSuite): def reset(self, params, rep): if params['name'].startswith("content"): cfg = Config() #if the index was not built yet #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") cfg.axi = "data/AppAxi" cfg.index_mode = "old" cfg.weight = params['weight'] self.rec = Recommender(cfg) self.rec.set_strategy(params['strategy']) self.repo_size = self.rec.items_repository.get_doccount() self.user = LocalSystem() self.user.app_pkg_profile(self.rec.items_repository) self.user.no_auto_pkg_profile() self.sample_size = int(len(self.user.pkg_profile)*params['sample']) # iteration should be set to 10 in config file #self.profile_size = range(10,101,10) def iterate(self, params, rep, n): if params['name'].startswith("content"): item_score = dict.fromkeys(self.user.pkg_profile,1) # Prepare partition sample = {} for i in range(self.sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) # Get full recommendation user = User(item_score) recommendation = self.rec.get_recommendation(user,self.repo_size) # Write recall log recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ (params['strategy'],params['weight'],params['sample'],n) output = open(recall_file,'w') output.write("# weight=%s\n" % params['weight']) output.write("# strategy=%s\n" % params['strategy']) output.write("# sample=%f\n" % params['sample']) output.write("\n%d %d %d\n" % \ (self.repo_size,len(item_score),self.sample_size)) notfound = [] ranks = [] for pkg in sample.keys(): if pkg in recommendation.ranking: ranks.append(recommendation.ranking.index(pkg)) else: notfound.append(pkg) for r in sorted(ranks): output.write(str(r)+"\n") if notfound: output.write("Out of recommendation:\n") for pkg in notfound: output.write(pkg+"\n") output.close() # Plot metrics summary accuracy = [] precision = [] recall = [] f1 = [] g = Gnuplot.Gnuplot() g('set style data lines') g.xlabel('Recommendation size') for size in range(1,len(recommendation.ranking)+1,100): predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted,real,self.repo_size) accuracy.append([size,evaluation.run(Accuracy())]) precision.append([size,evaluation.run(Precision())]) recall.append([size,evaluation.run(Recall())]) f1.append([size,evaluation.run(F1())]) g.plot(Gnuplot.Data(accuracy,title="Accuracy"), Gnuplot.Data(precision,title="Precision"), Gnuplot.Data(recall,title="Recall"), Gnuplot.Data(f1,title="F1")) g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1) # Iteration log result = {'iteration': n, 'weight': params['weight'], 'strategy': params['strategy'], 'accuracy': accuracy[20], 'precision': precision[20], 'recall:': recall[20], 'f1': f1[20]} return result #class CollaborativeSuite(expsuite.PyExperimentSuite): # def reset(self, params, rep): # if params['name'].startswith("collaborative"): # # def iterate(self, params, rep, n): # if params['name'].startswith("collaborative"): # for root, dirs, files in os.walk(self.source_dir): # for popcon_file in files: # submission = PopconSubmission(os.path.join(root,popcon_file)) # user = User(submission.packages) # user.maximal_pkg_profile() # rec.get_recommendation(user) # precision = 0 # result = {'weight': params['weight'], # 'strategy': params['strategy'], # 'profile_size': self.profile_size[n], # 'accuracy': accuracy, # 'precision': precision, # 'recall:': recall, # 'f1': } # else: # result = {} # return result if __name__ == '__main__': if "clustering" in sys.argv or len(sys.argv)<3: ClusteringSuite().start() if "content" in sys.argv or len(sys.argv)<3: ContentBasedSuite().start() #if "collaborative" in sys.argv or len(sys.argv)<3: #CollaborativeSuite().start()