Commit 40247ad1e6d51d0b6907f19bc0249dbb6a496816

Authored by Tássia Camões Araújo
1 parent ae5b1cdb
Exists in master and in 1 other branch add_vagrant

Added experiments files

src/experiments/README 0 → 100644
... ... @@ -0,0 +1,2 @@
  1 +Experiments handled by expsuite:
  2 +https://github.com/rueckstiess/expsuite
... ...
src/experiments/experiments.cfg 0 → 100644
... ... @@ -0,0 +1,26 @@
  1 +[DEFAULT]
  2 +repetitions = 1
  3 +iterations = 10
  4 +path = 'results'
  5 +experiment = 'grid'
  6 +weight = ['bm25', 'trad']
  7 +;profile_size = range(10,100,10)
  8 +sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  9 +
  10 +[content]
  11 +strategy = ['cb','cbt','cbd']
  12 +
  13 +[clustering]
  14 +experiment = 'single'
  15 +;iterations = 4
  16 +;medoids = range(2,6)
  17 +iterations = 6
  18 +medoids = [100,500,1000,5000,10000,50000]
  19 +;disabled for this experiment
  20 +weight = 0
  21 +profile_size = 0
  22 +sample = 0
  23 +
  24 +[colaborative]
  25 +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
  26 +neighbors = range(10,1010,50)
... ...
src/experiments/runner.py 0 → 100755
... ... @@ -0,0 +1,173 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommender suite - recommender experiments suite
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import expsuite
  23 +import sys
  24 +sys.path.insert(0,'../')
  25 +from config import Config
  26 +from data import PopconXapianIndex, PopconSubmission
  27 +from recommender import Recommender
  28 +from user import LocalSystem, User
  29 +from evaluation import *
  30 +import logging
  31 +import random
  32 +import Gnuplot
  33 +
  34 +class ClusteringSuite(expsuite.PyExperimentSuite):
  35 + def reset(self, params, rep):
  36 + self.cfg = Config()
  37 + self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
  38 + self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
  39 + self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
  40 +
  41 + if params['name'] == "clustering":
  42 + logging.info("Starting 'clustering' experiments suite...")
  43 + self.cfg.index_mode = "recluster"
  44 +
  45 + def iterate(self, params, rep, n):
  46 + if params['name'] == "clustering":
  47 + logging.info("Running iteration %d" % params['medoids'][n])
  48 + self.cfg.k_medoids = params['medoids'][n]
  49 + pxi = PopconXapianIndex(self.cfg)
  50 + result = {'k_medoids': params['medoids'][n],
  51 + 'dispersion': pxi.cluster_dispersion}
  52 + else:
  53 + result = {}
  54 + return result
  55 +
  56 +class ContentBasedSuite(expsuite.PyExperimentSuite):
  57 + def reset(self, params, rep):
  58 + if params['name'].startswith("content"):
  59 + cfg = Config()
  60 + #if the index was not built yet
  61 + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
  62 + cfg.axi = "data/AppAxi"
  63 + cfg.index_mode = "old"
  64 + cfg.weight = params['weight']
  65 + self.rec = Recommender(cfg)
  66 + self.rec.set_strategy(params['strategy'])
  67 + self.repo_size = self.rec.items_repository.get_doccount()
  68 + self.user = LocalSystem()
  69 + self.user.app_pkg_profile(self.rec.items_repository)
  70 + self.user.no_auto_pkg_profile()
  71 + self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
  72 + # iteration should be set to 10 in config file
  73 + #self.profile_size = range(10,101,10)
  74 +
  75 + def iterate(self, params, rep, n):
  76 + if params['name'].startswith("content"):
  77 + # Get full recommendation
  78 + item_score = dict.fromkeys(self.user.pkg_profile,1)
  79 + sample = {}
  80 + for i in range(self.sample_size):
  81 + item, score = item_score.popitem()
  82 + sample[item] = score
  83 + user = User(item_score)
  84 + recommendation = self.rec.get_recommendation(user,self.repo_size)
  85 + # Write recall log
  86 + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
  87 + (params['strategy'],params['weight'],params['sample'],n)
  88 + output = open(recall_file,'w')
  89 + output.write("# weight=%s\n" % params['weight'])
  90 + output.write("# strategy=%s\n" % params['strategy'])
  91 + output.write("# sample=%f\n" % params['sample'])
  92 + output.write("\n%d %d %d\n" % \
  93 + (self.repo_size,len(item_score),self.sample_size))
  94 + notfound = []
  95 + ranks = []
  96 + for pkg in sample.keys():
  97 + if pkg in recommendation.ranking:
  98 + ranks.append(recommendation.ranking.index(pkg))
  99 + else:
  100 + notfound.append(pkg)
  101 + for r in sorted(ranks):
  102 + output.write(str(r)+"\n")
  103 + if notfound:
  104 + output.write("Out of recommendation:\n")
  105 + for pkg in notfound:
  106 + output.write(pkg+"\n")
  107 + output.close()
  108 + # Plot metrics summary
  109 + g = Gnuplot.Gnuplot()
  110 + g('set style data lines')
  111 + g.xlabel('Recommendation size')
  112 + accuracy = []
  113 + precision = []
  114 + recall = []
  115 + f1 = []
  116 + for size in range(1,len(recommendation.ranking)+1,100):
  117 + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
  118 + real = RecommendationResult(sample)
  119 + evaluation = Evaluation(predicted,real,self.repo_size)
  120 + accuracy.append([size,evaluation.run(Accuracy())])
  121 + precision.append([size,evaluation.run(Precision())])
  122 + recall.append([size,evaluation.run(Recall())])
  123 + f1.append([size,evaluation.run(F1())])
  124 + #print "accuracy", len(accuracy)
  125 + #print "precision", len(precision)
  126 + #print "recall", len(recall)
  127 + #print "f1", len(f1)
  128 + g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
  129 + Gnuplot.Data(precision,title="Precision"),
  130 + Gnuplot.Data(recall,title="Recall"),
  131 + Gnuplot.Data(f1,title="F1"))
  132 + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
  133 + result = {}
  134 + result = {'weight': params['weight'],
  135 + 'strategy': params['strategy'],
  136 + 'accuracy': accuracy[20],
  137 + 'precision': precision[20],
  138 + 'recall:': recall[20],
  139 + 'f1': f1[20]}
  140 + return result
  141 +
  142 +#class CollaborativeSuite(expsuite.PyExperimentSuite):
  143 +# def reset(self, params, rep):
  144 +# if params['name'].startswith("collaborative"):
  145 +#
  146 +# def iterate(self, params, rep, n):
  147 +# if params['name'].startswith("collaborative"):
  148 +# for root, dirs, files in os.walk(self.source_dir):
  149 +# for popcon_file in files:
  150 +# submission = PopconSubmission(os.path.join(root,popcon_file))
  151 +# user = User(submission.packages)
  152 +# user.maximal_pkg_profile()
  153 +# rec.get_recommendation(user)
  154 +# precision = 0
  155 +# result = {'weight': params['weight'],
  156 +# 'strategy': params['strategy'],
  157 +# 'profile_size': self.profile_size[n],
  158 +# 'accuracy': accuracy,
  159 +# 'precision': precision,
  160 +# 'recall:': recall,
  161 +# 'f1': }
  162 +# else:
  163 +# result = {}
  164 +# return result
  165 +
  166 +if __name__ == '__main__':
  167 +
  168 + if "clustering" in sys.argv or len(sys.argv)<3:
  169 + ClusteringSuite().start()
  170 + if "content" in sys.argv or len(sys.argv)<3:
  171 + ContentBasedSuite().start()
  172 + #if "collaborative" in sys.argv or len(sys.argv)<3:
  173 + #CollaborativeSuite().start()
... ...