Commit af4795bd544bd7f7bc4a67a8bd2204ea593a65cd

Authored by Tássia Camões Araújo
1 parent 60e25836
Exists in master and in 1 other branch add_vagrant

Renaming deprecated code dir.

src/experiments/deprecated/clustering-suite.py 0 → 100755
... ... @@ -0,0 +1,51 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommender suite - recommender experiments suite
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import sys
  23 +import os
  24 +sys.path.insert(0,'../')
  25 +from config import Config
  26 +from data import PopconXapianIndex, PopconSubmission
  27 +from recommender import Recommender
  28 +from user import LocalSystem, User
  29 +from evaluation import *
  30 +import logging
  31 +import random
  32 +import Gnuplot
  33 +
  34 +if __name__ == '__main__':
  35 +
  36 + cfg = Config()
  37 + cfg.index_mode = "recluster"
  38 + logging.info("Starting clustering experiments")
  39 + logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids,cfg.max_popcon))
  40 + cfg.popcon_dir = os.path.expanduser("~/org/popcon.debian.org/popcon-mail/popcon-entries/")
  41 + cfg.popcon_index = cfg.popcon_index+("_%dmedoids%dmax" %
  42 + (cfg.k_medoids,cfg.max_popcon))
  43 + cfg.clusters_dir = cfg.clusters_dir+("_%dmedoids%dmax" %
  44 + (cfg.k_medoids,cfg.max_popcon))
  45 + pxi = PopconXapianIndex(cfg)
  46 + logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion)
  47 + # Write clustering log
  48 + output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids,cfg.max_popcon)),'w')
  49 + output.write("# k_medoids\tmax_popcon\tdispersion\n")
  50 + output.write("%d %f\n" % (cfg.k_medoids,cfg.max_popcon,pxi.cluster_dispersion))
  51 + output.close()
... ...
src/experiments/deprecated/experiments.cfg 0 → 100644
... ... @@ -0,0 +1,27 @@
  1 +[DEFAULT]
  2 +repetitions = 1
  3 +iterations = 10
  4 +path = 'results'
  5 +experiment = 'grid'
  6 +weight = ['bm25', 'trad']
  7 +;profile_size = range(10,100,10)
  8 +;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  9 +sample = [0.6, 0.7, 0.8, 0.9]
  10 +
  11 +[content]
  12 +strategy = ['cb','cbt','cbd']
  13 +
  14 +[clustering]
  15 +experiment = 'single'
  16 +;iterations = 4
  17 +;medoids = range(2,6)
  18 +iterations = 6
  19 +medoids = [100,500,1000,5000,10000,50000]
  20 +;disabled for this experiment
  21 +weight = 0
  22 +profile_size = 0
  23 +sample = 0
  24 +
  25 +[colaborative]
  26 +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
  27 +neighbors = range(10,1010,50)
... ...
src/experiments/deprecated/runner.py 0 → 100755
... ... @@ -0,0 +1,171 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommender suite - recommender experiments suite
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import expsuite
  23 +import sys
  24 +sys.path.insert(0,'../')
  25 +from config import Config
  26 +from data import PopconXapianIndex, PopconSubmission
  27 +from recommender import Recommender
  28 +from user import LocalSystem, User
  29 +from evaluation import *
  30 +import logging
  31 +import random
  32 +import Gnuplot
  33 +
  34 +class ClusteringSuite(expsuite.PyExperimentSuite):
  35 + def reset(self, params, rep):
  36 + self.cfg = Config()
  37 + self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
  38 + self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
  39 + self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
  40 +
  41 + if params['name'] == "clustering":
  42 + logging.info("Starting 'clustering' experiments suite...")
  43 + self.cfg.index_mode = "recluster"
  44 +
  45 + def iterate(self, params, rep, n):
  46 + if params['name'] == "clustering":
  47 + logging.info("Running iteration %d" % params['medoids'][n])
  48 + self.cfg.k_medoids = params['medoids'][n]
  49 + pxi = PopconXapianIndex(self.cfg)
  50 + result = {'k_medoids': params['medoids'][n],
  51 + 'dispersion': pxi.cluster_dispersion}
  52 + else:
  53 + result = {}
  54 + return result
  55 +
  56 +class ContentBasedSuite(expsuite.PyExperimentSuite):
  57 + def reset(self, params, rep):
  58 + if params['name'].startswith("content"):
  59 + cfg = Config()
  60 + #if the index was not built yet
  61 + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
  62 + cfg.axi = "data/AppAxi"
  63 + cfg.index_mode = "old"
  64 + cfg.weight = params['weight']
  65 + self.rec = Recommender(cfg)
  66 + self.rec.set_strategy(params['strategy'])
  67 + self.repo_size = self.rec.items_repository.get_doccount()
  68 + self.user = LocalSystem()
  69 + self.user.app_pkg_profile(self.rec.items_repository)
  70 + self.user.no_auto_pkg_profile()
  71 + self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
  72 + # iteration should be set to 10 in config file
  73 + #self.profile_size = range(10,101,10)
  74 +
  75 + def iterate(self, params, rep, n):
  76 + if params['name'].startswith("content"):
  77 + item_score = dict.fromkeys(self.user.pkg_profile,1)
  78 + # Prepare partition
  79 + sample = {}
  80 + for i in range(self.sample_size):
  81 + key = random.choice(item_score.keys())
  82 + sample[key] = item_score.pop(key)
  83 + # Get full recommendation
  84 + user = User(item_score)
  85 + recommendation = self.rec.get_recommendation(user,self.repo_size)
  86 + # Write recall log
  87 + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
  88 + (params['strategy'],params['weight'],params['sample'],n)
  89 + output = open(recall_file,'w')
  90 + output.write("# weight=%s\n" % params['weight'])
  91 + output.write("# strategy=%s\n" % params['strategy'])
  92 + output.write("# sample=%f\n" % params['sample'])
  93 + output.write("\n%d %d %d\n" % \
  94 + (self.repo_size,len(item_score),self.sample_size))
  95 + notfound = []
  96 + ranks = []
  97 + for pkg in sample.keys():
  98 + if pkg in recommendation.ranking:
  99 + ranks.append(recommendation.ranking.index(pkg))
  100 + else:
  101 + notfound.append(pkg)
  102 + for r in sorted(ranks):
  103 + output.write(str(r)+"\n")
  104 + if notfound:
  105 + output.write("Out of recommendation:\n")
  106 + for pkg in notfound:
  107 + output.write(pkg+"\n")
  108 + output.close()
  109 + # Plot metrics summary
  110 + accuracy = []
  111 + precision = []
  112 + recall = []
  113 + f1 = []
  114 + g = Gnuplot.Gnuplot()
  115 + g('set style data lines')
  116 + g.xlabel('Recommendation size')
  117 + for size in range(1,len(recommendation.ranking)+1,100):
  118 + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
  119 + real = RecommendationResult(sample)
  120 + evaluation = Evaluation(predicted,real,self.repo_size)
  121 + accuracy.append([size,evaluation.run(Accuracy())])
  122 + precision.append([size,evaluation.run(Precision())])
  123 + recall.append([size,evaluation.run(Recall())])
  124 + f1.append([size,evaluation.run(F1())])
  125 + g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
  126 + Gnuplot.Data(precision,title="Precision"),
  127 + Gnuplot.Data(recall,title="Recall"),
  128 + Gnuplot.Data(f1,title="F1"))
  129 + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
  130 + # Iteration log
  131 + result = {'iteration': n,
  132 + 'weight': params['weight'],
  133 + 'strategy': params['strategy'],
  134 + 'accuracy': accuracy[20],
  135 + 'precision': precision[20],
  136 + 'recall:': recall[20],
  137 + 'f1': f1[20]}
  138 + return result
  139 +
  140 +#class CollaborativeSuite(expsuite.PyExperimentSuite):
  141 +# def reset(self, params, rep):
  142 +# if params['name'].startswith("collaborative"):
  143 +#
  144 +# def iterate(self, params, rep, n):
  145 +# if params['name'].startswith("collaborative"):
  146 +# for root, dirs, files in os.walk(self.source_dir):
  147 +# for popcon_file in files:
  148 +# submission = PopconSubmission(os.path.join(root,popcon_file))
  149 +# user = User(submission.packages)
  150 +# user.maximal_pkg_profile()
  151 +# rec.get_recommendation(user)
  152 +# precision = 0
  153 +# result = {'weight': params['weight'],
  154 +# 'strategy': params['strategy'],
  155 +# 'profile_size': self.profile_size[n],
  156 +# 'accuracy': accuracy,
  157 +# 'precision': precision,
  158 +# 'recall:': recall,
  159 +# 'f1': }
  160 +# else:
  161 +# result = {}
  162 +# return result
  163 +
  164 +if __name__ == '__main__':
  165 +
  166 + if "clustering" in sys.argv or len(sys.argv)<3:
  167 + ClusteringSuite().start()
  168 + if "content" in sys.argv or len(sys.argv)<3:
  169 + ContentBasedSuite().start()
  170 + #if "collaborative" in sys.argv or len(sys.argv)<3:
  171 + #CollaborativeSuite().start()
... ...