Commit bfe34df2087d4cb2d9dbfd72c8f7d5d119d4d3d0

Authored by Tássia Camões Araújo
1 parent 5a8e4f02
Exists in master and in 1 other branch add_vagrant

Experiments suite without expsuite.

Showing 1 changed file with 150 additions and 0 deletions   Show diff stats
src/experiments/strategies-suite.py 0 → 100755
... ... @@ -0,0 +1,150 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommender suite - recommender experiments suite
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import sys
  23 +sys.path.insert(0,'../')
  24 +from config import Config
  25 +from data import PopconXapianIndex, PopconSubmission, AppAptXapianIndex
  26 +from recommender import Recommender
  27 +from user import LocalSystem, User
  28 +from evaluation import *
  29 +import logging
  30 +import random
  31 +import Gnuplot
  32 +
  33 +def run_iteration(label,cfg,sample_proportion,n):
  34 + rec = Recommender(cfg)
  35 + repo_size = rec.items_repository.get_doccount()
  36 + user = PopconSystem("/root/popularity-contest-tassia")
  37 + print "profile",user.pkg_profile
  38 + user.maximal_pkg_profile()
  39 + sample_size = int(len(user.pkg_profile)*sample_proportion)
  40 + for n in range(iteration):
  41 + item_score = dict.fromkeys(user.pkg_profile,1)
  42 + # Prepare partition
  43 + sample = {}
  44 + for i in range(sample_size):
  45 + key = random.choice(item_score.keys())
  46 + sample[key] = item_score.pop(key)
  47 + # Get full recommendation
  48 + user = User(item_score)
  49 + recommendation = rec.get_recommendation(user,repo_size)
  50 + # Write recall log
  51 + log_file = "results/strategies/"+label["values"]
  52 + output = open(log_file,'w')
  53 + output.write("# %s\n" % label["description"])
  54 + output.write("# %s\n" % label["values"])
  55 + notfound = []
  56 + ranks = []
  57 + for pkg in sample.keys():
  58 + if pkg in recommendation.ranking:
  59 + ranks.append(recommendation.ranking.index(pkg))
  60 + else:
  61 + notfound.append(pkg)
  62 + for r in sorted(ranks):
  63 + output.write(str(r)+"\n")
  64 + if notfound:
  65 + output.write("Out of recommendation:\n")
  66 + for pkg in notfound:
  67 + output.write(pkg+"\n")
  68 + output.close()
  69 + # Plot metrics summary
  70 + accuracy = []
  71 + precision = []
  72 + recall = []
  73 + f1 = []
  74 + g = Gnuplot.Gnuplot()
  75 + g('set style data lines')
  76 + g.xlabel('Recommendation size')
  77 + for size in range(1,len(recommendation.ranking)+1,100):
  78 + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
  79 + real = RecommendationResult(sample)
  80 + evaluation = Evaluation(predicted,real,repo_size)
  81 + accuracy.append([size,evaluation.run(Accuracy())])
  82 + precision.append([size,evaluation.run(Precision())])
  83 + recall.append([size,evaluation.run(Recall())])
  84 + f1.append([size,evaluation.run(F1())])
  85 +
  86 + g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
  87 + Gnuplot.Data(precision,title="Precision"),
  88 + Gnuplot.Data(recall,title="Recall"),
  89 + Gnuplot.Data(f1,title="F1"))
  90 + g.hardcopy(log_file+"-plot.ps", enhanced=1, color=1)
  91 +
  92 +
  93 +if __name__ == '__main__':
  94 + iteration = 10
  95 + samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]
  96 + weights = ['bm25', 'trad']
  97 + cb_strategies = ['cb','cbt','cbd']
  98 + #cb_strategies = []
  99 + profile_size = range(10,100,10)
  100 + items_repository = ["data/AppAxi","/var/lib/apt-xapian-index/index"]
  101 + users_repository = ["data/popcon_index_full","data/popcon_index-50000",
  102 + "data/popcon_index_10000","data/popcon_index_1000"]
  103 + users_repository = []
  104 + neighbors = range(10,1010,100)
  105 +
  106 + cfg = Config()
  107 + cfg.index_mode = "old"
  108 + label = {}
  109 +
  110 + for w in weights:
  111 + cfg.weight = w
  112 + for items_repo in items_repository:
  113 + cfg.axi = items_repo
  114 + if "App" in cfg.axi:
  115 + axi_str = "axiapp"
  116 + else:
  117 + axi_str = "axifull"
  118 + for sample_proportion in samples_proportion:
  119 + if "content" in sys.argv or len(sys.argv)<2:
  120 + for size in profile_size:
  121 + cfg.profile_size = size
  122 + for strategy in cb_strategies:
  123 + cfg.strategy = strategy
  124 + for n in range(iteration):
  125 + label["description"] = "weight-axi-profile-strategy-sample-n"
  126 + label["values"] = ("%s-%s-%d-%s-%.2f-%d" %
  127 + (cfg.weight,axi_str,cfg.profile_size,
  128 + cfg.strategy,sample_proportion,n))
  129 + run_iteration(label,cfg,sample_proportion,n)
  130 + if "colaborative" in sys.argv or len(sys.argv)<2:
  131 + cfg.strategy = "col"
  132 + for users_repo in users_repository:
  133 + cfg.popcon_index = users_repo
  134 + for k in neighbors:
  135 + cfg.k_neighbors = k
  136 + for n in range(iteration):
  137 + k_str = "k"+str(cfg.k_neighbors)
  138 + if "full" in cfg.popcon_index:
  139 + popcon_str = "popfull"
  140 + if "50000" in cfg.popcon_index:
  141 + popcon_str = "pop50000"
  142 + if "10000" in cfg.popcon_index:
  143 + popcon_str = "pop10000"
  144 + if "1000" in cfg.popcon_index:
  145 + popcon_str = "pop1000"
  146 + label["description"] = "weight-axi-popcon-profile-strategy-k-sample-n"
  147 + label["values"] = ("%s-%s-%s-%d-%s-%s-%.2f-%d" %
  148 + (cfg.weight,axi_str,popcon_str,cfg.profile_size,
  149 + cfg.strategy,k_str,sample_proportion,n))
  150 + run_iteration(label,cfg,sample_proportion,n)
... ...