Commit 79e91d8c5e76016236e6af59f00a1fbbb813b390

Authored by Tássia Camões Araújo
1 parent 1aed15a5
Exists in master and in 1 other branch add_vagrant

Experiments refactoring.

src/evaluation.py
... ... @@ -294,6 +294,10 @@ class CrossValidation:
294 294 round_user = User(cross_item_score)
295 295 result_size = int(self.recommender.items_repository.get_doccount()*
296 296 self.result_proportion)
  297 + logging.debug("size %d" % result_size)
  298 + if not result_size:
  299 + logging.critical("Recommendation size is zero.")
  300 + raise Error
297 301 predicted_result = self.recommender.get_recommendation(round_user,result_size)
298 302 if not predicted_result.size:
299 303 logging.critical("No recommendation produced. Abort cross-validation.")
... ...
src/experiments/strategies-suite.py
... ... @@ -30,121 +30,117 @@ import logging
30 30 import random
31 31 import Gnuplot
32 32  
33   -def run_iteration(label,cfg,sample_proportion,n):
  33 +def write_recall_log(label,sample,recommendation,log_file):
  34 + # Write recall log
  35 + output = open(log_file,'w')
  36 + output.write("# %s\n" % label["description"])
  37 + output.write("# %s\n" % label["values"])
  38 + notfound = []
  39 + ranks = []
  40 + for pkg in sample.keys():
  41 + if pkg in recommendation.ranking:
  42 + ranks.append(recommendation.ranking.index(pkg))
  43 + else:
  44 + notfound.append(pkg)
  45 + for r in sorted(ranks):
  46 + output.write(str(r)+"\n")
  47 + if notfound:
  48 + output.write("Out of recommendation:\n")
  49 + for pkg in notfound:
  50 + output.write(pkg+"\n")
  51 + output.close()
  52 +
  53 +def plot_summary(sample,recommendation,repo_size,log_file):
  54 + # Plot metrics summary
  55 + accuracy = []
  56 + precision = []
  57 + recall = []
  58 + f1 = []
  59 + g = Gnuplot.Gnuplot()
  60 + g('set style data lines')
  61 + g.xlabel('Recommendation size')
  62 + for size in range(1,len(recommendation.ranking)+1,100):
  63 + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
  64 + real = RecommendationResult(sample)
  65 + evaluation = Evaluation(predicted,real,repo_size)
  66 + accuracy.append([size,evaluation.run(Accuracy())])
  67 + precision.append([size,evaluation.run(Precision())])
  68 + recall.append([size,evaluation.run(Recall())])
  69 + f1.append([size,evaluation.run(F1())])
  70 +
  71 + g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
  72 + Gnuplot.Data(precision,title="Precision"),
  73 + Gnuplot.Data(recall,title="Recall"),
  74 + Gnuplot.Data(f1,title="F1"))
  75 + g.hardcopy(log_file+"-plot.ps", terminal="postscript")
  76 + g.hardcopy(log_file+"-plot.ps", terminal="postscript")
  77 +
  78 +def run_iteration(user,cfg,label,sample):
34 79 rec = Recommender(cfg)
35 80 repo_size = rec.items_repository.get_doccount()
36   - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))
37   - print "profile",user.pkg_profile
38   - user.maximal_pkg_profile()
39   - sample_size = int(len(user.pkg_profile)*sample_proportion)
40   - for n in range(iteration):
41   - item_score = dict.fromkeys(user.pkg_profile,1)
42   - # Prepare partition
43   - sample = {}
44   - for i in range(sample_size):
45   - key = random.choice(item_score.keys())
46   - sample[key] = item_score.pop(key)
47   - # Get full recommendation
48   - user = User(item_score)
49   - recommendation = rec.get_recommendation(user,repo_size)
50   - # Write recall log
51   - log_file = "results/strategies/"+label["values"]
52   - output = open(log_file,'w')
53   - output.write("# %s\n" % label["description"])
54   - output.write("# %s\n" % label["values"])
55   - notfound = []
56   - ranks = []
57   - for pkg in sample.keys():
58   - if pkg in recommendation.ranking:
59   - ranks.append(recommendation.ranking.index(pkg))
60   - else:
61   - notfound.append(pkg)
62   - for r in sorted(ranks):
63   - output.write(str(r)+"\n")
64   - if notfound:
65   - output.write("Out of recommendation:\n")
66   - for pkg in notfound:
67   - output.write(pkg+"\n")
68   - output.close()
69   - # Plot metrics summary
70   - accuracy = []
71   - precision = []
72   - recall = []
73   - f1 = []
74   - g = Gnuplot.Gnuplot()
75   - g('set style data lines')
76   - g.xlabel('Recommendation size')
77   - for size in range(1,len(recommendation.ranking)+1,100):
78   - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
79   - real = RecommendationResult(sample)
80   - evaluation = Evaluation(predicted,real,repo_size)
81   - accuracy.append([size,evaluation.run(Accuracy())])
82   - precision.append([size,evaluation.run(Precision())])
83   - recall.append([size,evaluation.run(Recall())])
84   - f1.append([size,evaluation.run(F1())])
85   -
86   - g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
87   - Gnuplot.Data(precision,title="Precision"),
88   - Gnuplot.Data(recall,title="Recall"),
89   - Gnuplot.Data(f1,title="F1"))
90   - g.hardcopy(log_file+"-plot.ps", enhanced=1, color=1)
  81 + recommendation = rec.get_recommendation(user,repo_size)
  82 + log_file = "results/strategies/"+label["values"]
  83 + write_recall_log(label,sample,recommendation,log_file)
  84 + plot_summary(sample,recommendation,repo_size,log_file)
91 85  
  86 +def run_strategies(user,sample,n):
  87 + cfg = Config()
  88 + label = {}
  89 + sample_proportion = (len(sample)/len(user.pkg_profile)+len(sample))
  90 + for k in bm25_k1:
  91 + cfg.bm25_k1 = k
  92 + if "content" in sys.argv or len(sys.argv)<2:
  93 + for size in profile_size:
  94 + cfg.profile_size = size
  95 + for strategy in content_based:
  96 + cfg.strategy = strategy
  97 + label["description"] = "k1_bm25-profile-strategy-sample-n"
  98 + label["values"] = ("%.2f-%d-%s-%.2f-%d" %
  99 + (cfg.bm25_k1,cfg.profile_size,
  100 + cfg.strategy,sample_proportion,n))
  101 + run_iteration(user,cfg,label,sample)
  102 + if "colaborative" in sys.argv or len(sys.argv)<2:
  103 + for strategy in collaborative:
  104 + cfg.strategy = strategy
  105 + for size in popcon_size:
  106 + cfg.popcon_desktopapps = cfg.popcon_desktopapps+size
  107 + cfg.popcon_programs = cfg.popcon_programs+size
  108 + for k in neighbors:
  109 + cfg.k_neighbors = k
  110 + k_str = "k"+str(cfg.k_neighbors)
  111 + label["description"] = "k1_bm25-popcon-strategy-k-sample-n"
  112 + label["values"] = ("%.2f-%s-%s-%s-%.2f-%d" %
  113 + (cfg.bm25_k1,str(popcon_size),cfg.strategy,
  114 + k_str,sample_proportion,n))
  115 + run_iteration(user,cfg,label,sample)
92 116  
93 117 if __name__ == '__main__':
94   - iteration = 10
  118 + iterations = 10
95 119 samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]
96 120 weights = ['bm25', 'trad']
97   - cb_strategies = ['cb','cbt','cbd']
98   - #cb_strategies = []
99   - profile_size = range(10,100,10)
100   - items_repository = ["data/AppAxi","/var/lib/apt-xapian-index/index"]
101   - users_repository = ["data/popcon_index_full","data/popcon_index-50000",
102   - "data/popcon_index_10000","data/popcon_index_1000"]
103   - users_repository = []
104   - neighbors = range(10,1010,100)
  121 + bm25_k1 = [1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
  122 + content_based = ['cb','cbt','cbd','cbh',
  123 + 'cb_eset','cbt_eset','cbd_eset','cbh_eset']
  124 + collaborative = ['knn','knn_plus','knn_eset']
  125 + hybrid = ['knnco','knnco_eset']
105 126  
106   - cfg = Config()
107   - cfg.index_mode = "old"
108   - label = {}
  127 + profile_size = range(10,100,10)
  128 + popcon_size = [1000,10000,50000,'full']
  129 + neighbors = range(10,510,100)
109 130  
110   - for w in weights:
111   - cfg.weight = w
112   - for items_repo in items_repository:
113   - cfg.axi = items_repo
114   - if "App" in cfg.axi:
115   - axi_str = "axiapp"
116   - else:
117   - axi_str = "axifull"
118   - for sample_proportion in samples_proportion:
119   - if "content" in sys.argv or len(sys.argv)<2:
120   - for size in profile_size:
121   - cfg.profile_size = size
122   - for strategy in cb_strategies:
123   - cfg.strategy = strategy
124   - for n in range(iteration):
125   - label["description"] = "weight-axi-profile-strategy-sample-n"
126   - label["values"] = ("%s-%s-%d-%s-%.2f-%d" %
127   - (cfg.weight,axi_str,cfg.profile_size,
128   - cfg.strategy,sample_proportion,n))
129   - run_iteration(label,cfg,sample_proportion,n)
130   - if "colaborative" in sys.argv or len(sys.argv)<2:
131   - cfg.strategy = "col"
132   - for users_repo in users_repository:
133   - cfg.popcon_index = users_repo
134   - for k in neighbors:
135   - cfg.k_neighbors = k
136   - for n in range(iteration):
137   - k_str = "k"+str(cfg.k_neighbors)
138   - if "full" in cfg.popcon_index:
139   - popcon_str = "popfull"
140   - if "50000" in cfg.popcon_index:
141   - popcon_str = "pop50000"
142   - if "10000" in cfg.popcon_index:
143   - popcon_str = "pop10000"
144   - if "1000" in cfg.popcon_index:
145   - popcon_str = "pop1000"
146   - label["description"] = "weight-axi-popcon-profile-strategy-k-sample-n"
147   - label["values"] = ("%s-%s-%s-%d-%s-%s-%.2f-%d" %
148   - (cfg.weight,axi_str,popcon_str,cfg.profile_size,
149   - cfg.strategy,k_str,sample_proportion,n))
150   - run_iteration(label,cfg,sample_proportion,n)
  131 + user = LocalSystem()
  132 + #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps"))
  133 + user.maximal_pkg_profile()
  134 + for sample_proportion in samples_proportion:
  135 + for n in range(iterations):
  136 + # Fill user profile
  137 + item_score = {}
  138 + for pkg in user.pkg_profile:
  139 + item_score[pkg] = user.item_score[pkg]
  140 + # Prepare partition sample
  141 + sample = {}
  142 + sample_size = int(len(user.pkg_profile)*sample_proportion)
  143 + for i in range(sample_size):
  144 + key = random.choice(item_score.keys())
  145 + sample[key] = item_score.pop(key)
  146 + run_strategies(User(item_score),sample,n)
... ...