Experiments refactoring.

Tássia Camões Araújo
1 parent 1aed15a5
Showing 2 changed files with 109 additions and 109 deletions Show diff stats
src/evaluation.py
src/experiments/strategies-suite.py
@@ -294,6 +294,10 @@ class CrossValidation:
             round_user = User(cross_item_score)
             result_size = int(self.recommender.items_repository.get_doccount()*
                               self.result_proportion)
+            logging.debug("size %d" % result_size)
+            if not result_size:
+                logging.critical("Recommendation size is zero.")
+                raise Error
             predicted_result = self.recommender.get_recommendation(round_user,result_size)
             if not predicted_result.size:
                 logging.critical("No recommendation produced. Abort cross-validation.")
@@ -30,121 +30,117 @@ import logging
 import random
 import Gnuplot
-def run_iteration(label,cfg,sample_proportion,n):
+def write_recall_log(label,sample,recommendation,log_file):
+    # Write recall log
+    output = open(log_file,'w')
+    output.write("# %s\n" % label["description"])
+    output.write("# %s\n" % label["values"])
+    notfound = []
+    ranks = []
+    for pkg in sample.keys():
+        if pkg in recommendation.ranking:
+            ranks.append(recommendation.ranking.index(pkg))
+        else:
+            notfound.append(pkg)
+    for r in sorted(ranks):
+        output.write(str(r)+"\n")
+    if notfound:
+        output.write("Out of recommendation:\n")
+        for pkg in notfound:
+            output.write(pkg+"\n")
+    output.close()
+
+def plot_summary(sample,recommendation,repo_size,log_file):
+    # Plot metrics summary
+    accuracy = []
+    precision = []
+    recall = []
+    f1 = []
+    g = Gnuplot.Gnuplot()
+    g('set style data lines')
+    g.xlabel('Recommendation size')
+    for size in range(1,len(recommendation.ranking)+1,100):
+        predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
+        real = RecommendationResult(sample)
+        evaluation = Evaluation(predicted,real,repo_size)
+        accuracy.append([size,evaluation.run(Accuracy())])
+        precision.append([size,evaluation.run(Precision())])
+        recall.append([size,evaluation.run(Recall())])
+        f1.append([size,evaluation.run(F1())])
+
+    g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
+           Gnuplot.Data(precision,title="Precision"),
+           Gnuplot.Data(recall,title="Recall"),
+           Gnuplot.Data(f1,title="F1"))
+    g.hardcopy(log_file+"-plot.ps", terminal="postscript")
+    g.hardcopy(log_file+"-plot.ps", terminal="postscript")
+
+def run_iteration(user,cfg,label,sample):
     rec = Recommender(cfg)
     repo_size = rec.items_repository.get_doccount()
-    user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))
-    print "profile",user.pkg_profile
-    user.maximal_pkg_profile()
-    sample_size = int(len(user.pkg_profile)*sample_proportion)
-    for n in range(iteration):
-        item_score = dict.fromkeys(user.pkg_profile,1)
-        # Prepare partition
-        sample = {}
-        for i in range(sample_size):
-             key = random.choice(item_score.keys())
-             sample[key] = item_score.pop(key)
-        # Get full recommendation
-        user = User(item_score)
-        recommendation = rec.get_recommendation(user,repo_size)
-        # Write recall log
-        log_file = "results/strategies/"+label["values"]
-        output = open(log_file,'w')
-        output.write("# %s\n" % label["description"])
-        output.write("# %s\n" % label["values"])
-        notfound = []
-        ranks = []
-        for pkg in sample.keys():
-            if pkg in recommendation.ranking:
-                ranks.append(recommendation.ranking.index(pkg))
-            else:
-                notfound.append(pkg)
-        for r in sorted(ranks):
-            output.write(str(r)+"\n")
-        if notfound:
-            output.write("Out of recommendation:\n")
-            for pkg in notfound:
-                output.write(pkg+"\n")
-        output.close()
-        # Plot metrics summary
-        accuracy = []
-        precision = []
-        recall = []
-        f1 = []
-        g = Gnuplot.Gnuplot()
-        g('set style data lines')
-        g.xlabel('Recommendation size')
-        for size in range(1,len(recommendation.ranking)+1,100):
-            predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
-            real = RecommendationResult(sample)
-            evaluation = Evaluation(predicted,real,repo_size)
-            accuracy.append([size,evaluation.run(Accuracy())])
-            precision.append([size,evaluation.run(Precision())])
-            recall.append([size,evaluation.run(Recall())])
-            f1.append([size,evaluation.run(F1())])
-
-        g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
-               Gnuplot.Data(precision,title="Precision"),
-               Gnuplot.Data(recall,title="Recall"),
-               Gnuplot.Data(f1,title="F1"))
-        g.hardcopy(log_file+"-plot.ps", enhanced=1, color=1)
+    recommendation = rec.get_recommendation(user,repo_size)
+    log_file = "results/strategies/"+label["values"]
+    write_recall_log(label,sample,recommendation,log_file)
+    plot_summary(sample,recommendation,repo_size,log_file)
+def run_strategies(user,sample,n):
+    cfg = Config()
+    label = {}
+    sample_proportion = (len(sample)/len(user.pkg_profile)+len(sample))
+    for k in bm25_k1:
+        cfg.bm25_k1 = k
+        if "content" in sys.argv or len(sys.argv)<2:
+            for size in profile_size:
+                cfg.profile_size = size
+                for strategy in content_based:
+                    cfg.strategy = strategy
+                    label["description"] = "k1_bm25-profile-strategy-sample-n"
+                    label["values"] = ("%.2f-%d-%s-%.2f-%d" %
+                                       (cfg.bm25_k1,cfg.profile_size,
+                                        cfg.strategy,sample_proportion,n))
+                    run_iteration(user,cfg,label,sample)
+        if "colaborative" in sys.argv or len(sys.argv)<2:
+            for strategy in collaborative:
+                cfg.strategy = strategy
+                for size in popcon_size:
+                    cfg.popcon_desktopapps = cfg.popcon_desktopapps+size
+                    cfg.popcon_programs = cfg.popcon_programs+size
+                    for k in neighbors:
+                        cfg.k_neighbors = k
+                        k_str = "k"+str(cfg.k_neighbors)
+                        label["description"] = "k1_bm25-popcon-strategy-k-sample-n"
+                        label["values"] = ("%.2f-%s-%s-%s-%.2f-%d" %
+                                           (cfg.bm25_k1,str(popcon_size),cfg.strategy,
+                                            k_str,sample_proportion,n))
+                        run_iteration(user,cfg,label,sample)
 if __name__ == '__main__':
-    iteration = 10
+    iterations = 10
     samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]
     weights = ['bm25', 'trad']
-    cb_strategies = ['cb','cbt','cbd']
-    #cb_strategies = []
-    profile_size = range(10,100,10)
-    items_repository = ["data/AppAxi","/var/lib/apt-xapian-index/index"]
-    users_repository = ["data/popcon_index_full","data/popcon_index-50000",
-                        "data/popcon_index_10000","data/popcon_index_1000"]
-    users_repository = []
-    neighbors = range(10,1010,100)
+    bm25_k1 = [1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
+    content_based = ['cb','cbt','cbd','cbh',
+                     'cb_eset','cbt_eset','cbd_eset','cbh_eset']
+    collaborative = ['knn','knn_plus','knn_eset']
+    hybrid = ['knnco','knnco_eset']
-    cfg = Config()
-    cfg.index_mode = "old"
-    label = {}
+    profile_size = range(10,100,10)
+    popcon_size = [1000,10000,50000,'full']
+    neighbors = range(10,510,100)
-    for w in weights:
-        cfg.weight = w
-        for items_repo in items_repository:
-            cfg.axi = items_repo
-            if "App" in cfg.axi:
-                axi_str = "axiapp"
-            else:
-                axi_str = "axifull"
-            for sample_proportion in samples_proportion:
-                if "content" in sys.argv or len(sys.argv)<2:
-                    for size in profile_size:
-                        cfg.profile_size = size
-                        for strategy in cb_strategies:
-                            cfg.strategy = strategy
-                            for n in range(iteration):
-                                label["description"] = "weight-axi-profile-strategy-sample-n"
-                                label["values"] = ("%s-%s-%d-%s-%.2f-%d" %
-                                                   (cfg.weight,axi_str,cfg.profile_size,
-                                                    cfg.strategy,sample_proportion,n))
-                                run_iteration(label,cfg,sample_proportion,n)
-                if "colaborative" in sys.argv or len(sys.argv)<2:
-                    cfg.strategy = "col"
-                    for users_repo in users_repository:
-                        cfg.popcon_index = users_repo
-                        for k in neighbors:
-                            cfg.k_neighbors = k
-                            for n in range(iteration):
-                                k_str = "k"+str(cfg.k_neighbors)
-                                if "full" in cfg.popcon_index:
-                                    popcon_str = "popfull"
-                                if "50000" in cfg.popcon_index:
-                                    popcon_str = "pop50000"
-                                if "10000" in cfg.popcon_index:
-                                    popcon_str = "pop10000"
-                                if "1000" in cfg.popcon_index:
-                                    popcon_str = "pop1000"
-                                label["description"] = "weight-axi-popcon-profile-strategy-k-sample-n"
-                                label["values"] = ("%s-%s-%s-%d-%s-%s-%.2f-%d" %
-                                                   (cfg.weight,axi_str,popcon_str,cfg.profile_size,
-                                                    cfg.strategy,k_str,sample_proportion,n))
-                                run_iteration(label,cfg,sample_proportion,n)
+    user = LocalSystem()
+    #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps"))
+    user.maximal_pkg_profile()
+    for sample_proportion in samples_proportion:
+        for n in range(iterations):
+            # Fill user profile
+            item_score = {}
+            for pkg in user.pkg_profile:
+                item_score[pkg] = user.item_score[pkg]
+            # Prepare partition sample
+            sample = {}
+            sample_size = int(len(user.pkg_profile)*sample_proportion)
+            for i in range(sample_size):
+                 key = random.choice(item_score.keys())
+                 sample[key] = item_score.pop(key)
+            run_strategies(User(item_score),sample,n)