Commit 67cefa3d16f518b8a94a086d433e7cf82865bde2

Authored by Tássia Camões Araújo
2 parents 3ca461fa 2bc5af97
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

src/bin/app_recommender.cfg
@@ -12,7 +12,7 @@ output = apprec.log @@ -12,7 +12,7 @@ output = apprec.log
12 base_dir = ~/.app-recommender/ 12 base_dir = ~/.app-recommender/
13 # filters for valid packages 13 # filters for valid packages
14 filters_dir = filters 14 filters_dir = filters
15 -pkgs_filter = programs 15 +pkgs_filter = desktopapps
16 # package information indexes 16 # package information indexes
17 axi = /var/lib/apt-xapian-index/index 17 axi = /var/lib/apt-xapian-index/index
18 axi_programs = axi_programs 18 axi_programs = axi_programs
@@ -24,7 +24,7 @@ axi_desktopapps = axi_desktopapps @@ -24,7 +24,7 @@ axi_desktopapps = axi_desktopapps
24 popcon = 1 24 popcon = 1
25 popcon_programs = popcon_programs 25 popcon_programs = popcon_programs
26 popcon_desktopapps = popcon_desktopapps 26 popcon_desktopapps = popcon_desktopapps
27 -popcon_index = popcon_programs 27 +popcon_index = popcon_desktopapps
28 popcon_dir = popcon-entries 28 popcon_dir = popcon-entries
29 # number of popcon submission for indexing 29 # number of popcon submission for indexing
30 max_popcon = 100000000 30 max_popcon = 100000000
@@ -46,7 +46,7 @@ class Config(Singleton): @@ -46,7 +46,7 @@ class Config(Singleton):
46 self.base_dir = os.path.expanduser("~/.app-recommender/") 46 self.base_dir = os.path.expanduser("~/.app-recommender/")
47 # filters for valid packages 47 # filters for valid packages
48 self.filters_dir = os.path.join(self.base_dir,"filters") 48 self.filters_dir = os.path.join(self.base_dir,"filters")
49 - self.pkgs_filter = os.path.join(self.filters_dir,"programs") 49 + self.pkgs_filter = os.path.join(self.filters_dir,"desktopapps")
50 # package information packages 50 # package information packages
51 self.axi = "/var/lib/apt-xapian-index/index" 51 self.axi = "/var/lib/apt-xapian-index/index"
52 self.axi_programs = os.path.join(self.base_dir,"axi_programs") 52 self.axi_programs = os.path.join(self.base_dir,"axi_programs")
@@ -57,7 +57,7 @@ class Config(Singleton): @@ -57,7 +57,7 @@ class Config(Singleton):
57 self.popcon = 1 57 self.popcon = 1
58 self.popcon_programs = os.path.join(self.base_dir,"popcon_programs") 58 self.popcon_programs = os.path.join(self.base_dir,"popcon_programs")
59 self.popcon_desktopapps = os.path.join(self.base_dir,"popcon_desktopapps") 59 self.popcon_desktopapps = os.path.join(self.base_dir,"popcon_desktopapps")
60 - self.popcon_index = self.popcon_programs 60 + self.popcon_index = self.popcon_desktopapps
61 self.popcon_dir = os.path.join(self.base_dir,"popcon-entries") 61 self.popcon_dir = os.path.join(self.base_dir,"popcon-entries")
62 self.max_popcon = 1000 62 self.max_popcon = 1000
63 # popcon clustering 63 # popcon clustering
@@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0): @@ -85,10 +85,15 @@ def tfidf_weighting(index,docs,content_filter,plus=0):
85 # Compute sublinear tfidf for each term 85 # Compute sublinear tfidf for each term
86 weights = {} 86 weights = {}
87 for term in terms_doc.termlist(): 87 for term in terms_doc.termlist():
88 - tf = 1+math.log(term.wdf)  
89 - idf = math.log(index.get_doccount()/  
90 - float(index.get_termfreq(term.term)))  
91 - weights[term.term] = tf*idf 88 + try:
  89 + # Even if it shouldn't raise error...
  90 + # math.log: ValueError: math domain error
  91 + tf = 1+math.log(term.wdf)
  92 + idf = math.log(index.get_doccount()/
  93 + float(index.get_termfreq(term.term)))
  94 + weights[term.term] = tf*idf
  95 + except:
  96 + pass
92 sorted_weights = list(reversed(sorted(weights.items(), 97 sorted_weights = list(reversed(sorted(weights.items(),
93 key=operator.itemgetter(1)))) 98 key=operator.itemgetter(1))))
94 #print sorted_weights 99 #print sorted_weights
@@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -410,7 +415,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
410 # if the package has tags associated with it 415 # if the package has tags associated with it
411 if not tags == "notags": 416 if not tags == "notags":
412 for tag in tags: 417 for tag in tags:
413 - if tag in self.valid_tags: 418 + if tag.lstrip("XT") in self.valid_tags:
414 doc.add_term(tag,freq) 419 doc.add_term(tag,freq)
415 doc_id = self.add_document(doc) 420 doc_id = self.add_document(doc)
416 doc_count += 1 421 doc_count += 1
src/evaluation.py
@@ -123,16 +123,33 @@ class Recall(Metric): @@ -123,16 +123,33 @@ class Recall(Metric):
123 """ 123 """
124 return float(len(evaluation.true_positive))/len(evaluation.real_relevant) 124 return float(len(evaluation.true_positive))/len(evaluation.real_relevant)
125 125
126 -class F1(Metric): 126 +class FPR(Metric):
  127 + """
  128 + False positive rate (used for ploting ROC curve).
  129 + """
  130 + def __init__(self):
  131 + """
  132 + Set metric description.
  133 + """
  134 + self.desc = " FPR "
  135 +
  136 + def run(self,evaluation):
  137 + """
  138 + Compute metric.
  139 + """
  140 + return float(len(evaluation.false_positive))/evaluation.true_negatives_len
  141 +
  142 +class F_score(Metric):
127 """ 143 """
128 Classification accuracy metric which correlates precision and recall into an 144 Classification accuracy metric which correlates precision and recall into an
129 unique measure. 145 unique measure.
130 """ 146 """
131 - def __init__(self): 147 + def __init__(self,k):
132 """ 148 """
133 Set metric description. 149 Set metric description.
134 """ 150 """
135 - self.desc = " F1 " 151 + self.desc = " F_score "
  152 + self.k = k
136 153
137 def run(self,evaluation): 154 def run(self,evaluation):
138 """ 155 """
@@ -140,8 +157,8 @@ class F1(Metric): @@ -140,8 +157,8 @@ class F1(Metric):
140 """ 157 """
141 p = Precision().run(evaluation) 158 p = Precision().run(evaluation)
142 r = Recall().run(evaluation) 159 r = Recall().run(evaluation)
143 - if (p+r)>0:  
144 - return float(2*((p*r)/(p+r))) 160 + if ((self.k*self.k*p)+r)>0:
  161 + return float(((1+(self.k*self.k))*((p*r)/((self.k*self.k*p)+r))))
145 else: 162 else:
146 return 0 163 return 0
147 164
@@ -237,11 +254,12 @@ class Evaluation: @@ -237,11 +254,12 @@ class Evaluation:
237 self.false_negative = [v[0] for v in self.real_relevant if not v[0] in 254 self.false_negative = [v[0] for v in self.real_relevant if not v[0] in
238 [w[0] for w in self.predicted_relevant]] 255 [w[0] for w in self.predicted_relevant]]
239 256
240 - logging.debug("TP: %d" % len(self.true_positive))  
241 - logging.debug("FP: %d" % len(self.false_positive))  
242 - logging.debug("FN: %d" % len(self.false_negative))  
243 - logging.debug("Repo_size: %d" % self.repository_size)  
244 - logging.debug("Relevant: %d" % len(self.real_relevant)) 257 + self.true_negatives_len = self.repository_size - len(self.real_relevant)
  258 + #logging.debug("TP: %d" % len(self.true_positive))
  259 + #logging.debug("FP: %d" % len(self.false_positive))
  260 + #logging.debug("FN: %d" % len(self.false_negative))
  261 + #logging.debug("Repo_size: %d" % self.repository_size)
  262 + #logging.debug("Relevant: %d" % len(self.real_relevant))
245 263
246 def run(self,metric): 264 def run(self,metric):
247 """ 265 """
src/experiments/strategies-suite.py
@@ -30,117 +30,245 @@ import logging @@ -30,117 +30,245 @@ import logging
30 import random 30 import random
31 import Gnuplot 31 import Gnuplot
32 32
33 -def write_recall_log(label,sample,recommendation,log_file): 33 +#iterations = 3
  34 +#sample_proportions = [0.9]
  35 +#weighting = [('bm25',1.2)]
  36 +#collaborative = ['knn']
  37 +#content_based = []
  38 +#hybrid = ['knnco']
  39 +#profile_size = [50,100]
  40 +#popcon_size = ["1000"]
  41 +#neighbors = [50]
  42 +
  43 +iterations = 10
  44 +sample_proportions = [0.5, 0.6, 0.7, 0.8, 0.9]
  45 +weighting = [('bm25',1.2), ('bm25',1.6), ('bm25',2.0), ('trad',0)]
  46 +content_based = ['cb','cbt','cbd','cbh','cb_eset','cbt_eset','cbd_eset','cbh_eset']
  47 +collaborative = ['knn_eset','knn','knn_plus']
  48 +hybrid = ['knnco','knnco_eset']
  49 +
  50 +profile_size = range(20,100,20)
  51 +#popcon_size = [1000,10000,50000,'full']
  52 +neighbors = range(10,510,50)
  53 +
  54 +def write_recall_log(label,n,sample,recommendation,profile_size,repo_size,log_file):
34 # Write recall log 55 # Write recall log
35 - output = open(log_file,'w')  
36 - output.write("# %s\n" % label["description"])  
37 - output.write("# %s\n" % label["values"])  
38 - notfound = []  
39 - ranks = []  
40 - for pkg in sample.keys():  
41 - if pkg in recommendation.ranking:  
42 - ranks.append(recommendation.ranking.index(pkg))  
43 - else:  
44 - notfound.append(pkg)  
45 - for r in sorted(ranks):  
46 - output.write(str(r)+"\n")  
47 - if notfound:  
48 - output.write("Out of recommendation:\n")  
49 - for pkg in notfound:  
50 - output.write(pkg+"\n") 56 + output = open(("%s-%d" % (log_file,n)),'w')
  57 + output.write("# %s-n\n" % label["description"])
  58 + output.write("# %s-%d\n" % (label["values"],n))
  59 + output.write("\n%d %d %d\n" % \
  60 + (repo_size,profile_size,len(sample)))
  61 + if hasattr(recommendation,"ranking"):
  62 + notfound = []
  63 + ranks = []
  64 + for pkg in sample.keys():
  65 + if pkg in recommendation.ranking:
  66 + ranks.append(recommendation.ranking.index(pkg))
  67 + else:
  68 + notfound.append(pkg)
  69 + for r in sorted(ranks):
  70 + output.write(str(r)+"\n")
  71 + if notfound:
  72 + output.write("Out of recommendation:\n")
  73 + for pkg in notfound:
  74 + output.write(pkg+"\n")
51 output.close() 75 output.close()
52 76
53 -def plot_summary(sample,recommendation,repo_size,log_file): 77 +def plot_summary(precision,recall,f1,f05,accuracy,log_file):
54 # Plot metrics summary 78 # Plot metrics summary
55 - accuracy = []  
56 - precision = []  
57 - recall = []  
58 - f1 = []  
59 g = Gnuplot.Gnuplot() 79 g = Gnuplot.Gnuplot()
60 g('set style data lines') 80 g('set style data lines')
61 g.xlabel('Recommendation size') 81 g.xlabel('Recommendation size')
62 - for size in range(1,len(recommendation.ranking)+1,100):  
63 - predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))  
64 - real = RecommendationResult(sample)  
65 - evaluation = Evaluation(predicted,real,repo_size)  
66 - accuracy.append([size,evaluation.run(Accuracy())])  
67 - precision.append([size,evaluation.run(Precision())])  
68 - recall.append([size,evaluation.run(Recall())])  
69 - f1.append([size,evaluation.run(F1())])  
70 - 82 + g.title("Setup: %s" % log_file.split("/")[-1])
71 g.plot(Gnuplot.Data(accuracy,title="Accuracy"), 83 g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
72 Gnuplot.Data(precision,title="Precision"), 84 Gnuplot.Data(precision,title="Precision"),
73 Gnuplot.Data(recall,title="Recall"), 85 Gnuplot.Data(recall,title="Recall"),
74 - Gnuplot.Data(f1,title="F1"))  
75 - g.hardcopy(log_file+"-plot.ps", terminal="postscript")  
76 - g.hardcopy(log_file+"-plot.ps", terminal="postscript")  
77 -  
78 -def run_iteration(user,cfg,label,sample):  
79 - rec = Recommender(cfg)  
80 - repo_size = rec.items_repository.get_doccount()  
81 - recommendation = rec.get_recommendation(user,repo_size)  
82 - log_file = "results/strategies/"+label["values"]  
83 - write_recall_log(label,sample,recommendation,log_file)  
84 - plot_summary(sample,recommendation,repo_size,log_file)  
85 -  
86 -def run_strategies(user,sample,n):  
87 - cfg = Config() 86 + Gnuplot.Data(f1,title="F_1"),
  87 + Gnuplot.Data(f05,title="F_0.5"))
  88 + g.hardcopy(log_file+".png",terminal="png")
  89 + g.hardcopy(log_file+".ps",terminal="postscript",enhanced=1,color=1)
  90 + g('set logscale x')
  91 + g('replot')
  92 + g.hardcopy(log_file+"-logscale.png",terminal="png")
  93 + g.hardcopy(log_file+"-logscale.ps",terminal="postscript",enhanced=1,color=1)
  94 +
  95 +def get_label(cfg,sample_proportion):
88 label = {} 96 label = {}
89 - sample_proportion = (len(sample)/len(user.pkg_profile)+len(sample))  
90 - for k in bm25_k1:  
91 - cfg.bm25_k1 = k  
92 - if "content" in sys.argv or len(sys.argv)<2: 97 + if cfg.strategy in content_based:
  98 + label["description"] = "strategy-filter-profile-k1_bm25-sample"
  99 + label["values"] = ("%s-profile%d-%s-kbm%.1f-sample%.1f" %
  100 + (cfg.strategy,cfg.profile_size,
  101 + cfg.pkgs_filter.split("/")[-1],
  102 + cfg.bm25_k1,sample_proportion))
  103 + elif cfg.strategy in collaborative:
  104 + label["description"] = "strategy-knn-filter-k1_bm25-sample"
  105 + label["values"] = ("%s-k%d-%s-kbm%.1f-sample%.1f" %
  106 + (cfg.strategy,cfg.k_neighbors,
  107 + cfg.pkgs_filter.split("/")[-1],
  108 + cfg.bm25_k1,sample_proportion))
  109 + elif cfg.strategy in hybrid:
  110 + label["description"] = "strategy-knn-filter-profile-k1_bm25-sample"
  111 + label["values"] = ("%s-k%d-profile%d-%s-kbm%.1f-sample%.1f" %
  112 + (cfg.strategy,cfg.k_neighbors,cfg.profile_size,
  113 + cfg.pkgs_filter.split("/")[-1],
  114 + cfg.bm25_k1,sample_proportion))
  115 + else:
  116 + print "Unknown strategy"
  117 + return label
  118 +
  119 +class ExperimentResults:
  120 + def __init__(self,repo_size):
  121 + self.repository_size = repo_size
  122 + self.accuracy = {}
  123 + self.precision = {}
  124 + self.recall = {}
  125 + self.f1 = {}
  126 + self.f05 = {}
  127 + points = [1]+range(10,200,10)+range(200,self.repository_size,100)
  128 + for size in points:
  129 + self.accuracy[size] = []
  130 + self.precision[size] = []
  131 + self.recall[size] = []
  132 + self.f1[size] = []
  133 + self.f05[size] = []
  134 +
  135 + def add_result(self,ranking,sample):
  136 + for size in self.accuracy.keys():
  137 + predicted = RecommendationResult(dict.fromkeys(ranking[:size],1))
  138 + real = RecommendationResult(sample)
  139 + evaluation = Evaluation(predicted,real,self.repository_size)
  140 + self.accuracy[size].append(evaluation.run(Accuracy()))
  141 + self.precision[size].append(evaluation.run(Precision()))
  142 + self.recall[size].append(evaluation.run(Recall()))
  143 + self.f1[size].append(evaluation.run(F_score(1)))
  144 + self.f05[size].append(evaluation.run(F_score(0.5)))
  145 +
  146 + def get_precision_summary(self):
  147 + summary = [[size,sum(values)/len(values)] for size,values in self.precision.items()]
  148 + return sorted(summary)
  149 +
  150 + def get_recall_summary(self):
  151 + summary = [[size,sum(values)/len(values)] for size,values in self.recall.items()]
  152 + return sorted(summary)
  153 +
  154 + def get_f1_summary(self):
  155 + summary = [[size,sum(values)/len(values)] for size,values in self.f1.items()]
  156 + return sorted(summary)
  157 +
  158 + def get_f05_summary(self):
  159 + summary = [[size,sum(values)/len(values)] for size,values in self.f05.items()]
  160 + return sorted(summary)
  161 +
  162 + def get_accuracy_summary(self):
  163 + summary = [[size,sum(values)/len(values)] for size,values in self.accuracy.items()]
  164 + return sorted(summary)
  165 +
  166 + def best_precision(self):
  167 + size = max(self.precision, key = lambda x: max(self.precision[x]))
  168 + return (size,max(self.precision[size]))
  169 +
  170 + def best_f1(self):
  171 + size = max(self.f1, key = lambda x: max(self.f1[x]))
  172 + return (size,max(self.f1[size]))
  173 +
  174 + def best_f05(self):
  175 + size = max(self.f05, key = lambda x: max(self.f05[x]))
  176 + return (size,max(self.f05[size]))
  177 +
  178 +def run_strategy(cfg,user):
  179 + for weight in weighting:
  180 + cfg.weight = weight[0]
  181 + cfg.bm25_k1 = weight[1]
  182 + rec = Recommender(cfg)
  183 + repo_size = rec.items_repository.get_doccount()
  184 + for proportion in sample_proportions:
  185 + results = ExperimentResults(repo_size)
  186 + label = get_label(cfg,proportion)
  187 + log_file = "results/strategies/"+label["values"]
  188 + for n in range(iterations):
  189 + # Fill sample profile
  190 + profile_size = len(user.pkg_profile)
  191 + item_score = {}
  192 + for pkg in user.pkg_profile:
  193 + item_score[pkg] = user.item_score[pkg]
  194 + sample = {}
  195 + sample_size = int(profile_size*proportion)
  196 + for i in range(sample_size):
  197 + key = random.choice(item_score.keys())
  198 + sample[key] = item_score.pop(key)
  199 + iteration_user = User(item_score)
  200 + recommendation = rec.get_recommendation(iteration_user,repo_size)
  201 + write_recall_log(label,n,sample,recommendation,profile_size,repo_size,log_file)
  202 + if hasattr(recommendation,"ranking"):
  203 + results.add_result(recommendation.ranking,sample)
  204 + with open(log_file,'w') as f:
  205 + precision_10 = sum(results.precision[10])/len(results.precision[10])
  206 + f1_10 = sum(results.f1[10])/len(results.f1[10])
  207 + f05_10 = sum(results.f05[10])/len(results.f05[10])
  208 + f.write("# %s\n# %s\n\ncoverage %d\n\n" %
  209 + (label["description"],label["values"],recommendation.size))
  210 + f.write("# best results (recommendation size; metric)\n")
  211 + f.write("precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" %
  212 + (results.best_precision()[0],results.best_precision()[1],
  213 + results.best_f1()[0],results.best_f1()[1],
  214 + results.best_f05()[0],results.best_f05()[1]))
  215 + f.write("# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" %
  216 + (precision_10,f1_10,f05_10))
  217 + precision = results.get_precision_summary()
  218 + recall = results.get_recall_summary()
  219 + f1 = results.get_f1_summary()
  220 + f05 = results.get_f05_summary()
  221 + accuracy = results.get_accuracy_summary()
  222 + plot_summary(precision,recall,f1,f05,accuracy,log_file)
  223 +
  224 +def run_content(user,cfg):
  225 + for strategy in content_based:
  226 + cfg.strategy = strategy
  227 + for size in profile_size:
  228 + cfg.profile_size = size
  229 + run_strategy(cfg,user)
  230 +
  231 +def run_collaborative(user,cfg):
  232 + popcon_desktopapps = cfg.popcon_desktopapps
  233 + popcon_programs = cfg.popcon_programs
  234 + for strategy in collaborative:
  235 + cfg.strategy = strategy
  236 + for k in neighbors:
  237 + cfg.k_neighbors = k
  238 + #for size in popcon_size:
  239 + # if size:
  240 + # cfg.popcon_desktopapps = popcon_desktopapps+"_"+size
  241 + # cfg.popcon_programs = popcon_programs+"_"+size
  242 + run_strategy(cfg,user)
  243 +
  244 +def run_hybrid(user,cfg):
  245 + popcon_desktopapps = cfg.popcon_desktopapps
  246 + popcon_programs = cfg.popcon_programs
  247 + for strategy in hybrid:
  248 + cfg.strategy = strategy
  249 + for k in neighbors:
  250 + cfg.k_neighbors = k
  251 + #for size in popcon_size:
  252 + # if size:
  253 + # cfg.popcon_desktopapps = popcon_desktopapps+"_"+size
  254 + # cfg.popcon_programs = popcon_programs+"_"+size
93 for size in profile_size: 255 for size in profile_size:
94 cfg.profile_size = size 256 cfg.profile_size = size
95 - for strategy in content_based:  
96 - cfg.strategy = strategy  
97 - label["description"] = "k1_bm25-profile-strategy-sample-n"  
98 - label["values"] = ("%.2f-%d-%s-%.2f-%d" %  
99 - (cfg.bm25_k1,cfg.profile_size,  
100 - cfg.strategy,sample_proportion,n))  
101 - run_iteration(user,cfg,label,sample)  
102 - if "colaborative" in sys.argv or len(sys.argv)<2:  
103 - for strategy in collaborative:  
104 - cfg.strategy = strategy  
105 - for size in popcon_size:  
106 - cfg.popcon_desktopapps = cfg.popcon_desktopapps+size  
107 - cfg.popcon_programs = cfg.popcon_programs+size  
108 - for k in neighbors:  
109 - cfg.k_neighbors = k  
110 - k_str = "k"+str(cfg.k_neighbors)  
111 - label["description"] = "k1_bm25-popcon-strategy-k-sample-n"  
112 - label["values"] = ("%.2f-%s-%s-%s-%.2f-%d" %  
113 - (cfg.bm25_k1,str(popcon_size),cfg.strategy,  
114 - k_str,sample_proportion,n))  
115 - run_iteration(user,cfg,label,sample) 257 + run_strategy(cfg,user)
116 258
117 if __name__ == '__main__': 259 if __name__ == '__main__':
118 - iterations = 10  
119 - samples_proportion = [0.5, 0.6, 0.7, 0.8, 0.9]  
120 - weights = ['bm25', 'trad']  
121 - bm25_k1 = [1.0, 1.2, 1.4, 1.6, 1.8, 2.0]  
122 - content_based = ['cb','cbt','cbd','cbh',  
123 - 'cb_eset','cbt_eset','cbd_eset','cbh_eset']  
124 - collaborative = ['knn','knn_plus','knn_eset']  
125 - hybrid = ['knnco','knnco_eset']  
126 -  
127 - profile_size = range(10,100,10)  
128 - popcon_size = [1000,10000,50000,'full']  
129 - neighbors = range(10,510,100)  
130 -  
131 - user = LocalSystem() 260 + #user = LocalSystem()
132 #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps")) 261 #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps"))
  262 +
  263 + cfg = Config()
  264 + user = PopconSystem("/root/.app-recommender/popcon-entries/8b/8b44fcdbcf676e711a153d5db09979d7")
  265 + #user = PopconSystem("/root/.app-recommender/popcon-entries/4a/4a67a295ec14826db2aa1d90be2f1623")
  266 + user.filter_pkg_profile(cfg.pkgs_filter)
133 user.maximal_pkg_profile() 267 user.maximal_pkg_profile()
134 - for sample_proportion in samples_proportion:  
135 - for n in range(iterations):  
136 - # Fill user profile  
137 - item_score = {}  
138 - for pkg in user.pkg_profile:  
139 - item_score[pkg] = user.item_score[pkg]  
140 - # Prepare partition sample  
141 - sample = {}  
142 - sample_size = int(len(user.pkg_profile)*sample_proportion)  
143 - for i in range(sample_size):  
144 - key = random.choice(item_score.keys())  
145 - sample[key] = item_score.pop(key)  
146 - run_strategies(User(item_score),sample,n) 268 +
  269 + if "content" in sys.argv or len(sys.argv)<2:
  270 + run_content(user,cfg)
  271 + if "collaborative" in sys.argv or len(sys.argv)<2:
  272 + run_collaborative(user,cfg)
  273 + if "hybrid" in sys.argv or len(sys.argv)<2:
  274 + run_hybrid(user,cfg)
src/recommender.py
@@ -109,8 +109,12 @@ class Recommender: @@ -109,8 +109,12 @@ class Recommender:
109 Set the recommendation strategy. 109 Set the recommendation strategy.
110 """ 110 """
111 logging.info("Setting recommender strategy to \'%s\'" % strategy_str) 111 logging.info("Setting recommender strategy to \'%s\'" % strategy_str)
112 - self.items_repository = self.axi_programs  
113 - self.valid_pkgs = self.valid_programs 112 + if self.cfg.pkgs_filter.split("/")[-1] == "desktopapps":
  113 + self.items_repository = self.axi_desktopapps
  114 + self.valid_pkgs = self.valid_desktopapps
  115 + else:
  116 + self.items_repository = self.axi_programs
  117 + self.valid_pkgs = self.valid_programs
114 # Check if collaborative strategies can be instanciated 118 # Check if collaborative strategies can be instanciated
115 if ("col" in strategy_str) or ("knn" in strategy_str): 119 if ("col" in strategy_str) or ("knn" in strategy_str):
116 if not self.cfg.popcon: 120 if not self.cfg.popcon:
src/strategy.py
@@ -100,6 +100,7 @@ class ContentBased(RecommendationStrategy): @@ -100,6 +100,7 @@ class ContentBased(RecommendationStrategy):
100 100
101 def get_sugestion_from_profile(self,rec,user,profile,recommendation_size): 101 def get_sugestion_from_profile(self,rec,user,profile,recommendation_size):
102 query = xapian.Query(xapian.Query.OP_OR,profile) 102 query = xapian.Query(xapian.Query.OP_OR,profile)
  103 + print query
103 enquire = xapian.Enquire(rec.items_repository) 104 enquire = xapian.Enquire(rec.items_repository)
104 enquire.set_weighting_scheme(rec.weight) 105 enquire.set_weighting_scheme(rec.weight)
105 enquire.set_query(query) 106 enquire.set_query(query)
@@ -295,7 +296,7 @@ class KnnContent(Collaborative): @@ -295,7 +296,7 @@ class KnnContent(Collaborative):
295 weights = data.tfidf_weighting(rec.users_repository,neighborhood, 296 weights = data.tfidf_weighting(rec.users_repository,neighborhood,
296 PkgExpandDecider(user.items())) 297 PkgExpandDecider(user.items()))
297 profile = [w[0] for w in weights][:rec.cfg.profile_size] 298 profile = [w[0] for w in weights][:rec.cfg.profile_size]
298 - result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) 299 + result = ContentBased("tag",rec.cfg.profile_size).get_sugestion_from_profile(rec,user,profile,recommendation_size)
299 return result 300 return result
300 301
301 class KnnContentEset(Collaborative): 302 class KnnContentEset(Collaborative):
@@ -313,10 +314,10 @@ class KnnContentEset(Collaborative): @@ -313,10 +314,10 @@ class KnnContentEset(Collaborative):
313 neighbors_rset = self.get_neighborhood_rset(user,rec) 314 neighbors_rset = self.get_neighborhood_rset(user,rec)
314 enquire = self.get_enquire(rec) 315 enquire = self.get_enquire(rec)
315 # Retrieve relevant tags based on neighborhood profile expansion 316 # Retrieve relevant tags based on neighborhood profile expansion
316 - eset = enquire.get_eset(rec.cfg.profile_size,rset, 317 + eset = enquire.get_eset(rec.cfg.profile_size,neighbors_rset,
317 TagExpandDecider()) 318 TagExpandDecider())
318 profile = [e.term for e in eset] 319 profile = [e.term for e in eset]
319 - result = ContentBased().get_sugestion_from_profile(rec,user,profile,recommendation_size) 320 + result = ContentBased("tag",rec.cfg.profile_size).get_sugestion_from_profile(rec,user,profile,recommendation_size)
320 return result 321 return result
321 322
322 class Demographic(RecommendationStrategy): 323 class Demographic(RecommendationStrategy):