Commit 2fce1682e62e824dcbf897ea5aa2e2a30464a63a
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
15 changed files
with
516 additions
and
55 deletions
Show diff stats
src/data.py
... | ... | @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): |
41 | 41 | return matches |
42 | 42 | |
43 | 43 | def axi_search_pkg_tags(axi,pkg): |
44 | - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | |
45 | 44 | enquire = xapian.Enquire(axi) |
46 | - enquire.set_query(query) | |
45 | + enquire.set_query(xapian.Query("XP"+pkg)) | |
47 | 46 | matches = enquire.get_mset(0,1) |
47 | + if not matches: | |
48 | + logging.debug("Package %s not found in items repository" % pkg) | |
49 | + return [] | |
48 | 50 | for m in matches: |
49 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
50 | 52 | term.term.startswith("XT")] |
51 | - return tags | |
53 | + return tags | |
52 | 54 | |
53 | 55 | def print_index(index): |
54 | 56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
... | ... | @@ -59,6 +61,32 @@ def print_index(index): |
59 | 61 | output += "\n---" |
60 | 62 | return output |
61 | 63 | |
64 | +class AppAptXapianIndex(xapian.WritableDatabase): | |
65 | + """ | |
66 | + Sample data source for packages information, mainly useful for tests. | |
67 | + """ | |
68 | + def __init__(self,axi_path,path): | |
69 | + xapian.WritableDatabase.__init__(self,path, | |
70 | + xapian.DB_CREATE_OR_OVERWRITE) | |
71 | + axi = xapian.Database(axi_path) | |
72 | + logging.info("AptXapianIndex size: %d" % axi.get_doccount()) | |
73 | + for docid in range(1,axi.get_lastdocid()+1): | |
74 | + try: | |
75 | + doc = axi.get_document(docid) | |
76 | + allterms = [term.term for term in doc.termlist()] | |
77 | + if "XTrole::program" in allterms: | |
78 | + self.add_document(doc) | |
79 | + logging.info("Added doc %d." % docid) | |
80 | + else: | |
81 | + logging.info("Discarded doc %d." % docid) | |
82 | + except: | |
83 | + logging.info("Doc %d not found in axi." % docid) | |
84 | + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % | |
85 | + self.get_doccount(), self.get_lastdocid()) | |
86 | + | |
87 | + def __str__(self): | |
88 | + return print_index(self) | |
89 | + | |
62 | 90 | class SampleAptXapianIndex(xapian.WritableDatabase): |
63 | 91 | """ |
64 | 92 | Sample data source for packages information, mainly useful for tests. |
... | ... | @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): |
129 | 157 | """ |
130 | 158 | self.axi = xapian.Database(cfg.axi) |
131 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | |
132 | 161 | if not cfg.index_mode == "old" or not self.load_index(): |
133 | 162 | if not os.path.exists(cfg.popcon_dir): |
134 | 163 | os.makedirs(cfg.popcon_dir) |
... | ... | @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): |
205 | 234 | submission.user_id) |
206 | 235 | for pkg, freq in submission.packages.items(): |
207 | 236 | doc.add_term("XP"+pkg,freq) |
208 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
209 | - doc.add_term(tag,freq) | |
237 | + if axi_search_pkg_tags(self.axi,pkg): | |
238 | + for tag in axi_search_pkg_tags(self.axi,pkg): | |
239 | + doc.add_term(tag,freq) | |
210 | 240 | doc_id = self.add_document(doc) |
211 | 241 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
212 | 242 | # python garbage collector | ... | ... |
src/evaluation.py
... | ... | @@ -140,7 +140,7 @@ class F1(Metric): |
140 | 140 | p = Precision().run(evaluation) |
141 | 141 | r = Recall().run(evaluation) |
142 | 142 | if (p+r)>0: |
143 | - return float((2*p*r))/(p+r) | |
143 | + return float(2*((p*r)/(p+r))) | |
144 | 144 | else: |
145 | 145 | return 0 |
146 | 146 | |
... | ... | @@ -289,7 +289,7 @@ class CrossValidation: |
289 | 289 | result_size = int(self.recommender.items_repository.get_doccount()* |
290 | 290 | self.result_proportion) |
291 | 291 | predicted_result = self.recommender.get_recommendation(round_user,result_size) |
292 | - print len(round_partition) | |
292 | + #print len(round_partition) | |
293 | 293 | real_result = RecommendationResult(round_partition) |
294 | 294 | #logging.debug("Predicted result: %s",predicted_result) |
295 | 295 | evaluation = Evaluation(predicted_result,real_result, | ... | ... |
src/examples/cross_validation.py
... | ... | @@ -40,16 +40,20 @@ if __name__ == '__main__': |
40 | 40 | try: |
41 | 41 | cfg = Config() |
42 | 42 | rec = Recommender(cfg) |
43 | + print "\nRecommender strategy: ",rec.strategy.description | |
43 | 44 | user = LocalSystem() |
44 | - user.maximal_pkg_profile() | |
45 | - | |
45 | + #user.app_pkg_profile(rec.items_repository) | |
46 | + user.no_auto_pkg_profile() | |
46 | 47 | begin_time = datetime.datetime.now() |
47 | 48 | logging.debug("Cross-validation started at %s" % begin_time) |
48 | 49 | |
49 | 50 | metrics = [] |
50 | 51 | metrics.append(Precision()) |
51 | 52 | metrics.append(Recall()) |
52 | - validation = CrossValidation(0.3,10,rec,metrics) | |
53 | + metrics.append(F1()) | |
54 | + metrics.append(Accuracy()) | |
55 | + metrics.append(SimpleAccuracy()) | |
56 | + validation = CrossValidation(0.3,10,rec,metrics,0.005) | |
53 | 57 | validation.run(user) |
54 | 58 | print validation |
55 | 59 | ... | ... |
... | ... | @@ -0,0 +1,26 @@ |
1 | +[DEFAULT] | |
2 | +repetitions = 1 | |
3 | +iterations = 10 | |
4 | +path = 'results' | |
5 | +experiment = 'grid' | |
6 | +weight = ['bm25', 'trad'] | |
7 | +;profile_size = range(10,100,10) | |
8 | +sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] | |
9 | + | |
10 | +[content] | |
11 | +strategy = ['cb','cbt','cbd'] | |
12 | + | |
13 | +[clustering] | |
14 | +experiment = 'single' | |
15 | +;iterations = 4 | |
16 | +;medoids = range(2,6) | |
17 | +iterations = 6 | |
18 | +medoids = [100,500,1000,5000,10000,50000] | |
19 | +;disabled for this experiment | |
20 | +weight = 0 | |
21 | +profile_size = 0 | |
22 | +sample = 0 | |
23 | + | |
24 | +[colaborative] | |
25 | +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"] | |
26 | +neighbors = range(10,1010,50) | ... | ... |
... | ... | @@ -0,0 +1,173 @@ |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + recommender suite - recommender experiments suite | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
21 | + | |
22 | +import expsuite | |
23 | +import sys | |
24 | +sys.path.insert(0,'../') | |
25 | +from config import Config | |
26 | +from data import PopconXapianIndex, PopconSubmission | |
27 | +from recommender import Recommender | |
28 | +from user import LocalSystem, User | |
29 | +from evaluation import * | |
30 | +import logging | |
31 | +import random | |
32 | +import Gnuplot | |
33 | + | |
34 | +class ClusteringSuite(expsuite.PyExperimentSuite): | |
35 | + def reset(self, params, rep): | |
36 | + self.cfg = Config() | |
37 | + self.cfg.popcon_index = "../tests/test_data/.sample_pxi" | |
38 | + self.cfg.popcon_dir = "../tests/test_data/popcon_dir" | |
39 | + self.cfg.clusters_dir = "../tests/test_data/clusters_dir" | |
40 | + | |
41 | + if params['name'] == "clustering": | |
42 | + logging.info("Starting 'clustering' experiments suite...") | |
43 | + self.cfg.index_mode = "recluster" | |
44 | + | |
45 | + def iterate(self, params, rep, n): | |
46 | + if params['name'] == "clustering": | |
47 | + logging.info("Running iteration %d" % params['medoids'][n]) | |
48 | + self.cfg.k_medoids = params['medoids'][n] | |
49 | + pxi = PopconXapianIndex(self.cfg) | |
50 | + result = {'k_medoids': params['medoids'][n], | |
51 | + 'dispersion': pxi.cluster_dispersion} | |
52 | + else: | |
53 | + result = {} | |
54 | + return result | |
55 | + | |
56 | +class ContentBasedSuite(expsuite.PyExperimentSuite): | |
57 | + def reset(self, params, rep): | |
58 | + if params['name'].startswith("content"): | |
59 | + cfg = Config() | |
60 | + #if the index was not built yet | |
61 | + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") | |
62 | + cfg.axi = "data/AppAxi" | |
63 | + cfg.index_mode = "old" | |
64 | + cfg.weight = params['weight'] | |
65 | + self.rec = Recommender(cfg) | |
66 | + self.rec.set_strategy(params['strategy']) | |
67 | + self.repo_size = self.rec.items_repository.get_doccount() | |
68 | + self.user = LocalSystem() | |
69 | + self.user.app_pkg_profile(self.rec.items_repository) | |
70 | + self.user.no_auto_pkg_profile() | |
71 | + self.sample_size = int(len(self.user.pkg_profile)*params['sample']) | |
72 | + # iteration should be set to 10 in config file | |
73 | + #self.profile_size = range(10,101,10) | |
74 | + | |
75 | + def iterate(self, params, rep, n): | |
76 | + if params['name'].startswith("content"): | |
77 | + # Get full recommendation | |
78 | + item_score = dict.fromkeys(self.user.pkg_profile,1) | |
79 | + sample = {} | |
80 | + for i in range(self.sample_size): | |
81 | + item, score = item_score.popitem() | |
82 | + sample[item] = score | |
83 | + user = User(item_score) | |
84 | + recommendation = self.rec.get_recommendation(user,self.repo_size) | |
85 | + # Write recall log | |
86 | + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ | |
87 | + (params['strategy'],params['weight'],params['sample'],n) | |
88 | + output = open(recall_file,'w') | |
89 | + output.write("# weight=%s\n" % params['weight']) | |
90 | + output.write("# strategy=%s\n" % params['strategy']) | |
91 | + output.write("# sample=%f\n" % params['sample']) | |
92 | + output.write("\n%d %d %d\n" % \ | |
93 | + (self.repo_size,len(item_score),self.sample_size)) | |
94 | + notfound = [] | |
95 | + ranks = [] | |
96 | + for pkg in sample.keys(): | |
97 | + if pkg in recommendation.ranking: | |
98 | + ranks.append(recommendation.ranking.index(pkg)) | |
99 | + else: | |
100 | + notfound.append(pkg) | |
101 | + for r in sorted(ranks): | |
102 | + output.write(str(r)+"\n") | |
103 | + if notfound: | |
104 | + output.write("Out of recommendation:\n") | |
105 | + for pkg in notfound: | |
106 | + output.write(pkg+"\n") | |
107 | + output.close() | |
108 | + # Plot metrics summary | |
109 | + g = Gnuplot.Gnuplot() | |
110 | + g('set style data lines') | |
111 | + g.xlabel('Recommendation size') | |
112 | + accuracy = [] | |
113 | + precision = [] | |
114 | + recall = [] | |
115 | + f1 = [] | |
116 | + for size in range(1,len(recommendation.ranking)+1,100): | |
117 | + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1)) | |
118 | + real = RecommendationResult(sample) | |
119 | + evaluation = Evaluation(predicted,real,self.repo_size) | |
120 | + accuracy.append([size,evaluation.run(Accuracy())]) | |
121 | + precision.append([size,evaluation.run(Precision())]) | |
122 | + recall.append([size,evaluation.run(Recall())]) | |
123 | + f1.append([size,evaluation.run(F1())]) | |
124 | + #print "accuracy", len(accuracy) | |
125 | + #print "precision", len(precision) | |
126 | + #print "recall", len(recall) | |
127 | + #print "f1", len(f1) | |
128 | + g.plot(Gnuplot.Data(accuracy,title="Accuracy"), | |
129 | + Gnuplot.Data(precision,title="Precision"), | |
130 | + Gnuplot.Data(recall,title="Recall"), | |
131 | + Gnuplot.Data(f1,title="F1")) | |
132 | + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1) | |
133 | + result = {} | |
134 | + result = {'weight': params['weight'], | |
135 | + 'strategy': params['strategy'], | |
136 | + 'accuracy': accuracy[20], | |
137 | + 'precision': precision[20], | |
138 | + 'recall:': recall[20], | |
139 | + 'f1': f1[20]} | |
140 | + return result | |
141 | + | |
142 | +#class CollaborativeSuite(expsuite.PyExperimentSuite): | |
143 | +# def reset(self, params, rep): | |
144 | +# if params['name'].startswith("collaborative"): | |
145 | +# | |
146 | +# def iterate(self, params, rep, n): | |
147 | +# if params['name'].startswith("collaborative"): | |
148 | +# for root, dirs, files in os.walk(self.source_dir): | |
149 | +# for popcon_file in files: | |
150 | +# submission = PopconSubmission(os.path.join(root,popcon_file)) | |
151 | +# user = User(submission.packages) | |
152 | +# user.maximal_pkg_profile() | |
153 | +# rec.get_recommendation(user) | |
154 | +# precision = 0 | |
155 | +# result = {'weight': params['weight'], | |
156 | +# 'strategy': params['strategy'], | |
157 | +# 'profile_size': self.profile_size[n], | |
158 | +# 'accuracy': accuracy, | |
159 | +# 'precision': precision, | |
160 | +# 'recall:': recall, | |
161 | +# 'f1': } | |
162 | +# else: | |
163 | +# result = {} | |
164 | +# return result | |
165 | + | |
166 | +if __name__ == '__main__': | |
167 | + | |
168 | + if "clustering" in sys.argv or len(sys.argv)<3: | |
169 | + ClusteringSuite().start() | |
170 | + if "content" in sys.argv or len(sys.argv)<3: | |
171 | + ContentBasedSuite().start() | |
172 | + #if "collaborative" in sys.argv or len(sys.argv)<3: | |
173 | + #CollaborativeSuite().start() | ... | ... |
src/recommender.py
... | ... | @@ -28,12 +28,14 @@ class RecommendationResult: |
28 | 28 | """ |
29 | 29 | Class designed to describe a recommendation result: items and scores. |
30 | 30 | """ |
31 | - def __init__(self,item_score): | |
31 | + def __init__(self,item_score,ranking=0): | |
32 | 32 | """ |
33 | 33 | Set initial parameters. |
34 | 34 | """ |
35 | 35 | self.item_score = item_score |
36 | 36 | self.size = len(item_score) |
37 | + if ranking: | |
38 | + self.ranking = ranking | |
37 | 39 | |
38 | 40 | def __str__(self): |
39 | 41 | """ |
... | ... | @@ -64,13 +66,13 @@ class Recommender: |
64 | 66 | """ |
65 | 67 | Set initial parameters. |
66 | 68 | """ |
69 | + self.cfg = cfg | |
67 | 70 | self.items_repository = xapian.Database(cfg.axi) |
68 | 71 | self.set_strategy(cfg.strategy) |
69 | 72 | if cfg.weight == "bm25": |
70 | 73 | self.weight = xapian.BM25Weight() |
71 | 74 | else: |
72 | 75 | self.weight = xapian.TradWeight() |
73 | - self.cfg = cfg | |
74 | 76 | |
75 | 77 | def set_strategy(self,strategy_str): |
76 | 78 | """ |
... | ... | @@ -83,10 +85,10 @@ class Recommender: |
83 | 85 | if strategy_str == "cbd": |
84 | 86 | self.strategy = strategy.ContentBasedStrategy("desc") |
85 | 87 | if strategy_str == "col": |
86 | - self.strategy = strategy.CollaborativeStrategy(20) | |
87 | 88 | self.users_repository = data.PopconXapianIndex(self.cfg) |
89 | + self.strategy = strategy.CollaborativeStrategy(20) | |
88 | 90 | |
89 | - def get_recommendation(self,user,result_size=20): | |
91 | + def get_recommendation(self,user,result_size=100): | |
90 | 92 | """ |
91 | 93 | Produces recommendation using previously loaded strategy. |
92 | 94 | """ | ... | ... |
src/strategy.py
... | ... | @@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider): |
42 | 42 | """ |
43 | 43 | return doc.get_data() not in self.pkgs_list |
44 | 44 | |
45 | +class AppMatchDecider(xapian.MatchDecider): | |
46 | + """ | |
47 | + Extend xapian.MatchDecider to not consider only applications packages. | |
48 | + """ | |
49 | + def __init__(self, pkgs_list, axi): | |
50 | + """ | |
51 | + Set initial parameters. | |
52 | + """ | |
53 | + xapian.MatchDecider.__init__(self) | |
54 | + self.pkgs_list = pkgs_list | |
55 | + self.axi = axi | |
56 | + | |
57 | + def __call__(self, doc): | |
58 | + """ | |
59 | + True if the package is not already installed. | |
60 | + """ | |
61 | + tags = axi_search_pkg_tags(self.axi,doc.get_data()) | |
62 | + return (("XTrole::program" in tags) and | |
63 | + (doc.get_data() not in self.pkgs_list)) | |
64 | + | |
45 | 65 | class UserMatchDecider(xapian.MatchDecider): |
46 | 66 | """ |
47 | 67 | Extend xapian.MatchDecider to match similar profiles. |
... | ... | @@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider): |
73 | 93 | True if the term is a package. |
74 | 94 | """ |
75 | 95 | # [FIXME] return term.startswith("XP") |
76 | - return not term.startswith("XT") | |
96 | + #return not term.startswith("XT") | |
97 | + return term.startswith("XP") | |
98 | + | |
99 | +class AppExpandDecider(xapian.ExpandDecider): | |
100 | + """ | |
101 | + Extend xapian.ExpandDecider to consider applications only. | |
102 | + """ | |
103 | + def __init__(self,axi): | |
104 | + xapian.ExpandDecider.__init__(self) | |
105 | + self.axi = axi | |
106 | + | |
107 | + def __call__(self, term): | |
108 | + """ | |
109 | + True if the term is a package. | |
110 | + """ | |
111 | + if not term.startswith("XT"): | |
112 | + package = term.lstrip("XP") | |
113 | + print package | |
114 | + tags = axi_search_pkg_tags(self.axi,package) | |
115 | + if "XTrole::program" in tags: | |
116 | + print tags | |
117 | + return True | |
118 | + else: | |
119 | + return False | |
120 | + else: | |
121 | + return False | |
77 | 122 | |
78 | 123 | class TagExpandDecider(xapian.ExpandDecider): |
79 | 124 | """ |
... | ... | @@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy): |
100 | 145 | self.content = content |
101 | 146 | self.profile_size = profile_size |
102 | 147 | |
103 | - def run(self,rec,user,limit): | |
148 | + def run(self,rec,user,recommendation_size): | |
104 | 149 | """ |
105 | 150 | Perform recommendation strategy. |
106 | 151 | """ |
... | ... | @@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy): |
113 | 158 | enquire.set_query(query) |
114 | 159 | try: |
115 | 160 | # retrieve matching packages |
116 | - mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items())) | |
161 | + mset = enquire.get_mset(0, recommendation_size, None, | |
162 | + PkgMatchDecider(user.items())) | |
163 | + #AppMatchDecider(user.items(), | |
164 | + # rec.items_repository)) | |
117 | 165 | except xapian.DatabaseError as error: |
118 | 166 | logging.critical("Content-based strategy: "+error.get_msg()) |
119 | 167 | # compose result dictionary |
120 | 168 | item_score = {} |
169 | + ranking = [] | |
121 | 170 | for m in mset: |
171 | + #[FIXME] set this constraint somehow | |
172 | + #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data()) | |
173 | + #if "XTrole::program" in tags: | |
122 | 174 | item_score[m.document.get_data()] = m.weight |
123 | - return recommender.RecommendationResult(item_score) | |
175 | + ranking.append(m.document.get_data()) | |
176 | + | |
177 | + return recommender.RecommendationResult(item_score,ranking) | |
124 | 178 | |
125 | 179 | class CollaborativeStrategy(RecommendationStrategy): |
126 | 180 | """ |
127 | 181 | Colaborative recommendation strategy. |
128 | 182 | """ |
129 | - def __init__(self,k,clustering=1): | |
183 | + def __init__(self,k): | |
130 | 184 | self.description = "Collaborative" |
131 | - self.clustering = clustering | |
132 | 185 | self.neighbours = k |
133 | 186 | |
134 | - def run(self,rec,user,result_size): | |
187 | + def run(self,rec,user,recommendation_size): | |
135 | 188 | """ |
136 | 189 | Perform recommendation strategy. |
137 | 190 | """ |
138 | - profile = user.pkg_profile | |
191 | + profile = ["XP"+package for package in user.pkg_profile] | |
139 | 192 | # prepair index for querying user profile |
140 | 193 | query = xapian.Query(xapian.Query.OP_OR,profile) |
141 | - if self.clustering: | |
142 | - enquire = xapian.Enquire(rec.clustered_users_repository) | |
143 | - else: | |
144 | - enquire = xapian.Enquire(rec.users_repository) | |
194 | + enquire = xapian.Enquire(rec.users_repository) | |
145 | 195 | enquire.set_weighting_scheme(rec.weight) |
146 | 196 | enquire.set_query(query) |
147 | 197 | try: |
... | ... | @@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy): |
155 | 205 | rset.add_document(m.document.get_docid()) |
156 | 206 | logging.debug(m.document.get_data()) |
157 | 207 | # retrieve most relevant packages |
158 | - eset = enquire.get_eset(result_size,rset,PkgExpandDecider()) | |
208 | + #eset = enquire.get_eset(recommendation_size,rset, | |
209 | + # AppExpandDecider(rec.items_repository)) | |
210 | + eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider()) | |
159 | 211 | # compose result dictionary |
160 | 212 | item_score = {} |
161 | - for package in eset: | |
162 | - item_score[package.term.lstrip("XP")] = package.weight | |
213 | + for e in eset: | |
214 | + package = e.term.lstrip("XP") | |
215 | + tags = axi_search_pkg_tags(rec.items_repository,package) | |
216 | + #[FIXME] set this constraint somehow | |
217 | + #if "XTrole::program" in tags: | |
218 | + item_score[package] = e.weight | |
163 | 219 | return recommender.RecommendationResult(item_score) |
164 | 220 | |
165 | 221 | class DemographicStrategy(RecommendationStrategy): |
166 | 222 | """ |
167 | 223 | Recommendation strategy based on demographic data. |
168 | 224 | """ |
225 | + #def __init__(self, result): | |
226 | + #self.result = result | |
169 | 227 | def __init__(self): |
170 | 228 | self.description = "Demographic" |
171 | 229 | logging.debug("Demographic recommendation not yet implemented.") |
172 | 230 | raise Error |
173 | 231 | |
174 | - def run(self,user,items_repository): | |
232 | + def run(self,rec,user,recommendation_size): | |
175 | 233 | """ |
176 | 234 | Perform recommendation strategy. |
177 | 235 | """ |
178 | - pass | |
236 | + ordered_result = self.result.get_prediction() | |
237 | + | |
238 | + for item,weight in ordered_result: | |
239 | + pass | |
240 | + | |
179 | 241 | |
180 | 242 | class KnowledgeBasedStrategy(RecommendationStrategy): |
181 | 243 | """ | ... | ... |
src/tests/data_tests.py
... | ... | @@ -22,14 +22,29 @@ __license__ = """ |
22 | 22 | import unittest2 |
23 | 23 | import shutil |
24 | 24 | import os |
25 | +import xapian | |
25 | 26 | import sys |
26 | 27 | sys.path.insert(0,'../') |
27 | -from data import PopconSubmission, PopconXapianIndex | |
28 | +from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags | |
28 | 29 | from config import Config |
29 | 30 | |
30 | 31 | def suite(): |
31 | 32 | return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests) |
32 | 33 | |
34 | +class AxiSearchTests(unittest2.TestCase): | |
35 | + @classmethod | |
36 | + def setUpClass(self): | |
37 | + cfg = Config() | |
38 | + self.axi = xapian.Database(cfg.axi) | |
39 | + | |
40 | + def test_search_pkg_tags(self): | |
41 | + tags = axi_search_pkg_tags(self.axi,'apticron') | |
42 | + self.assertEqual(set(tags),set(['XTadmin::package-management', | |
43 | + 'XTinterface::daemon', | |
44 | + 'XTnetwork::server', 'XTrole::program', | |
45 | + 'XTsuite::debian', 'XTuse::monitor', | |
46 | + 'XTworks-with::mail'])) | |
47 | + | |
33 | 48 | class PopconSubmissionTests(unittest2.TestCase): |
34 | 49 | @classmethod |
35 | 50 | def setUpClass(self): | ... | ... |
... | ... | @@ -0,0 +1,90 @@ |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + singletonTests - Singleton class test case | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
21 | + | |
22 | +import xapian | |
23 | +import unittest2 | |
24 | +import sys | |
25 | +sys.path.insert(0,'../') | |
26 | +from evaluation import (Accuracy, Precision, Recall, F1, Coverage, | |
27 | + Evaluation, CrossValidation) | |
28 | +from recommender import RecommendationResult | |
29 | +from config import Config | |
30 | +from recommender import Recommender | |
31 | +from user import User | |
32 | +from data import SampleAptXapianIndex | |
33 | + | |
34 | +class MetricsTests(unittest2.TestCase): | |
35 | + @classmethod | |
36 | + def setUpClass(self): | |
37 | + repository = ['apple','grape','pineaple','melon','watermelon','orange'] | |
38 | + real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1)) | |
39 | + predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1)) | |
40 | + self.evaluation = Evaluation(predicted,real,len(repository)) | |
41 | + | |
42 | + def test_class_accuracy(self): | |
43 | + accuracy = Accuracy().run(self.evaluation) | |
44 | + self.assertEqual(accuracy,0.5) | |
45 | + | |
46 | + def test_precision(self): | |
47 | + precision = Precision().run(self.evaluation) | |
48 | + self.assertEqual("%.2f" % precision,"0.67") | |
49 | + | |
50 | + def test_recall(self): | |
51 | + recall = Recall().run(self.evaluation) | |
52 | + self.assertEqual(recall,0.5) | |
53 | + | |
54 | + def test_f1(self): | |
55 | + f1 = F1().run(self.evaluation) | |
56 | + self.assertEqual("%.2f" % f1,"0.57") | |
57 | + | |
58 | + def test_coverage(self): | |
59 | + evaluations_set = set() | |
60 | + evaluations_set.add(self.evaluation) | |
61 | + coverage = Coverage().run(evaluations_set) | |
62 | + self.assertEqual(coverage,0.5) | |
63 | + | |
64 | + def test_evaluation(self): | |
65 | + self.assertEqual(self.evaluation.true_positive, ['apple','grape']) | |
66 | + self.assertEqual(self.evaluation.false_positive, ['orange']) | |
67 | + self.assertEqual(self.evaluation.false_negative, ['pineaple','melon']) | |
68 | + | |
69 | + def test_cross_validation(self): | |
70 | + cfg = Config() | |
71 | + axi = xapian.Database(cfg.axi) | |
72 | + packages = ["gimp","aaphoto","eog","emacs","dia","ferret", | |
73 | + "festival","file","inkscape","xpdf"] | |
74 | + path = "test_data/.sample_axi" | |
75 | + sample_axi = SampleAptXapianIndex(packages,axi,path) | |
76 | + rec = Recommender(cfg) | |
77 | + rec.items_repository = sample_axi | |
78 | + user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) | |
79 | + | |
80 | + metrics = [] | |
81 | + metrics.append(Precision()) | |
82 | + metrics.append(Recall()) | |
83 | + metrics.append(F1()) | |
84 | + | |
85 | + validation = CrossValidation(0.3,5,rec,metrics,0.5) | |
86 | + validation.run(user) | |
87 | + print validation | |
88 | + | |
89 | +if __name__ == '__main__': | |
90 | + unittest2.main() | ... | ... |
src/web/templates/about.html
... | ... | @@ -3,23 +3,23 @@ $var mod = 'about'; |
3 | 3 | $var cssfiles: |
4 | 4 | $var jsfiles: |
5 | 5 | |
6 | - | |
7 | 6 | <div id="maincontent"> |
8 | 7 | <div class="innertube"> |
9 | 8 | |
10 | 9 | <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" /> |
11 | 10 | |
12 | -<h1>What is this?</h1> | |
13 | - | |
14 | -<p> | |
15 | -AppRecommender is a project in development that aims to provide solutions | |
16 | -for application recommendation at the GNU/Linux world. It was initially thought | |
17 | -as a Debian package recommender, but considering the multi-distro effort in | |
18 | -providing platform independent solutions, it should also follow this | |
19 | -principle. | |
20 | -</p> | |
11 | +<h1>About</h1> | |
21 | 12 | |
13 | +<p>This experiment aims to compare and validate automated application | |
14 | +recommendations produced by various strategies and algorithms tunnings. Asking | |
15 | +real users about the relevance of the recommendation is the closest we can get | |
16 | +of the real accuracy of the recommender system.</p> | |
22 | 17 | |
18 | +<p>The engine that is being tested is a free software called <a | |
19 | +href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was | |
20 | +initially developed using the Debian Project infrasctructure, but the solution | |
21 | +is essentially distro-independent and could even be adapted to non GNU/Linux | |
22 | +systems given that there was available data for that.</p> | |
23 | 23 | |
24 | 24 | </div><!-- id="innertube" --> |
25 | 25 | </div><!-- id="maincontent" --> | ... | ... |
src/web/templates/apprec.html
src/web/templates/index.html
... | ... | @@ -670,18 +670,17 @@ function showtags(tagid) { |
670 | 670 | <h1>You might also like...</h1> |
671 | 671 | |
672 | 672 | <p>Provide a list of packages or upload a popcon submission file and you'll get |
673 | -a list of suggested packages automatically computed by AppRecommender. You can | |
674 | -customize the recommender setup or let it randomly choose one.</p> | |
673 | +a list of suggested packages automatically computed by AppRecommender.<!-- You can | |
674 | +customize the recommender setup or let it randomly choose one.--></p> | |
675 | 675 | |
676 | -<p>Please fill the form that follows the recommendation results. Your | |
676 | +<p>Given the recommendation result, please evaluate each application and | |
677 | +choose if you want to continue with another round of suggestions. | |
678 | +<!--fill the form that follows the recommendation results.--> </p><p>Your | |
677 | 679 | feedback is very much appreciated!</p> |
678 | 680 | |
679 | - | |
680 | -<p>Enjoy it :)</p> | |
681 | 681 | </div> |
682 | 682 | |
683 | 683 | </div><!-- class="innertube" --> |
684 | 684 | </div><!-- id="maincontent" --> |
685 | 685 | |
686 | 686 | |
687 | - | ... | ... |
src/web/templates/layout.html
... | ... | @@ -59,7 +59,7 @@ if (x==null || x=="Write your list App here or send a file list this icon:") |
59 | 59 | <fieldset> |
60 | 60 | <div id="submit-box"> |
61 | 61 | <input type="submit" value="RECOMMENDER" id="submit-button"><br /> |
62 | - <a id="advanced-button">advanced query?</a> | |
62 | + <!--<a id="advanced-button">advanced query?</a>--> | |
63 | 63 | </div> |
64 | 64 | <div id="input-box"> |
65 | 65 | <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a> |
... | ... | @@ -165,16 +165,15 @@ $:content |
165 | 165 | <div id="footer"> |
166 | 166 | |
167 | 167 | <div id="navbar"> |
168 | - <ul> | |
168 | + <ul> | |
169 | 169 | <li><a href="$url_base">Home</a></li> |
170 | 170 | <li><a href="$url_base/about">About</a></li> |
171 | - <li><a href="$url_base/support">Support</a></li> | |
172 | 171 | <li><a href="http://github.com/tassia/AppRecommender">Development</a></li> |
173 | 172 | </ul> |
174 | 173 | </div><!-- id="navbar" --> |
175 | - <p id="copyright"> | |
176 | - Copyright © 2011 AppRecommender. Debian is a registered trademark of Software in the Public Interest, Inc. | |
177 | - </p> | |
174 | +<!-- <p id="copyright"> | |
175 | + Copyright © 2011 AppRecommender team. | |
176 | + </p> --> | |
178 | 177 | </div><!-- id="footer" --> |
179 | 178 | |
180 | 179 | ... | ... |
... | ... | @@ -0,0 +1,60 @@ |
1 | +$var title: Survey | |
2 | +$var mod = 'index'; | |
3 | +$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css | |
4 | +$var jsfiles: static/js/facebox.js | |
5 | + | |
6 | + | |
7 | +<!-- Dynamic form --> | |
8 | +<script type="application/x-javascript"> | |
9 | +window.onload = function() { | |
10 | + setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' ) | |
11 | + }; | |
12 | +</script> | |
13 | + | |
14 | +<script type="application/x-javascript"> | |
15 | +$$(document).ready(function() { | |
16 | + $$('a[rel*=facebox]').facebox({ | |
17 | + loadingImage : '/static/images/loading.gif', | |
18 | + closeImage : '/static/images/closelabel.png' | |
19 | + }); | |
20 | + $$("#tags-box").click(function () { | |
21 | + $$("#tags-box").hide(1000); | |
22 | + }); | |
23 | + | |
24 | +}); | |
25 | +</script> | |
26 | + | |
27 | + | |
28 | +<div id="sidebar"> | |
29 | +<div class="innertube"> | |
30 | + | |
31 | + | |
32 | +<br style="clear: both" /> | |
33 | +</div><!-- class="innertube" --> | |
34 | +</div><!-- id="sidebar" --> | |
35 | + | |
36 | +<div id="maincontent"> | |
37 | +<div class="innertube"> | |
38 | + | |
39 | +<div class="textbox"> | |
40 | +<h1>Help us learn your needs!</h1> | |
41 | + | |
42 | +<p>Participate in this survey and contribute for the development of | |
43 | +AppRecommender, a recommender system for GNU/Linux applications.</p> | |
44 | +<br /> | |
45 | +<p>Please provide the list of packages installed in a real running system, | |
46 | +by uploading a popcon submission or the file generated with the command: | |
47 | +"dpkg -l > packages_list".</p> | |
48 | + | |
49 | +<p>Evaluate at least 10 suggested applications and identify yourself if you | |
50 | +wish to. Upon the completion of this survey there will be a thank you page | |
51 | +listing all identified participant.</p> | |
52 | + | |
53 | +<p>Your help is very much appreciated!</p> | |
54 | + | |
55 | +</div> | |
56 | + | |
57 | +</div><!-- class="innertube" --> | |
58 | +</div><!-- id="maincontent" --> | |
59 | + | |
60 | + | ... | ... |