Commit 2fce1682e62e824dcbf897ea5aa2e2a30464a63a
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
15 changed files
with
516 additions
and
55 deletions
Show diff stats
src/data.py
@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): | @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): | ||
41 | return matches | 41 | return matches |
42 | 42 | ||
43 | def axi_search_pkg_tags(axi,pkg): | 43 | def axi_search_pkg_tags(axi,pkg): |
44 | - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | ||
45 | enquire = xapian.Enquire(axi) | 44 | enquire = xapian.Enquire(axi) |
46 | - enquire.set_query(query) | 45 | + enquire.set_query(xapian.Query("XP"+pkg)) |
47 | matches = enquire.get_mset(0,1) | 46 | matches = enquire.get_mset(0,1) |
47 | + if not matches: | ||
48 | + logging.debug("Package %s not found in items repository" % pkg) | ||
49 | + return [] | ||
48 | for m in matches: | 50 | for m in matches: |
49 | tags = [term.term for term in axi.get_document(m.docid).termlist() if | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
50 | term.term.startswith("XT")] | 52 | term.term.startswith("XT")] |
51 | - return tags | 53 | + return tags |
52 | 54 | ||
53 | def print_index(index): | 55 | def print_index(index): |
54 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" | 56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
@@ -59,6 +61,32 @@ def print_index(index): | @@ -59,6 +61,32 @@ def print_index(index): | ||
59 | output += "\n---" | 61 | output += "\n---" |
60 | return output | 62 | return output |
61 | 63 | ||
64 | +class AppAptXapianIndex(xapian.WritableDatabase): | ||
65 | + """ | ||
66 | + Sample data source for packages information, mainly useful for tests. | ||
67 | + """ | ||
68 | + def __init__(self,axi_path,path): | ||
69 | + xapian.WritableDatabase.__init__(self,path, | ||
70 | + xapian.DB_CREATE_OR_OVERWRITE) | ||
71 | + axi = xapian.Database(axi_path) | ||
72 | + logging.info("AptXapianIndex size: %d" % axi.get_doccount()) | ||
73 | + for docid in range(1,axi.get_lastdocid()+1): | ||
74 | + try: | ||
75 | + doc = axi.get_document(docid) | ||
76 | + allterms = [term.term for term in doc.termlist()] | ||
77 | + if "XTrole::program" in allterms: | ||
78 | + self.add_document(doc) | ||
79 | + logging.info("Added doc %d." % docid) | ||
80 | + else: | ||
81 | + logging.info("Discarded doc %d." % docid) | ||
82 | + except: | ||
83 | + logging.info("Doc %d not found in axi." % docid) | ||
84 | + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % | ||
85 | + self.get_doccount(), self.get_lastdocid()) | ||
86 | + | ||
87 | + def __str__(self): | ||
88 | + return print_index(self) | ||
89 | + | ||
62 | class SampleAptXapianIndex(xapian.WritableDatabase): | 90 | class SampleAptXapianIndex(xapian.WritableDatabase): |
63 | """ | 91 | """ |
64 | Sample data source for packages information, mainly useful for tests. | 92 | Sample data source for packages information, mainly useful for tests. |
@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
129 | """ | 157 | """ |
130 | self.axi = xapian.Database(cfg.axi) | 158 | self.axi = xapian.Database(cfg.axi) |
131 | self.path = os.path.expanduser(cfg.popcon_index) | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | ||
132 | if not cfg.index_mode == "old" or not self.load_index(): | 161 | if not cfg.index_mode == "old" or not self.load_index(): |
133 | if not os.path.exists(cfg.popcon_dir): | 162 | if not os.path.exists(cfg.popcon_dir): |
134 | os.makedirs(cfg.popcon_dir) | 163 | os.makedirs(cfg.popcon_dir) |
@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
205 | submission.user_id) | 234 | submission.user_id) |
206 | for pkg, freq in submission.packages.items(): | 235 | for pkg, freq in submission.packages.items(): |
207 | doc.add_term("XP"+pkg,freq) | 236 | doc.add_term("XP"+pkg,freq) |
208 | - for tag in axi_search_pkg_tags(self.axi,pkg): | ||
209 | - doc.add_term(tag,freq) | 237 | + if axi_search_pkg_tags(self.axi,pkg): |
238 | + for tag in axi_search_pkg_tags(self.axi,pkg): | ||
239 | + doc.add_term(tag,freq) | ||
210 | doc_id = self.add_document(doc) | 240 | doc_id = self.add_document(doc) |
211 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 241 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
212 | # python garbage collector | 242 | # python garbage collector |
src/evaluation.py
@@ -140,7 +140,7 @@ class F1(Metric): | @@ -140,7 +140,7 @@ class F1(Metric): | ||
140 | p = Precision().run(evaluation) | 140 | p = Precision().run(evaluation) |
141 | r = Recall().run(evaluation) | 141 | r = Recall().run(evaluation) |
142 | if (p+r)>0: | 142 | if (p+r)>0: |
143 | - return float((2*p*r))/(p+r) | 143 | + return float(2*((p*r)/(p+r))) |
144 | else: | 144 | else: |
145 | return 0 | 145 | return 0 |
146 | 146 | ||
@@ -289,7 +289,7 @@ class CrossValidation: | @@ -289,7 +289,7 @@ class CrossValidation: | ||
289 | result_size = int(self.recommender.items_repository.get_doccount()* | 289 | result_size = int(self.recommender.items_repository.get_doccount()* |
290 | self.result_proportion) | 290 | self.result_proportion) |
291 | predicted_result = self.recommender.get_recommendation(round_user,result_size) | 291 | predicted_result = self.recommender.get_recommendation(round_user,result_size) |
292 | - print len(round_partition) | 292 | + #print len(round_partition) |
293 | real_result = RecommendationResult(round_partition) | 293 | real_result = RecommendationResult(round_partition) |
294 | #logging.debug("Predicted result: %s",predicted_result) | 294 | #logging.debug("Predicted result: %s",predicted_result) |
295 | evaluation = Evaluation(predicted_result,real_result, | 295 | evaluation = Evaluation(predicted_result,real_result, |
src/examples/cross_validation.py
@@ -40,16 +40,20 @@ if __name__ == '__main__': | @@ -40,16 +40,20 @@ if __name__ == '__main__': | ||
40 | try: | 40 | try: |
41 | cfg = Config() | 41 | cfg = Config() |
42 | rec = Recommender(cfg) | 42 | rec = Recommender(cfg) |
43 | + print "\nRecommender strategy: ",rec.strategy.description | ||
43 | user = LocalSystem() | 44 | user = LocalSystem() |
44 | - user.maximal_pkg_profile() | ||
45 | - | 45 | + #user.app_pkg_profile(rec.items_repository) |
46 | + user.no_auto_pkg_profile() | ||
46 | begin_time = datetime.datetime.now() | 47 | begin_time = datetime.datetime.now() |
47 | logging.debug("Cross-validation started at %s" % begin_time) | 48 | logging.debug("Cross-validation started at %s" % begin_time) |
48 | 49 | ||
49 | metrics = [] | 50 | metrics = [] |
50 | metrics.append(Precision()) | 51 | metrics.append(Precision()) |
51 | metrics.append(Recall()) | 52 | metrics.append(Recall()) |
52 | - validation = CrossValidation(0.3,10,rec,metrics) | 53 | + metrics.append(F1()) |
54 | + metrics.append(Accuracy()) | ||
55 | + metrics.append(SimpleAccuracy()) | ||
56 | + validation = CrossValidation(0.3,10,rec,metrics,0.005) | ||
53 | validation.run(user) | 57 | validation.run(user) |
54 | print validation | 58 | print validation |
55 | 59 |
@@ -0,0 +1,26 @@ | @@ -0,0 +1,26 @@ | ||
1 | +[DEFAULT] | ||
2 | +repetitions = 1 | ||
3 | +iterations = 10 | ||
4 | +path = 'results' | ||
5 | +experiment = 'grid' | ||
6 | +weight = ['bm25', 'trad'] | ||
7 | +;profile_size = range(10,100,10) | ||
8 | +sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] | ||
9 | + | ||
10 | +[content] | ||
11 | +strategy = ['cb','cbt','cbd'] | ||
12 | + | ||
13 | +[clustering] | ||
14 | +experiment = 'single' | ||
15 | +;iterations = 4 | ||
16 | +;medoids = range(2,6) | ||
17 | +iterations = 6 | ||
18 | +medoids = [100,500,1000,5000,10000,50000] | ||
19 | +;disabled for this experiment | ||
20 | +weight = 0 | ||
21 | +profile_size = 0 | ||
22 | +sample = 0 | ||
23 | + | ||
24 | +[colaborative] | ||
25 | +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"] | ||
26 | +neighbors = range(10,1010,50) |
@@ -0,0 +1,173 @@ | @@ -0,0 +1,173 @@ | ||
1 | +#!/usr/bin/env python | ||
2 | +""" | ||
3 | + recommender suite - recommender experiments suite | ||
4 | +""" | ||
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | ||
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | ||
7 | +__license__ = """ | ||
8 | + This program is free software: you can redistribute it and/or modify | ||
9 | + it under the terms of the GNU General Public License as published by | ||
10 | + the Free Software Foundation, either version 3 of the License, or | ||
11 | + (at your option) any later version. | ||
12 | + | ||
13 | + This program is distributed in the hope that it will be useful, | ||
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | + GNU General Public License for more details. | ||
17 | + | ||
18 | + You should have received a copy of the GNU General Public License | ||
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
20 | +""" | ||
21 | + | ||
22 | +import expsuite | ||
23 | +import sys | ||
24 | +sys.path.insert(0,'../') | ||
25 | +from config import Config | ||
26 | +from data import PopconXapianIndex, PopconSubmission | ||
27 | +from recommender import Recommender | ||
28 | +from user import LocalSystem, User | ||
29 | +from evaluation import * | ||
30 | +import logging | ||
31 | +import random | ||
32 | +import Gnuplot | ||
33 | + | ||
34 | +class ClusteringSuite(expsuite.PyExperimentSuite): | ||
35 | + def reset(self, params, rep): | ||
36 | + self.cfg = Config() | ||
37 | + self.cfg.popcon_index = "../tests/test_data/.sample_pxi" | ||
38 | + self.cfg.popcon_dir = "../tests/test_data/popcon_dir" | ||
39 | + self.cfg.clusters_dir = "../tests/test_data/clusters_dir" | ||
40 | + | ||
41 | + if params['name'] == "clustering": | ||
42 | + logging.info("Starting 'clustering' experiments suite...") | ||
43 | + self.cfg.index_mode = "recluster" | ||
44 | + | ||
45 | + def iterate(self, params, rep, n): | ||
46 | + if params['name'] == "clustering": | ||
47 | + logging.info("Running iteration %d" % params['medoids'][n]) | ||
48 | + self.cfg.k_medoids = params['medoids'][n] | ||
49 | + pxi = PopconXapianIndex(self.cfg) | ||
50 | + result = {'k_medoids': params['medoids'][n], | ||
51 | + 'dispersion': pxi.cluster_dispersion} | ||
52 | + else: | ||
53 | + result = {} | ||
54 | + return result | ||
55 | + | ||
56 | +class ContentBasedSuite(expsuite.PyExperimentSuite): | ||
57 | + def reset(self, params, rep): | ||
58 | + if params['name'].startswith("content"): | ||
59 | + cfg = Config() | ||
60 | + #if the index was not built yet | ||
61 | + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") | ||
62 | + cfg.axi = "data/AppAxi" | ||
63 | + cfg.index_mode = "old" | ||
64 | + cfg.weight = params['weight'] | ||
65 | + self.rec = Recommender(cfg) | ||
66 | + self.rec.set_strategy(params['strategy']) | ||
67 | + self.repo_size = self.rec.items_repository.get_doccount() | ||
68 | + self.user = LocalSystem() | ||
69 | + self.user.app_pkg_profile(self.rec.items_repository) | ||
70 | + self.user.no_auto_pkg_profile() | ||
71 | + self.sample_size = int(len(self.user.pkg_profile)*params['sample']) | ||
72 | + # iteration should be set to 10 in config file | ||
73 | + #self.profile_size = range(10,101,10) | ||
74 | + | ||
75 | + def iterate(self, params, rep, n): | ||
76 | + if params['name'].startswith("content"): | ||
77 | + # Get full recommendation | ||
78 | + item_score = dict.fromkeys(self.user.pkg_profile,1) | ||
79 | + sample = {} | ||
80 | + for i in range(self.sample_size): | ||
81 | + item, score = item_score.popitem() | ||
82 | + sample[item] = score | ||
83 | + user = User(item_score) | ||
84 | + recommendation = self.rec.get_recommendation(user,self.repo_size) | ||
85 | + # Write recall log | ||
86 | + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ | ||
87 | + (params['strategy'],params['weight'],params['sample'],n) | ||
88 | + output = open(recall_file,'w') | ||
89 | + output.write("# weight=%s\n" % params['weight']) | ||
90 | + output.write("# strategy=%s\n" % params['strategy']) | ||
91 | + output.write("# sample=%f\n" % params['sample']) | ||
92 | + output.write("\n%d %d %d\n" % \ | ||
93 | + (self.repo_size,len(item_score),self.sample_size)) | ||
94 | + notfound = [] | ||
95 | + ranks = [] | ||
96 | + for pkg in sample.keys(): | ||
97 | + if pkg in recommendation.ranking: | ||
98 | + ranks.append(recommendation.ranking.index(pkg)) | ||
99 | + else: | ||
100 | + notfound.append(pkg) | ||
101 | + for r in sorted(ranks): | ||
102 | + output.write(str(r)+"\n") | ||
103 | + if notfound: | ||
104 | + output.write("Out of recommendation:\n") | ||
105 | + for pkg in notfound: | ||
106 | + output.write(pkg+"\n") | ||
107 | + output.close() | ||
108 | + # Plot metrics summary | ||
109 | + g = Gnuplot.Gnuplot() | ||
110 | + g('set style data lines') | ||
111 | + g.xlabel('Recommendation size') | ||
112 | + accuracy = [] | ||
113 | + precision = [] | ||
114 | + recall = [] | ||
115 | + f1 = [] | ||
116 | + for size in range(1,len(recommendation.ranking)+1,100): | ||
117 | + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1)) | ||
118 | + real = RecommendationResult(sample) | ||
119 | + evaluation = Evaluation(predicted,real,self.repo_size) | ||
120 | + accuracy.append([size,evaluation.run(Accuracy())]) | ||
121 | + precision.append([size,evaluation.run(Precision())]) | ||
122 | + recall.append([size,evaluation.run(Recall())]) | ||
123 | + f1.append([size,evaluation.run(F1())]) | ||
124 | + #print "accuracy", len(accuracy) | ||
125 | + #print "precision", len(precision) | ||
126 | + #print "recall", len(recall) | ||
127 | + #print "f1", len(f1) | ||
128 | + g.plot(Gnuplot.Data(accuracy,title="Accuracy"), | ||
129 | + Gnuplot.Data(precision,title="Precision"), | ||
130 | + Gnuplot.Data(recall,title="Recall"), | ||
131 | + Gnuplot.Data(f1,title="F1")) | ||
132 | + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1) | ||
133 | + result = {} | ||
134 | + result = {'weight': params['weight'], | ||
135 | + 'strategy': params['strategy'], | ||
136 | + 'accuracy': accuracy[20], | ||
137 | + 'precision': precision[20], | ||
138 | + 'recall:': recall[20], | ||
139 | + 'f1': f1[20]} | ||
140 | + return result | ||
141 | + | ||
142 | +#class CollaborativeSuite(expsuite.PyExperimentSuite): | ||
143 | +# def reset(self, params, rep): | ||
144 | +# if params['name'].startswith("collaborative"): | ||
145 | +# | ||
146 | +# def iterate(self, params, rep, n): | ||
147 | +# if params['name'].startswith("collaborative"): | ||
148 | +# for root, dirs, files in os.walk(self.source_dir): | ||
149 | +# for popcon_file in files: | ||
150 | +# submission = PopconSubmission(os.path.join(root,popcon_file)) | ||
151 | +# user = User(submission.packages) | ||
152 | +# user.maximal_pkg_profile() | ||
153 | +# rec.get_recommendation(user) | ||
154 | +# precision = 0 | ||
155 | +# result = {'weight': params['weight'], | ||
156 | +# 'strategy': params['strategy'], | ||
157 | +# 'profile_size': self.profile_size[n], | ||
158 | +# 'accuracy': accuracy, | ||
159 | +# 'precision': precision, | ||
160 | +# 'recall:': recall, | ||
161 | +# 'f1': } | ||
162 | +# else: | ||
163 | +# result = {} | ||
164 | +# return result | ||
165 | + | ||
166 | +if __name__ == '__main__': | ||
167 | + | ||
168 | + if "clustering" in sys.argv or len(sys.argv)<3: | ||
169 | + ClusteringSuite().start() | ||
170 | + if "content" in sys.argv or len(sys.argv)<3: | ||
171 | + ContentBasedSuite().start() | ||
172 | + #if "collaborative" in sys.argv or len(sys.argv)<3: | ||
173 | + #CollaborativeSuite().start() |
src/recommender.py
@@ -28,12 +28,14 @@ class RecommendationResult: | @@ -28,12 +28,14 @@ class RecommendationResult: | ||
28 | """ | 28 | """ |
29 | Class designed to describe a recommendation result: items and scores. | 29 | Class designed to describe a recommendation result: items and scores. |
30 | """ | 30 | """ |
31 | - def __init__(self,item_score): | 31 | + def __init__(self,item_score,ranking=0): |
32 | """ | 32 | """ |
33 | Set initial parameters. | 33 | Set initial parameters. |
34 | """ | 34 | """ |
35 | self.item_score = item_score | 35 | self.item_score = item_score |
36 | self.size = len(item_score) | 36 | self.size = len(item_score) |
37 | + if ranking: | ||
38 | + self.ranking = ranking | ||
37 | 39 | ||
38 | def __str__(self): | 40 | def __str__(self): |
39 | """ | 41 | """ |
@@ -64,13 +66,13 @@ class Recommender: | @@ -64,13 +66,13 @@ class Recommender: | ||
64 | """ | 66 | """ |
65 | Set initial parameters. | 67 | Set initial parameters. |
66 | """ | 68 | """ |
69 | + self.cfg = cfg | ||
67 | self.items_repository = xapian.Database(cfg.axi) | 70 | self.items_repository = xapian.Database(cfg.axi) |
68 | self.set_strategy(cfg.strategy) | 71 | self.set_strategy(cfg.strategy) |
69 | if cfg.weight == "bm25": | 72 | if cfg.weight == "bm25": |
70 | self.weight = xapian.BM25Weight() | 73 | self.weight = xapian.BM25Weight() |
71 | else: | 74 | else: |
72 | self.weight = xapian.TradWeight() | 75 | self.weight = xapian.TradWeight() |
73 | - self.cfg = cfg | ||
74 | 76 | ||
75 | def set_strategy(self,strategy_str): | 77 | def set_strategy(self,strategy_str): |
76 | """ | 78 | """ |
@@ -83,10 +85,10 @@ class Recommender: | @@ -83,10 +85,10 @@ class Recommender: | ||
83 | if strategy_str == "cbd": | 85 | if strategy_str == "cbd": |
84 | self.strategy = strategy.ContentBasedStrategy("desc") | 86 | self.strategy = strategy.ContentBasedStrategy("desc") |
85 | if strategy_str == "col": | 87 | if strategy_str == "col": |
86 | - self.strategy = strategy.CollaborativeStrategy(20) | ||
87 | self.users_repository = data.PopconXapianIndex(self.cfg) | 88 | self.users_repository = data.PopconXapianIndex(self.cfg) |
89 | + self.strategy = strategy.CollaborativeStrategy(20) | ||
88 | 90 | ||
89 | - def get_recommendation(self,user,result_size=20): | 91 | + def get_recommendation(self,user,result_size=100): |
90 | """ | 92 | """ |
91 | Produces recommendation using previously loaded strategy. | 93 | Produces recommendation using previously loaded strategy. |
92 | """ | 94 | """ |
src/strategy.py
@@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider): | @@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider): | ||
42 | """ | 42 | """ |
43 | return doc.get_data() not in self.pkgs_list | 43 | return doc.get_data() not in self.pkgs_list |
44 | 44 | ||
45 | +class AppMatchDecider(xapian.MatchDecider): | ||
46 | + """ | ||
47 | + Extend xapian.MatchDecider to not consider only applications packages. | ||
48 | + """ | ||
49 | + def __init__(self, pkgs_list, axi): | ||
50 | + """ | ||
51 | + Set initial parameters. | ||
52 | + """ | ||
53 | + xapian.MatchDecider.__init__(self) | ||
54 | + self.pkgs_list = pkgs_list | ||
55 | + self.axi = axi | ||
56 | + | ||
57 | + def __call__(self, doc): | ||
58 | + """ | ||
59 | + True if the package is not already installed. | ||
60 | + """ | ||
61 | + tags = axi_search_pkg_tags(self.axi,doc.get_data()) | ||
62 | + return (("XTrole::program" in tags) and | ||
63 | + (doc.get_data() not in self.pkgs_list)) | ||
64 | + | ||
45 | class UserMatchDecider(xapian.MatchDecider): | 65 | class UserMatchDecider(xapian.MatchDecider): |
46 | """ | 66 | """ |
47 | Extend xapian.MatchDecider to match similar profiles. | 67 | Extend xapian.MatchDecider to match similar profiles. |
@@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider): | @@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider): | ||
73 | True if the term is a package. | 93 | True if the term is a package. |
74 | """ | 94 | """ |
75 | # [FIXME] return term.startswith("XP") | 95 | # [FIXME] return term.startswith("XP") |
76 | - return not term.startswith("XT") | 96 | + #return not term.startswith("XT") |
97 | + return term.startswith("XP") | ||
98 | + | ||
99 | +class AppExpandDecider(xapian.ExpandDecider): | ||
100 | + """ | ||
101 | + Extend xapian.ExpandDecider to consider applications only. | ||
102 | + """ | ||
103 | + def __init__(self,axi): | ||
104 | + xapian.ExpandDecider.__init__(self) | ||
105 | + self.axi = axi | ||
106 | + | ||
107 | + def __call__(self, term): | ||
108 | + """ | ||
109 | + True if the term is a package. | ||
110 | + """ | ||
111 | + if not term.startswith("XT"): | ||
112 | + package = term.lstrip("XP") | ||
113 | + print package | ||
114 | + tags = axi_search_pkg_tags(self.axi,package) | ||
115 | + if "XTrole::program" in tags: | ||
116 | + print tags | ||
117 | + return True | ||
118 | + else: | ||
119 | + return False | ||
120 | + else: | ||
121 | + return False | ||
77 | 122 | ||
78 | class TagExpandDecider(xapian.ExpandDecider): | 123 | class TagExpandDecider(xapian.ExpandDecider): |
79 | """ | 124 | """ |
@@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
100 | self.content = content | 145 | self.content = content |
101 | self.profile_size = profile_size | 146 | self.profile_size = profile_size |
102 | 147 | ||
103 | - def run(self,rec,user,limit): | 148 | + def run(self,rec,user,recommendation_size): |
104 | """ | 149 | """ |
105 | Perform recommendation strategy. | 150 | Perform recommendation strategy. |
106 | """ | 151 | """ |
@@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
113 | enquire.set_query(query) | 158 | enquire.set_query(query) |
114 | try: | 159 | try: |
115 | # retrieve matching packages | 160 | # retrieve matching packages |
116 | - mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items())) | 161 | + mset = enquire.get_mset(0, recommendation_size, None, |
162 | + PkgMatchDecider(user.items())) | ||
163 | + #AppMatchDecider(user.items(), | ||
164 | + # rec.items_repository)) | ||
117 | except xapian.DatabaseError as error: | 165 | except xapian.DatabaseError as error: |
118 | logging.critical("Content-based strategy: "+error.get_msg()) | 166 | logging.critical("Content-based strategy: "+error.get_msg()) |
119 | # compose result dictionary | 167 | # compose result dictionary |
120 | item_score = {} | 168 | item_score = {} |
169 | + ranking = [] | ||
121 | for m in mset: | 170 | for m in mset: |
171 | + #[FIXME] set this constraint somehow | ||
172 | + #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data()) | ||
173 | + #if "XTrole::program" in tags: | ||
122 | item_score[m.document.get_data()] = m.weight | 174 | item_score[m.document.get_data()] = m.weight |
123 | - return recommender.RecommendationResult(item_score) | 175 | + ranking.append(m.document.get_data()) |
176 | + | ||
177 | + return recommender.RecommendationResult(item_score,ranking) | ||
124 | 178 | ||
125 | class CollaborativeStrategy(RecommendationStrategy): | 179 | class CollaborativeStrategy(RecommendationStrategy): |
126 | """ | 180 | """ |
127 | Colaborative recommendation strategy. | 181 | Colaborative recommendation strategy. |
128 | """ | 182 | """ |
129 | - def __init__(self,k,clustering=1): | 183 | + def __init__(self,k): |
130 | self.description = "Collaborative" | 184 | self.description = "Collaborative" |
131 | - self.clustering = clustering | ||
132 | self.neighbours = k | 185 | self.neighbours = k |
133 | 186 | ||
134 | - def run(self,rec,user,result_size): | 187 | + def run(self,rec,user,recommendation_size): |
135 | """ | 188 | """ |
136 | Perform recommendation strategy. | 189 | Perform recommendation strategy. |
137 | """ | 190 | """ |
138 | - profile = user.pkg_profile | 191 | + profile = ["XP"+package for package in user.pkg_profile] |
139 | # prepair index for querying user profile | 192 | # prepair index for querying user profile |
140 | query = xapian.Query(xapian.Query.OP_OR,profile) | 193 | query = xapian.Query(xapian.Query.OP_OR,profile) |
141 | - if self.clustering: | ||
142 | - enquire = xapian.Enquire(rec.clustered_users_repository) | ||
143 | - else: | ||
144 | - enquire = xapian.Enquire(rec.users_repository) | 194 | + enquire = xapian.Enquire(rec.users_repository) |
145 | enquire.set_weighting_scheme(rec.weight) | 195 | enquire.set_weighting_scheme(rec.weight) |
146 | enquire.set_query(query) | 196 | enquire.set_query(query) |
147 | try: | 197 | try: |
@@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy): | @@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy): | ||
155 | rset.add_document(m.document.get_docid()) | 205 | rset.add_document(m.document.get_docid()) |
156 | logging.debug(m.document.get_data()) | 206 | logging.debug(m.document.get_data()) |
157 | # retrieve most relevant packages | 207 | # retrieve most relevant packages |
158 | - eset = enquire.get_eset(result_size,rset,PkgExpandDecider()) | 208 | + #eset = enquire.get_eset(recommendation_size,rset, |
209 | + # AppExpandDecider(rec.items_repository)) | ||
210 | + eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider()) | ||
159 | # compose result dictionary | 211 | # compose result dictionary |
160 | item_score = {} | 212 | item_score = {} |
161 | - for package in eset: | ||
162 | - item_score[package.term.lstrip("XP")] = package.weight | 213 | + for e in eset: |
214 | + package = e.term.lstrip("XP") | ||
215 | + tags = axi_search_pkg_tags(rec.items_repository,package) | ||
216 | + #[FIXME] set this constraint somehow | ||
217 | + #if "XTrole::program" in tags: | ||
218 | + item_score[package] = e.weight | ||
163 | return recommender.RecommendationResult(item_score) | 219 | return recommender.RecommendationResult(item_score) |
164 | 220 | ||
165 | class DemographicStrategy(RecommendationStrategy): | 221 | class DemographicStrategy(RecommendationStrategy): |
166 | """ | 222 | """ |
167 | Recommendation strategy based on demographic data. | 223 | Recommendation strategy based on demographic data. |
168 | """ | 224 | """ |
225 | + #def __init__(self, result): | ||
226 | + #self.result = result | ||
169 | def __init__(self): | 227 | def __init__(self): |
170 | self.description = "Demographic" | 228 | self.description = "Demographic" |
171 | logging.debug("Demographic recommendation not yet implemented.") | 229 | logging.debug("Demographic recommendation not yet implemented.") |
172 | raise Error | 230 | raise Error |
173 | 231 | ||
174 | - def run(self,user,items_repository): | 232 | + def run(self,rec,user,recommendation_size): |
175 | """ | 233 | """ |
176 | Perform recommendation strategy. | 234 | Perform recommendation strategy. |
177 | """ | 235 | """ |
178 | - pass | 236 | + ordered_result = self.result.get_prediction() |
237 | + | ||
238 | + for item,weight in ordered_result: | ||
239 | + pass | ||
240 | + | ||
179 | 241 | ||
180 | class KnowledgeBasedStrategy(RecommendationStrategy): | 242 | class KnowledgeBasedStrategy(RecommendationStrategy): |
181 | """ | 243 | """ |
src/tests/data_tests.py
@@ -22,14 +22,29 @@ __license__ = """ | @@ -22,14 +22,29 @@ __license__ = """ | ||
22 | import unittest2 | 22 | import unittest2 |
23 | import shutil | 23 | import shutil |
24 | import os | 24 | import os |
25 | +import xapian | ||
25 | import sys | 26 | import sys |
26 | sys.path.insert(0,'../') | 27 | sys.path.insert(0,'../') |
27 | -from data import PopconSubmission, PopconXapianIndex | 28 | +from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags |
28 | from config import Config | 29 | from config import Config |
29 | 30 | ||
30 | def suite(): | 31 | def suite(): |
31 | return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests) | 32 | return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests) |
32 | 33 | ||
34 | +class AxiSearchTests(unittest2.TestCase): | ||
35 | + @classmethod | ||
36 | + def setUpClass(self): | ||
37 | + cfg = Config() | ||
38 | + self.axi = xapian.Database(cfg.axi) | ||
39 | + | ||
40 | + def test_search_pkg_tags(self): | ||
41 | + tags = axi_search_pkg_tags(self.axi,'apticron') | ||
42 | + self.assertEqual(set(tags),set(['XTadmin::package-management', | ||
43 | + 'XTinterface::daemon', | ||
44 | + 'XTnetwork::server', 'XTrole::program', | ||
45 | + 'XTsuite::debian', 'XTuse::monitor', | ||
46 | + 'XTworks-with::mail'])) | ||
47 | + | ||
33 | class PopconSubmissionTests(unittest2.TestCase): | 48 | class PopconSubmissionTests(unittest2.TestCase): |
34 | @classmethod | 49 | @classmethod |
35 | def setUpClass(self): | 50 | def setUpClass(self): |
@@ -0,0 +1,90 @@ | @@ -0,0 +1,90 @@ | ||
1 | +#!/usr/bin/env python | ||
2 | +""" | ||
3 | + singletonTests - Singleton class test case | ||
4 | +""" | ||
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | ||
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | ||
7 | +__license__ = """ | ||
8 | + This program is free software: you can redistribute it and/or modify | ||
9 | + it under the terms of the GNU General Public License as published by | ||
10 | + the Free Software Foundation, either version 3 of the License, or | ||
11 | + (at your option) any later version. | ||
12 | + | ||
13 | + This program is distributed in the hope that it will be useful, | ||
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | + GNU General Public License for more details. | ||
17 | + | ||
18 | + You should have received a copy of the GNU General Public License | ||
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
20 | +""" | ||
21 | + | ||
22 | +import xapian | ||
23 | +import unittest2 | ||
24 | +import sys | ||
25 | +sys.path.insert(0,'../') | ||
26 | +from evaluation import (Accuracy, Precision, Recall, F1, Coverage, | ||
27 | + Evaluation, CrossValidation) | ||
28 | +from recommender import RecommendationResult | ||
29 | +from config import Config | ||
30 | +from recommender import Recommender | ||
31 | +from user import User | ||
32 | +from data import SampleAptXapianIndex | ||
33 | + | ||
34 | +class MetricsTests(unittest2.TestCase): | ||
35 | + @classmethod | ||
36 | + def setUpClass(self): | ||
37 | + repository = ['apple','grape','pineaple','melon','watermelon','orange'] | ||
38 | + real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1)) | ||
39 | + predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1)) | ||
40 | + self.evaluation = Evaluation(predicted,real,len(repository)) | ||
41 | + | ||
42 | + def test_class_accuracy(self): | ||
43 | + accuracy = Accuracy().run(self.evaluation) | ||
44 | + self.assertEqual(accuracy,0.5) | ||
45 | + | ||
46 | + def test_precision(self): | ||
47 | + precision = Precision().run(self.evaluation) | ||
48 | + self.assertEqual("%.2f" % precision,"0.67") | ||
49 | + | ||
50 | + def test_recall(self): | ||
51 | + recall = Recall().run(self.evaluation) | ||
52 | + self.assertEqual(recall,0.5) | ||
53 | + | ||
54 | + def test_f1(self): | ||
55 | + f1 = F1().run(self.evaluation) | ||
56 | + self.assertEqual("%.2f" % f1,"0.57") | ||
57 | + | ||
58 | + def test_coverage(self): | ||
59 | + evaluations_set = set() | ||
60 | + evaluations_set.add(self.evaluation) | ||
61 | + coverage = Coverage().run(evaluations_set) | ||
62 | + self.assertEqual(coverage,0.5) | ||
63 | + | ||
64 | + def test_evaluation(self): | ||
65 | + self.assertEqual(self.evaluation.true_positive, ['apple','grape']) | ||
66 | + self.assertEqual(self.evaluation.false_positive, ['orange']) | ||
67 | + self.assertEqual(self.evaluation.false_negative, ['pineaple','melon']) | ||
68 | + | ||
69 | + def test_cross_validation(self): | ||
70 | + cfg = Config() | ||
71 | + axi = xapian.Database(cfg.axi) | ||
72 | + packages = ["gimp","aaphoto","eog","emacs","dia","ferret", | ||
73 | + "festival","file","inkscape","xpdf"] | ||
74 | + path = "test_data/.sample_axi" | ||
75 | + sample_axi = SampleAptXapianIndex(packages,axi,path) | ||
76 | + rec = Recommender(cfg) | ||
77 | + rec.items_repository = sample_axi | ||
78 | + user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) | ||
79 | + | ||
80 | + metrics = [] | ||
81 | + metrics.append(Precision()) | ||
82 | + metrics.append(Recall()) | ||
83 | + metrics.append(F1()) | ||
84 | + | ||
85 | + validation = CrossValidation(0.3,5,rec,metrics,0.5) | ||
86 | + validation.run(user) | ||
87 | + print validation | ||
88 | + | ||
89 | +if __name__ == '__main__': | ||
90 | + unittest2.main() |
src/web/templates/about.html
@@ -3,23 +3,23 @@ $var mod = 'about'; | @@ -3,23 +3,23 @@ $var mod = 'about'; | ||
3 | $var cssfiles: | 3 | $var cssfiles: |
4 | $var jsfiles: | 4 | $var jsfiles: |
5 | 5 | ||
6 | - | ||
7 | <div id="maincontent"> | 6 | <div id="maincontent"> |
8 | <div class="innertube"> | 7 | <div class="innertube"> |
9 | 8 | ||
10 | <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" /> | 9 | <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" /> |
11 | 10 | ||
12 | -<h1>What is this?</h1> | ||
13 | - | ||
14 | -<p> | ||
15 | -AppRecommender is a project in development that aims to provide solutions | ||
16 | -for application recommendation at the GNU/Linux world. It was initially thought | ||
17 | -as a Debian package recommender, but considering the multi-distro effort in | ||
18 | -providing platform independent solutions, it should also follow this | ||
19 | -principle. | ||
20 | -</p> | 11 | +<h1>About</h1> |
21 | 12 | ||
13 | +<p>This experiment aims to compare and validate automated application | ||
14 | +recommendations produced by various strategies and algorithms tunnings. Asking | ||
15 | +real users about the relevance of the recommendation is the closest we can get | ||
16 | +of the real accuracy of the recommender system.</p> | ||
22 | 17 | ||
18 | +<p>The engine that is being tested is a free software called <a | ||
19 | +href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was | ||
20 | +initially developed using the Debian Project infrasctructure, but the solution | ||
21 | +is essentially distro-independent and could even be adapted to non GNU/Linux | ||
22 | +systems given that there was available data for that.</p> | ||
23 | 23 | ||
24 | </div><!-- id="innertube" --> | 24 | </div><!-- id="innertube" --> |
25 | </div><!-- id="maincontent" --> | 25 | </div><!-- id="maincontent" --> |
src/web/templates/apprec.html
src/web/templates/index.html
@@ -670,18 +670,17 @@ function showtags(tagid) { | @@ -670,18 +670,17 @@ function showtags(tagid) { | ||
670 | <h1>You might also like...</h1> | 670 | <h1>You might also like...</h1> |
671 | 671 | ||
672 | <p>Provide a list of packages or upload a popcon submission file and you'll get | 672 | <p>Provide a list of packages or upload a popcon submission file and you'll get |
673 | -a list of suggested packages automatically computed by AppRecommender. You can | ||
674 | -customize the recommender setup or let it randomly choose one.</p> | 673 | +a list of suggested packages automatically computed by AppRecommender.<!-- You can |
674 | +customize the recommender setup or let it randomly choose one.--></p> | ||
675 | 675 | ||
676 | -<p>Please fill the form that follows the recommendation results. Your | 676 | +<p>Given the recommendation result, please evaluate each application and |
677 | +choose if you want to continue with another round of suggestions. | ||
678 | +<!--fill the form that follows the recommendation results.--> </p><p>Your | ||
677 | feedback is very much appreciated!</p> | 679 | feedback is very much appreciated!</p> |
678 | 680 | ||
679 | - | ||
680 | -<p>Enjoy it :)</p> | ||
681 | </div> | 681 | </div> |
682 | 682 | ||
683 | </div><!-- class="innertube" --> | 683 | </div><!-- class="innertube" --> |
684 | </div><!-- id="maincontent" --> | 684 | </div><!-- id="maincontent" --> |
685 | 685 | ||
686 | 686 | ||
687 | - |
src/web/templates/layout.html
@@ -59,7 +59,7 @@ if (x==null || x=="Write your list App here or send a file list this icon:") | @@ -59,7 +59,7 @@ if (x==null || x=="Write your list App here or send a file list this icon:") | ||
59 | <fieldset> | 59 | <fieldset> |
60 | <div id="submit-box"> | 60 | <div id="submit-box"> |
61 | <input type="submit" value="RECOMMENDER" id="submit-button"><br /> | 61 | <input type="submit" value="RECOMMENDER" id="submit-button"><br /> |
62 | - <a id="advanced-button">advanced query?</a> | 62 | + <!--<a id="advanced-button">advanced query?</a>--> |
63 | </div> | 63 | </div> |
64 | <div id="input-box"> | 64 | <div id="input-box"> |
65 | <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a> | 65 | <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a> |
@@ -165,16 +165,15 @@ $:content | @@ -165,16 +165,15 @@ $:content | ||
165 | <div id="footer"> | 165 | <div id="footer"> |
166 | 166 | ||
167 | <div id="navbar"> | 167 | <div id="navbar"> |
168 | - <ul> | 168 | + <ul> |
169 | <li><a href="$url_base">Home</a></li> | 169 | <li><a href="$url_base">Home</a></li> |
170 | <li><a href="$url_base/about">About</a></li> | 170 | <li><a href="$url_base/about">About</a></li> |
171 | - <li><a href="$url_base/support">Support</a></li> | ||
172 | <li><a href="http://github.com/tassia/AppRecommender">Development</a></li> | 171 | <li><a href="http://github.com/tassia/AppRecommender">Development</a></li> |
173 | </ul> | 172 | </ul> |
174 | </div><!-- id="navbar" --> | 173 | </div><!-- id="navbar" --> |
175 | - <p id="copyright"> | ||
176 | - Copyright © 2011 AppRecommender. Debian is a registered trademark of Software in the Public Interest, Inc. | ||
177 | - </p> | 174 | +<!-- <p id="copyright"> |
175 | + Copyright © 2011 AppRecommender team. | ||
176 | + </p> --> | ||
178 | </div><!-- id="footer" --> | 177 | </div><!-- id="footer" --> |
179 | 178 | ||
180 | 179 |
@@ -0,0 +1,60 @@ | @@ -0,0 +1,60 @@ | ||
1 | +$var title: Survey | ||
2 | +$var mod = 'index'; | ||
3 | +$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css | ||
4 | +$var jsfiles: static/js/facebox.js | ||
5 | + | ||
6 | + | ||
7 | +<!-- Dynamic form --> | ||
8 | +<script type="application/x-javascript"> | ||
9 | +window.onload = function() { | ||
10 | + setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' ) | ||
11 | + }; | ||
12 | +</script> | ||
13 | + | ||
14 | +<script type="application/x-javascript"> | ||
15 | +$$(document).ready(function() { | ||
16 | + $$('a[rel*=facebox]').facebox({ | ||
17 | + loadingImage : '/static/images/loading.gif', | ||
18 | + closeImage : '/static/images/closelabel.png' | ||
19 | + }); | ||
20 | + $$("#tags-box").click(function () { | ||
21 | + $$("#tags-box").hide(1000); | ||
22 | + }); | ||
23 | + | ||
24 | +}); | ||
25 | +</script> | ||
26 | + | ||
27 | + | ||
28 | +<div id="sidebar"> | ||
29 | +<div class="innertube"> | ||
30 | + | ||
31 | + | ||
32 | +<br style="clear: both" /> | ||
33 | +</div><!-- class="innertube" --> | ||
34 | +</div><!-- id="sidebar" --> | ||
35 | + | ||
36 | +<div id="maincontent"> | ||
37 | +<div class="innertube"> | ||
38 | + | ||
39 | +<div class="textbox"> | ||
40 | +<h1>Help us learn your needs!</h1> | ||
41 | + | ||
42 | +<p>Participate in this survey and contribute for the development of | ||
43 | +AppRecommender, a recommender system for GNU/Linux applications.</p> | ||
44 | +<br /> | ||
45 | +<p>Please provide the list of packages installed in a real running system, | ||
46 | +by uploading a popcon submission or the file generated with the command: | ||
47 | +"dpkg -l > packages_list".</p> | ||
48 | + | ||
49 | +<p>Evaluate at least 10 suggested applications and identify yourself if you | ||
50 | +wish to. Upon the completion of this survey there will be a thank you page | ||
51 | +listing all identified participant.</p> | ||
52 | + | ||
53 | +<p>Your help is very much appreciated!</p> | ||
54 | + | ||
55 | +</div> | ||
56 | + | ||
57 | +</div><!-- class="innertube" --> | ||
58 | +</div><!-- id="maincontent" --> | ||
59 | + | ||
60 | + |