Commit 0538dedf34ef74084986dd6849eeca6b5bda5b24
1 parent
efb90222
Exists in
master
and in
1 other branch
Refactored recommendation strategies and implemented some tests.
Showing
2 changed files
with
209 additions
and
130 deletions
Show diff stats
src/strategy.py
... | ... | @@ -20,54 +20,27 @@ __license__ = """ |
20 | 20 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
21 | 21 | """ |
22 | 22 | |
23 | -import string | |
24 | -import os, re | |
25 | 23 | import xapian |
26 | -from data import * | |
27 | 24 | from singleton import Singleton |
28 | 25 | import recommender |
29 | - | |
30 | -class ReputationHeuristic(Singleton): | |
31 | - """ | |
32 | - Abstraction for diferent reputation heuristics. | |
33 | - """ | |
34 | - pass | |
35 | - | |
36 | -class BugsHeuristic(ReputationHeuristic): | |
37 | - """ | |
38 | - Reputation heuristic based on quantity of open bugs. | |
39 | - """ | |
40 | - pass | |
41 | - | |
42 | -class RCBugsHeuristic(ReputationHeuristic): | |
43 | - """ | |
44 | - Reputation heuristic based on quantity of RC bugs. | |
45 | - """ | |
46 | - pass | |
47 | - | |
48 | -class PopularityHeuristic(ReputationHeuristic): | |
49 | - """ | |
50 | - Reputation heuristic based on popularity of packages. | |
51 | - """ | |
52 | - pass | |
26 | +from data import * | |
53 | 27 | |
54 | 28 | class PkgMatchDecider(xapian.MatchDecider): |
55 | 29 | """ |
56 | 30 | Extend xapian.MatchDecider to not consider installed packages. |
57 | 31 | """ |
58 | - | |
59 | - def __init__(self, installed_pkgs): | |
32 | + def __init__(self, pkgs_list): | |
60 | 33 | """ |
61 | 34 | Set initial parameters. |
62 | 35 | """ |
63 | 36 | xapian.MatchDecider.__init__(self) |
64 | - self.installed_pkgs = installed_pkgs | |
37 | + self.pkgs_list = pkgs_list | |
65 | 38 | |
66 | 39 | def __call__(self, doc): |
67 | 40 | """ |
68 | 41 | True if the package is not already installed. |
69 | 42 | """ |
70 | - return doc.get_data() not in self.installed_pkgs | |
43 | + return doc.get_data() not in self.pkgs_list | |
71 | 44 | |
72 | 45 | class UserMatchDecider(xapian.MatchDecider): |
73 | 46 | """ |
... | ... | @@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider): |
80 | 53 | """ |
81 | 54 | xapian.MatchDecider.__init__(self) |
82 | 55 | self.profile = profile |
83 | - print "mdecider:",profile | |
84 | 56 | |
85 | 57 | def __call__(self, doc): |
86 | 58 | """ |
87 | 59 | True if the user has more the half of packages from profile. |
88 | 60 | """ |
89 | - profile_size = len(self.profile) | |
90 | - pkg_match=0 | |
61 | + match=0 | |
91 | 62 | for term in doc: |
92 | 63 | if term.term in self.profile: |
93 | - pkg_match = pkg_match+1 | |
94 | - print "id",doc.get_docid(),"match",pkg_match | |
95 | - return pkg_match >= profile_size/2 | |
64 | + match = match+1 | |
65 | + return (match >= len(self.profile)/2) | |
96 | 66 | |
97 | 67 | class PkgExpandDecider(xapian.ExpandDecider): |
98 | 68 | """ |
99 | 69 | Extend xapian.ExpandDecider to consider packages only. |
100 | 70 | """ |
101 | - | |
102 | - def __init__(self): | |
103 | - """ | |
104 | - Call base class init. | |
105 | - """ | |
106 | - xapian.ExpandDecider.__init__(self) | |
107 | - | |
108 | 71 | def __call__(self, term): |
109 | 72 | """ |
110 | 73 | True if the term is a package. |
111 | 74 | """ |
75 | + # [FIXME] return term.startswith("XP") | |
112 | 76 | return not term.startswith("XT") |
113 | 77 | |
114 | 78 | class TagExpandDecider(xapian.ExpandDecider): |
115 | 79 | """ |
116 | 80 | Extend xapian.ExpandDecider to consider tags only. |
117 | 81 | """ |
118 | - | |
119 | - def __init__(self, profile): | |
120 | - """ | |
121 | - Call base class init. | |
122 | - """ | |
123 | - xapian.ExpandDecider.__init__(self) | |
124 | - | |
125 | - def __call__(self, doc): | |
82 | + def __call__(self, term): | |
126 | 83 | """ |
127 | - True if the user has more the half of packages from profile. | |
84 | + True if the term is a tag. | |
128 | 85 | """ |
129 | 86 | return term.startswith("XT") |
130 | 87 | |
... | ... | @@ -134,65 +91,30 @@ class RecommendationStrategy: |
134 | 91 | """ |
135 | 92 | pass |
136 | 93 | |
137 | -class ItemReputationStrategy(RecommendationStrategy): | |
138 | - """ | |
139 | - Recommendation strategy based on items reputation. | |
140 | - """ | |
141 | - def run(self,items_list,heuristic): | |
142 | - """ | |
143 | - Perform recommendation strategy. | |
144 | - """ | |
145 | - logging.critical("Item reputation recommendation strategy is not yet implemented.") | |
146 | - raise Error | |
147 | - | |
148 | -#class ContentBasedStrategy(RecommendationStrategy): | |
149 | -# """ | |
150 | -# Content-based recommendation strategy. | |
151 | -# """ | |
152 | -# def run(self,rec,user): | |
153 | -# """ | |
154 | -# Perform recommendation strategy. | |
155 | -# """ | |
156 | -# profile = user.txi_tag_profile(rec.items_repository,50) | |
157 | -# qp = xapian.QueryParser() | |
158 | -# query = qp.parse_query(profile) | |
159 | -# enquire = xapian.Enquire(rec.items_repository) | |
160 | -# enquire.set_query(query) | |
161 | -# | |
162 | -# try: | |
163 | -# mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
164 | -# except xapian.DatabaseError as error: | |
165 | -# logging.critical(error.get_msg()) | |
166 | -# raise Error | |
167 | -# | |
168 | -# item_score = {} | |
169 | -# for m in mset: | |
170 | -# item_score[m.document.get_data()] = m.rank | |
171 | -# return recommender.RecommendationResult(item_score,20) | |
172 | - | |
173 | -class AxiContentBasedStrategy(RecommendationStrategy): | |
94 | +class ContentBasedStrategy(RecommendationStrategy): | |
174 | 95 | """ |
175 | 96 | Content-based recommendation strategy based on Apt-xapian-index. |
176 | 97 | """ |
177 | - def __init__(self): | |
98 | + def __init__(self,content): | |
178 | 99 | self.description = "Content-based" |
100 | + self.content = content | |
179 | 101 | |
180 | - def run(self,rec,user): | |
102 | + def run(self,rec,user,limit): | |
181 | 103 | """ |
182 | 104 | Perform recommendation strategy. |
183 | 105 | """ |
184 | - profile = user.axi_tag_profile(rec.items_repository,50) | |
185 | - #profile_str = string.join(list(profile),' ') | |
186 | - query = xapian.Query(xapian.Query.OP_OR,list(profile)) | |
106 | + profile = user.profile(rec.items_repository,self.content,50) | |
107 | + # prepair index for querying user profile | |
108 | + query = xapian.Query(xapian.Query.OP_OR,profile) | |
187 | 109 | enquire = xapian.Enquire(rec.items_repository) |
110 | + enquire.set_weighting_scheme(rec.weight) | |
188 | 111 | enquire.set_query(query) |
189 | - | |
190 | 112 | try: |
191 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
113 | + # retrieve matching packages | |
114 | + mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items())) | |
192 | 115 | except xapian.DatabaseError as error: |
193 | - logging.critical(error.get_msg()) | |
194 | - raise Error | |
195 | - | |
116 | + logging.critical("Content-based strategy: "+error.get_msg()) | |
117 | + # compose result dictionary | |
196 | 118 | item_score = {} |
197 | 119 | for m in mset: |
198 | 120 | item_score[m.document.get_data()] = m.weight |
... | ... | @@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy): |
202 | 124 | """ |
203 | 125 | Colaborative recommendation strategy. |
204 | 126 | """ |
205 | - def __init__(self): | |
127 | + def __init__(self,k,clustering=1): | |
206 | 128 | self.description = "Collaborative" |
129 | + self.clustering = clustering | |
130 | + self.neighbours = k | |
207 | 131 | |
208 | - #def run(self,rec,user,similarity_measure): | |
209 | - def run(self,rec,user): | |
132 | + def run(self,rec,user,limit): | |
210 | 133 | """ |
211 | 134 | Perform recommendation strategy. |
212 | 135 | """ |
213 | - profile = user.maximal_pkg_profile() | |
214 | - #profile_str = string.join(list(profile),' ') | |
215 | - query = xapian.Query(xapian.Query.OP_OR,list(profile)) | |
216 | - enquire = xapian.Enquire(rec.users_repository) | |
136 | + profile = user.pkg_profile | |
137 | + # prepair index for querying user profile | |
138 | + query = xapian.Query(xapian.Query.OP_OR,profile) | |
139 | + if self.clustering: | |
140 | + enquire = xapian.Enquire(rec.clustered_users_repository) | |
141 | + else: | |
142 | + enquire = xapian.Enquire(rec.users_repository) | |
143 | + enquire.set_weighting_scheme(rec.weight) | |
217 | 144 | enquire.set_query(query) |
218 | - | |
219 | 145 | try: |
220 | - #mset = enquire.get_mset(0, 182, None, UserMatchDecider(profile)) | |
221 | - mset = enquire.get_mset(0, 20) | |
146 | + # retrieve matching users | |
147 | + mset = enquire.get_mset(0, self.neighbours) | |
222 | 148 | except xapian.DatabaseError as error: |
223 | - logging.critical(error.get_msg()) | |
224 | - raise Error | |
225 | - | |
149 | + logging.critical("Collaborative strategy: "+error.get_msg()) | |
226 | 150 | rset = xapian.RSet() |
151 | + logging.debug("Neighborhood composed by the following users (by hash)") | |
227 | 152 | for m in mset: |
228 | 153 | rset.add_document(m.document.get_docid()) |
229 | - logging.debug("Counting as relevant submission %s" % | |
230 | - m.document.get_data()) | |
231 | - | |
232 | - eset = enquire.get_eset(20,rset,PkgExpandDecider()) | |
233 | - rank = 0 | |
154 | + logging.debug(m.document.get_data()) | |
155 | + # retrieve most relevant packages | |
156 | + eset = enquire.get_eset(limit,rset,PkgExpandDecider()) | |
157 | + # compose result dictionary | |
234 | 158 | item_score = {} |
235 | - for term in eset: | |
236 | - item_score[term.term] = rank | |
237 | - rank = rank+1 | |
238 | - | |
159 | + for package in eset: | |
160 | + item_score[package.term.lstrip("XP")] = package.weight | |
239 | 161 | return recommender.RecommendationResult(item_score) |
240 | 162 | |
163 | +class DemographicStrategy(RecommendationStrategy): | |
164 | + """ | |
165 | + Recommendation strategy based on demographic data. | |
166 | + """ | |
167 | + def __init__(self): | |
168 | + self.description = "Demographic" | |
169 | + logging.debug("Demographic recommendation not yet implemented.") | |
170 | + raise Error | |
171 | + | |
172 | + def run(self,user,items_repository): | |
173 | + """ | |
174 | + Perform recommendation strategy. | |
175 | + """ | |
176 | + pass | |
177 | + | |
241 | 178 | class KnowledgeBasedStrategy(RecommendationStrategy): |
242 | 179 | """ |
243 | 180 | Knowledge-based recommendation strategy. |
244 | 181 | """ |
245 | 182 | def __init__(self): |
246 | 183 | self.description = "Knowledge-based" |
184 | + logging.debug("Knowledge-based recommendation not yet implemented.") | |
185 | + raise Error | |
247 | 186 | |
248 | 187 | def run(self,user,knowledge_repository): |
249 | 188 | """ |
250 | 189 | Perform recommendation strategy. |
251 | 190 | """ |
252 | - logging.critical("Knowledge-based recommendation strategy is not yet implemented.") | |
253 | - raise Error | |
191 | + pass | |
254 | 192 | |
255 | -class DemographicStrategy(RecommendationStrategy): | |
193 | +class ReputationHeuristic(Singleton): | |
256 | 194 | """ |
257 | - Recommendation strategy based on demographic data. | |
195 | + Abstraction for diferent reputation heuristics. | |
196 | + """ | |
197 | + pass | |
198 | + | |
199 | +class BugsHeuristic(ReputationHeuristic): | |
200 | + """ | |
201 | + Reputation heuristic based on quantity of open bugs. | |
202 | + """ | |
203 | + pass | |
204 | + | |
205 | +class RCBugsHeuristic(ReputationHeuristic): | |
206 | + """ | |
207 | + Reputation heuristic based on quantity of RC bugs. | |
208 | + """ | |
209 | + pass | |
210 | + | |
211 | +class PopularityHeuristic(ReputationHeuristic): | |
212 | + """ | |
213 | + Reputation heuristic based on popularity of packages. | |
214 | + """ | |
215 | + pass | |
216 | + | |
217 | +class ItemReputationStrategy(RecommendationStrategy): | |
218 | + """ | |
219 | + Recommendation strategy based on items reputation. | |
258 | 220 | """ |
259 | 221 | def __init__(self): |
260 | - self.description = "Demographic" | |
222 | + self.description = "Item reputation" | |
223 | + logging.debug("Item reputation recommendation not yet implemented.") | |
224 | + raise Error | |
261 | 225 | |
262 | - def run(self,user,items_repository): | |
226 | + def run(self,items_list,heuristic): | |
263 | 227 | """ |
264 | 228 | Perform recommendation strategy. |
265 | 229 | """ |
266 | - logging.critical("Demographic recommendation strategy is not yet implemented.") | |
267 | - raise Error | |
230 | + pass | ... | ... |
... | ... | @@ -0,0 +1,116 @@ |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + strategyTests - Recommendation strategies classes test case | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
21 | + | |
22 | +import unittest2 | |
23 | +import xapian | |
24 | +import sys | |
25 | +sys.path.insert(0,'../') | |
26 | +from error import Error | |
27 | +from user import User | |
28 | +from recommender import RecommendationResult | |
29 | +from config import * | |
30 | +#from data import * | |
31 | +from strategy import (PkgMatchDecider, UserMatchDecider, PkgExpandDecider, | |
32 | + TagExpandDecider, ContentBasedStrategy, | |
33 | + CollaborativeStrategy, DemographicStrategy, | |
34 | + KnowledgeBasedStrategy, ItemReputationStrategy) | |
35 | + | |
36 | +class PkgMatchDeciderTests(unittest2.TestCase): | |
37 | + @classmethod | |
38 | + def setUpClass(self): | |
39 | + pkgs_list = ["gimp","eog","inkscape"] | |
40 | + self.decider = PkgMatchDecider(pkgs_list) | |
41 | + self.doc = xapian.Document() | |
42 | + | |
43 | + def test_match(self): | |
44 | + self.doc.set_data("emacs") | |
45 | + self.assertTrue(self.decider(self.doc)) | |
46 | + | |
47 | + def test_no_match(self): | |
48 | + self.doc.set_data("gimp") | |
49 | + self.assertFalse(self.decider(self.doc)) | |
50 | + | |
51 | +class UserMatchDeciderTests(unittest2.TestCase): | |
52 | + @classmethod | |
53 | + def setUpClass(self): | |
54 | + user_profile = ["gimp","eog","inkscape", "emacs"] | |
55 | + self.decider = UserMatchDecider(user_profile) | |
56 | + | |
57 | + def setUp(self): | |
58 | + self.doc = xapian.Document() | |
59 | + | |
60 | + def test_match(self): | |
61 | + self.doc.add_term("emacs") | |
62 | + self.doc.add_term("gimp") | |
63 | + self.doc.add_term("eog") | |
64 | + self.assertTrue(self.decider(self.doc)) | |
65 | + | |
66 | + def test_no_match(self): | |
67 | + self.doc.add_term("gimp") | |
68 | + self.assertFalse(self.decider(self.doc)) | |
69 | + | |
70 | +class PkgExpandDeciderTests(unittest2.TestCase): | |
71 | + @classmethod | |
72 | + def setUpClass(self): | |
73 | + self.decider = PkgExpandDecider() | |
74 | + | |
75 | + def test_match(self): | |
76 | + self.assertTrue(self.decider("XPgimp")) | |
77 | + | |
78 | + def test_no_match(self): | |
79 | + self.assertFalse(self.decider("XTgimp")) | |
80 | + | |
81 | +class TagExpandDeciderTests(unittest2.TestCase): | |
82 | + @classmethod | |
83 | + def setUpClass(self): | |
84 | + self.decider = TagExpandDecider() | |
85 | + | |
86 | + def test_match(self): | |
87 | + self.assertTrue(self.decider("XTgimp")) | |
88 | + | |
89 | + def test_no_match(self): | |
90 | + self.assertFalse(self.decider("gimp")) | |
91 | + | |
92 | +class ContentBasedStrategyTests(unittest2.TestCase): | |
93 | + @classmethod | |
94 | + def setUpClass(self): | |
95 | + | |
96 | + pass | |
97 | + | |
98 | +class CollaborativeStrategyTests(unittest2.TestCase): | |
99 | + @classmethod | |
100 | + def setUpClass(self): | |
101 | + pass | |
102 | + | |
103 | +class DemographicStrategyTests(unittest2.TestCase): | |
104 | + def test_call(self): | |
105 | + self.assertRaises(Error,lambda: DemographicStrategy()) | |
106 | + | |
107 | +class KnowledgeBasedStrategyTests(unittest2.TestCase): | |
108 | + def test_call(self): | |
109 | + self.assertRaises(Error,lambda: KnowledgeBasedStrategy()) | |
110 | + | |
111 | +class ItemReputationStrategyTests(unittest2.TestCase): | |
112 | + def test_call(self): | |
113 | + self.assertRaises(Error,lambda: ItemReputationStrategy()) | |
114 | + | |
115 | +if __name__ == '__main__': | |
116 | + unittest2.main() | ... | ... |