Commit 0538dedf34ef74084986dd6849eeca6b5bda5b24

Authored by Tássia Camões Araújo
1 parent efb90222
Exists in master and in 1 other branch add_vagrant

Refactored recommendation strategies and implemented some tests.

Showing 2 changed files with 209 additions and 130 deletions   Show diff stats
src/strategy.py
... ... @@ -20,54 +20,27 @@ __license__ = """
20 20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 21 """
22 22  
23   -import string
24   -import os, re
25 23 import xapian
26   -from data import *
27 24 from singleton import Singleton
28 25 import recommender
29   -
30   -class ReputationHeuristic(Singleton):
31   - """
32   - Abstraction for diferent reputation heuristics.
33   - """
34   - pass
35   -
36   -class BugsHeuristic(ReputationHeuristic):
37   - """
38   - Reputation heuristic based on quantity of open bugs.
39   - """
40   - pass
41   -
42   -class RCBugsHeuristic(ReputationHeuristic):
43   - """
44   - Reputation heuristic based on quantity of RC bugs.
45   - """
46   - pass
47   -
48   -class PopularityHeuristic(ReputationHeuristic):
49   - """
50   - Reputation heuristic based on popularity of packages.
51   - """
52   - pass
  26 +from data import *
53 27  
54 28 class PkgMatchDecider(xapian.MatchDecider):
55 29 """
56 30 Extend xapian.MatchDecider to not consider installed packages.
57 31 """
58   -
59   - def __init__(self, installed_pkgs):
  32 + def __init__(self, pkgs_list):
60 33 """
61 34 Set initial parameters.
62 35 """
63 36 xapian.MatchDecider.__init__(self)
64   - self.installed_pkgs = installed_pkgs
  37 + self.pkgs_list = pkgs_list
65 38  
66 39 def __call__(self, doc):
67 40 """
68 41 True if the package is not already installed.
69 42 """
70   - return doc.get_data() not in self.installed_pkgs
  43 + return doc.get_data() not in self.pkgs_list
71 44  
72 45 class UserMatchDecider(xapian.MatchDecider):
73 46 """
... ... @@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider):
80 53 """
81 54 xapian.MatchDecider.__init__(self)
82 55 self.profile = profile
83   - print "mdecider:",profile
84 56  
85 57 def __call__(self, doc):
86 58 """
87 59 True if the user has more the half of packages from profile.
88 60 """
89   - profile_size = len(self.profile)
90   - pkg_match=0
  61 + match=0
91 62 for term in doc:
92 63 if term.term in self.profile:
93   - pkg_match = pkg_match+1
94   - print "id",doc.get_docid(),"match",pkg_match
95   - return pkg_match >= profile_size/2
  64 + match = match+1
  65 + return (match >= len(self.profile)/2)
96 66  
97 67 class PkgExpandDecider(xapian.ExpandDecider):
98 68 """
99 69 Extend xapian.ExpandDecider to consider packages only.
100 70 """
101   -
102   - def __init__(self):
103   - """
104   - Call base class init.
105   - """
106   - xapian.ExpandDecider.__init__(self)
107   -
108 71 def __call__(self, term):
109 72 """
110 73 True if the term is a package.
111 74 """
  75 + # [FIXME] return term.startswith("XP")
112 76 return not term.startswith("XT")
113 77  
114 78 class TagExpandDecider(xapian.ExpandDecider):
115 79 """
116 80 Extend xapian.ExpandDecider to consider tags only.
117 81 """
118   -
119   - def __init__(self, profile):
120   - """
121   - Call base class init.
122   - """
123   - xapian.ExpandDecider.__init__(self)
124   -
125   - def __call__(self, doc):
  82 + def __call__(self, term):
126 83 """
127   - True if the user has more the half of packages from profile.
  84 + True if the term is a tag.
128 85 """
129 86 return term.startswith("XT")
130 87  
... ... @@ -134,65 +91,30 @@ class RecommendationStrategy:
134 91 """
135 92 pass
136 93  
137   -class ItemReputationStrategy(RecommendationStrategy):
138   - """
139   - Recommendation strategy based on items reputation.
140   - """
141   - def run(self,items_list,heuristic):
142   - """
143   - Perform recommendation strategy.
144   - """
145   - logging.critical("Item reputation recommendation strategy is not yet implemented.")
146   - raise Error
147   -
148   -#class ContentBasedStrategy(RecommendationStrategy):
149   -# """
150   -# Content-based recommendation strategy.
151   -# """
152   -# def run(self,rec,user):
153   -# """
154   -# Perform recommendation strategy.
155   -# """
156   -# profile = user.txi_tag_profile(rec.items_repository,50)
157   -# qp = xapian.QueryParser()
158   -# query = qp.parse_query(profile)
159   -# enquire = xapian.Enquire(rec.items_repository)
160   -# enquire.set_query(query)
161   -#
162   -# try:
163   -# mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
164   -# except xapian.DatabaseError as error:
165   -# logging.critical(error.get_msg())
166   -# raise Error
167   -#
168   -# item_score = {}
169   -# for m in mset:
170   -# item_score[m.document.get_data()] = m.rank
171   -# return recommender.RecommendationResult(item_score,20)
172   -
173   -class AxiContentBasedStrategy(RecommendationStrategy):
  94 +class ContentBasedStrategy(RecommendationStrategy):
174 95 """
175 96 Content-based recommendation strategy based on Apt-xapian-index.
176 97 """
177   - def __init__(self):
  98 + def __init__(self,content):
178 99 self.description = "Content-based"
  100 + self.content = content
179 101  
180   - def run(self,rec,user):
  102 + def run(self,rec,user,limit):
181 103 """
182 104 Perform recommendation strategy.
183 105 """
184   - profile = user.axi_tag_profile(rec.items_repository,50)
185   - #profile_str = string.join(list(profile),' ')
186   - query = xapian.Query(xapian.Query.OP_OR,list(profile))
  106 + profile = user.profile(rec.items_repository,self.content,50)
  107 + # prepair index for querying user profile
  108 + query = xapian.Query(xapian.Query.OP_OR,profile)
187 109 enquire = xapian.Enquire(rec.items_repository)
  110 + enquire.set_weighting_scheme(rec.weight)
188 111 enquire.set_query(query)
189   -
190 112 try:
191   - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  113 + # retrieve matching packages
  114 + mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
192 115 except xapian.DatabaseError as error:
193   - logging.critical(error.get_msg())
194   - raise Error
195   -
  116 + logging.critical("Content-based strategy: "+error.get_msg())
  117 + # compose result dictionary
196 118 item_score = {}
197 119 for m in mset:
198 120 item_score[m.document.get_data()] = m.weight
... ... @@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy):
202 124 """
203 125 Colaborative recommendation strategy.
204 126 """
205   - def __init__(self):
  127 + def __init__(self,k,clustering=1):
206 128 self.description = "Collaborative"
  129 + self.clustering = clustering
  130 + self.neighbours = k
207 131  
208   - #def run(self,rec,user,similarity_measure):
209   - def run(self,rec,user):
  132 + def run(self,rec,user,limit):
210 133 """
211 134 Perform recommendation strategy.
212 135 """
213   - profile = user.maximal_pkg_profile()
214   - #profile_str = string.join(list(profile),' ')
215   - query = xapian.Query(xapian.Query.OP_OR,list(profile))
216   - enquire = xapian.Enquire(rec.users_repository)
  136 + profile = user.pkg_profile
  137 + # prepair index for querying user profile
  138 + query = xapian.Query(xapian.Query.OP_OR,profile)
  139 + if self.clustering:
  140 + enquire = xapian.Enquire(rec.clustered_users_repository)
  141 + else:
  142 + enquire = xapian.Enquire(rec.users_repository)
  143 + enquire.set_weighting_scheme(rec.weight)
217 144 enquire.set_query(query)
218   -
219 145 try:
220   - #mset = enquire.get_mset(0, 182, None, UserMatchDecider(profile))
221   - mset = enquire.get_mset(0, 20)
  146 + # retrieve matching users
  147 + mset = enquire.get_mset(0, self.neighbours)
222 148 except xapian.DatabaseError as error:
223   - logging.critical(error.get_msg())
224   - raise Error
225   -
  149 + logging.critical("Collaborative strategy: "+error.get_msg())
226 150 rset = xapian.RSet()
  151 + logging.debug("Neighborhood composed by the following users (by hash)")
227 152 for m in mset:
228 153 rset.add_document(m.document.get_docid())
229   - logging.debug("Counting as relevant submission %s" %
230   - m.document.get_data())
231   -
232   - eset = enquire.get_eset(20,rset,PkgExpandDecider())
233   - rank = 0
  154 + logging.debug(m.document.get_data())
  155 + # retrieve most relevant packages
  156 + eset = enquire.get_eset(limit,rset,PkgExpandDecider())
  157 + # compose result dictionary
234 158 item_score = {}
235   - for term in eset:
236   - item_score[term.term] = rank
237   - rank = rank+1
238   -
  159 + for package in eset:
  160 + item_score[package.term.lstrip("XP")] = package.weight
239 161 return recommender.RecommendationResult(item_score)
240 162  
  163 +class DemographicStrategy(RecommendationStrategy):
  164 + """
  165 + Recommendation strategy based on demographic data.
  166 + """
  167 + def __init__(self):
  168 + self.description = "Demographic"
  169 + logging.debug("Demographic recommendation not yet implemented.")
  170 + raise Error
  171 +
  172 + def run(self,user,items_repository):
  173 + """
  174 + Perform recommendation strategy.
  175 + """
  176 + pass
  177 +
241 178 class KnowledgeBasedStrategy(RecommendationStrategy):
242 179 """
243 180 Knowledge-based recommendation strategy.
244 181 """
245 182 def __init__(self):
246 183 self.description = "Knowledge-based"
  184 + logging.debug("Knowledge-based recommendation not yet implemented.")
  185 + raise Error
247 186  
248 187 def run(self,user,knowledge_repository):
249 188 """
250 189 Perform recommendation strategy.
251 190 """
252   - logging.critical("Knowledge-based recommendation strategy is not yet implemented.")
253   - raise Error
  191 + pass
254 192  
255   -class DemographicStrategy(RecommendationStrategy):
  193 +class ReputationHeuristic(Singleton):
256 194 """
257   - Recommendation strategy based on demographic data.
  195 + Abstraction for diferent reputation heuristics.
  196 + """
  197 + pass
  198 +
  199 +class BugsHeuristic(ReputationHeuristic):
  200 + """
  201 + Reputation heuristic based on quantity of open bugs.
  202 + """
  203 + pass
  204 +
  205 +class RCBugsHeuristic(ReputationHeuristic):
  206 + """
  207 + Reputation heuristic based on quantity of RC bugs.
  208 + """
  209 + pass
  210 +
  211 +class PopularityHeuristic(ReputationHeuristic):
  212 + """
  213 + Reputation heuristic based on popularity of packages.
  214 + """
  215 + pass
  216 +
  217 +class ItemReputationStrategy(RecommendationStrategy):
  218 + """
  219 + Recommendation strategy based on items reputation.
258 220 """
259 221 def __init__(self):
260   - self.description = "Demographic"
  222 + self.description = "Item reputation"
  223 + logging.debug("Item reputation recommendation not yet implemented.")
  224 + raise Error
261 225  
262   - def run(self,user,items_repository):
  226 + def run(self,items_list,heuristic):
263 227 """
264 228 Perform recommendation strategy.
265 229 """
266   - logging.critical("Demographic recommendation strategy is not yet implemented.")
267   - raise Error
  230 + pass
... ...
src/tests/strategy_tests.py 0 → 100755
... ... @@ -0,0 +1,116 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + strategyTests - Recommendation strategies classes test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import unittest2
  23 +import xapian
  24 +import sys
  25 +sys.path.insert(0,'../')
  26 +from error import Error
  27 +from user import User
  28 +from recommender import RecommendationResult
  29 +from config import *
  30 +#from data import *
  31 +from strategy import (PkgMatchDecider, UserMatchDecider, PkgExpandDecider,
  32 + TagExpandDecider, ContentBasedStrategy,
  33 + CollaborativeStrategy, DemographicStrategy,
  34 + KnowledgeBasedStrategy, ItemReputationStrategy)
  35 +
  36 +class PkgMatchDeciderTests(unittest2.TestCase):
  37 + @classmethod
  38 + def setUpClass(self):
  39 + pkgs_list = ["gimp","eog","inkscape"]
  40 + self.decider = PkgMatchDecider(pkgs_list)
  41 + self.doc = xapian.Document()
  42 +
  43 + def test_match(self):
  44 + self.doc.set_data("emacs")
  45 + self.assertTrue(self.decider(self.doc))
  46 +
  47 + def test_no_match(self):
  48 + self.doc.set_data("gimp")
  49 + self.assertFalse(self.decider(self.doc))
  50 +
  51 +class UserMatchDeciderTests(unittest2.TestCase):
  52 + @classmethod
  53 + def setUpClass(self):
  54 + user_profile = ["gimp","eog","inkscape", "emacs"]
  55 + self.decider = UserMatchDecider(user_profile)
  56 +
  57 + def setUp(self):
  58 + self.doc = xapian.Document()
  59 +
  60 + def test_match(self):
  61 + self.doc.add_term("emacs")
  62 + self.doc.add_term("gimp")
  63 + self.doc.add_term("eog")
  64 + self.assertTrue(self.decider(self.doc))
  65 +
  66 + def test_no_match(self):
  67 + self.doc.add_term("gimp")
  68 + self.assertFalse(self.decider(self.doc))
  69 +
  70 +class PkgExpandDeciderTests(unittest2.TestCase):
  71 + @classmethod
  72 + def setUpClass(self):
  73 + self.decider = PkgExpandDecider()
  74 +
  75 + def test_match(self):
  76 + self.assertTrue(self.decider("XPgimp"))
  77 +
  78 + def test_no_match(self):
  79 + self.assertFalse(self.decider("XTgimp"))
  80 +
  81 +class TagExpandDeciderTests(unittest2.TestCase):
  82 + @classmethod
  83 + def setUpClass(self):
  84 + self.decider = TagExpandDecider()
  85 +
  86 + def test_match(self):
  87 + self.assertTrue(self.decider("XTgimp"))
  88 +
  89 + def test_no_match(self):
  90 + self.assertFalse(self.decider("gimp"))
  91 +
  92 +class ContentBasedStrategyTests(unittest2.TestCase):
  93 + @classmethod
  94 + def setUpClass(self):
  95 +
  96 + pass
  97 +
  98 +class CollaborativeStrategyTests(unittest2.TestCase):
  99 + @classmethod
  100 + def setUpClass(self):
  101 + pass
  102 +
  103 +class DemographicStrategyTests(unittest2.TestCase):
  104 + def test_call(self):
  105 + self.assertRaises(Error,lambda: DemographicStrategy())
  106 +
  107 +class KnowledgeBasedStrategyTests(unittest2.TestCase):
  108 + def test_call(self):
  109 + self.assertRaises(Error,lambda: KnowledgeBasedStrategy())
  110 +
  111 +class ItemReputationStrategyTests(unittest2.TestCase):
  112 + def test_call(self):
  113 + self.assertRaises(Error,lambda: ItemReputationStrategy())
  114 +
  115 +if __name__ == '__main__':
  116 + unittest2.main()
... ...