Commit 24c02613b28d52433ebb41bb6d69700faaa15e1c

Authored by Tássia Camões Araújo
1 parent aad7a97e
Exists in master and in 1 other branch add_vagrant

Some more user-related tests and fixes. The new default strategy is 'cta',

since 'ct' is not enabled anymore; Recommendation size is now defined by
get_prediction(size); Result is filled with items weight instead of rank.
src/config.py
... ... @@ -46,7 +46,7 @@ class Config():
46 46 self.popcon_index = "~/.app-recommender/popcon_index"
47 47 self.popcon_dir = "~/.app-recommender/popcon_dir"
48 48 self.clusters_dir = "~/.app-recommender/clusters_dir"
49   - self.strategy = "ct" # defaults to the cheapest one
  49 + self.strategy = "cta" # defaults to the cheapest one
50 50 self.reindex = 0
51 51 self.load_options()
52 52 self.set_logger()
... ...
src/recommender.py
... ... @@ -28,12 +28,11 @@ class RecommendationResult:
28 28 """
29 29 Class designed to describe a recommendation result: items and scores.
30 30 """
31   - def __init__(self,item_score,size):
  31 + def __init__(self,item_score):
32 32 """
33 33 Set initial parameters.
34 34 """
35 35 self.item_score = item_score
36   - self.size = size
37 36  
38 37 def __str__(self):
39 38 """
... ... @@ -45,12 +44,12 @@ class RecommendationResult:
45 44 str += "%2d: %s\n" % (i,result[i][0])
46 45 return str
47 46  
48   - def get_prediction(self):
  47 + def get_prediction(self,size=20):
49 48 """
50 49 Return prediction based on recommendation size (number of items).
51 50 """
52 51 sorted_result = sorted(self.item_score.items(), key=itemgetter(1))
53   - return sorted_result[:self.size]
  52 + return reversed(sorted_result[:size])
54 53  
55 54 class Recommender:
56 55 """
... ...
src/strategy.py
... ... @@ -173,6 +173,9 @@ class AxiContentBasedStrategy(RecommendationStrategy):
173 173 """
174 174 Content-based recommendation strategy based on Apt-xapian-index.
175 175 """
  176 + def __init__(self):
  177 + self.description = "Content-based"
  178 +
176 179 def run(self,rec,user):
177 180 """
178 181 Perform recommendation strategy.
... ... @@ -190,8 +193,8 @@ class AxiContentBasedStrategy(RecommendationStrategy):
190 193  
191 194 item_score = {}
192 195 for m in mset:
193   - item_score[m.document.get_data()] = m.rank
194   - return recommender.RecommendationResult(item_score,20)
  196 + item_score[m.document.get_data()] = m.weight
  197 + return recommender.RecommendationResult(item_score)
195 198  
196 199 class CollaborativeStrategy(RecommendationStrategy):
197 200 """
... ... @@ -227,7 +230,7 @@ class CollaborativeStrategy(RecommendationStrategy):
227 230 item_score[term.term] = rank
228 231 rank = rank+1
229 232  
230   - return recommender.RecommendationResult(item_score,20)
  233 + return recommender.RecommendationResult(item_score)
231 234  
232 235 class KnowledgeBasedStrategy(RecommendationStrategy):
233 236 """
... ...
src/tests/user_tests.py
... ... @@ -37,7 +37,7 @@ class UserTests(unittest2.TestCase):
37 37 @classmethod
38 38 def setUpClass(self):
39 39 cfg = Config()
40   - self.axi = xapian.Database(cfg.axi)
  40 + #self.axi = xapian.Database(cfg.axi)
41 41 self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
42 42 self.pxi = PkgXapianIndex()
43 43  
... ... @@ -104,35 +104,42 @@ class UserTests(unittest2.TestCase):
104 104 self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"]))
105 105  
106 106 def test_axi_tag_profile(self):
107   - enquire = xapian.Enquire(self.pxi)
108   - relevant_dict = {}
109   - non_relevant_dict = {}
110 107 package_terms = ["XP"+package for package in self.user.items()]
  108 + enquire = xapian.Enquire(self.pxi)
111 109 enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms))
112   - mset = enquire.get_mset(0, self.pxi.get_doccount(), None, None)
113   - tag_terms = set()
114   - for m in mset:
115   - tag_terms = [x.term for x in m.document.termlist()
116   - if x.term.startswith("XT")]
117   - for tag in tag_terms:
118   - if tag in relevant_dict:
119   - relevant_dict[tag] = relevant_dict[tag]+1
120   - else:
121   - relevant_dict[tag] = 1
122   - rank = {}
123   - for tag,count in relevant_dict.items():
124   - non_relevant_dict[tag] = self.pxi.get_termfreq(tag)-count
125   - if non_relevant_dict[tag]>0:
126   - rank[tag] = relevant_dict[tag]/float(non_relevant_dict[tag])
127   - #print "relevant",relevant_dict
128   - #print "non_relevant",non_relevant_dict
  110 + user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None)
  111 + tag_terms = []
  112 + for p in user_packages:
  113 + tag_terms = tag_terms + [x.term for x in p.document.termlist() \
  114 + if x.term.startswith("XT")]
  115 + relevant_count = dict([(tag,tag_terms.count(tag)) \
  116 + for tag in set(tag_terms)])
  117 + #rank = {}
  118 + #non_relevant_count = dict()
  119 + #for tag,count in relevant_count.items():
  120 + # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count
  121 + # if non_relevant_count[tag]>0:
  122 + # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag])
  123 + #print "relevant",relevant_count
  124 + #print "non_relevant",non_relevant_count
129 125 #print sorted(rank.items(), key=operator.itemgetter(1))
130 126 #[FIXME] get ths value based on real ranking
  127 + #print set(self.user.axi_tag_profile(self.pxi,4))
131 128 self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)),
132 129 set(["XTuse::editing", "XTworks-with::image",
133 130 "XTworks-with-format::png",
134 131 "XTworks-with-format::jpg"]))
135 132  
  133 + def test_maximal_pkg_profile(self):
  134 + old_pkg_profile = self.user.items()
  135 + aaphoto_deps = ["libc6", "libgomp1", "libjasper1", "libjpeg62",
  136 + "libpng12-0"]
  137 + libc6_deps = ["libc-bin", "libgcc1"]
  138 +
  139 + for pkg in aaphoto_deps+libc6_deps:
  140 + self.user.item_score[pkg] = 1
  141 +
  142 + self.assertEqual(old_pkg_profile,self.user.maximal_pkg_profile())
136 143  
137 144 if __name__ == '__main__':
138 145 unittest2.main()
... ...
src/user.py
... ... @@ -132,7 +132,7 @@ class User:
132 132 profile_size = len(self.pkg_profile)
133 133 logging.info("Reduced packages profile size from %d to %d." %
134 134 (old_profile_size, profile_size))
135   - return self.pkg_profile
  135 + return set(self.pkg_profile)
136 136  
137 137 class LocalSystem(User):
138 138 """
... ...