Commit 24c02613b28d52433ebb41bb6d69700faaa15e1c
1 parent
aad7a97e
Exists in
master
and in
1 other branch
Some more user-related tests and fixes. The new default strategy is 'cta',
since 'ct' is not enabled anymore; Recommendation size is now defined by get_prediction(size); Result is filled with items weight instead of rank.
Showing
5 changed files
with
39 additions
and
30 deletions
Show diff stats
src/config.py
... | ... | @@ -46,7 +46,7 @@ class Config(): |
46 | 46 | self.popcon_index = "~/.app-recommender/popcon_index" |
47 | 47 | self.popcon_dir = "~/.app-recommender/popcon_dir" |
48 | 48 | self.clusters_dir = "~/.app-recommender/clusters_dir" |
49 | - self.strategy = "ct" # defaults to the cheapest one | |
49 | + self.strategy = "cta" # defaults to the cheapest one | |
50 | 50 | self.reindex = 0 |
51 | 51 | self.load_options() |
52 | 52 | self.set_logger() | ... | ... |
src/recommender.py
... | ... | @@ -28,12 +28,11 @@ class RecommendationResult: |
28 | 28 | """ |
29 | 29 | Class designed to describe a recommendation result: items and scores. |
30 | 30 | """ |
31 | - def __init__(self,item_score,size): | |
31 | + def __init__(self,item_score): | |
32 | 32 | """ |
33 | 33 | Set initial parameters. |
34 | 34 | """ |
35 | 35 | self.item_score = item_score |
36 | - self.size = size | |
37 | 36 | |
38 | 37 | def __str__(self): |
39 | 38 | """ |
... | ... | @@ -45,12 +44,12 @@ class RecommendationResult: |
45 | 44 | str += "%2d: %s\n" % (i,result[i][0]) |
46 | 45 | return str |
47 | 46 | |
48 | - def get_prediction(self): | |
47 | + def get_prediction(self,size=20): | |
49 | 48 | """ |
50 | 49 | Return prediction based on recommendation size (number of items). |
51 | 50 | """ |
52 | 51 | sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) |
53 | - return sorted_result[:self.size] | |
52 | + return reversed(sorted_result[:size]) | |
54 | 53 | |
55 | 54 | class Recommender: |
56 | 55 | """ | ... | ... |
src/strategy.py
... | ... | @@ -173,6 +173,9 @@ class AxiContentBasedStrategy(RecommendationStrategy): |
173 | 173 | """ |
174 | 174 | Content-based recommendation strategy based on Apt-xapian-index. |
175 | 175 | """ |
176 | + def __init__(self): | |
177 | + self.description = "Content-based" | |
178 | + | |
176 | 179 | def run(self,rec,user): |
177 | 180 | """ |
178 | 181 | Perform recommendation strategy. |
... | ... | @@ -190,8 +193,8 @@ class AxiContentBasedStrategy(RecommendationStrategy): |
190 | 193 | |
191 | 194 | item_score = {} |
192 | 195 | for m in mset: |
193 | - item_score[m.document.get_data()] = m.rank | |
194 | - return recommender.RecommendationResult(item_score,20) | |
196 | + item_score[m.document.get_data()] = m.weight | |
197 | + return recommender.RecommendationResult(item_score) | |
195 | 198 | |
196 | 199 | class CollaborativeStrategy(RecommendationStrategy): |
197 | 200 | """ |
... | ... | @@ -227,7 +230,7 @@ class CollaborativeStrategy(RecommendationStrategy): |
227 | 230 | item_score[term.term] = rank |
228 | 231 | rank = rank+1 |
229 | 232 | |
230 | - return recommender.RecommendationResult(item_score,20) | |
233 | + return recommender.RecommendationResult(item_score) | |
231 | 234 | |
232 | 235 | class KnowledgeBasedStrategy(RecommendationStrategy): |
233 | 236 | """ | ... | ... |
src/tests/user_tests.py
... | ... | @@ -37,7 +37,7 @@ class UserTests(unittest2.TestCase): |
37 | 37 | @classmethod |
38 | 38 | def setUpClass(self): |
39 | 39 | cfg = Config() |
40 | - self.axi = xapian.Database(cfg.axi) | |
40 | + #self.axi = xapian.Database(cfg.axi) | |
41 | 41 | self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) |
42 | 42 | self.pxi = PkgXapianIndex() |
43 | 43 | |
... | ... | @@ -104,35 +104,42 @@ class UserTests(unittest2.TestCase): |
104 | 104 | self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"])) |
105 | 105 | |
106 | 106 | def test_axi_tag_profile(self): |
107 | - enquire = xapian.Enquire(self.pxi) | |
108 | - relevant_dict = {} | |
109 | - non_relevant_dict = {} | |
110 | 107 | package_terms = ["XP"+package for package in self.user.items()] |
108 | + enquire = xapian.Enquire(self.pxi) | |
111 | 109 | enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms)) |
112 | - mset = enquire.get_mset(0, self.pxi.get_doccount(), None, None) | |
113 | - tag_terms = set() | |
114 | - for m in mset: | |
115 | - tag_terms = [x.term for x in m.document.termlist() | |
116 | - if x.term.startswith("XT")] | |
117 | - for tag in tag_terms: | |
118 | - if tag in relevant_dict: | |
119 | - relevant_dict[tag] = relevant_dict[tag]+1 | |
120 | - else: | |
121 | - relevant_dict[tag] = 1 | |
122 | - rank = {} | |
123 | - for tag,count in relevant_dict.items(): | |
124 | - non_relevant_dict[tag] = self.pxi.get_termfreq(tag)-count | |
125 | - if non_relevant_dict[tag]>0: | |
126 | - rank[tag] = relevant_dict[tag]/float(non_relevant_dict[tag]) | |
127 | - #print "relevant",relevant_dict | |
128 | - #print "non_relevant",non_relevant_dict | |
110 | + user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None) | |
111 | + tag_terms = [] | |
112 | + for p in user_packages: | |
113 | + tag_terms = tag_terms + [x.term for x in p.document.termlist() \ | |
114 | + if x.term.startswith("XT")] | |
115 | + relevant_count = dict([(tag,tag_terms.count(tag)) \ | |
116 | + for tag in set(tag_terms)]) | |
117 | + #rank = {} | |
118 | + #non_relevant_count = dict() | |
119 | + #for tag,count in relevant_count.items(): | |
120 | + # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count | |
121 | + # if non_relevant_count[tag]>0: | |
122 | + # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag]) | |
123 | + #print "relevant",relevant_count | |
124 | + #print "non_relevant",non_relevant_count | |
129 | 125 | #print sorted(rank.items(), key=operator.itemgetter(1)) |
130 | 126 | #[FIXME] get ths value based on real ranking |
127 | + #print set(self.user.axi_tag_profile(self.pxi,4)) | |
131 | 128 | self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)), |
132 | 129 | set(["XTuse::editing", "XTworks-with::image", |
133 | 130 | "XTworks-with-format::png", |
134 | 131 | "XTworks-with-format::jpg"])) |
135 | 132 | |
133 | + def test_maximal_pkg_profile(self): | |
134 | + old_pkg_profile = self.user.items() | |
135 | + aaphoto_deps = ["libc6", "libgomp1", "libjasper1", "libjpeg62", | |
136 | + "libpng12-0"] | |
137 | + libc6_deps = ["libc-bin", "libgcc1"] | |
138 | + | |
139 | + for pkg in aaphoto_deps+libc6_deps: | |
140 | + self.user.item_score[pkg] = 1 | |
141 | + | |
142 | + self.assertEqual(old_pkg_profile,self.user.maximal_pkg_profile()) | |
136 | 143 | |
137 | 144 | if __name__ == '__main__': |
138 | 145 | unittest2.main() | ... | ... |
src/user.py
... | ... | @@ -132,7 +132,7 @@ class User: |
132 | 132 | profile_size = len(self.pkg_profile) |
133 | 133 | logging.info("Reduced packages profile size from %d to %d." % |
134 | 134 | (old_profile_size, profile_size)) |
135 | - return self.pkg_profile | |
135 | + return set(self.pkg_profile) | |
136 | 136 | |
137 | 137 | class LocalSystem(User): |
138 | 138 | """ | ... | ... |