From 24c02613b28d52433ebb41bb6d69700faaa15e1c Mon Sep 17 00:00:00 2001 From: Tássia Camões Araújo Date: Fri, 24 Jun 2011 14:15:35 -0300 Subject: [PATCH] Some more user-related tests and fixes. The new default strategy is 'cta', since 'ct' is not enabled anymore; Recommendation size is now defined by get_prediction(size); Result is filled with items weight instead of rank. --- src/config.py | 2 +- src/recommender.py | 7 +++---- src/strategy.py | 9 ++++++--- src/tests/user_tests.py | 49 ++++++++++++++++++++++++++++--------------------- src/user.py | 2 +- 5 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/config.py b/src/config.py index dd8b714..53b69f8 100644 --- a/src/config.py +++ b/src/config.py @@ -46,7 +46,7 @@ class Config(): self.popcon_index = "~/.app-recommender/popcon_index" self.popcon_dir = "~/.app-recommender/popcon_dir" self.clusters_dir = "~/.app-recommender/clusters_dir" - self.strategy = "ct" # defaults to the cheapest one + self.strategy = "cta" # defaults to the cheapest one self.reindex = 0 self.load_options() self.set_logger() diff --git a/src/recommender.py b/src/recommender.py index e2055a6..8e11bdd 100644 --- a/src/recommender.py +++ b/src/recommender.py @@ -28,12 +28,11 @@ class RecommendationResult: """ Class designed to describe a recommendation result: items and scores. """ - def __init__(self,item_score,size): + def __init__(self,item_score): """ Set initial parameters. """ self.item_score = item_score - self.size = size def __str__(self): """ @@ -45,12 +44,12 @@ class RecommendationResult: str += "%2d: %s\n" % (i,result[i][0]) return str - def get_prediction(self): + def get_prediction(self,size=20): """ Return prediction based on recommendation size (number of items). """ sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) - return sorted_result[:self.size] + return reversed(sorted_result[:size]) class Recommender: """ diff --git a/src/strategy.py b/src/strategy.py index a88ca25..2171e75 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -173,6 +173,9 @@ class AxiContentBasedStrategy(RecommendationStrategy): """ Content-based recommendation strategy based on Apt-xapian-index. """ + def __init__(self): + self.description = "Content-based" + def run(self,rec,user): """ Perform recommendation strategy. @@ -190,8 +193,8 @@ class AxiContentBasedStrategy(RecommendationStrategy): item_score = {} for m in mset: - item_score[m.document.get_data()] = m.rank - return recommender.RecommendationResult(item_score,20) + item_score[m.document.get_data()] = m.weight + return recommender.RecommendationResult(item_score) class CollaborativeStrategy(RecommendationStrategy): """ @@ -227,7 +230,7 @@ class CollaborativeStrategy(RecommendationStrategy): item_score[term.term] = rank rank = rank+1 - return recommender.RecommendationResult(item_score,20) + return recommender.RecommendationResult(item_score) class KnowledgeBasedStrategy(RecommendationStrategy): """ diff --git a/src/tests/user_tests.py b/src/tests/user_tests.py index 30135a6..70611c5 100755 --- a/src/tests/user_tests.py +++ b/src/tests/user_tests.py @@ -37,7 +37,7 @@ class UserTests(unittest2.TestCase): @classmethod def setUpClass(self): cfg = Config() - self.axi = xapian.Database(cfg.axi) + #self.axi = xapian.Database(cfg.axi) self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) self.pxi = PkgXapianIndex() @@ -104,35 +104,42 @@ class UserTests(unittest2.TestCase): self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"])) def test_axi_tag_profile(self): - enquire = xapian.Enquire(self.pxi) - relevant_dict = {} - non_relevant_dict = {} package_terms = ["XP"+package for package in self.user.items()] + enquire = xapian.Enquire(self.pxi) enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms)) - mset = enquire.get_mset(0, self.pxi.get_doccount(), None, None) - tag_terms = set() - for m in mset: - tag_terms = [x.term for x in m.document.termlist() - if x.term.startswith("XT")] - for tag in tag_terms: - if tag in relevant_dict: - relevant_dict[tag] = relevant_dict[tag]+1 - else: - relevant_dict[tag] = 1 - rank = {} - for tag,count in relevant_dict.items(): - non_relevant_dict[tag] = self.pxi.get_termfreq(tag)-count - if non_relevant_dict[tag]>0: - rank[tag] = relevant_dict[tag]/float(non_relevant_dict[tag]) - #print "relevant",relevant_dict - #print "non_relevant",non_relevant_dict + user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None) + tag_terms = [] + for p in user_packages: + tag_terms = tag_terms + [x.term for x in p.document.termlist() \ + if x.term.startswith("XT")] + relevant_count = dict([(tag,tag_terms.count(tag)) \ + for tag in set(tag_terms)]) + #rank = {} + #non_relevant_count = dict() + #for tag,count in relevant_count.items(): + # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count + # if non_relevant_count[tag]>0: + # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag]) + #print "relevant",relevant_count + #print "non_relevant",non_relevant_count #print sorted(rank.items(), key=operator.itemgetter(1)) #[FIXME] get ths value based on real ranking + #print set(self.user.axi_tag_profile(self.pxi,4)) self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)), set(["XTuse::editing", "XTworks-with::image", "XTworks-with-format::png", "XTworks-with-format::jpg"])) + def test_maximal_pkg_profile(self): + old_pkg_profile = self.user.items() + aaphoto_deps = ["libc6", "libgomp1", "libjasper1", "libjpeg62", + "libpng12-0"] + libc6_deps = ["libc-bin", "libgcc1"] + + for pkg in aaphoto_deps+libc6_deps: + self.user.item_score[pkg] = 1 + + self.assertEqual(old_pkg_profile,self.user.maximal_pkg_profile()) if __name__ == '__main__': unittest2.main() diff --git a/src/user.py b/src/user.py index 20061f7..a25bbb2 100644 --- a/src/user.py +++ b/src/user.py @@ -132,7 +132,7 @@ class User: profile_size = len(self.pkg_profile) logging.info("Reduced packages profile size from %d to %d." % (old_profile_size, profile_size)) - return self.pkg_profile + return set(self.pkg_profile) class LocalSystem(User): """ -- libgit2 0.21.2