Commit 2e9ab843a42a3a87c77db71ac63340d6baa20c3e

Authored by Tiago Bortoletto Vaz
2 parents 22d71862 c1675b12
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

@@ -46,8 +46,8 @@ class Config(): @@ -46,8 +46,8 @@ class Config():
46 self.popcon_index = "~/.app-recommender/popcon_index" 46 self.popcon_index = "~/.app-recommender/popcon_index"
47 self.popcon_dir = "~/.app-recommender/popcon_dir" 47 self.popcon_dir = "~/.app-recommender/popcon_dir"
48 self.clusters_dir = "~/.app-recommender/clusters_dir" 48 self.clusters_dir = "~/.app-recommender/clusters_dir"
49 - self.strategy = "cta" # defaults to the cheapest one  
50 - self.reindex = 0 49 + self.strategy = "cb" # defaults to the cheapest one
  50 + self.weight = "bm25"
51 self.load_options() 51 self.load_options()
52 self.set_logger() 52 self.set_logger()
53 53
@@ -63,22 +63,24 @@ class Config(): @@ -63,22 +63,24 @@ class Config():
63 print " -c, --config=PATH Path to configuration file." 63 print " -c, --config=PATH Path to configuration file."
64 print "" 64 print ""
65 print " [ recommender ]" 65 print " [ recommender ]"
66 - print " -t, --tagsdb=PATH Path to debtags database."  
67 - print " -i, --tagsindex=PATH Path to debtags dedicated index."  
68 - print " -r, --force-reindex Force reindexing debtags database."  
69 print " -a, --axi=PATH Path to Apt-xapian-index." 66 print " -a, --axi=PATH Path to Apt-xapian-index."
70 print " -p, --popconindex=PATH Path to popcon dedicated index." 67 print " -p, --popconindex=PATH Path to popcon dedicated index."
71 print " -m, --popcondir=PATH Path to popcon submissions dir." 68 print " -m, --popcondir=PATH Path to popcon submissions dir."
72 print " -l, --clustersdir=PATH Path to popcon clusters dir." 69 print " -l, --clustersdir=PATH Path to popcon clusters dir."
  70 + print " -w, --weight=OPTION Search weighting scheme."
73 print " -s, --strategy=OPTION Recommendation strategy." 71 print " -s, --strategy=OPTION Recommendation strategy."
74 print "" 72 print ""
  73 + print " [ weight options ] "
  74 + print " trad = traditional probabilistic weighting "
  75 + print " bm25 = bm25 weighting scheme "
  76 + print ""
75 print " [ strategy options ] " 77 print " [ strategy options ] "
76 - print " ct = content-based using tags "  
77 - print " cta = content-based using tags via apt-xapian-index"  
78 - print " cp = content-based using package descriptions " 78 + print " cb = content-based "
  79 + print " cbt = content-based using only tags as content "
  80 + print " cbd = content-based using only package descriptions as content "
79 print " col = collaborative " 81 print " col = collaborative "
80 - print " colct = collaborative through tags content "  
81 - print " colcp = collaborative through package descriptions content " 82 + #print " colct = collaborative through tags content "
  83 + #print " colcp = collaborative through package descriptions content "
82 84
83 def read_option(self, section, option): 85 def read_option(self, section, option):
84 """ 86 """
@@ -108,19 +110,17 @@ class Config(): @@ -108,19 +110,17 @@ class Config():
108 self.output_filename = self.read_option('general', 'output') 110 self.output_filename = self.read_option('general', 'output')
109 self.config = self.read_option('general', 'config') 111 self.config = self.read_option('general', 'config')
110 112
111 - self.tags_db = self.read_option('recommender', 'tags_db')  
112 - self.tags_index = self.read_option('recommender', 'tags_index')  
113 - self.reindex = self.read_option('recommender', 'reindex')  
114 self.axi = self.read_option('recommender', 'axi') 113 self.axi = self.read_option('recommender', 'axi')
115 self.popcon_index = self.read_option('recommender', 'popcon_index') 114 self.popcon_index = self.read_option('recommender', 'popcon_index')
116 self.popcon_dir = self.read_option('recommender', 'popcon_dir') 115 self.popcon_dir = self.read_option('recommender', 'popcon_dir')
117 self.clusters_dir = self.read_option('recommender', 'clusters_dir') 116 self.clusters_dir = self.read_option('recommender', 'clusters_dir')
  117 + self.weight = self.read_option('recommender', 'weight')
  118 + self.strategy = self.read_option('recommender', 'strategy')
118 119
119 - short_options = "hdvo:c:t:i:ra:p:m:s:" 120 + short_options = "hdvo:c:a:p:m:l:w:s:"
120 long_options = ["help", "debug", "verbose", "output=", "config=", 121 long_options = ["help", "debug", "verbose", "output=", "config=",
121 - "tagsdb=", "tagsindex=", "reindex", "axi=",  
122 - "popconindex=", "popcondir=", "clustersdir=",  
123 - "strategy="] 122 + "axi=", "popconindex=", "popcondir=", "clustersdir=",
  123 + "weight=", "strategy="]
124 try: 124 try:
125 opts, args = getopt.getopt(sys.argv[1:], short_options, 125 opts, args = getopt.getopt(sys.argv[1:], short_options,
126 long_options) 126 long_options)
@@ -142,12 +142,6 @@ class Config(): @@ -142,12 +142,6 @@ class Config():
142 self.output = p 142 self.output = p
143 elif o in ("-c", "--config"): 143 elif o in ("-c", "--config"):
144 self.config = p 144 self.config = p
145 - elif o in ("-t", "--tagsdb"):  
146 - self.tags_db = p  
147 - elif o in ("-i", "--tagsindex"):  
148 - self.tags_index = p  
149 - elif o in ("-r", "--force-reindex"):  
150 - self.reindex = 1  
151 elif o in ("-a", "--axi"): 145 elif o in ("-a", "--axi"):
152 self.axi = p + "/index" 146 self.axi = p + "/index"
153 self.axi_values = p + "/values" 147 self.axi_values = p + "/values"
@@ -157,6 +151,8 @@ class Config(): @@ -157,6 +151,8 @@ class Config():
157 self.popcon_dir = p 151 self.popcon_dir = p
158 elif o in ("-l", "--clustersdir"): 152 elif o in ("-l", "--clustersdir"):
159 self.popcon_dir = p 153 self.popcon_dir = p
  154 + elif o in ("-w", "--weight"):
  155 + self.weight = p
160 elif o in ("-s", "--strategy"): 156 elif o in ("-s", "--strategy"):
161 self.strategy = p 157 self.strategy = p
162 else: 158 else:
@@ -35,29 +35,44 @@ from singleton import Singleton @@ -35,29 +35,44 @@ from singleton import Singleton
35 import cluster 35 import cluster
36 from dissimilarity import * 36 from dissimilarity import *
37 37
38 -#class Item:  
39 -# """  
40 -# Generic item definition.  
41 -# """  
42 -#  
43 -#class Package(Item):  
44 -# """  
45 -# Definition of a GNU/Linux application as a recommender item.  
46 -# """  
47 -# def __init__(self,package_name):  
48 -# """  
49 -# Set initial attributes.  
50 -# """  
51 -# self.package_name = package_name  
52 -#  
53 -#def normalize_tags(string):  
54 -# """  
55 -# Substitute string characters : by _ and - by '.  
56 -# Examples:  
57 -# admin::package-management -> admin__package'management  
58 -# implemented-in::c++ -> implemented-in__c++  
59 -# """  
60 -# return string.replace(':','_').replace('-','\'') 38 +def axi_search_pkgs(axi,pkgs_list):
  39 + terms = ["XP"+item for item in pkgs_list]
  40 + query = xapian.Query(xapian.Query.OP_OR, terms)
  41 + enquire = xapian.Enquire(axi)
  42 + enquire.set_query(query)
  43 + matches = enquire.get_mset(0,axi.get_doccount())
  44 + return matches
  45 +
  46 +def axi_search_pkg_tags(axi,pkg):
  47 + query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
  48 + enquire = xapian.Enquire(axi)
  49 + enquire.set_query(query)
  50 + matches = enquire.get_mset(0,1)
  51 + for m in matches:
  52 + tags = [term.term for term in axi.get_document(m.docid).termlist() if
  53 + term.term.startswith("XT")]
  54 + return tags
  55 +
  56 +class SampleAptXapianIndex(xapian.WritableDatabase):
  57 + """
  58 + Sample data source for packages information, mainly useful for tests.
  59 + """
  60 + def __init__(self,pkgs_list,axi):
  61 + xapian.WritableDatabase.__init__(self,".sample_axi",
  62 + xapian.DB_CREATE_OR_OVERWRITE)
  63 + sample = axi_search_pkgs(axi,pkgs_list)
  64 + self.all_docs = []
  65 + for package in sample:
  66 + doc_id = self.add_document(axi.get_document(package.docid))
  67 + self.all_docs.append(doc_id)
  68 +
  69 + def _print(self):
  70 + print "---"
  71 + print xapian.WritableDatabase.__repr__(self)
  72 + print "---"
  73 + for doc_id in self.all_docs:
  74 + print [term.term for term in self.get_document(doc_id).termlist()]
  75 + print "---"
61 76
62 #[FIXME] get pkg tags from axi and remove load_debtags_db method 77 #[FIXME] get pkg tags from axi and remove load_debtags_db method
63 def load_debtags_db(db_path): 78 def load_debtags_db(db_path):
@@ -75,106 +90,6 @@ def load_debtags_db(db_path): @@ -75,106 +90,6 @@ def load_debtags_db(db_path):
75 logging.error("Could not load DebtagsDB from '%s'." % self.db_path) 90 logging.error("Could not load DebtagsDB from '%s'." % self.db_path)
76 raise Error 91 raise Error
77 92
78 -#class TagsXapianIndex(xapian.WritableDatabase,Singleton):  
79 -# """  
80 -# Data source for tags info defined as a singleton xapian database.  
81 -# """  
82 -# def __init__(self,cfg):  
83 -# """  
84 -# Set initial attributes.  
85 -# """  
86 -# self.path = os.path.expanduser(cfg.tags_index)  
87 -# self.db_path = os.path.expanduser(cfg.tags_db)  
88 -# self.debtags_db = debtags.DB()  
89 -# try:  
90 -# db_file = open(self.db_path)  
91 -# except IOError:  
92 -# logging.error("Could not load DebtagsDB from '%s'." % self.db_path)  
93 -# raise Error  
94 -# md5 = hashlib.md5()  
95 -# md5.update(db_file.read())  
96 -# self.db_md5 = md5.hexdigest()  
97 -# db_file.close()  
98 -# self.load_index(cfg.reindex)  
99 -#  
100 -## def load_db(self):  
101 -## """  
102 -## Load debtags database from the source file.  
103 -## """  
104 -## tag_filter = re.compile(r"^special::.+$|^.+::TODO$")  
105 -## try:  
106 -## db_file = open(self.db_path, "r")  
107 -## self.debtags_db.read(db_file,lambda x: not tag_filter.match(x))  
108 -## db_file.close()  
109 -## except:  
110 -## logging.error("Could not load DebtagsDB from '%s'." % self.db_path)  
111 -## raise Error  
112 -#  
113 -# def relevant_tags_from_db(self,pkgs_list,qtd_of_tags):  
114 -# """  
115 -# Return most relevant tags considering a list of packages.  
116 -# """  
117 -# if not self.debtags_db.package_count():  
118 -# #print "index vazio"  
119 -# self.debtags_db = load_debtags_db(self.db_path)  
120 -# relevant_db = self.debtags_db.choose_packages(pkgs_list)  
121 -# relevance_index = debtags.relevance_index_function(self.debtags_db,  
122 -# relevant_db)  
123 -# sorted_relevant_tags = sorted(relevant_db.iter_tags(),  
124 -# lambda a, b: cmp(relevance_index(a),  
125 -# relevance_index(b)))  
126 -# return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))  
127 -#  
128 -# def load_index(self,reindex):  
129 -# """  
130 -# Load an existing debtags index.  
131 -# """  
132 -# if not reindex:  
133 -# try:  
134 -# logging.info("Opening existing debtags xapian index at \'%s\'"  
135 -# % self.path)  
136 -# xapian.Database.__init__(self,self.path)  
137 -# md5 = self.get_metadata("md5")  
138 -# if not md5 == self.db_md5:  
139 -# logging.info("Index must be updated.")  
140 -# reindex = 1  
141 -# except xapian.DatabaseError:  
142 -# logging.info("Could not open debtags index.")  
143 -# reindex =1  
144 -#  
145 -# if reindex:  
146 -# self.new_index()  
147 -#  
148 -# def new_index(self):  
149 -# """  
150 -# Create a xapian index for debtags info based on 'debtags_db' and  
151 -# place it at 'self.path'.  
152 -# """  
153 -# if not os.path.exists(self.path):  
154 -# os.makedirs(self.path)  
155 -#  
156 -# try:  
157 -# logging.info("Indexing debtags info from \'%s\'" %  
158 -# self.db_path)  
159 -# logging.info("Creating new xapian index at \'%s\'" %  
160 -# self.path)  
161 -# xapian.WritableDatabase.__init__(self,self.path,  
162 -# xapian.DB_CREATE_OR_OVERWRITE)  
163 -# except xapian.DatabaseError:  
164 -# logging.critical("Could not create xapian index.")  
165 -# raise Error  
166 -#  
167 -# self.debtags_db = load_debtags_db(self.db_path)  
168 -# self.set_metadata("md5",self.db_md5)  
169 -#  
170 -# for pkg,tags in self.debtags_db.iter_packages_tags():  
171 -# doc = xapian.Document()  
172 -# doc.set_data(pkg)  
173 -# for tag in tags:  
174 -# doc.add_term(normalize_tags(tag))  
175 -# doc_id = self.add_document(doc)  
176 -# logging.debug("Debtags Xapian: Indexing doc %d",doc_id)  
177 -  
178 class PopconXapianIndex(xapian.WritableDatabase,Singleton): 93 class PopconXapianIndex(xapian.WritableDatabase,Singleton):
179 """ 94 """
180 Data source for popcon submissions defined as a singleton xapian database. 95 Data source for popcon submissions defined as a singleton xapian database.
src/recommender.py
@@ -19,10 +19,10 @@ __license__ = """ @@ -19,10 +19,10 @@ __license__ = """
19 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 """ 20 """
21 21
22 -from operator import itemgetter  
23 -from data import *  
24 -from strategy import *  
25 -from error import Error 22 +import xapian
  23 +import operator
  24 +import data
  25 +import strategy
26 26
27 class RecommendationResult: 27 class RecommendationResult:
28 """ 28 """
@@ -40,7 +40,7 @@ class RecommendationResult: @@ -40,7 +40,7 @@ class RecommendationResult:
40 """ 40 """
41 result = self.get_prediction() 41 result = self.get_prediction()
42 str = "\n" 42 str = "\n"
43 - for i in range(len(result)): 43 + for i in range(len((list(result)))):
44 str += "%2d: %s\n" % (i,result[i][0]) 44 str += "%2d: %s\n" % (i,result[i][0])
45 return str 45 return str
46 46
@@ -48,8 +48,10 @@ class RecommendationResult: @@ -48,8 +48,10 @@ class RecommendationResult:
48 """ 48 """
49 Return prediction based on recommendation size (number of items). 49 Return prediction based on recommendation size (number of items).
50 """ 50 """
51 - sorted_result = sorted(self.item_score.items(), key=itemgetter(1))  
52 - return reversed(sorted_result[-size:]) 51 + if size > len(self.item_score): size = len(self.item_score)
  52 + sorted_result = sorted(self.item_score.items(),
  53 + key=operator.itemgetter(1))
  54 + return list(reversed(sorted_result[-size:]))
53 55
54 class Recommender: 56 class Recommender:
55 """ 57 """
@@ -59,47 +61,30 @@ class Recommender: @@ -59,47 +61,30 @@ class Recommender:
59 """ 61 """
60 Set initial parameters. 62 Set initial parameters.
61 """ 63 """
62 - try:  
63 - strategy = "self."+cfg.strategy+"(cfg)"  
64 - exec(strategy)  
65 - except (NameError, AttributeError, SyntaxError) as err:  
66 - print err  
67 - logging.critical("Could not perform recommendation strategy '%s'" %  
68 - cfg.strategy)  
69 - raise Error  
70 -  
71 - def ct(self,cfg):  
72 - """  
73 - Set recommender attributes to perform content-based recommendation  
74 - using tags index as source data.  
75 - """  
76 - self.items_repository = TagsXapianIndex(cfg)  
77 - self.strategy = ContentBasedStrategy()  
78 -  
79 - def cta(self,cfg):  
80 - """  
81 - Set recommender attributes to perform content-based recommendation  
82 - using apt-xapian-index as source data.  
83 - """  
84 self.items_repository = xapian.Database(cfg.axi) 64 self.items_repository = xapian.Database(cfg.axi)
85 - self.strategy = AxiContentBasedStrategy()  
86 -  
87 - def col(self,cfg):  
88 - """  
89 - Set recommender attributes to perform collaborative recommendation  
90 - using popcon-xapian-index as source data.  
91 - """  
92 - self.users_repository = PopconXapianIndex(cfg)  
93 - self.strategy = CollaborativeStrategy() 65 + self.users_repository = data.PopconXapianIndex(cfg) #[FIXME] only cfg fields
  66 + self.clustered_users_repository = data.PopconXapianIndex(cfg) #[FIXME]
  67 + self.set_strategy(cfg.strategy)
  68 + if cfg.weight == "bm25":
  69 + self.weight = xapian.BM25Weight()
  70 + else:
  71 + self.weight = xapian.TradWeight()
94 72
95 - def set_strategy(self,strategy): 73 + def set_strategy(self,strategy_str):
96 """ 74 """
97 Set the recommendation strategy. 75 Set the recommendation strategy.
98 """ 76 """
99 - self.strategy = strategy 77 + if strategy_str == "cb":
  78 + self.strategy = strategy.ContentBasedStrategy("full")
  79 + if strategy_str == "cbt":
  80 + self.strategy = strategy.ContentBasedStrategy("tag")
  81 + if strategy_str == "cbd":
  82 + self.strategy = strategy.ContentBasedStrategy("desc")
  83 + if strategy_str == "col":
  84 + self.strategy = strategy.CollaborativeStrategy(20)
100 85
101 - def get_recommendation(self,user): 86 + def get_recommendation(self,user,limit=20):
102 """ 87 """
103 Produces recommendation using previously loaded strategy. 88 Produces recommendation using previously loaded strategy.
104 """ 89 """
105 - return self.strategy.run(self,user) 90 + return self.strategy.run(self,user,limit)
src/strategy.py
@@ -20,54 +20,27 @@ __license__ = &quot;&quot;&quot; @@ -20,54 +20,27 @@ __license__ = &quot;&quot;&quot;
20 along with this program. If not, see <http://www.gnu.org/licenses/>. 20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """ 21 """
22 22
23 -import string  
24 -import os, re  
25 import xapian 23 import xapian
26 -from data import *  
27 from singleton import Singleton 24 from singleton import Singleton
28 import recommender 25 import recommender
29 -  
30 -class ReputationHeuristic(Singleton):  
31 - """  
32 - Abstraction for diferent reputation heuristics.  
33 - """  
34 - pass  
35 -  
36 -class BugsHeuristic(ReputationHeuristic):  
37 - """  
38 - Reputation heuristic based on quantity of open bugs.  
39 - """  
40 - pass  
41 -  
42 -class RCBugsHeuristic(ReputationHeuristic):  
43 - """  
44 - Reputation heuristic based on quantity of RC bugs.  
45 - """  
46 - pass  
47 -  
48 -class PopularityHeuristic(ReputationHeuristic):  
49 - """  
50 - Reputation heuristic based on popularity of packages.  
51 - """  
52 - pass 26 +from data import *
53 27
54 class PkgMatchDecider(xapian.MatchDecider): 28 class PkgMatchDecider(xapian.MatchDecider):
55 """ 29 """
56 Extend xapian.MatchDecider to not consider installed packages. 30 Extend xapian.MatchDecider to not consider installed packages.
57 """ 31 """
58 -  
59 - def __init__(self, installed_pkgs): 32 + def __init__(self, pkgs_list):
60 """ 33 """
61 Set initial parameters. 34 Set initial parameters.
62 """ 35 """
63 xapian.MatchDecider.__init__(self) 36 xapian.MatchDecider.__init__(self)
64 - self.installed_pkgs = installed_pkgs 37 + self.pkgs_list = pkgs_list
65 38
66 def __call__(self, doc): 39 def __call__(self, doc):
67 """ 40 """
68 True if the package is not already installed. 41 True if the package is not already installed.
69 """ 42 """
70 - return doc.get_data() not in self.installed_pkgs 43 + return doc.get_data() not in self.pkgs_list
71 44
72 class UserMatchDecider(xapian.MatchDecider): 45 class UserMatchDecider(xapian.MatchDecider):
73 """ 46 """
@@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider): @@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider):
80 """ 53 """
81 xapian.MatchDecider.__init__(self) 54 xapian.MatchDecider.__init__(self)
82 self.profile = profile 55 self.profile = profile
83 - print "mdecider:",profile  
84 56
85 def __call__(self, doc): 57 def __call__(self, doc):
86 """ 58 """
87 True if the user has more the half of packages from profile. 59 True if the user has more the half of packages from profile.
88 """ 60 """
89 - profile_size = len(self.profile)  
90 - pkg_match=0 61 + match=0
91 for term in doc: 62 for term in doc:
92 if term.term in self.profile: 63 if term.term in self.profile:
93 - pkg_match = pkg_match+1  
94 - print "id",doc.get_docid(),"match",pkg_match  
95 - return pkg_match >= profile_size/2 64 + match = match+1
  65 + return (match >= len(self.profile)/2)
96 66
97 class PkgExpandDecider(xapian.ExpandDecider): 67 class PkgExpandDecider(xapian.ExpandDecider):
98 """ 68 """
99 Extend xapian.ExpandDecider to consider packages only. 69 Extend xapian.ExpandDecider to consider packages only.
100 """ 70 """
101 -  
102 - def __init__(self):  
103 - """  
104 - Call base class init.  
105 - """  
106 - xapian.ExpandDecider.__init__(self)  
107 -  
108 def __call__(self, term): 71 def __call__(self, term):
109 """ 72 """
110 True if the term is a package. 73 True if the term is a package.
111 """ 74 """
  75 + # [FIXME] return term.startswith("XP")
112 return not term.startswith("XT") 76 return not term.startswith("XT")
113 77
114 class TagExpandDecider(xapian.ExpandDecider): 78 class TagExpandDecider(xapian.ExpandDecider):
115 """ 79 """
116 Extend xapian.ExpandDecider to consider tags only. 80 Extend xapian.ExpandDecider to consider tags only.
117 """ 81 """
118 -  
119 - def __init__(self, profile):  
120 - """  
121 - Call base class init.  
122 - """  
123 - xapian.ExpandDecider.__init__(self)  
124 -  
125 - def __call__(self, doc): 82 + def __call__(self, term):
126 """ 83 """
127 - True if the user has more the half of packages from profile. 84 + True if the term is a tag.
128 """ 85 """
129 return term.startswith("XT") 86 return term.startswith("XT")
130 87
@@ -134,65 +91,30 @@ class RecommendationStrategy: @@ -134,65 +91,30 @@ class RecommendationStrategy:
134 """ 91 """
135 pass 92 pass
136 93
137 -class ItemReputationStrategy(RecommendationStrategy):  
138 - """  
139 - Recommendation strategy based on items reputation.  
140 - """  
141 - def run(self,items_list,heuristic):  
142 - """  
143 - Perform recommendation strategy.  
144 - """  
145 - logging.critical("Item reputation recommendation strategy is not yet implemented.")  
146 - raise Error  
147 -  
148 -#class ContentBasedStrategy(RecommendationStrategy):  
149 -# """  
150 -# Content-based recommendation strategy.  
151 -# """  
152 -# def run(self,rec,user):  
153 -# """  
154 -# Perform recommendation strategy.  
155 -# """  
156 -# profile = user.txi_tag_profile(rec.items_repository,50)  
157 -# qp = xapian.QueryParser()  
158 -# query = qp.parse_query(profile)  
159 -# enquire = xapian.Enquire(rec.items_repository)  
160 -# enquire.set_query(query)  
161 -#  
162 -# try:  
163 -# mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))  
164 -# except xapian.DatabaseError as error:  
165 -# logging.critical(error.get_msg())  
166 -# raise Error  
167 -#  
168 -# item_score = {}  
169 -# for m in mset:  
170 -# item_score[m.document.get_data()] = m.rank  
171 -# return recommender.RecommendationResult(item_score,20)  
172 -  
173 -class AxiContentBasedStrategy(RecommendationStrategy): 94 +class ContentBasedStrategy(RecommendationStrategy):
174 """ 95 """
175 Content-based recommendation strategy based on Apt-xapian-index. 96 Content-based recommendation strategy based on Apt-xapian-index.
176 """ 97 """
177 - def __init__(self): 98 + def __init__(self,content):
178 self.description = "Content-based" 99 self.description = "Content-based"
  100 + self.content = content
179 101
180 - def run(self,rec,user): 102 + def run(self,rec,user,limit):
181 """ 103 """
182 Perform recommendation strategy. 104 Perform recommendation strategy.
183 """ 105 """
184 - profile = user.axi_tag_profile(rec.items_repository,50)  
185 - #profile_str = string.join(list(profile),' ')  
186 - query = xapian.Query(xapian.Query.OP_OR,list(profile)) 106 + profile = user.profile(rec.items_repository,self.content,50)
  107 + # prepair index for querying user profile
  108 + query = xapian.Query(xapian.Query.OP_OR,profile)
187 enquire = xapian.Enquire(rec.items_repository) 109 enquire = xapian.Enquire(rec.items_repository)
  110 + enquire.set_weighting_scheme(rec.weight)
188 enquire.set_query(query) 111 enquire.set_query(query)
189 -  
190 try: 112 try:
191 - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 113 + # retrieve matching packages
  114 + mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
192 except xapian.DatabaseError as error: 115 except xapian.DatabaseError as error:
193 - logging.critical(error.get_msg())  
194 - raise Error  
195 - 116 + logging.critical("Content-based strategy: "+error.get_msg())
  117 + # compose result dictionary
196 item_score = {} 118 item_score = {}
197 for m in mset: 119 for m in mset:
198 item_score[m.document.get_data()] = m.weight 120 item_score[m.document.get_data()] = m.weight
@@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy): @@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy):
202 """ 124 """
203 Colaborative recommendation strategy. 125 Colaborative recommendation strategy.
204 """ 126 """
205 - def __init__(self): 127 + def __init__(self,k,clustering=1):
206 self.description = "Collaborative" 128 self.description = "Collaborative"
  129 + self.clustering = clustering
  130 + self.neighbours = k
207 131
208 - #def run(self,rec,user,similarity_measure):  
209 - def run(self,rec,user): 132 + def run(self,rec,user,limit):
210 """ 133 """
211 Perform recommendation strategy. 134 Perform recommendation strategy.
212 """ 135 """
213 - profile = user.maximal_pkg_profile()  
214 - #profile_str = string.join(list(profile),' ')  
215 - query = xapian.Query(xapian.Query.OP_OR,list(profile))  
216 - enquire = xapian.Enquire(rec.users_repository) 136 + profile = user.pkg_profile
  137 + # prepair index for querying user profile
  138 + query = xapian.Query(xapian.Query.OP_OR,profile)
  139 + if self.clustering:
  140 + enquire = xapian.Enquire(rec.clustered_users_repository)
  141 + else:
  142 + enquire = xapian.Enquire(rec.users_repository)
  143 + enquire.set_weighting_scheme(rec.weight)
217 enquire.set_query(query) 144 enquire.set_query(query)
218 -  
219 try: 145 try:
220 - #mset = enquire.get_mset(0, 182, None, UserMatchDecider(profile))  
221 - mset = enquire.get_mset(0, 20) 146 + # retrieve matching users
  147 + mset = enquire.get_mset(0, self.neighbours)
222 except xapian.DatabaseError as error: 148 except xapian.DatabaseError as error:
223 - logging.critical(error.get_msg())  
224 - raise Error  
225 - 149 + logging.critical("Collaborative strategy: "+error.get_msg())
226 rset = xapian.RSet() 150 rset = xapian.RSet()
  151 + logging.debug("Neighborhood composed by the following users (by hash)")
227 for m in mset: 152 for m in mset:
228 rset.add_document(m.document.get_docid()) 153 rset.add_document(m.document.get_docid())
229 - logging.debug("Counting as relevant submission %s" %  
230 - m.document.get_data())  
231 -  
232 - eset = enquire.get_eset(20,rset,PkgExpandDecider())  
233 - rank = 0 154 + logging.debug(m.document.get_data())
  155 + # retrieve most relevant packages
  156 + eset = enquire.get_eset(limit,rset,PkgExpandDecider())
  157 + # compose result dictionary
234 item_score = {} 158 item_score = {}
235 - for term in eset:  
236 - item_score[term.term] = rank  
237 - rank = rank+1  
238 - 159 + for package in eset:
  160 + item_score[package.term.lstrip("XP")] = package.weight
239 return recommender.RecommendationResult(item_score) 161 return recommender.RecommendationResult(item_score)
240 162
  163 +class DemographicStrategy(RecommendationStrategy):
  164 + """
  165 + Recommendation strategy based on demographic data.
  166 + """
  167 + def __init__(self):
  168 + self.description = "Demographic"
  169 + logging.debug("Demographic recommendation not yet implemented.")
  170 + raise Error
  171 +
  172 + def run(self,user,items_repository):
  173 + """
  174 + Perform recommendation strategy.
  175 + """
  176 + pass
  177 +
241 class KnowledgeBasedStrategy(RecommendationStrategy): 178 class KnowledgeBasedStrategy(RecommendationStrategy):
242 """ 179 """
243 Knowledge-based recommendation strategy. 180 Knowledge-based recommendation strategy.
244 """ 181 """
245 def __init__(self): 182 def __init__(self):
246 self.description = "Knowledge-based" 183 self.description = "Knowledge-based"
  184 + logging.debug("Knowledge-based recommendation not yet implemented.")
  185 + raise Error
247 186
248 def run(self,user,knowledge_repository): 187 def run(self,user,knowledge_repository):
249 """ 188 """
250 Perform recommendation strategy. 189 Perform recommendation strategy.
251 """ 190 """
252 - logging.critical("Knowledge-based recommendation strategy is not yet implemented.")  
253 - raise Error 191 + pass
254 192
255 -class DemographicStrategy(RecommendationStrategy): 193 +class ReputationHeuristic(Singleton):
256 """ 194 """
257 - Recommendation strategy based on demographic data. 195 + Abstraction for diferent reputation heuristics.
  196 + """
  197 + pass
  198 +
  199 +class BugsHeuristic(ReputationHeuristic):
  200 + """
  201 + Reputation heuristic based on quantity of open bugs.
  202 + """
  203 + pass
  204 +
  205 +class RCBugsHeuristic(ReputationHeuristic):
  206 + """
  207 + Reputation heuristic based on quantity of RC bugs.
  208 + """
  209 + pass
  210 +
  211 +class PopularityHeuristic(ReputationHeuristic):
  212 + """
  213 + Reputation heuristic based on popularity of packages.
  214 + """
  215 + pass
  216 +
  217 +class ItemReputationStrategy(RecommendationStrategy):
  218 + """
  219 + Recommendation strategy based on items reputation.
258 """ 220 """
259 def __init__(self): 221 def __init__(self):
260 - self.description = "Demographic" 222 + self.description = "Item reputation"
  223 + logging.debug("Item reputation recommendation not yet implemented.")
  224 + raise Error
261 225
262 - def run(self,user,items_repository): 226 + def run(self,items_list,heuristic):
263 """ 227 """
264 Perform recommendation strategy. 228 Perform recommendation strategy.
265 """ 229 """
266 - logging.critical("Demographic recommendation strategy is not yet implemented.")  
267 - raise Error 230 + pass
src/tests/package-xapian-index
@@ -1,10 +0,0 @@ @@ -1,10 +0,0 @@
1 -aaphoto: implemented-in::c, interface::commandline, role::program, use::editing, works-with::image  
2 -dia: implemented-in::c, interface::x11, role::program, scope::application, suite::gnu, uitoolkit::gtk, use::editing, works-with::image, works-with::image:vector, x11::application  
3 -eog: implemented-in::c, interface::x11, role::program, scope::application, suite::gnome, uitoolkit::gtk, use::viewing, works-with-format::jpg, works-with-format::png, works-with::image, works-with::image:raster, works-with::image:vector, x11::application  
4 -emacs: devel::editor, role::dummy, role::metapackage, special::meta, suite::emacs, suite::gnu, use::editing  
5 -ferret: devel::modelling, role::program, scope::application, suite::gnu, works-with::db  
6 -festival: accessibility::speech, devel::interpreter, implemented-in::scheme, interface::text-mode, network::client, network::server, role::program, sound::speech, uitoolkit::ncurses, works-with::audio  
7 -file: admin::forensics, implemented-in::c, interface::commandline, role::program, scope::utility, use::analysing, use::scanning, works-with::file  
8 -gimp: implemented-in::c, interface::x11, role::program, scope::application, suite::gimp, suite::gnu, uitoolkit::gtk, use::editing, works-with-format::gif, works-with-format::jpg, works-with-format::pdf, works-with-format::png, works-with-format::tiff, works-with::image, works-with::image:raster, works-with::text, x11::application  
9 -inkscape: implemented-in::c, implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::gtk, use::editing, works-with-format::pdf, works-with-format::postscript, works-with-format::svg, works-with-format::tex, works-with::image, works-with::image:vector, x11::application  
10 -xpdf: implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::motif, use::viewing, works-with-format::pdf, works-with::text, x11::application  
src/tests/recommender_tests.py 0 → 100755
@@ -0,0 +1,69 @@ @@ -0,0 +1,69 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommenderTests - Recommender class test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import unittest2
  23 +import sys
  24 +sys.path.insert(0,'../')
  25 +from recommender import RecommendationResult, Recommender
  26 +from user import User
  27 +from config import Config
  28 +from strategy import ContentBasedStrategy, CollaborativeStrategy
  29 +
  30 +class RecommendationResultTests(unittest2.TestCase):
  31 + @classmethod
  32 + def setUpClass(self):
  33 + self.result = RecommendationResult({"gimp":1.5,"inkscape":3.0,"eog":1})
  34 +
  35 + def test_str(self):
  36 + string = "\n 0: inkscape\n 1: gimp\n 2: eog\n"
  37 + self.assertEqual(self.result.__str__(),string)
  38 +
  39 + def test_get_prediction(self):
  40 + prediction = [("inkscape",3.0),("gimp",1.5),("eog",1)]
  41 + self.assertEqual(self.result.get_prediction(),prediction)
  42 +
  43 +class RecommenderTests(unittest2.TestCase):
  44 + @classmethod
  45 + def setUpClass(self):
  46 + cfg = Config()
  47 + self.rec = Recommender(cfg)
  48 +
  49 + def test_set_strategy(self):
  50 + self.rec.set_strategy("cb")
  51 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  52 + self.assertEqual(self.rec.strategy.content,"full")
  53 + self.rec.set_strategy("cbt")
  54 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  55 + self.assertEqual(self.rec.strategy.content,"tag")
  56 + self.rec.set_strategy("cbd")
  57 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  58 + self.assertEqual(self.rec.strategy.content,"desc")
  59 + self.rec.set_strategy("col")
  60 + self.assertIsInstance(self.rec.strategy,CollaborativeStrategy)
  61 +
  62 + def test_get_recommendation(self):
  63 + user = User({"inkscape": 1, "gimp": 1, "eog":1})
  64 + result = self.rec.get_recommendation(user)
  65 + self.assertIsInstance(result, RecommendationResult)
  66 + self.assertGreater(len(result.item_score),0)
  67 +
  68 +if __name__ == '__main__':
  69 + unittest2.main()
src/tests/runner.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """ 2 """
3 - tests - execution of the whole set of tests suites. 3 + runner - Run the whole set of test cases suites.
4 """ 4 """
5 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" 5 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" 6 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
@@ -20,9 +20,30 @@ __license__ = &quot;&quot;&quot; @@ -20,9 +20,30 @@ __license__ = &quot;&quot;&quot;
20 """ 20 """
21 21
22 import unittest2 22 import unittest2
23 -import user_tests  
24 -import singleton_tests 23 +from user_tests import UserTests, FilterTagTests, FilterDescriptionTests
  24 +from recommender_tests import RecommendationResultTests, RecommenderTests
  25 +from strategy_tests import (PkgMatchDeciderTests, UserMatchDeciderTests,
  26 + PkgExpandDeciderTests, TagExpandDeciderTests, ContentBasedStrategyTests,
  27 + CollaborativeStrategyTests, DemographicStrategyTests,
  28 + KnowledgeBasedStrategyTests, ItemReputationStrategyTests)
  29 +from singleton_tests import SingletonTests
  30 +
  31 +def load_tests(test_cases):
  32 + suite = unittest2.TestSuite()
  33 + for test_class in test_cases:
  34 + tests = unittest2.TestLoader().loadTestsFromTestCase(test_class)
  35 + suite.addTests(tests)
  36 + return suite
  37 +
  38 +test_lists = [[UserTests, FilterTagTests, FilterDescriptionTests],
  39 + [RecommendationResultTests, RecommenderTests],
  40 + [PkgMatchDeciderTests, UserMatchDeciderTests,
  41 + PkgExpandDeciderTests, TagExpandDeciderTests,
  42 + ContentBasedStrategyTests, CollaborativeStrategyTests,
  43 + DemographicStrategyTests, KnowledgeBasedStrategyTests,
  44 + ItemReputationStrategyTests],
  45 + [SingletonTests]]
25 46
26 runner = unittest2.TextTestRunner() 47 runner = unittest2.TextTestRunner()
27 -runner.run(user_tests.suite())  
28 -runner.run(singleton_tests.suite()) 48 +for module in test_lists:
  49 + runner.run(load_tests(module))
src/tests/singleton_tests.py
@@ -24,9 +24,6 @@ import sys @@ -24,9 +24,6 @@ import sys
24 sys.path.insert(0,'../') 24 sys.path.insert(0,'../')
25 from singleton import Singleton 25 from singleton import Singleton
26 26
27 -def suite():  
28 - return unittest2.TestLoader().loadTestsFromTestCase(SingletonTests)  
29 -  
30 class SingletonTests(unittest2.TestCase): 27 class SingletonTests(unittest2.TestCase):
31 def test_creation(self): 28 def test_creation(self):
32 object_1 = Singleton() 29 object_1 = Singleton()
src/tests/strategy_tests.py 0 → 100755
@@ -0,0 +1,116 @@ @@ -0,0 +1,116 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + strategyTests - Recommendation strategies classes test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import unittest2
  23 +import xapian
  24 +import sys
  25 +sys.path.insert(0,'../')
  26 +from error import Error
  27 +from user import User
  28 +from recommender import RecommendationResult
  29 +from config import *
  30 +#from data import *
  31 +from strategy import (PkgMatchDecider, UserMatchDecider, PkgExpandDecider,
  32 + TagExpandDecider, ContentBasedStrategy,
  33 + CollaborativeStrategy, DemographicStrategy,
  34 + KnowledgeBasedStrategy, ItemReputationStrategy)
  35 +
  36 +class PkgMatchDeciderTests(unittest2.TestCase):
  37 + @classmethod
  38 + def setUpClass(self):
  39 + pkgs_list = ["gimp","eog","inkscape"]
  40 + self.decider = PkgMatchDecider(pkgs_list)
  41 + self.doc = xapian.Document()
  42 +
  43 + def test_match(self):
  44 + self.doc.set_data("emacs")
  45 + self.assertTrue(self.decider(self.doc))
  46 +
  47 + def test_no_match(self):
  48 + self.doc.set_data("gimp")
  49 + self.assertFalse(self.decider(self.doc))
  50 +
  51 +class UserMatchDeciderTests(unittest2.TestCase):
  52 + @classmethod
  53 + def setUpClass(self):
  54 + user_profile = ["gimp","eog","inkscape", "emacs"]
  55 + self.decider = UserMatchDecider(user_profile)
  56 +
  57 + def setUp(self):
  58 + self.doc = xapian.Document()
  59 +
  60 + def test_match(self):
  61 + self.doc.add_term("emacs")
  62 + self.doc.add_term("gimp")
  63 + self.doc.add_term("eog")
  64 + self.assertTrue(self.decider(self.doc))
  65 +
  66 + def test_no_match(self):
  67 + self.doc.add_term("gimp")
  68 + self.assertFalse(self.decider(self.doc))
  69 +
  70 +class PkgExpandDeciderTests(unittest2.TestCase):
  71 + @classmethod
  72 + def setUpClass(self):
  73 + self.decider = PkgExpandDecider()
  74 +
  75 + def test_match(self):
  76 + self.assertTrue(self.decider("XPgimp"))
  77 +
  78 + def test_no_match(self):
  79 + self.assertFalse(self.decider("XTgimp"))
  80 +
  81 +class TagExpandDeciderTests(unittest2.TestCase):
  82 + @classmethod
  83 + def setUpClass(self):
  84 + self.decider = TagExpandDecider()
  85 +
  86 + def test_match(self):
  87 + self.assertTrue(self.decider("XTgimp"))
  88 +
  89 + def test_no_match(self):
  90 + self.assertFalse(self.decider("gimp"))
  91 +
  92 +class ContentBasedStrategyTests(unittest2.TestCase):
  93 + @classmethod
  94 + def setUpClass(self):
  95 +
  96 + pass
  97 +
  98 +class CollaborativeStrategyTests(unittest2.TestCase):
  99 + @classmethod
  100 + def setUpClass(self):
  101 + pass
  102 +
  103 +class DemographicStrategyTests(unittest2.TestCase):
  104 + def test_call(self):
  105 + self.assertRaises(Error,lambda: DemographicStrategy())
  106 +
  107 +class KnowledgeBasedStrategyTests(unittest2.TestCase):
  108 + def test_call(self):
  109 + self.assertRaises(Error,lambda: KnowledgeBasedStrategy())
  110 +
  111 +class ItemReputationStrategyTests(unittest2.TestCase):
  112 + def test_call(self):
  113 + self.assertRaises(Error,lambda: ItemReputationStrategy())
  114 +
  115 +if __name__ == '__main__':
  116 + unittest2.main()
src/tests/user_tests.py
@@ -19,26 +19,39 @@ __license__ = &quot;&quot;&quot; @@ -19,26 +19,39 @@ __license__ = &quot;&quot;&quot;
19 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 """ 20 """
21 21
22 -import operator  
23 -import math  
24 import unittest2 22 import unittest2
25 import xapian 23 import xapian
26 import sys 24 import sys
27 sys.path.insert(0,'../') 25 sys.path.insert(0,'../')
28 -from user import *  
29 -from config import *  
30 -from data import * 26 +from user import User, FilterTag, FilterDescription
  27 +from config import Config
  28 +from data import SampleAptXapianIndex
31 29
32 -def suite():  
33 - return unittest2.TestLoader().loadTestsFromTestCase(UserTests) 30 +class FilterTagTests(unittest2.TestCase):
  31 + def test_call_true(self):
  32 + self.assertTrue(FilterTag()("XTrole::program"))
  33 +
  34 + def test_call_false(self):
  35 + self.assertFalse(FilterTag()("role::program"))
  36 +
  37 +class FilterDescriptionTests(unittest2.TestCase):
  38 + def test_call_true(self):
  39 + self.assertTrue(FilterDescription()("program"))
  40 + #self.assertTrue(FilterDescription()("Zprogram"))
  41 +
  42 + def test_call_false(self):
  43 + self.assertFalse(FilterDescription()("XTprogram"))
34 44
35 class UserTests(unittest2.TestCase): 45 class UserTests(unittest2.TestCase):
36 @classmethod 46 @classmethod
37 def setUpClass(self): 47 def setUpClass(self):
38 cfg = Config() 48 cfg = Config()
39 - #self.axi = xapian.Database(cfg.axi) 49 + self.axi = xapian.Database(cfg.axi)
  50 + sample_packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
  51 + "festival","file","inkscape","xpdf"]
  52 + self.sample_axi = SampleAptXapianIndex(sample_packages,self.axi)
40 self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) 53 self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
41 - self.pxi = PkgXapianIndex("package-xapian-index") 54 + #self.sample_axi._print()
42 55
43 def test_hash(self): 56 def test_hash(self):
44 new_user = User(dict()) 57 new_user = User(dict())
@@ -100,34 +113,34 @@ class UserTests(unittest2.TestCase): @@ -100,34 +113,34 @@ class UserTests(unittest2.TestCase):
100 self.assertEqual(self.user.demographic_profile,desktop_art_admin) 113 self.assertEqual(self.user.demographic_profile,desktop_art_admin)
101 114
102 def test_items(self): 115 def test_items(self):
103 - self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"]))  
104 -  
105 - def test_axi_tag_profile(self):  
106 - package_terms = ["XP"+package for package in self.user.items()]  
107 - enquire = xapian.Enquire(self.pxi)  
108 - enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms))  
109 - user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None)  
110 - tag_terms = []  
111 - for p in user_packages:  
112 - tag_terms = tag_terms + [x.term for x in p.document.termlist() \  
113 - if x.term.startswith("XT")]  
114 - relevant_count = dict([(tag,tag_terms.count(tag)) \  
115 - for tag in set(tag_terms)])  
116 - #rank = {}  
117 - #non_relevant_count = dict()  
118 - #for tag,count in relevant_count.items():  
119 - # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count  
120 - # if non_relevant_count[tag]>0:  
121 - # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag])  
122 - #print "relevant",relevant_count  
123 - #print "non_relevant",non_relevant_count  
124 - #print sorted(rank.items(), key=operator.itemgetter(1))  
125 - #[FIXME] get ths value based on real ranking  
126 - #print set(self.user.axi_tag_profile(self.pxi,4))  
127 - self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)),  
128 - set(["XTuse::editing", "XTworks-with::image",  
129 - "XTworks-with-format::png",  
130 - "XTworks-with-format::jpg"])) 116 + self.assertEqual(set(self.user.items()),
  117 + set(["gimp","aaphoto","eog","emacs"]))
  118 +
  119 + def test_profile(self):
  120 + self.assertEqual(self.user.profile(self.sample_axi,"tag",10),
  121 + self.user.tag_profile(self.sample_axi,10))
  122 + self.assertEqual(self.user.profile(self.sample_axi,"desc",10),
  123 + self.user.desc_profile(self.sample_axi,10))
  124 + self.assertEqual(self.user.profile(self.sample_axi,"full",10),
  125 + self.user.full_profile(self.sample_axi,10))
  126 +
  127 + def test_tag_profile(self):
  128 + self.assertEqual(self.user.tag_profile(self.sample_axi,10),
  129 + ['XTuse::editing', 'XTworks-with::image:raster',
  130 + 'XTworks-with-format::png', 'XTworks-with-format::jpg',
  131 + 'XTworks-with::image','XTimplemented-in::c',
  132 + 'XTsuite::gnome', 'XTsuite::emacs',
  133 + 'XTrole::metapackage', 'XTdevel::editor'])
  134 +
  135 + def test_desc_profile(self):
  136 + self.assertEqual(self.user.desc_profile(self.sample_axi,10),
  137 + ['image', 'the', 'which', 'manipulation', 'program',
  138 + 'input', 'a', 'gnu', 'images', 'this'])
  139 +
  140 + def test_full_profile(self):
  141 + self.assertEqual(self.user.full_profile(self.sample_axi,10),
  142 + (self.user.tag_profile(self.sample_axi,5)+
  143 + self.user.desc_profile(self.sample_axi,5)))
131 144
132 def test_maximal_pkg_profile(self): 145 def test_maximal_pkg_profile(self):
133 old_pkg_profile = self.user.items() 146 old_pkg_profile = self.user.items()
@@ -25,6 +25,7 @@ import xapian @@ -25,6 +25,7 @@ import xapian
25 import logging 25 import logging
26 import apt 26 import apt
27 from singleton import Singleton 27 from singleton import Singleton
  28 +import data
28 29
29 class FilterTag(xapian.ExpandDecider): 30 class FilterTag(xapian.ExpandDecider):
30 """ 31 """
@@ -34,7 +35,17 @@ class FilterTag(xapian.ExpandDecider): @@ -34,7 +35,17 @@ class FilterTag(xapian.ExpandDecider):
34 """ 35 """
35 Return true if the term is a tag, else false. 36 Return true if the term is a tag, else false.
36 """ 37 """
37 - return term[:2] == "XT" 38 + return term.startswith("XT")
  39 +
  40 +class FilterDescription(xapian.ExpandDecider):
  41 + """
  42 + Extend xapian.ExpandDecider to consider only package description terms.
  43 + """
  44 + def __call__(self, term):
  45 + """
  46 + Return true if the term is a tag, else false.
  47 + """
  48 + return term.islower() #or term.startswith("Z")
38 49
39 class DemographicProfile(Singleton): 50 class DemographicProfile(Singleton):
40 def __init__(self): 51 def __init__(self):
@@ -63,57 +74,83 @@ class User: @@ -63,57 +74,83 @@ class User:
63 """ 74 """
64 Define a user of a recommender. 75 Define a user of a recommender.
65 """ 76 """
66 - def __init__(self,item_score,user_id=0,profiles_set=0): 77 + def __init__(self,item_score,user_id=0,demo_profiles_set=0):
67 """ 78 """
68 - Set initial user attributes. If no user_id was passed as parameter, a  
69 - random md5-hash is generated for that purpose. If the demographic  
70 - profile was not defined, it defaults to 'desktop' 79 + Set initial user attributes. pkg_profile gets the whole set of items,
  80 + a random user_id is set if none was provided and the demographic
  81 + profile defaults to 'desktop'.
71 """ 82 """
72 self.item_score = item_score 83 self.item_score = item_score
  84 + self.pkg_profile = self.items()
  85 +
73 if user_id: 86 if user_id:
74 self.id = user_id 87 self.id = user_id
75 else: 88 else:
76 random.seed() 89 random.seed()
77 self.id = random.getrandbits(128) 90 self.id = random.getrandbits(128)
78 - self.pkg_profile = self.item_score.keys()  
79 - if not profiles_set: 91 +
  92 + if not demo_profiles_set:
80 profiles_set = set(["desktop"]) 93 profiles_set = set(["desktop"])
81 self.set_demographic_profile(profiles_set) 94 self.set_demographic_profile(profiles_set)
82 95
  96 + def items(self):
  97 + """
  98 + Return the set of user items.
  99 + """
  100 + return self.item_score.keys()
  101 +
83 def set_demographic_profile(self,profiles_set): 102 def set_demographic_profile(self,profiles_set):
  103 + """
  104 + Set demographic profle based on labels in 'profiles_set'.
  105 + """
84 self.demographic_profile = DemographicProfile()(profiles_set) 106 self.demographic_profile = DemographicProfile()(profiles_set)
85 107
86 - def items(self): 108 + def profile(self,items_repository,content,size):
87 """ 109 """
88 - Return the set of user items. 110 + Get user profile for a specific type of content: packages tags,
  111 + description or both (full_profile)
  112 + """
  113 + if content == "tag": return self.tag_profile(items_repository,size)
  114 + if content == "desc": return self.desc_profile(items_repository,size)
  115 + if content == "full": return self.full_profile(items_repository,size)
  116 +
  117 + def tag_profile(self,items_repository,size):
  118 + """
  119 + Return most relevant tags for a list of packages.
89 """ 120 """
90 - return set(self.item_score.keys())  
91 -  
92 - def axi_tag_profile(self,apt_xapian_index,profile_size):  
93 - """  
94 - Return most relevant tags for a list of packages based on axi.  
95 - """  
96 - terms = ["XP"+item for item in self.pkg_profile]  
97 - query = xapian.Query(xapian.Query.OP_OR, terms)  
98 - enquire = xapian.Enquire(apt_xapian_index)  
99 - enquire.set_query(query)  
100 - rset = xapian.RSet()  
101 - for m in enquire.get_mset(0,apt_xapian_index.get_doccount()):  
102 - rset.add_document(m.docid)  
103 - # statistically good differentiators between relevant and non-relevant  
104 - eset = enquire.get_eset(profile_size, rset, FilterTag())  
105 - profile = []  
106 - for res in eset:  
107 - profile.append(res.term)  
108 - logging.debug("%.2f %s" % (res.weight,res.term.lstrip("XT"))) 121 + enquire = xapian.Enquire(items_repository)
  122 + matches = data.axi_search_pkgs(items_repository,self.pkg_profile)
  123 + rset_packages = xapian.RSet()
  124 + for m in matches:
  125 + rset_packages.add_document(m.docid)
  126 + # statistically good differentiators
  127 + eset_tags = enquire.get_eset(size, rset_packages, FilterTag())
  128 + profile = [res.term for res in eset_tags]
109 return profile 129 return profile
110 130
111 - #def txi_tag_profile(self,tags_xapian_index,profile_size):  
112 - # """  
113 - # Return most relevant tags for a list of packages based on tags index.  
114 - # """  
115 - # return tags_xapian_index.relevant_tags_from_db(self.pkg_profile,  
116 - # profile_size) 131 + def desc_profile(self,items_repository,size):
  132 + """
  133 + Return most relevant keywords for a list of packages based on their
  134 + text descriptions.
  135 + """
  136 + enquire = xapian.Enquire(items_repository)
  137 + matches = data.axi_search_pkgs(items_repository,self.pkg_profile)
  138 + rset_packages = xapian.RSet()
  139 + for m in matches:
  140 + rset_packages.add_document(m.docid)
  141 + eset_keywords = enquire.get_eset(size, rset_packages,
  142 + FilterDescription())
  143 + profile = [res.term for res in eset_keywords]
  144 + return profile
  145 +
  146 + def full_profile(self,items_repository,size):
  147 + """
  148 + Return most relevant tags and keywords for a list of packages based
  149 + their tags and descriptions.
  150 + """
  151 + tag_profile = self.tag_profile(items_repository,size)[:size/2]
  152 + desc_profile = self.desc_profile(items_repository,size)[:size/2]
  153 + return tag_profile+desc_profile
117 154
118 def maximal_pkg_profile(self): 155 def maximal_pkg_profile(self):
119 """ 156 """
@@ -132,12 +169,11 @@ class User: @@ -132,12 +169,11 @@ class User:
132 if or_dep.name in self.pkg_profile: 169 if or_dep.name in self.pkg_profile:
133 self.pkg_profile.remove(or_dep.name) 170 self.pkg_profile.remove(or_dep.name)
134 except: 171 except:
135 - logging.debug("Disconsidering package not found in cache: %s"  
136 - % p) 172 + logging.debug("Package not found in cache: %s" % p)
137 profile_size = len(self.pkg_profile) 173 profile_size = len(self.pkg_profile)
138 - logging.info("Reduced packages profile size from %d to %d." %  
139 - (old_profile_size, profile_size))  
140 - return set(self.pkg_profile) 174 + logging.debug("Maximal package profile: reduced packages profile size \
  175 + from %d to %d." % (old_profile_size, profile_size))
  176 + return self.pkg_profile
141 177
142 class LocalSystem(User): 178 class LocalSystem(User):
143 """ 179 """
@@ -168,8 +204,9 @@ class LocalSystem(User): @@ -168,8 +204,9 @@ class LocalSystem(User):
168 if pkg.is_auto_installed: 204 if pkg.is_auto_installed:
169 self.pkg_profile.remove(p) 205 self.pkg_profile.remove(p)
170 except: 206 except:
171 - logging.debug("Disconsidering package not found in cache: %s"  
172 - % p) 207 + logging.debug("Package not found in cache: %s" % p)
173 profile_size = len(self.pkg_profile) 208 profile_size = len(self.pkg_profile)
174 - logging.info("Reduced packages profile size from %d to %d." %  
175 - (old_profile_size, profile_size)) 209 + logging.debug("No auto-intalled package profile: reduced packages \
  210 + profile size from %d to %d." %
  211 + (old_profile_size, profile_size))
  212 + return self.pkg_profile