Commit 2e9ab843a42a3a87c77db71ac63340d6baa20c3e

Authored by Tiago Bortoletto Vaz
2 parents 22d71862 c1675b12
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

src/config.py
... ... @@ -46,8 +46,8 @@ class Config():
46 46 self.popcon_index = "~/.app-recommender/popcon_index"
47 47 self.popcon_dir = "~/.app-recommender/popcon_dir"
48 48 self.clusters_dir = "~/.app-recommender/clusters_dir"
49   - self.strategy = "cta" # defaults to the cheapest one
50   - self.reindex = 0
  49 + self.strategy = "cb" # defaults to the cheapest one
  50 + self.weight = "bm25"
51 51 self.load_options()
52 52 self.set_logger()
53 53  
... ... @@ -63,22 +63,24 @@ class Config():
63 63 print " -c, --config=PATH Path to configuration file."
64 64 print ""
65 65 print " [ recommender ]"
66   - print " -t, --tagsdb=PATH Path to debtags database."
67   - print " -i, --tagsindex=PATH Path to debtags dedicated index."
68   - print " -r, --force-reindex Force reindexing debtags database."
69 66 print " -a, --axi=PATH Path to Apt-xapian-index."
70 67 print " -p, --popconindex=PATH Path to popcon dedicated index."
71 68 print " -m, --popcondir=PATH Path to popcon submissions dir."
72 69 print " -l, --clustersdir=PATH Path to popcon clusters dir."
  70 + print " -w, --weight=OPTION Search weighting scheme."
73 71 print " -s, --strategy=OPTION Recommendation strategy."
74 72 print ""
  73 + print " [ weight options ] "
  74 + print " trad = traditional probabilistic weighting "
  75 + print " bm25 = bm25 weighting scheme "
  76 + print ""
75 77 print " [ strategy options ] "
76   - print " ct = content-based using tags "
77   - print " cta = content-based using tags via apt-xapian-index"
78   - print " cp = content-based using package descriptions "
  78 + print " cb = content-based "
  79 + print " cbt = content-based using only tags as content "
  80 + print " cbd = content-based using only package descriptions as content "
79 81 print " col = collaborative "
80   - print " colct = collaborative through tags content "
81   - print " colcp = collaborative through package descriptions content "
  82 + #print " colct = collaborative through tags content "
  83 + #print " colcp = collaborative through package descriptions content "
82 84  
83 85 def read_option(self, section, option):
84 86 """
... ... @@ -108,19 +110,17 @@ class Config():
108 110 self.output_filename = self.read_option('general', 'output')
109 111 self.config = self.read_option('general', 'config')
110 112  
111   - self.tags_db = self.read_option('recommender', 'tags_db')
112   - self.tags_index = self.read_option('recommender', 'tags_index')
113   - self.reindex = self.read_option('recommender', 'reindex')
114 113 self.axi = self.read_option('recommender', 'axi')
115 114 self.popcon_index = self.read_option('recommender', 'popcon_index')
116 115 self.popcon_dir = self.read_option('recommender', 'popcon_dir')
117 116 self.clusters_dir = self.read_option('recommender', 'clusters_dir')
  117 + self.weight = self.read_option('recommender', 'weight')
  118 + self.strategy = self.read_option('recommender', 'strategy')
118 119  
119   - short_options = "hdvo:c:t:i:ra:p:m:s:"
  120 + short_options = "hdvo:c:a:p:m:l:w:s:"
120 121 long_options = ["help", "debug", "verbose", "output=", "config=",
121   - "tagsdb=", "tagsindex=", "reindex", "axi=",
122   - "popconindex=", "popcondir=", "clustersdir=",
123   - "strategy="]
  122 + "axi=", "popconindex=", "popcondir=", "clustersdir=",
  123 + "weight=", "strategy="]
124 124 try:
125 125 opts, args = getopt.getopt(sys.argv[1:], short_options,
126 126 long_options)
... ... @@ -142,12 +142,6 @@ class Config():
142 142 self.output = p
143 143 elif o in ("-c", "--config"):
144 144 self.config = p
145   - elif o in ("-t", "--tagsdb"):
146   - self.tags_db = p
147   - elif o in ("-i", "--tagsindex"):
148   - self.tags_index = p
149   - elif o in ("-r", "--force-reindex"):
150   - self.reindex = 1
151 145 elif o in ("-a", "--axi"):
152 146 self.axi = p + "/index"
153 147 self.axi_values = p + "/values"
... ... @@ -157,6 +151,8 @@ class Config():
157 151 self.popcon_dir = p
158 152 elif o in ("-l", "--clustersdir"):
159 153 self.popcon_dir = p
  154 + elif o in ("-w", "--weight"):
  155 + self.weight = p
160 156 elif o in ("-s", "--strategy"):
161 157 self.strategy = p
162 158 else:
... ...
src/data.py
... ... @@ -35,29 +35,44 @@ from singleton import Singleton
35 35 import cluster
36 36 from dissimilarity import *
37 37  
38   -#class Item:
39   -# """
40   -# Generic item definition.
41   -# """
42   -#
43   -#class Package(Item):
44   -# """
45   -# Definition of a GNU/Linux application as a recommender item.
46   -# """
47   -# def __init__(self,package_name):
48   -# """
49   -# Set initial attributes.
50   -# """
51   -# self.package_name = package_name
52   -#
53   -#def normalize_tags(string):
54   -# """
55   -# Substitute string characters : by _ and - by '.
56   -# Examples:
57   -# admin::package-management -> admin__package'management
58   -# implemented-in::c++ -> implemented-in__c++
59   -# """
60   -# return string.replace(':','_').replace('-','\'')
  38 +def axi_search_pkgs(axi,pkgs_list):
  39 + terms = ["XP"+item for item in pkgs_list]
  40 + query = xapian.Query(xapian.Query.OP_OR, terms)
  41 + enquire = xapian.Enquire(axi)
  42 + enquire.set_query(query)
  43 + matches = enquire.get_mset(0,axi.get_doccount())
  44 + return matches
  45 +
  46 +def axi_search_pkg_tags(axi,pkg):
  47 + query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
  48 + enquire = xapian.Enquire(axi)
  49 + enquire.set_query(query)
  50 + matches = enquire.get_mset(0,1)
  51 + for m in matches:
  52 + tags = [term.term for term in axi.get_document(m.docid).termlist() if
  53 + term.term.startswith("XT")]
  54 + return tags
  55 +
  56 +class SampleAptXapianIndex(xapian.WritableDatabase):
  57 + """
  58 + Sample data source for packages information, mainly useful for tests.
  59 + """
  60 + def __init__(self,pkgs_list,axi):
  61 + xapian.WritableDatabase.__init__(self,".sample_axi",
  62 + xapian.DB_CREATE_OR_OVERWRITE)
  63 + sample = axi_search_pkgs(axi,pkgs_list)
  64 + self.all_docs = []
  65 + for package in sample:
  66 + doc_id = self.add_document(axi.get_document(package.docid))
  67 + self.all_docs.append(doc_id)
  68 +
  69 + def _print(self):
  70 + print "---"
  71 + print xapian.WritableDatabase.__repr__(self)
  72 + print "---"
  73 + for doc_id in self.all_docs:
  74 + print [term.term for term in self.get_document(doc_id).termlist()]
  75 + print "---"
61 76  
62 77 #[FIXME] get pkg tags from axi and remove load_debtags_db method
63 78 def load_debtags_db(db_path):
... ... @@ -75,106 +90,6 @@ def load_debtags_db(db_path):
75 90 logging.error("Could not load DebtagsDB from '%s'." % self.db_path)
76 91 raise Error
77 92  
78   -#class TagsXapianIndex(xapian.WritableDatabase,Singleton):
79   -# """
80   -# Data source for tags info defined as a singleton xapian database.
81   -# """
82   -# def __init__(self,cfg):
83   -# """
84   -# Set initial attributes.
85   -# """
86   -# self.path = os.path.expanduser(cfg.tags_index)
87   -# self.db_path = os.path.expanduser(cfg.tags_db)
88   -# self.debtags_db = debtags.DB()
89   -# try:
90   -# db_file = open(self.db_path)
91   -# except IOError:
92   -# logging.error("Could not load DebtagsDB from '%s'." % self.db_path)
93   -# raise Error
94   -# md5 = hashlib.md5()
95   -# md5.update(db_file.read())
96   -# self.db_md5 = md5.hexdigest()
97   -# db_file.close()
98   -# self.load_index(cfg.reindex)
99   -#
100   -## def load_db(self):
101   -## """
102   -## Load debtags database from the source file.
103   -## """
104   -## tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
105   -## try:
106   -## db_file = open(self.db_path, "r")
107   -## self.debtags_db.read(db_file,lambda x: not tag_filter.match(x))
108   -## db_file.close()
109   -## except:
110   -## logging.error("Could not load DebtagsDB from '%s'." % self.db_path)
111   -## raise Error
112   -#
113   -# def relevant_tags_from_db(self,pkgs_list,qtd_of_tags):
114   -# """
115   -# Return most relevant tags considering a list of packages.
116   -# """
117   -# if not self.debtags_db.package_count():
118   -# #print "index vazio"
119   -# self.debtags_db = load_debtags_db(self.db_path)
120   -# relevant_db = self.debtags_db.choose_packages(pkgs_list)
121   -# relevance_index = debtags.relevance_index_function(self.debtags_db,
122   -# relevant_db)
123   -# sorted_relevant_tags = sorted(relevant_db.iter_tags(),
124   -# lambda a, b: cmp(relevance_index(a),
125   -# relevance_index(b)))
126   -# return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
127   -#
128   -# def load_index(self,reindex):
129   -# """
130   -# Load an existing debtags index.
131   -# """
132   -# if not reindex:
133   -# try:
134   -# logging.info("Opening existing debtags xapian index at \'%s\'"
135   -# % self.path)
136   -# xapian.Database.__init__(self,self.path)
137   -# md5 = self.get_metadata("md5")
138   -# if not md5 == self.db_md5:
139   -# logging.info("Index must be updated.")
140   -# reindex = 1
141   -# except xapian.DatabaseError:
142   -# logging.info("Could not open debtags index.")
143   -# reindex =1
144   -#
145   -# if reindex:
146   -# self.new_index()
147   -#
148   -# def new_index(self):
149   -# """
150   -# Create a xapian index for debtags info based on 'debtags_db' and
151   -# place it at 'self.path'.
152   -# """
153   -# if not os.path.exists(self.path):
154   -# os.makedirs(self.path)
155   -#
156   -# try:
157   -# logging.info("Indexing debtags info from \'%s\'" %
158   -# self.db_path)
159   -# logging.info("Creating new xapian index at \'%s\'" %
160   -# self.path)
161   -# xapian.WritableDatabase.__init__(self,self.path,
162   -# xapian.DB_CREATE_OR_OVERWRITE)
163   -# except xapian.DatabaseError:
164   -# logging.critical("Could not create xapian index.")
165   -# raise Error
166   -#
167   -# self.debtags_db = load_debtags_db(self.db_path)
168   -# self.set_metadata("md5",self.db_md5)
169   -#
170   -# for pkg,tags in self.debtags_db.iter_packages_tags():
171   -# doc = xapian.Document()
172   -# doc.set_data(pkg)
173   -# for tag in tags:
174   -# doc.add_term(normalize_tags(tag))
175   -# doc_id = self.add_document(doc)
176   -# logging.debug("Debtags Xapian: Indexing doc %d",doc_id)
177   -
178 93 class PopconXapianIndex(xapian.WritableDatabase,Singleton):
179 94 """
180 95 Data source for popcon submissions defined as a singleton xapian database.
... ...
src/recommender.py
... ... @@ -19,10 +19,10 @@ __license__ = """
19 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 20 """
21 21  
22   -from operator import itemgetter
23   -from data import *
24   -from strategy import *
25   -from error import Error
  22 +import xapian
  23 +import operator
  24 +import data
  25 +import strategy
26 26  
27 27 class RecommendationResult:
28 28 """
... ... @@ -40,7 +40,7 @@ class RecommendationResult:
40 40 """
41 41 result = self.get_prediction()
42 42 str = "\n"
43   - for i in range(len(result)):
  43 + for i in range(len((list(result)))):
44 44 str += "%2d: %s\n" % (i,result[i][0])
45 45 return str
46 46  
... ... @@ -48,8 +48,10 @@ class RecommendationResult:
48 48 """
49 49 Return prediction based on recommendation size (number of items).
50 50 """
51   - sorted_result = sorted(self.item_score.items(), key=itemgetter(1))
52   - return reversed(sorted_result[-size:])
  51 + if size > len(self.item_score): size = len(self.item_score)
  52 + sorted_result = sorted(self.item_score.items(),
  53 + key=operator.itemgetter(1))
  54 + return list(reversed(sorted_result[-size:]))
53 55  
54 56 class Recommender:
55 57 """
... ... @@ -59,47 +61,30 @@ class Recommender:
59 61 """
60 62 Set initial parameters.
61 63 """
62   - try:
63   - strategy = "self."+cfg.strategy+"(cfg)"
64   - exec(strategy)
65   - except (NameError, AttributeError, SyntaxError) as err:
66   - print err
67   - logging.critical("Could not perform recommendation strategy '%s'" %
68   - cfg.strategy)
69   - raise Error
70   -
71   - def ct(self,cfg):
72   - """
73   - Set recommender attributes to perform content-based recommendation
74   - using tags index as source data.
75   - """
76   - self.items_repository = TagsXapianIndex(cfg)
77   - self.strategy = ContentBasedStrategy()
78   -
79   - def cta(self,cfg):
80   - """
81   - Set recommender attributes to perform content-based recommendation
82   - using apt-xapian-index as source data.
83   - """
84 64 self.items_repository = xapian.Database(cfg.axi)
85   - self.strategy = AxiContentBasedStrategy()
86   -
87   - def col(self,cfg):
88   - """
89   - Set recommender attributes to perform collaborative recommendation
90   - using popcon-xapian-index as source data.
91   - """
92   - self.users_repository = PopconXapianIndex(cfg)
93   - self.strategy = CollaborativeStrategy()
  65 + self.users_repository = data.PopconXapianIndex(cfg) #[FIXME] only cfg fields
  66 + self.clustered_users_repository = data.PopconXapianIndex(cfg) #[FIXME]
  67 + self.set_strategy(cfg.strategy)
  68 + if cfg.weight == "bm25":
  69 + self.weight = xapian.BM25Weight()
  70 + else:
  71 + self.weight = xapian.TradWeight()
94 72  
95   - def set_strategy(self,strategy):
  73 + def set_strategy(self,strategy_str):
96 74 """
97 75 Set the recommendation strategy.
98 76 """
99   - self.strategy = strategy
  77 + if strategy_str == "cb":
  78 + self.strategy = strategy.ContentBasedStrategy("full")
  79 + if strategy_str == "cbt":
  80 + self.strategy = strategy.ContentBasedStrategy("tag")
  81 + if strategy_str == "cbd":
  82 + self.strategy = strategy.ContentBasedStrategy("desc")
  83 + if strategy_str == "col":
  84 + self.strategy = strategy.CollaborativeStrategy(20)
100 85  
101   - def get_recommendation(self,user):
  86 + def get_recommendation(self,user,limit=20):
102 87 """
103 88 Produces recommendation using previously loaded strategy.
104 89 """
105   - return self.strategy.run(self,user)
  90 + return self.strategy.run(self,user,limit)
... ...
src/strategy.py
... ... @@ -20,54 +20,27 @@ __license__ = &quot;&quot;&quot;
20 20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 21 """
22 22  
23   -import string
24   -import os, re
25 23 import xapian
26   -from data import *
27 24 from singleton import Singleton
28 25 import recommender
29   -
30   -class ReputationHeuristic(Singleton):
31   - """
32   - Abstraction for diferent reputation heuristics.
33   - """
34   - pass
35   -
36   -class BugsHeuristic(ReputationHeuristic):
37   - """
38   - Reputation heuristic based on quantity of open bugs.
39   - """
40   - pass
41   -
42   -class RCBugsHeuristic(ReputationHeuristic):
43   - """
44   - Reputation heuristic based on quantity of RC bugs.
45   - """
46   - pass
47   -
48   -class PopularityHeuristic(ReputationHeuristic):
49   - """
50   - Reputation heuristic based on popularity of packages.
51   - """
52   - pass
  26 +from data import *
53 27  
54 28 class PkgMatchDecider(xapian.MatchDecider):
55 29 """
56 30 Extend xapian.MatchDecider to not consider installed packages.
57 31 """
58   -
59   - def __init__(self, installed_pkgs):
  32 + def __init__(self, pkgs_list):
60 33 """
61 34 Set initial parameters.
62 35 """
63 36 xapian.MatchDecider.__init__(self)
64   - self.installed_pkgs = installed_pkgs
  37 + self.pkgs_list = pkgs_list
65 38  
66 39 def __call__(self, doc):
67 40 """
68 41 True if the package is not already installed.
69 42 """
70   - return doc.get_data() not in self.installed_pkgs
  43 + return doc.get_data() not in self.pkgs_list
71 44  
72 45 class UserMatchDecider(xapian.MatchDecider):
73 46 """
... ... @@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider):
80 53 """
81 54 xapian.MatchDecider.__init__(self)
82 55 self.profile = profile
83   - print "mdecider:",profile
84 56  
85 57 def __call__(self, doc):
86 58 """
87 59 True if the user has more the half of packages from profile.
88 60 """
89   - profile_size = len(self.profile)
90   - pkg_match=0
  61 + match=0
91 62 for term in doc:
92 63 if term.term in self.profile:
93   - pkg_match = pkg_match+1
94   - print "id",doc.get_docid(),"match",pkg_match
95   - return pkg_match >= profile_size/2
  64 + match = match+1
  65 + return (match >= len(self.profile)/2)
96 66  
97 67 class PkgExpandDecider(xapian.ExpandDecider):
98 68 """
99 69 Extend xapian.ExpandDecider to consider packages only.
100 70 """
101   -
102   - def __init__(self):
103   - """
104   - Call base class init.
105   - """
106   - xapian.ExpandDecider.__init__(self)
107   -
108 71 def __call__(self, term):
109 72 """
110 73 True if the term is a package.
111 74 """
  75 + # [FIXME] return term.startswith("XP")
112 76 return not term.startswith("XT")
113 77  
114 78 class TagExpandDecider(xapian.ExpandDecider):
115 79 """
116 80 Extend xapian.ExpandDecider to consider tags only.
117 81 """
118   -
119   - def __init__(self, profile):
120   - """
121   - Call base class init.
122   - """
123   - xapian.ExpandDecider.__init__(self)
124   -
125   - def __call__(self, doc):
  82 + def __call__(self, term):
126 83 """
127   - True if the user has more the half of packages from profile.
  84 + True if the term is a tag.
128 85 """
129 86 return term.startswith("XT")
130 87  
... ... @@ -134,65 +91,30 @@ class RecommendationStrategy:
134 91 """
135 92 pass
136 93  
137   -class ItemReputationStrategy(RecommendationStrategy):
138   - """
139   - Recommendation strategy based on items reputation.
140   - """
141   - def run(self,items_list,heuristic):
142   - """
143   - Perform recommendation strategy.
144   - """
145   - logging.critical("Item reputation recommendation strategy is not yet implemented.")
146   - raise Error
147   -
148   -#class ContentBasedStrategy(RecommendationStrategy):
149   -# """
150   -# Content-based recommendation strategy.
151   -# """
152   -# def run(self,rec,user):
153   -# """
154   -# Perform recommendation strategy.
155   -# """
156   -# profile = user.txi_tag_profile(rec.items_repository,50)
157   -# qp = xapian.QueryParser()
158   -# query = qp.parse_query(profile)
159   -# enquire = xapian.Enquire(rec.items_repository)
160   -# enquire.set_query(query)
161   -#
162   -# try:
163   -# mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
164   -# except xapian.DatabaseError as error:
165   -# logging.critical(error.get_msg())
166   -# raise Error
167   -#
168   -# item_score = {}
169   -# for m in mset:
170   -# item_score[m.document.get_data()] = m.rank
171   -# return recommender.RecommendationResult(item_score,20)
172   -
173   -class AxiContentBasedStrategy(RecommendationStrategy):
  94 +class ContentBasedStrategy(RecommendationStrategy):
174 95 """
175 96 Content-based recommendation strategy based on Apt-xapian-index.
176 97 """
177   - def __init__(self):
  98 + def __init__(self,content):
178 99 self.description = "Content-based"
  100 + self.content = content
179 101  
180   - def run(self,rec,user):
  102 + def run(self,rec,user,limit):
181 103 """
182 104 Perform recommendation strategy.
183 105 """
184   - profile = user.axi_tag_profile(rec.items_repository,50)
185   - #profile_str = string.join(list(profile),' ')
186   - query = xapian.Query(xapian.Query.OP_OR,list(profile))
  106 + profile = user.profile(rec.items_repository,self.content,50)
  107 + # prepair index for querying user profile
  108 + query = xapian.Query(xapian.Query.OP_OR,profile)
187 109 enquire = xapian.Enquire(rec.items_repository)
  110 + enquire.set_weighting_scheme(rec.weight)
188 111 enquire.set_query(query)
189   -
190 112 try:
191   - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  113 + # retrieve matching packages
  114 + mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
192 115 except xapian.DatabaseError as error:
193   - logging.critical(error.get_msg())
194   - raise Error
195   -
  116 + logging.critical("Content-based strategy: "+error.get_msg())
  117 + # compose result dictionary
196 118 item_score = {}
197 119 for m in mset:
198 120 item_score[m.document.get_data()] = m.weight
... ... @@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy):
202 124 """
203 125 Colaborative recommendation strategy.
204 126 """
205   - def __init__(self):
  127 + def __init__(self,k,clustering=1):
206 128 self.description = "Collaborative"
  129 + self.clustering = clustering
  130 + self.neighbours = k
207 131  
208   - #def run(self,rec,user,similarity_measure):
209   - def run(self,rec,user):
  132 + def run(self,rec,user,limit):
210 133 """
211 134 Perform recommendation strategy.
212 135 """
213   - profile = user.maximal_pkg_profile()
214   - #profile_str = string.join(list(profile),' ')
215   - query = xapian.Query(xapian.Query.OP_OR,list(profile))
216   - enquire = xapian.Enquire(rec.users_repository)
  136 + profile = user.pkg_profile
  137 + # prepair index for querying user profile
  138 + query = xapian.Query(xapian.Query.OP_OR,profile)
  139 + if self.clustering:
  140 + enquire = xapian.Enquire(rec.clustered_users_repository)
  141 + else:
  142 + enquire = xapian.Enquire(rec.users_repository)
  143 + enquire.set_weighting_scheme(rec.weight)
217 144 enquire.set_query(query)
218   -
219 145 try:
220   - #mset = enquire.get_mset(0, 182, None, UserMatchDecider(profile))
221   - mset = enquire.get_mset(0, 20)
  146 + # retrieve matching users
  147 + mset = enquire.get_mset(0, self.neighbours)
222 148 except xapian.DatabaseError as error:
223   - logging.critical(error.get_msg())
224   - raise Error
225   -
  149 + logging.critical("Collaborative strategy: "+error.get_msg())
226 150 rset = xapian.RSet()
  151 + logging.debug("Neighborhood composed by the following users (by hash)")
227 152 for m in mset:
228 153 rset.add_document(m.document.get_docid())
229   - logging.debug("Counting as relevant submission %s" %
230   - m.document.get_data())
231   -
232   - eset = enquire.get_eset(20,rset,PkgExpandDecider())
233   - rank = 0
  154 + logging.debug(m.document.get_data())
  155 + # retrieve most relevant packages
  156 + eset = enquire.get_eset(limit,rset,PkgExpandDecider())
  157 + # compose result dictionary
234 158 item_score = {}
235   - for term in eset:
236   - item_score[term.term] = rank
237   - rank = rank+1
238   -
  159 + for package in eset:
  160 + item_score[package.term.lstrip("XP")] = package.weight
239 161 return recommender.RecommendationResult(item_score)
240 162  
  163 +class DemographicStrategy(RecommendationStrategy):
  164 + """
  165 + Recommendation strategy based on demographic data.
  166 + """
  167 + def __init__(self):
  168 + self.description = "Demographic"
  169 + logging.debug("Demographic recommendation not yet implemented.")
  170 + raise Error
  171 +
  172 + def run(self,user,items_repository):
  173 + """
  174 + Perform recommendation strategy.
  175 + """
  176 + pass
  177 +
241 178 class KnowledgeBasedStrategy(RecommendationStrategy):
242 179 """
243 180 Knowledge-based recommendation strategy.
244 181 """
245 182 def __init__(self):
246 183 self.description = "Knowledge-based"
  184 + logging.debug("Knowledge-based recommendation not yet implemented.")
  185 + raise Error
247 186  
248 187 def run(self,user,knowledge_repository):
249 188 """
250 189 Perform recommendation strategy.
251 190 """
252   - logging.critical("Knowledge-based recommendation strategy is not yet implemented.")
253   - raise Error
  191 + pass
254 192  
255   -class DemographicStrategy(RecommendationStrategy):
  193 +class ReputationHeuristic(Singleton):
256 194 """
257   - Recommendation strategy based on demographic data.
  195 + Abstraction for diferent reputation heuristics.
  196 + """
  197 + pass
  198 +
  199 +class BugsHeuristic(ReputationHeuristic):
  200 + """
  201 + Reputation heuristic based on quantity of open bugs.
  202 + """
  203 + pass
  204 +
  205 +class RCBugsHeuristic(ReputationHeuristic):
  206 + """
  207 + Reputation heuristic based on quantity of RC bugs.
  208 + """
  209 + pass
  210 +
  211 +class PopularityHeuristic(ReputationHeuristic):
  212 + """
  213 + Reputation heuristic based on popularity of packages.
  214 + """
  215 + pass
  216 +
  217 +class ItemReputationStrategy(RecommendationStrategy):
  218 + """
  219 + Recommendation strategy based on items reputation.
258 220 """
259 221 def __init__(self):
260   - self.description = "Demographic"
  222 + self.description = "Item reputation"
  223 + logging.debug("Item reputation recommendation not yet implemented.")
  224 + raise Error
261 225  
262   - def run(self,user,items_repository):
  226 + def run(self,items_list,heuristic):
263 227 """
264 228 Perform recommendation strategy.
265 229 """
266   - logging.critical("Demographic recommendation strategy is not yet implemented.")
267   - raise Error
  230 + pass
... ...
src/tests/package-xapian-index
... ... @@ -1,10 +0,0 @@
1   -aaphoto: implemented-in::c, interface::commandline, role::program, use::editing, works-with::image
2   -dia: implemented-in::c, interface::x11, role::program, scope::application, suite::gnu, uitoolkit::gtk, use::editing, works-with::image, works-with::image:vector, x11::application
3   -eog: implemented-in::c, interface::x11, role::program, scope::application, suite::gnome, uitoolkit::gtk, use::viewing, works-with-format::jpg, works-with-format::png, works-with::image, works-with::image:raster, works-with::image:vector, x11::application
4   -emacs: devel::editor, role::dummy, role::metapackage, special::meta, suite::emacs, suite::gnu, use::editing
5   -ferret: devel::modelling, role::program, scope::application, suite::gnu, works-with::db
6   -festival: accessibility::speech, devel::interpreter, implemented-in::scheme, interface::text-mode, network::client, network::server, role::program, sound::speech, uitoolkit::ncurses, works-with::audio
7   -file: admin::forensics, implemented-in::c, interface::commandline, role::program, scope::utility, use::analysing, use::scanning, works-with::file
8   -gimp: implemented-in::c, interface::x11, role::program, scope::application, suite::gimp, suite::gnu, uitoolkit::gtk, use::editing, works-with-format::gif, works-with-format::jpg, works-with-format::pdf, works-with-format::png, works-with-format::tiff, works-with::image, works-with::image:raster, works-with::text, x11::application
9   -inkscape: implemented-in::c, implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::gtk, use::editing, works-with-format::pdf, works-with-format::postscript, works-with-format::svg, works-with-format::tex, works-with::image, works-with::image:vector, x11::application
10   -xpdf: implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::motif, use::viewing, works-with-format::pdf, works-with::text, x11::application
src/tests/recommender_tests.py 0 → 100755
... ... @@ -0,0 +1,69 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommenderTests - Recommender class test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import unittest2
  23 +import sys
  24 +sys.path.insert(0,'../')
  25 +from recommender import RecommendationResult, Recommender
  26 +from user import User
  27 +from config import Config
  28 +from strategy import ContentBasedStrategy, CollaborativeStrategy
  29 +
  30 +class RecommendationResultTests(unittest2.TestCase):
  31 + @classmethod
  32 + def setUpClass(self):
  33 + self.result = RecommendationResult({"gimp":1.5,"inkscape":3.0,"eog":1})
  34 +
  35 + def test_str(self):
  36 + string = "\n 0: inkscape\n 1: gimp\n 2: eog\n"
  37 + self.assertEqual(self.result.__str__(),string)
  38 +
  39 + def test_get_prediction(self):
  40 + prediction = [("inkscape",3.0),("gimp",1.5),("eog",1)]
  41 + self.assertEqual(self.result.get_prediction(),prediction)
  42 +
  43 +class RecommenderTests(unittest2.TestCase):
  44 + @classmethod
  45 + def setUpClass(self):
  46 + cfg = Config()
  47 + self.rec = Recommender(cfg)
  48 +
  49 + def test_set_strategy(self):
  50 + self.rec.set_strategy("cb")
  51 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  52 + self.assertEqual(self.rec.strategy.content,"full")
  53 + self.rec.set_strategy("cbt")
  54 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  55 + self.assertEqual(self.rec.strategy.content,"tag")
  56 + self.rec.set_strategy("cbd")
  57 + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
  58 + self.assertEqual(self.rec.strategy.content,"desc")
  59 + self.rec.set_strategy("col")
  60 + self.assertIsInstance(self.rec.strategy,CollaborativeStrategy)
  61 +
  62 + def test_get_recommendation(self):
  63 + user = User({"inkscape": 1, "gimp": 1, "eog":1})
  64 + result = self.rec.get_recommendation(user)
  65 + self.assertIsInstance(result, RecommendationResult)
  66 + self.assertGreater(len(result.item_score),0)
  67 +
  68 +if __name__ == '__main__':
  69 + unittest2.main()
... ...
src/tests/runner.py
1 1 #!/usr/bin/env python
2 2 """
3   - tests - execution of the whole set of tests suites.
  3 + runner - Run the whole set of test cases suites.
4 4 """
5 5 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6 6 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
... ... @@ -20,9 +20,30 @@ __license__ = &quot;&quot;&quot;
20 20 """
21 21  
22 22 import unittest2
23   -import user_tests
24   -import singleton_tests
  23 +from user_tests import UserTests, FilterTagTests, FilterDescriptionTests
  24 +from recommender_tests import RecommendationResultTests, RecommenderTests
  25 +from strategy_tests import (PkgMatchDeciderTests, UserMatchDeciderTests,
  26 + PkgExpandDeciderTests, TagExpandDeciderTests, ContentBasedStrategyTests,
  27 + CollaborativeStrategyTests, DemographicStrategyTests,
  28 + KnowledgeBasedStrategyTests, ItemReputationStrategyTests)
  29 +from singleton_tests import SingletonTests
  30 +
  31 +def load_tests(test_cases):
  32 + suite = unittest2.TestSuite()
  33 + for test_class in test_cases:
  34 + tests = unittest2.TestLoader().loadTestsFromTestCase(test_class)
  35 + suite.addTests(tests)
  36 + return suite
  37 +
  38 +test_lists = [[UserTests, FilterTagTests, FilterDescriptionTests],
  39 + [RecommendationResultTests, RecommenderTests],
  40 + [PkgMatchDeciderTests, UserMatchDeciderTests,
  41 + PkgExpandDeciderTests, TagExpandDeciderTests,
  42 + ContentBasedStrategyTests, CollaborativeStrategyTests,
  43 + DemographicStrategyTests, KnowledgeBasedStrategyTests,
  44 + ItemReputationStrategyTests],
  45 + [SingletonTests]]
25 46  
26 47 runner = unittest2.TextTestRunner()
27   -runner.run(user_tests.suite())
28   -runner.run(singleton_tests.suite())
  48 +for module in test_lists:
  49 + runner.run(load_tests(module))
... ...
src/tests/singleton_tests.py
... ... @@ -24,9 +24,6 @@ import sys
24 24 sys.path.insert(0,'../')
25 25 from singleton import Singleton
26 26  
27   -def suite():
28   - return unittest2.TestLoader().loadTestsFromTestCase(SingletonTests)
29   -
30 27 class SingletonTests(unittest2.TestCase):
31 28 def test_creation(self):
32 29 object_1 = Singleton()
... ...
src/tests/strategy_tests.py 0 → 100755
... ... @@ -0,0 +1,116 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + strategyTests - Recommendation strategies classes test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import unittest2
  23 +import xapian
  24 +import sys
  25 +sys.path.insert(0,'../')
  26 +from error import Error
  27 +from user import User
  28 +from recommender import RecommendationResult
  29 +from config import *
  30 +#from data import *
  31 +from strategy import (PkgMatchDecider, UserMatchDecider, PkgExpandDecider,
  32 + TagExpandDecider, ContentBasedStrategy,
  33 + CollaborativeStrategy, DemographicStrategy,
  34 + KnowledgeBasedStrategy, ItemReputationStrategy)
  35 +
  36 +class PkgMatchDeciderTests(unittest2.TestCase):
  37 + @classmethod
  38 + def setUpClass(self):
  39 + pkgs_list = ["gimp","eog","inkscape"]
  40 + self.decider = PkgMatchDecider(pkgs_list)
  41 + self.doc = xapian.Document()
  42 +
  43 + def test_match(self):
  44 + self.doc.set_data("emacs")
  45 + self.assertTrue(self.decider(self.doc))
  46 +
  47 + def test_no_match(self):
  48 + self.doc.set_data("gimp")
  49 + self.assertFalse(self.decider(self.doc))
  50 +
  51 +class UserMatchDeciderTests(unittest2.TestCase):
  52 + @classmethod
  53 + def setUpClass(self):
  54 + user_profile = ["gimp","eog","inkscape", "emacs"]
  55 + self.decider = UserMatchDecider(user_profile)
  56 +
  57 + def setUp(self):
  58 + self.doc = xapian.Document()
  59 +
  60 + def test_match(self):
  61 + self.doc.add_term("emacs")
  62 + self.doc.add_term("gimp")
  63 + self.doc.add_term("eog")
  64 + self.assertTrue(self.decider(self.doc))
  65 +
  66 + def test_no_match(self):
  67 + self.doc.add_term("gimp")
  68 + self.assertFalse(self.decider(self.doc))
  69 +
  70 +class PkgExpandDeciderTests(unittest2.TestCase):
  71 + @classmethod
  72 + def setUpClass(self):
  73 + self.decider = PkgExpandDecider()
  74 +
  75 + def test_match(self):
  76 + self.assertTrue(self.decider("XPgimp"))
  77 +
  78 + def test_no_match(self):
  79 + self.assertFalse(self.decider("XTgimp"))
  80 +
  81 +class TagExpandDeciderTests(unittest2.TestCase):
  82 + @classmethod
  83 + def setUpClass(self):
  84 + self.decider = TagExpandDecider()
  85 +
  86 + def test_match(self):
  87 + self.assertTrue(self.decider("XTgimp"))
  88 +
  89 + def test_no_match(self):
  90 + self.assertFalse(self.decider("gimp"))
  91 +
  92 +class ContentBasedStrategyTests(unittest2.TestCase):
  93 + @classmethod
  94 + def setUpClass(self):
  95 +
  96 + pass
  97 +
  98 +class CollaborativeStrategyTests(unittest2.TestCase):
  99 + @classmethod
  100 + def setUpClass(self):
  101 + pass
  102 +
  103 +class DemographicStrategyTests(unittest2.TestCase):
  104 + def test_call(self):
  105 + self.assertRaises(Error,lambda: DemographicStrategy())
  106 +
  107 +class KnowledgeBasedStrategyTests(unittest2.TestCase):
  108 + def test_call(self):
  109 + self.assertRaises(Error,lambda: KnowledgeBasedStrategy())
  110 +
  111 +class ItemReputationStrategyTests(unittest2.TestCase):
  112 + def test_call(self):
  113 + self.assertRaises(Error,lambda: ItemReputationStrategy())
  114 +
  115 +if __name__ == '__main__':
  116 + unittest2.main()
... ...
src/tests/user_tests.py
... ... @@ -19,26 +19,39 @@ __license__ = &quot;&quot;&quot;
19 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 20 """
21 21  
22   -import operator
23   -import math
24 22 import unittest2
25 23 import xapian
26 24 import sys
27 25 sys.path.insert(0,'../')
28   -from user import *
29   -from config import *
30   -from data import *
  26 +from user import User, FilterTag, FilterDescription
  27 +from config import Config
  28 +from data import SampleAptXapianIndex
31 29  
32   -def suite():
33   - return unittest2.TestLoader().loadTestsFromTestCase(UserTests)
  30 +class FilterTagTests(unittest2.TestCase):
  31 + def test_call_true(self):
  32 + self.assertTrue(FilterTag()("XTrole::program"))
  33 +
  34 + def test_call_false(self):
  35 + self.assertFalse(FilterTag()("role::program"))
  36 +
  37 +class FilterDescriptionTests(unittest2.TestCase):
  38 + def test_call_true(self):
  39 + self.assertTrue(FilterDescription()("program"))
  40 + #self.assertTrue(FilterDescription()("Zprogram"))
  41 +
  42 + def test_call_false(self):
  43 + self.assertFalse(FilterDescription()("XTprogram"))
34 44  
35 45 class UserTests(unittest2.TestCase):
36 46 @classmethod
37 47 def setUpClass(self):
38 48 cfg = Config()
39   - #self.axi = xapian.Database(cfg.axi)
  49 + self.axi = xapian.Database(cfg.axi)
  50 + sample_packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
  51 + "festival","file","inkscape","xpdf"]
  52 + self.sample_axi = SampleAptXapianIndex(sample_packages,self.axi)
40 53 self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
41   - self.pxi = PkgXapianIndex("package-xapian-index")
  54 + #self.sample_axi._print()
42 55  
43 56 def test_hash(self):
44 57 new_user = User(dict())
... ... @@ -100,34 +113,34 @@ class UserTests(unittest2.TestCase):
100 113 self.assertEqual(self.user.demographic_profile,desktop_art_admin)
101 114  
102 115 def test_items(self):
103   - self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"]))
104   -
105   - def test_axi_tag_profile(self):
106   - package_terms = ["XP"+package for package in self.user.items()]
107   - enquire = xapian.Enquire(self.pxi)
108   - enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms))
109   - user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None)
110   - tag_terms = []
111   - for p in user_packages:
112   - tag_terms = tag_terms + [x.term for x in p.document.termlist() \
113   - if x.term.startswith("XT")]
114   - relevant_count = dict([(tag,tag_terms.count(tag)) \
115   - for tag in set(tag_terms)])
116   - #rank = {}
117   - #non_relevant_count = dict()
118   - #for tag,count in relevant_count.items():
119   - # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count
120   - # if non_relevant_count[tag]>0:
121   - # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag])
122   - #print "relevant",relevant_count
123   - #print "non_relevant",non_relevant_count
124   - #print sorted(rank.items(), key=operator.itemgetter(1))
125   - #[FIXME] get ths value based on real ranking
126   - #print set(self.user.axi_tag_profile(self.pxi,4))
127   - self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)),
128   - set(["XTuse::editing", "XTworks-with::image",
129   - "XTworks-with-format::png",
130   - "XTworks-with-format::jpg"]))
  116 + self.assertEqual(set(self.user.items()),
  117 + set(["gimp","aaphoto","eog","emacs"]))
  118 +
  119 + def test_profile(self):
  120 + self.assertEqual(self.user.profile(self.sample_axi,"tag",10),
  121 + self.user.tag_profile(self.sample_axi,10))
  122 + self.assertEqual(self.user.profile(self.sample_axi,"desc",10),
  123 + self.user.desc_profile(self.sample_axi,10))
  124 + self.assertEqual(self.user.profile(self.sample_axi,"full",10),
  125 + self.user.full_profile(self.sample_axi,10))
  126 +
  127 + def test_tag_profile(self):
  128 + self.assertEqual(self.user.tag_profile(self.sample_axi,10),
  129 + ['XTuse::editing', 'XTworks-with::image:raster',
  130 + 'XTworks-with-format::png', 'XTworks-with-format::jpg',
  131 + 'XTworks-with::image','XTimplemented-in::c',
  132 + 'XTsuite::gnome', 'XTsuite::emacs',
  133 + 'XTrole::metapackage', 'XTdevel::editor'])
  134 +
  135 + def test_desc_profile(self):
  136 + self.assertEqual(self.user.desc_profile(self.sample_axi,10),
  137 + ['image', 'the', 'which', 'manipulation', 'program',
  138 + 'input', 'a', 'gnu', 'images', 'this'])
  139 +
  140 + def test_full_profile(self):
  141 + self.assertEqual(self.user.full_profile(self.sample_axi,10),
  142 + (self.user.tag_profile(self.sample_axi,5)+
  143 + self.user.desc_profile(self.sample_axi,5)))
131 144  
132 145 def test_maximal_pkg_profile(self):
133 146 old_pkg_profile = self.user.items()
... ...
src/user.py
... ... @@ -25,6 +25,7 @@ import xapian
25 25 import logging
26 26 import apt
27 27 from singleton import Singleton
  28 +import data
28 29  
29 30 class FilterTag(xapian.ExpandDecider):
30 31 """
... ... @@ -34,7 +35,17 @@ class FilterTag(xapian.ExpandDecider):
34 35 """
35 36 Return true if the term is a tag, else false.
36 37 """
37   - return term[:2] == "XT"
  38 + return term.startswith("XT")
  39 +
  40 +class FilterDescription(xapian.ExpandDecider):
  41 + """
  42 + Extend xapian.ExpandDecider to consider only package description terms.
  43 + """
  44 + def __call__(self, term):
  45 + """
  46 + Return true if the term is a tag, else false.
  47 + """
  48 + return term.islower() #or term.startswith("Z")
38 49  
39 50 class DemographicProfile(Singleton):
40 51 def __init__(self):
... ... @@ -63,57 +74,83 @@ class User:
63 74 """
64 75 Define a user of a recommender.
65 76 """
66   - def __init__(self,item_score,user_id=0,profiles_set=0):
  77 + def __init__(self,item_score,user_id=0,demo_profiles_set=0):
67 78 """
68   - Set initial user attributes. If no user_id was passed as parameter, a
69   - random md5-hash is generated for that purpose. If the demographic
70   - profile was not defined, it defaults to 'desktop'
  79 + Set initial user attributes. pkg_profile gets the whole set of items,
  80 + a random user_id is set if none was provided and the demographic
  81 + profile defaults to 'desktop'.
71 82 """
72 83 self.item_score = item_score
  84 + self.pkg_profile = self.items()
  85 +
73 86 if user_id:
74 87 self.id = user_id
75 88 else:
76 89 random.seed()
77 90 self.id = random.getrandbits(128)
78   - self.pkg_profile = self.item_score.keys()
79   - if not profiles_set:
  91 +
  92 + if not demo_profiles_set:
80 93 profiles_set = set(["desktop"])
81 94 self.set_demographic_profile(profiles_set)
82 95  
  96 + def items(self):
  97 + """
  98 + Return the set of user items.
  99 + """
  100 + return self.item_score.keys()
  101 +
83 102 def set_demographic_profile(self,profiles_set):
  103 + """
  104 + Set demographic profle based on labels in 'profiles_set'.
  105 + """
84 106 self.demographic_profile = DemographicProfile()(profiles_set)
85 107  
86   - def items(self):
  108 + def profile(self,items_repository,content,size):
87 109 """
88   - Return the set of user items.
  110 + Get user profile for a specific type of content: packages tags,
  111 + description or both (full_profile)
  112 + """
  113 + if content == "tag": return self.tag_profile(items_repository,size)
  114 + if content == "desc": return self.desc_profile(items_repository,size)
  115 + if content == "full": return self.full_profile(items_repository,size)
  116 +
  117 + def tag_profile(self,items_repository,size):
  118 + """
  119 + Return most relevant tags for a list of packages.
89 120 """
90   - return set(self.item_score.keys())
91   -
92   - def axi_tag_profile(self,apt_xapian_index,profile_size):
93   - """
94   - Return most relevant tags for a list of packages based on axi.
95   - """
96   - terms = ["XP"+item for item in self.pkg_profile]
97   - query = xapian.Query(xapian.Query.OP_OR, terms)
98   - enquire = xapian.Enquire(apt_xapian_index)
99   - enquire.set_query(query)
100   - rset = xapian.RSet()
101   - for m in enquire.get_mset(0,apt_xapian_index.get_doccount()):
102   - rset.add_document(m.docid)
103   - # statistically good differentiators between relevant and non-relevant
104   - eset = enquire.get_eset(profile_size, rset, FilterTag())
105   - profile = []
106   - for res in eset:
107   - profile.append(res.term)
108   - logging.debug("%.2f %s" % (res.weight,res.term.lstrip("XT")))
  121 + enquire = xapian.Enquire(items_repository)
  122 + matches = data.axi_search_pkgs(items_repository,self.pkg_profile)
  123 + rset_packages = xapian.RSet()
  124 + for m in matches:
  125 + rset_packages.add_document(m.docid)
  126 + # statistically good differentiators
  127 + eset_tags = enquire.get_eset(size, rset_packages, FilterTag())
  128 + profile = [res.term for res in eset_tags]
109 129 return profile
110 130  
111   - #def txi_tag_profile(self,tags_xapian_index,profile_size):
112   - # """
113   - # Return most relevant tags for a list of packages based on tags index.
114   - # """
115   - # return tags_xapian_index.relevant_tags_from_db(self.pkg_profile,
116   - # profile_size)
  131 + def desc_profile(self,items_repository,size):
  132 + """
  133 + Return most relevant keywords for a list of packages based on their
  134 + text descriptions.
  135 + """
  136 + enquire = xapian.Enquire(items_repository)
  137 + matches = data.axi_search_pkgs(items_repository,self.pkg_profile)
  138 + rset_packages = xapian.RSet()
  139 + for m in matches:
  140 + rset_packages.add_document(m.docid)
  141 + eset_keywords = enquire.get_eset(size, rset_packages,
  142 + FilterDescription())
  143 + profile = [res.term for res in eset_keywords]
  144 + return profile
  145 +
  146 + def full_profile(self,items_repository,size):
  147 + """
  148 + Return most relevant tags and keywords for a list of packages based
  149 + their tags and descriptions.
  150 + """
  151 + tag_profile = self.tag_profile(items_repository,size)[:size/2]
  152 + desc_profile = self.desc_profile(items_repository,size)[:size/2]
  153 + return tag_profile+desc_profile
117 154  
118 155 def maximal_pkg_profile(self):
119 156 """
... ... @@ -132,12 +169,11 @@ class User:
132 169 if or_dep.name in self.pkg_profile:
133 170 self.pkg_profile.remove(or_dep.name)
134 171 except:
135   - logging.debug("Disconsidering package not found in cache: %s"
136   - % p)
  172 + logging.debug("Package not found in cache: %s" % p)
137 173 profile_size = len(self.pkg_profile)
138   - logging.info("Reduced packages profile size from %d to %d." %
139   - (old_profile_size, profile_size))
140   - return set(self.pkg_profile)
  174 + logging.debug("Maximal package profile: reduced packages profile size \
  175 + from %d to %d." % (old_profile_size, profile_size))
  176 + return self.pkg_profile
141 177  
142 178 class LocalSystem(User):
143 179 """
... ... @@ -168,8 +204,9 @@ class LocalSystem(User):
168 204 if pkg.is_auto_installed:
169 205 self.pkg_profile.remove(p)
170 206 except:
171   - logging.debug("Disconsidering package not found in cache: %s"
172   - % p)
  207 + logging.debug("Package not found in cache: %s" % p)
173 208 profile_size = len(self.pkg_profile)
174   - logging.info("Reduced packages profile size from %d to %d." %
175   - (old_profile_size, profile_size))
  209 + logging.debug("No auto-intalled package profile: reduced packages \
  210 + profile size from %d to %d." %
  211 + (old_profile_size, profile_size))
  212 + return self.pkg_profile
... ...