Commit ea86d6ae4b509b088b98111c4d795f77eb046133
1 parent
bc5f760c
Exists in
master
and in
1 other branch
[data]
- axi_search_pkgs() returns docids instead of matches - popcon indexing considers pkgs filters [evaluation] - Added comments do cross-validation - cross_item_score now represents items_score (with respective ratings) [recommender] - Defined some more new strategies [strategies] - Now uses profile_size provided by config [user] - content_profile() replaced profile() - filter_pkg_profile() replaced app_pkg_profile() - new classes RandomPopcon and PopconSystem
Showing
5 changed files
with
128 additions
and
52 deletions
Show diff stats
src/data.py
@@ -31,6 +31,7 @@ import shutil | @@ -31,6 +31,7 @@ import shutil | ||
31 | from error import Error | 31 | from error import Error |
32 | from singleton import Singleton | 32 | from singleton import Singleton |
33 | from dissimilarity import * | 33 | from dissimilarity import * |
34 | +from config import Config | ||
34 | 35 | ||
35 | def axi_search_pkgs(axi,pkgs_list): | 36 | def axi_search_pkgs(axi,pkgs_list): |
36 | terms = ["XP"+item for item in pkgs_list] | 37 | terms = ["XP"+item for item in pkgs_list] |
@@ -38,19 +39,22 @@ def axi_search_pkgs(axi,pkgs_list): | @@ -38,19 +39,22 @@ def axi_search_pkgs(axi,pkgs_list): | ||
38 | enquire = xapian.Enquire(axi) | 39 | enquire = xapian.Enquire(axi) |
39 | enquire.set_query(query) | 40 | enquire.set_query(query) |
40 | matches = enquire.get_mset(0,axi.get_doccount()) | 41 | matches = enquire.get_mset(0,axi.get_doccount()) |
41 | - return matches | 42 | + return [m.docid for m in matches] |
42 | 43 | ||
43 | def axi_search_pkg_tags(axi,pkg): | 44 | def axi_search_pkg_tags(axi,pkg): |
44 | enquire = xapian.Enquire(axi) | 45 | enquire = xapian.Enquire(axi) |
45 | enquire.set_query(xapian.Query("XP"+pkg)) | 46 | enquire.set_query(xapian.Query("XP"+pkg)) |
46 | matches = enquire.get_mset(0,1) | 47 | matches = enquire.get_mset(0,1) |
47 | if not matches: | 48 | if not matches: |
48 | - #logging.debug("Package %s not found in items repository" % pkg) | ||
49 | - return [] | 49 | + logging.debug("Package %s not found in items repository" % pkg) |
50 | + return False | ||
50 | for m in matches: | 51 | for m in matches: |
51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if | 52 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
52 | term.term.startswith("XT")] | 53 | term.term.startswith("XT")] |
53 | - return tags | 54 | + if not tags: |
55 | + return "notags" | ||
56 | + else: | ||
57 | + return tags | ||
54 | 58 | ||
55 | def print_index(index): | 59 | def print_index(index): |
56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" | 60 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
@@ -96,7 +100,7 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | @@ -96,7 +100,7 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | ||
96 | xapian.DB_CREATE_OR_OVERWRITE) | 100 | xapian.DB_CREATE_OR_OVERWRITE) |
97 | sample = axi_search_pkgs(axi,pkgs_list) | 101 | sample = axi_search_pkgs(axi,pkgs_list) |
98 | for package in sample: | 102 | for package in sample: |
99 | - doc_id = self.add_document(axi.get_document(package.docid)) | 103 | + doc_id = self.add_document(axi.get_document(package)) |
100 | 104 | ||
101 | def __str__(self): | 105 | def __str__(self): |
102 | return print_index(self) | 106 | return print_index(self) |
@@ -115,6 +119,14 @@ class PopconSubmission(): | @@ -115,6 +119,14 @@ class PopconSubmission(): | ||
115 | output += "\n "+pkg+": "+str(weight) | 119 | output += "\n "+pkg+": "+str(weight) |
116 | return output | 120 | return output |
117 | 121 | ||
122 | + def apps(self,axi): | ||
123 | + apps = {} | ||
124 | + for pkg in self.packages.keys(): | ||
125 | + tags = axi_search_pkg_tags(self.axi,pkg) | ||
126 | + if "XTrole::program" in tags: | ||
127 | + apps[pkg] = self.packages[pkg] | ||
128 | + return apps | ||
129 | + | ||
118 | def load(self,binary=1): | 130 | def load(self,binary=1): |
119 | """ | 131 | """ |
120 | Parse a popcon submission, generating the names of the valid packages | 132 | Parse a popcon submission, generating the names of the valid packages |
@@ -159,6 +171,16 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -159,6 +171,16 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
159 | self.path = os.path.expanduser(cfg.popcon_index) | 171 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 172 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
161 | self.max_popcon = cfg.max_popcon | 173 | self.max_popcon = cfg.max_popcon |
174 | + self.valid_pkgs = [] | ||
175 | + # file format: one pkg_name per line | ||
176 | + with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs: | ||
177 | + self.valid_pkgs = [line.strip() for line in valid_pkgs | ||
178 | + if not line.startswith("#")] | ||
179 | + logging.debug("Considering %d valid packages" % len(self.valid_pkgs)) | ||
180 | + with open(os.path.join(cfg.filters,"tags")) as valid_tags: | ||
181 | + self.valid_tags = [line.strip() for line in valid_tags | ||
182 | + if not line.startswith("#")] | ||
183 | + logging.debug("Considering %d valid tags" % len(self.valid_tags)) | ||
162 | if not cfg.index_mode == "old" or not self.load_index(): | 184 | if not cfg.index_mode == "old" or not self.load_index(): |
163 | if not os.path.exists(cfg.popcon_dir): | 185 | if not os.path.exists(cfg.popcon_dir): |
164 | os.makedirs(cfg.popcon_dir) | 186 | os.makedirs(cfg.popcon_dir) |
@@ -243,10 +265,16 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -243,10 +265,16 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
243 | logging.debug("Parsing popcon submission \'%s\'" % | 265 | logging.debug("Parsing popcon submission \'%s\'" % |
244 | submission.user_id) | 266 | submission.user_id) |
245 | for pkg, freq in submission.packages.items(): | 267 | for pkg, freq in submission.packages.items(): |
246 | - doc.add_term("XP"+pkg,freq) | ||
247 | - #if axi_search_pkg_tags(self.axi,pkg): | ||
248 | - # for tag in axi_search_pkg_tags(self.axi,pkg): | ||
249 | - # doc.add_term(tag,freq) | 268 | + if pkg in self.valid_pkgs: |
269 | + tags = axi_search_pkg_tags(self.axi,pkg) | ||
270 | + # if the package was foung in axi | ||
271 | + if tags: | ||
272 | + doc.add_term("XP"+pkg,freq) | ||
273 | + # if the package has tags associated with it | ||
274 | + if not tags == "notags": | ||
275 | + for tag in tags: | ||
276 | + if tag in self.valid_tags: | ||
277 | + doc.add_term(tag,freq) | ||
250 | doc_id = self.add_document(doc) | 278 | doc_id = self.add_document(doc) |
251 | doc_count += 1 | 279 | doc_count += 1 |
252 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 280 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
@@ -256,7 +284,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -256,7 +284,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
256 | try: | 284 | try: |
257 | self.commit() | 285 | self.commit() |
258 | except: | 286 | except: |
259 | - self.flush() # deprecated function, used for old lib version | 287 | + self.flush() # deprecated function, used for compatibility with old lib version |
260 | 288 | ||
261 | def get_submissions(self,submissions_dir): | 289 | def get_submissions(self,submissions_dir): |
262 | """ | 290 | """ |
@@ -288,9 +316,7 @@ class KMedoidsClustering(cluster.KMeansClustering): | @@ -288,9 +316,7 @@ class KMedoidsClustering(cluster.KMeansClustering): | ||
288 | data_sample = data | 316 | data_sample = data |
289 | else: | 317 | else: |
290 | data_sample = random.sample(data,max_data) | 318 | data_sample = random.sample(data,max_data) |
291 | - print data_sample | ||
292 | cluster.KMeansClustering.__init__(self, data_sample, distance) | 319 | cluster.KMeansClustering.__init__(self, data_sample, distance) |
293 | - # cluster.KMeansClustering.__init__(self, data, distance) | ||
294 | self.distanceMatrix = {} | 320 | self.distanceMatrix = {} |
295 | for submission in self._KMeansClustering__data: | 321 | for submission in self._KMeansClustering__data: |
296 | self.distanceMatrix[submission.user_id] = {} | 322 | self.distanceMatrix[submission.user_id] = {} |
src/evaluation.py
@@ -25,6 +25,7 @@ import random | @@ -25,6 +25,7 @@ import random | ||
25 | from collections import defaultdict | 25 | from collections import defaultdict |
26 | import logging | 26 | import logging |
27 | 27 | ||
28 | +from error import Error | ||
28 | from user import * | 29 | from user import * |
29 | from recommender import * | 30 | from recommender import * |
30 | from singleton import Singleton | 31 | from singleton import Singleton |
@@ -271,11 +272,15 @@ class CrossValidation: | @@ -271,11 +272,15 @@ class CrossValidation: | ||
271 | """ | 272 | """ |
272 | Perform cross-validation. | 273 | Perform cross-validation. |
273 | """ | 274 | """ |
274 | - # | ||
275 | - cross_item_score = dict.fromkeys(user.pkg_profile,1) | 275 | + # Extracting user profile scores from cross validation |
276 | + cross_item_score = {} | ||
277 | + for pkg in user.pkg_profile: | ||
278 | + cross_item_score[pkg] = user.item_score[pkg] | ||
276 | partition_size = int(len(cross_item_score)*self.partition_proportion) | 279 | partition_size = int(len(cross_item_score)*self.partition_proportion) |
280 | + # main iteration | ||
277 | for r in range(self.rounds): | 281 | for r in range(self.rounds): |
278 | round_partition = {} | 282 | round_partition = {} |
283 | + # move items from cross_item_score to round-partition | ||
279 | for j in range(partition_size): | 284 | for j in range(partition_size): |
280 | if len(cross_item_score)>0: | 285 | if len(cross_item_score)>0: |
281 | random_key = random.choice(cross_item_score.keys()) | 286 | random_key = random.choice(cross_item_score.keys()) |
@@ -283,20 +288,25 @@ class CrossValidation: | @@ -283,20 +288,25 @@ class CrossValidation: | ||
283 | logging.critical("Empty cross_item_score.") | 288 | logging.critical("Empty cross_item_score.") |
284 | raise Error | 289 | raise Error |
285 | round_partition[random_key] = cross_item_score.pop(random_key) | 290 | round_partition[random_key] = cross_item_score.pop(random_key) |
286 | - #logging.debug("Round partition: %s",str(round_partition)) | ||
287 | - #logging.debug("Cross item-score: %s",str(cross_item_score)) | 291 | + logging.debug("Round partition: %s",str(round_partition)) |
292 | + logging.debug("Cross item-score: %s",str(cross_item_score)) | ||
293 | + # round user is created with remaining items | ||
288 | round_user = User(cross_item_score) | 294 | round_user = User(cross_item_score) |
289 | result_size = int(self.recommender.items_repository.get_doccount()* | 295 | result_size = int(self.recommender.items_repository.get_doccount()* |
290 | self.result_proportion) | 296 | self.result_proportion) |
291 | predicted_result = self.recommender.get_recommendation(round_user,result_size) | 297 | predicted_result = self.recommender.get_recommendation(round_user,result_size) |
292 | - #print len(round_partition) | 298 | + if not predicted_result.size: |
299 | + logging.critical("No recommendation produced. Abort cross-validation.") | ||
300 | + raise Error | ||
301 | + # partition is considered the expected result | ||
293 | real_result = RecommendationResult(round_partition) | 302 | real_result = RecommendationResult(round_partition) |
294 | - #logging.debug("Predicted result: %s",predicted_result) | 303 | + logging.debug("Predicted result: %s",predicted_result) |
295 | evaluation = Evaluation(predicted_result,real_result, | 304 | evaluation = Evaluation(predicted_result,real_result, |
296 | self.recommender.items_repository.get_doccount()) | 305 | self.recommender.items_repository.get_doccount()) |
297 | for metric in self.metrics_list: | 306 | for metric in self.metrics_list: |
298 | result = evaluation.run(metric) | 307 | result = evaluation.run(metric) |
299 | self.cross_results[metric.desc].append(result) | 308 | self.cross_results[metric.desc].append(result) |
309 | + # moving back items from round_partition to cross_item_score | ||
300 | while len(round_partition)>0: | 310 | while len(round_partition)>0: |
301 | item,score = round_partition.popitem() | 311 | item,score = round_partition.popitem() |
302 | cross_item_score[item] = score | 312 | cross_item_score[item] = score |
src/recommender.py
@@ -78,15 +78,23 @@ class Recommender: | @@ -78,15 +78,23 @@ class Recommender: | ||
78 | """ | 78 | """ |
79 | Set the recommendation strategy. | 79 | Set the recommendation strategy. |
80 | """ | 80 | """ |
81 | - if strategy_str == "cb": | ||
82 | - self.strategy = strategy.ContentBasedStrategy("full") | ||
83 | - if strategy_str == "cbt": | ||
84 | - self.strategy = strategy.ContentBasedStrategy("tag") | ||
85 | - if strategy_str == "cbd": | ||
86 | - self.strategy = strategy.ContentBasedStrategy("desc") | ||
87 | - if strategy_str == "col": | 81 | + self.items_repository = xapian.Database(self.cfg.axi) |
82 | + if "desktop" in strategy_str: | ||
83 | + self.items_repository = xapian.Database("/root/.app-recommender/DesktopAxi") | ||
84 | + self.cfg.popcon_index = "/root/.app-recommender/popcon-index_desktop_1000" | ||
85 | + | ||
86 | + if strategy_str == "cb" or strategy_str == "cb_desktop": | ||
87 | + self.strategy = strategy.ContentBasedStrategy("full", | ||
88 | + self.cfg.profile_size) | ||
89 | + if strategy_str == "cbt" or strategy_str == "cbt_desktop": | ||
90 | + self.strategy = strategy.ContentBasedStrategy("tag", | ||
91 | + self.cfg.profile_size) | ||
92 | + if strategy_str == "cbd" or strategy_str == "cbd_desktop": | ||
93 | + self.strategy = strategy.ContentBasedStrategy("desc", | ||
94 | + self.cfg.profile_size) | ||
95 | + if "col" in strategy_str: | ||
88 | self.users_repository = data.PopconXapianIndex(self.cfg) | 96 | self.users_repository = data.PopconXapianIndex(self.cfg) |
89 | - self.strategy = strategy.CollaborativeStrategy(20) | 97 | + self.strategy = strategy.CollaborativeStrategy(self.cfg.k_neighbors) |
90 | 98 | ||
91 | def get_recommendation(self,user,result_size=100): | 99 | def get_recommendation(self,user,result_size=100): |
92 | """ | 100 | """ |
src/strategy.py
@@ -140,7 +140,7 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -140,7 +140,7 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
140 | """ | 140 | """ |
141 | Content-based recommendation strategy based on Apt-xapian-index. | 141 | Content-based recommendation strategy based on Apt-xapian-index. |
142 | """ | 142 | """ |
143 | - def __init__(self,content,profile_size=50): | 143 | + def __init__(self,content,profile_size): |
144 | self.description = "Content-based" | 144 | self.description = "Content-based" |
145 | self.content = content | 145 | self.content = content |
146 | self.profile_size = profile_size | 146 | self.profile_size = profile_size |
@@ -149,8 +149,8 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -149,8 +149,8 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
149 | """ | 149 | """ |
150 | Perform recommendation strategy. | 150 | Perform recommendation strategy. |
151 | """ | 151 | """ |
152 | - profile = user.profile(rec.items_repository,self.content, | ||
153 | - self.profile_size) | 152 | + profile = user.content_profile(rec.items_repository,self.content, |
153 | + self.profile_size) | ||
154 | # prepair index for querying user profile | 154 | # prepair index for querying user profile |
155 | query = xapian.Query(xapian.Query.OP_OR,profile) | 155 | query = xapian.Query(xapian.Query.OP_OR,profile) |
156 | enquire = xapian.Enquire(rec.items_repository) | 156 | enquire = xapian.Enquire(rec.items_repository) |
@@ -188,7 +188,8 @@ class CollaborativeStrategy(RecommendationStrategy): | @@ -188,7 +188,8 @@ class CollaborativeStrategy(RecommendationStrategy): | ||
188 | """ | 188 | """ |
189 | Perform recommendation strategy. | 189 | Perform recommendation strategy. |
190 | """ | 190 | """ |
191 | - profile = ["XP"+package for package in user.pkg_profile] | 191 | + profile = ["XP"+package for package in |
192 | + user.filter_pkg_profile("/root/.app-recommender/filters/program")] | ||
192 | # prepair index for querying user profile | 193 | # prepair index for querying user profile |
193 | query = xapian.Query(xapian.Query.OP_OR,profile) | 194 | query = xapian.Query(xapian.Query.OP_OR,profile) |
194 | enquire = xapian.Enquire(rec.users_repository) | 195 | enquire = xapian.Enquire(rec.users_repository) |
@@ -210,13 +211,15 @@ class CollaborativeStrategy(RecommendationStrategy): | @@ -210,13 +211,15 @@ class CollaborativeStrategy(RecommendationStrategy): | ||
210 | eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider()) | 211 | eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider()) |
211 | # compose result dictionary | 212 | # compose result dictionary |
212 | item_score = {} | 213 | item_score = {} |
214 | + ranking = [] | ||
213 | for e in eset: | 215 | for e in eset: |
214 | package = e.term.lstrip("XP") | 216 | package = e.term.lstrip("XP") |
215 | tags = axi_search_pkg_tags(rec.items_repository,package) | 217 | tags = axi_search_pkg_tags(rec.items_repository,package) |
216 | #[FIXME] set this constraint somehow | 218 | #[FIXME] set this constraint somehow |
217 | #if "XTrole::program" in tags: | 219 | #if "XTrole::program" in tags: |
218 | item_score[package] = e.weight | 220 | item_score[package] = e.weight |
219 | - return recommender.RecommendationResult(item_score) | 221 | + ranking.append(m.document.get_data()) |
222 | + return recommender.RecommendationResult(item_score, ranking) | ||
220 | 223 | ||
221 | class DemographicStrategy(RecommendationStrategy): | 224 | class DemographicStrategy(RecommendationStrategy): |
222 | """ | 225 | """ |
src/user.py
@@ -19,8 +19,10 @@ __license__ = """ | @@ -19,8 +19,10 @@ __license__ = """ | ||
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
20 | """ | 20 | """ |
21 | 21 | ||
22 | +import os | ||
22 | import random | 23 | import random |
23 | import commands | 24 | import commands |
25 | +import datetime | ||
24 | import xapian | 26 | import xapian |
25 | import logging | 27 | import logging |
26 | import apt | 28 | import apt |
@@ -43,9 +45,10 @@ class FilterDescription(xapian.ExpandDecider): | @@ -43,9 +45,10 @@ class FilterDescription(xapian.ExpandDecider): | ||
43 | """ | 45 | """ |
44 | def __call__(self, term): | 46 | def __call__(self, term): |
45 | """ | 47 | """ |
46 | - Return true if the term is a tag, else false. | 48 | + Return true if the term or its stemmed version is part of a package |
49 | + description. | ||
47 | """ | 50 | """ |
48 | - return term.islower() #or term.startswith("Z") | 51 | + return term.islower() or term.startswith("Z") |
49 | 52 | ||
50 | class DemographicProfile(Singleton): | 53 | class DemographicProfile(Singleton): |
51 | def __init__(self): | 54 | def __init__(self): |
@@ -84,7 +87,7 @@ class User: | @@ -84,7 +87,7 @@ class User: | ||
84 | self.pkg_profile = self.items() | 87 | self.pkg_profile = self.items() |
85 | 88 | ||
86 | if user_id: | 89 | if user_id: |
87 | - self.id = user_id | 90 | + self.user_id = user_id |
88 | else: | 91 | else: |
89 | random.seed() | 92 | random.seed() |
90 | self.id = random.getrandbits(128) | 93 | self.id = random.getrandbits(128) |
@@ -105,7 +108,7 @@ class User: | @@ -105,7 +108,7 @@ class User: | ||
105 | """ | 108 | """ |
106 | self.demographic_profile = DemographicProfile()(profiles_set) | 109 | self.demographic_profile = DemographicProfile()(profiles_set) |
107 | 110 | ||
108 | - def profile(self,items_repository,content,size): | 111 | + def content_profile(self,items_repository,content,size): |
109 | """ | 112 | """ |
110 | Get user profile for a specific type of content: packages tags, | 113 | Get user profile for a specific type of content: packages tags, |
111 | description or both (full_profile) | 114 | description or both (full_profile) |
@@ -119,10 +122,10 @@ class User: | @@ -119,10 +122,10 @@ class User: | ||
119 | Return most relevant tags for a list of packages. | 122 | Return most relevant tags for a list of packages. |
120 | """ | 123 | """ |
121 | enquire = xapian.Enquire(items_repository) | 124 | enquire = xapian.Enquire(items_repository) |
122 | - matches = data.axi_search_pkgs(items_repository,self.pkg_profile) | 125 | + docs = data.axi_search_pkgs(items_repository,self.pkg_profile) |
123 | rset_packages = xapian.RSet() | 126 | rset_packages = xapian.RSet() |
124 | - for m in matches: | ||
125 | - rset_packages.add_document(m.docid) | 127 | + for docid in docs: |
128 | + rset_packages.add_document(docid) | ||
126 | # statistically good differentiators | 129 | # statistically good differentiators |
127 | eset_tags = enquire.get_eset(size, rset_packages, FilterTag()) | 130 | eset_tags = enquire.get_eset(size, rset_packages, FilterTag()) |
128 | profile = [res.term for res in eset_tags] | 131 | profile = [res.term for res in eset_tags] |
@@ -134,10 +137,10 @@ class User: | @@ -134,10 +137,10 @@ class User: | ||
134 | text descriptions. | 137 | text descriptions. |
135 | """ | 138 | """ |
136 | enquire = xapian.Enquire(items_repository) | 139 | enquire = xapian.Enquire(items_repository) |
137 | - matches = data.axi_search_pkgs(items_repository,self.pkg_profile) | 140 | + docs = data.axi_search_pkgs(items_repository,self.pkg_profile) |
138 | rset_packages = xapian.RSet() | 141 | rset_packages = xapian.RSet() |
139 | - for m in matches: | ||
140 | - rset_packages.add_document(m.docid) | 142 | + for docid in docs: |
143 | + rset_packages.add_document(docid) | ||
141 | eset_keywords = enquire.get_eset(size, rset_packages, | 144 | eset_keywords = enquire.get_eset(size, rset_packages, |
142 | FilterDescription()) | 145 | FilterDescription()) |
143 | profile = [res.term for res in eset_keywords] | 146 | profile = [res.term for res in eset_keywords] |
@@ -152,21 +155,19 @@ class User: | @@ -152,21 +155,19 @@ class User: | ||
152 | desc_profile = self.desc_profile(items_repository,size)[:size/2] | 155 | desc_profile = self.desc_profile(items_repository,size)[:size/2] |
153 | return tag_profile+desc_profile | 156 | return tag_profile+desc_profile |
154 | 157 | ||
155 | - def app_pkg_profile(self,axi): | 158 | + def filter_pkg_profile(self,filter_file): |
156 | """ | 159 | """ |
157 | - Return list of packages that are applications. | 160 | + Return list of packages from profile listed in the filter_file. |
158 | """ | 161 | """ |
159 | old_profile_size = len(self.pkg_profile) | 162 | old_profile_size = len(self.pkg_profile) |
160 | - for p in self.pkg_profile[:]: #iterate list copy | ||
161 | - tags = data.axi_search_pkg_tags(axi,p) | ||
162 | - try: | ||
163 | - | ||
164 | - if not "XTrole::program" in tags: | ||
165 | - self.pkg_profile.remove(p) | ||
166 | - except: | ||
167 | - logging.debug("Package not found in axi: %s" % p) | 163 | + with open(filter_file) as valid: |
164 | + valid_pkgs = [line.strip() for line in valid] | ||
165 | + for pkg in self.pkg_profile[:]: #iterate list copy | ||
166 | + if pkg not in valid_pkgs: | ||
167 | + self.pkg_profile.remove(pkg) | ||
168 | + logging.debug("Discarded package %s during profile filtering" % pkg) | ||
168 | profile_size = len(self.pkg_profile) | 169 | profile_size = len(self.pkg_profile) |
169 | - logging.debug("App package profile: reduced packages profile size \ | 170 | + logging.debug("Filtered package profile: reduced packages profile size \ |
170 | from %d to %d." % (old_profile_size, profile_size)) | 171 | from %d to %d." % (old_profile_size, profile_size)) |
171 | return self.pkg_profile | 172 | return self.pkg_profile |
172 | 173 | ||
@@ -193,6 +194,33 @@ class User: | @@ -193,6 +194,33 @@ class User: | ||
193 | from %d to %d." % (old_profile_size, profile_size)) | 194 | from %d to %d." % (old_profile_size, profile_size)) |
194 | return self.pkg_profile | 195 | return self.pkg_profile |
195 | 196 | ||
197 | +class RandomPopcon(User): | ||
198 | + def __init__(self,submissions_dir,pkgs_filter=0): | ||
199 | + """ | ||
200 | + Set initial parameters. | ||
201 | + """ | ||
202 | + item_score = {} | ||
203 | + len_profile = 0 | ||
204 | + while len_profile < 100: | ||
205 | + path = random.choice([os.path.join(root, submission) for | ||
206 | + root, dirs, files in os.walk(submissions_dir) | ||
207 | + for submission in files]) | ||
208 | + user = PopconSystem(path) | ||
209 | + if pkgs_filter: | ||
210 | + user.filter_pkg_profile(pkgs_filter) | ||
211 | + len_profile = len(user.pkg_profile) | ||
212 | + submission = data.PopconSubmission(path) | ||
213 | + User.__init__(self,submission.packages,submission.user_id) | ||
214 | + | ||
215 | +class PopconSystem(User): | ||
216 | + def __init__(self,path): | ||
217 | + """ | ||
218 | + Set initial parameters. | ||
219 | + """ | ||
220 | + item_score = {} | ||
221 | + submission = data.PopconSubmission(path) | ||
222 | + User.__init__(self,submission.packages,submission.user_id) | ||
223 | + | ||
196 | class LocalSystem(User): | 224 | class LocalSystem(User): |
197 | """ | 225 | """ |
198 | Extend the class User to consider the packages installed on the local | 226 | Extend the class User to consider the packages installed on the local |
@@ -207,6 +235,7 @@ class LocalSystem(User): | @@ -207,6 +235,7 @@ class LocalSystem(User): | ||
207 | for line in dpkg_output.splitlines(): | 235 | for line in dpkg_output.splitlines(): |
208 | pkg = line.split('\t')[0] | 236 | pkg = line.split('\t')[0] |
209 | item_score[pkg] = 1 | 237 | item_score[pkg] = 1 |
238 | + self.user_id = "local-"+str(datetime.datetime.now()) | ||
210 | User.__init__(self,item_score) | 239 | User.__init__(self,item_score) |
211 | 240 | ||
212 | def no_auto_pkg_profile(self): | 241 | def no_auto_pkg_profile(self): |