Commit 65be4b76c9e779b7f600b211e41649b0310b3eaa

Authored by Tássia Camões Araújo
2 parents e3176f2b 9e602af3
Exists in master and in 1 other branch add_vagrant

Merge remote branch 'upstream/master'

Conflicts:
	src/data.py
src/config.py
... ... @@ -44,7 +44,8 @@ class Config():
44 44 self.popcon_index = os.path.expanduser("~/.app-recommender/popcon_index")
45 45 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir")
46 46 self.clusters_dir = os.path.expanduser("~/.app-recommender/clusters_dir")
47   - self.index_mode = "0" # use old index
  47 + self.k_medoids = 100
  48 + self.index_mode = "old"
48 49 self.strategy = "cb"
49 50 self.weight = "bm25"
50 51 self.load_options()
... ... @@ -65,8 +66,9 @@ class Config():
65 66 print " -a, --axi=PATH Path to Apt-xapian-index"
66 67 print " -p, --popconindex=PATH Path to popcon dedicated index"
67 68 print " -m, --popcondir=PATH Path to popcon submissions dir"
68   - print " -u, --index_mode= 0: old, 1:reindex, 11:clustered_index"
  69 + print " -u, --indexmode= old, reindex, cluster, recluster"
69 70 print " -l, --clustersdir=PATH Path to popcon clusters dir"
  71 + print " -e, --medoids=k Number of medoids for clustering"
70 72 print " -w, --weight=OPTION Search weighting scheme"
71 73 print " -s, --strategy=OPTION Recommendation strategy"
72 74 print ""
... ... @@ -115,13 +117,14 @@ class Config():
115 117 self.popcon_dir = self.read_option('recommender', 'popcon_dir')
116 118 self.index_mode = self.read_option('recommender', 'index_mode')
117 119 self.clusters_dir = self.read_option('recommender', 'clusters_dir')
  120 + self.k_medoids = self.read_option('recommender', 'k_medoids')
118 121 self.weight = self.read_option('recommender', 'weight')
119 122 self.strategy = self.read_option('recommender', 'strategy')
120 123  
121   - short_options = "hdvo:c:a:p:m:ul:w:s:"
  124 + short_options = "hdvo:c:a:p:m:ul:e:w:s:"
122 125 long_options = ["help", "debug", "verbose", "output=", "config=",
123   - "axi=", "popconindex=", "popcondir=", "index_mode=",
124   - "clusters_dir=", "weight=", "strategy="]
  126 + "axi=", "popconindex=", "popcondir=", "indexmode=",
  127 + "clustersdir=", "kmedoids=", "weight=", "strategy="]
125 128 try:
126 129 opts, args = getopt.getopt(sys.argv[1:], short_options,
127 130 long_options)
... ... @@ -154,6 +157,8 @@ class Config():
154 157 self.index_mode = p
155 158 elif o in ("-l", "--clustersdir"):
156 159 self.clusters_dir = p
  160 + elif o in ("-e", "--kmedoids"):
  161 + self.k_medoids = p
157 162 elif o in ("-w", "--weight"):
158 163 self.weight = p
159 164 elif o in ("-s", "--strategy"):
... ...
src/data.py
... ... @@ -129,31 +129,36 @@ class PopconXapianIndex(xapian.WritableDatabase):
129 129 """
130 130 self.axi = xapian.Database(cfg.axi)
131 131 self.path = os.path.expanduser(cfg.popcon_index)
132   - if cfg.index_mode.startswith("1") or not self.load_index():
  132 + if not cfg.index_mode == "old" or not self.load_index():
133 133 if not os.path.exists(cfg.popcon_dir):
134 134 os.makedirs(cfg.popcon_dir)
135 135 if not os.listdir(cfg.popcon_dir):
136 136 logging.critical("Popcon dir seems to be empty.")
137 137 raise Error
138   - if cfg.index_mode == "10":
  138 + if cfg.index_mode == "reindex":
139 139 self.source_dir = os.path.expanduser(cfg.popcon_dir)
140 140 else:
141 141 self.source_dir = os.path.expanduser(cfg.clusters_dir)
142 142 if not os.path.exists(cfg.clusters_dir):
143 143 os.makedirs(cfg.clusters_dir)
144   - if not os.listdir(cfg.clusters_dir):
145   - distance = JaccardDistance()
  144 + if not os.listdir(cfg.clusters_dir) or \
  145 + cfg.index_mode == "recluster":
  146 + shutil.rmtree(cfg.clusters_dir,1)
  147 + os.makedirs(cfg.clusters_dir)
146 148 logging.info("Clustering popcon submissions from \'%s\'"
147 149 % cfg.popcon_dir)
148 150 logging.info("Clusters will be placed at \'%s\'"
149 151 % cfg.clusters_dir)
  152 + distance = JaccardDistance()
150 153 data = self.get_submissions(cfg.popcon_dir)
151   - if cfg.clustering == "Hierarchical":
152   - self.hierarchical_clustering(data,cfg.clusters_dir,
153   - distance)
154   - else:
155   - self.kmedoids_clustering(data,cfg.clusters_dir,
156   - distance)
  154 + self.cluster_dispersion = \
  155 + self.kmedoids_clustering(data, cfg.clusters_dir,
  156 + distance, cfg.k_medoids)
  157 + logging.info("Clusters dispersion: %f.2",
  158 + self.cluster_dispersion)
  159 + else:
  160 + logging.info("Using clusters from \'%s\'" %
  161 + cfg.clusters_dir)
157 162 self.build_index()
158 163  
159 164 def __str__(self):
... ... @@ -167,10 +172,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
167 172 logging.info("Opening existing popcon xapian index at \'%s\'"
168 173 % self.path)
169 174 xapian.Database.__init__(self,self.path)
170   - return True
  175 + return 1
171 176 except xapian.DatabaseError:
172 177 logging.info("Could not open popcon index.")
173   - return True
174 178 return 0
175 179  
176 180 def build_index(self):
... ... @@ -224,35 +228,23 @@ class PopconXapianIndex(xapian.WritableDatabase):
224 228 submissions.append(submission)
225 229 return submissions
226 230  
227   - def hierarchical_clustering(self,data,clusters_dir,distance,k=10):
228   - """
229   - Select popcon submissions from popcon_dir and place them at clusters_dir
230   - """
231   - cl = cluster.HierarchicalClustering(data, lambda x,y:
232   - distance(x.packages.keys(),
233   - y.packages.keys()))
234   - clusters = cl.getlevel(0.5)
235   - for c in clusters:
236   - print "cluster"
237   - for submission in c:
238   - print submission.user_id
239   -
240   - def kmedoids_clustering(self,data,clusters_dir,distance,k=10):
  231 + def kmedoids_clustering(self,data,clusters_dir,distance,k_medoids):
241 232 clusters = KMedoidsClustering(data,lambda x,y:
242 233 distance(x.packages.keys(),
243 234 y.packages.keys()))
244   - medoids = clusters.getMedoids(2)
  235 + medoids,dispersion = clusters.getMedoids(k_medoids)
245 236 for submission in medoids:
246 237 shutil.copyfile(submission.path,os.path.join(clusters_dir,
247 238 submission.user_id))
  239 + return dispersion
248 240  
249 241 class KMedoidsClustering(cluster.KMeansClustering):
250 242  
251   - def __init__(self,data,distance):
252   - if len(data)<100:
  243 + def __init__(self,data,distance,max_data=100):
  244 + if len(data)<max_data:
253 245 data_sample = data
254 246 else:
255   - data_sample = random.sample(data,100)
  247 + data_sample = random.sample(data,max_data)
256 248 cluster.KMeansClustering.__init__(self, data_sample, distance)
257 249 self.distanceMatrix = {}
258 250 for submission in self._KMeansClustering__data:
... ... @@ -287,7 +279,7 @@ class KMedoidsClustering(cluster.KMeansClustering):
287 279 logging.debug("medoidDistance: %f" % medoidDistance)
288 280 logging.debug("Cluster medoid: [%d] %s" % (medoid,
289 281 cluster[medoid].user_id))
290   - return cluster[medoid]
  282 + return (cluster[medoid],medoidDistance)
291 283  
292 284 def assign_item(self, item, origin):
293 285 """
... ... @@ -295,7 +287,8 @@ class KMedoidsClustering(cluster.KMeansClustering):
295 287 """
296 288 closest_cluster = origin
297 289 for cluster in self._KMeansClustering__clusters:
298   - if self.distance(item,self.getMedoid(cluster)) < self.distance(item,self.getMedoid(closest_cluster)):
  290 + if self.distance(item,self.getMedoid(cluster)[0]) < \
  291 + self.distance(item,self.getMedoid(closest_cluster)[0]):
299 292 closest_cluster = cluster
300 293  
301 294 if closest_cluster != origin:
... ... @@ -309,6 +302,8 @@ class KMedoidsClustering(cluster.KMeansClustering):
309 302 """
310 303 Generate n clusters and return their medoids.
311 304 """
312   - medoids = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
313   - logging.info("Clustering completed and the following centroids were found: %s" % [c.user_id for c in medoids])
314   - return medoids
  305 + medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
  306 + medoids = [m[0] for m in medoids_distances]
  307 + dispersion = sum([m[1] for m in medoids_distances])
  308 + logging.info("Clustering completed and the following medoids were found: %s" % [c.user_id for c in medoids])
  309 + return medoids,dispersion
... ...
src/evaluation.py
... ... @@ -49,6 +49,45 @@ class Metric(Singleton):
49 49 evaluation.real_item_scores[k]))
50 50 return errors
51 51  
  52 +
  53 +class SimpleAccuracy(Metric):
  54 + """
  55 + Classification accuracy metric which consider classes sizes.
  56 + """
  57 + def __init__(self):
  58 + """
  59 + Set metric description.
  60 + """
  61 + self.desc = " S_Accuracy "
  62 +
  63 + def run(self,evaluation):
  64 + """
  65 + Compute metric.
  66 + """
  67 + return float((evaluation.repository_size-
  68 + len(evaluation.false_positive))-
  69 + len(evaluation.false_negative))/evaluation.repository_size
  70 +
  71 +class Accuracy(Metric):
  72 + """
  73 + Classification accuracy metric which consider classes sizes.
  74 + """
  75 + def __init__(self):
  76 + """
  77 + Set metric description.
  78 + """
  79 + self.desc = " Accuracy "
  80 +
  81 + def run(self,evaluation):
  82 + """
  83 + Compute metric.
  84 + """
  85 + error_1 = (float(len(evaluation.false_positive))/
  86 + (evaluation.repository_size-len(evaluation.real_relevant)))
  87 + error_2 = (float(len(evaluation.false_negative))/len(evaluation.real_relevant))
  88 + accuracy = 1-(float(error_1+error_2)/2)
  89 + return accuracy
  90 +
52 91 class Precision(Metric):
53 92 """
54 93 Classification accuracy metric defined as the percentage of relevant itens
... ... @@ -64,7 +103,7 @@ class Precision(Metric):
64 103 """
65 104 Compute metric.
66 105 """
67   - return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant)
  106 + return float(len(evaluation.true_positive))/len(evaluation.predicted_relevant)
68 107  
69 108 class Recall(Metric):
70 109 """
... ... @@ -81,7 +120,7 @@ class Recall(Metric):
81 120 """
82 121 Compute metric.
83 122 """
84   - return float(len(evaluation.predicted_real))/len(evaluation.real_relevant)
  123 + return float(len(evaluation.true_positive))/len(evaluation.real_relevant)
85 124  
86 125 class F1(Metric):
87 126 """
... ... @@ -100,7 +139,10 @@ class F1(Metric):
100 139 """
101 140 p = Precision().run(evaluation)
102 141 r = Recall().run(evaluation)
103   - return float((2*p*r))/(p+r)
  142 + if (p+r)>0:
  143 + return float((2*p*r))/(p+r)
  144 + else:
  145 + return 0
104 146  
105 147 class MAE(Metric):
106 148 """
... ... @@ -158,43 +200,47 @@ class Coverage(Metric):
158 200 Evaluation metric defined as the percentage of itens covered by the
159 201 recommender (have been recommended at least once).
160 202 """
161   - def __init__(self,repository_size):
  203 + def __init__(self):
162 204 """
163 205 Set initial parameters.
164 206 """
165 207 self.desc = " Coverage "
166   - self.repository_size = repository_size
167   - self.covered = set()
168   -
169   - def save_covered(self,recommended_list):
170   - """
171   - Register that a list of itens has been recommended.
172   - """
173   - self.covered.update(set(recommended_list))
174 208  
175   - def run(self,evaluation):
  209 + def run(self,evaluations_set):
176 210 """
177 211 Compute metric.
178 212 """
179   - return float(self.covered.size)/self.repository_size
  213 + covered = set()
  214 + for evaluation in evaluations_set:
  215 + covered.update(set(evaluation.predicted_relevant))
  216 + return float(len(covered))/evaluation.repository_size
180 217  
181 218 class Evaluation:
182 219 """
183 220 Class designed to perform prediction evaluation, given data and metric.
184 221 """
185   - def __init__(self,predicted_result,real_result):
  222 + def __init__(self,predicted,real,repository_size):
186 223 """
187 224 Set initial parameters.
188 225 """
189   - self.predicted_item_scores = predicted_result.item_score
190   - self.predicted_relevant = predicted_result.get_prediction()
191   - self.real_item_scores = real_result.item_score
192   - self.real_relevant = real_result.get_prediction()
193   - self.predicted_real = [v for v in self.predicted_relevant if v in
194   - self.real_relevant]
195   - #print len(self.predicted_relevant)
196   - #print len(self.real_relevant)
197   - #print len(self.predicted_real)
  226 + self.repository_size = repository_size
  227 + self.predicted_item_scores = predicted.item_score
  228 + self.predicted_relevant = predicted.get_prediction()
  229 + self.real_item_scores = real.item_score
  230 + self.real_relevant = real.get_prediction()
  231 +
  232 + self.true_positive = [v[0] for v in self.predicted_relevant if v[0] in
  233 + [w[0] for w in self.real_relevant]]
  234 + self.false_positive = [v[0] for v in self.predicted_relevant if not v[0] in
  235 + [w[0] for w in self.real_relevant]]
  236 + self.false_negative = [v[0] for v in self.real_relevant if not v[0] in
  237 + [w[0] for w in self.predicted_relevant]]
  238 +
  239 + logging.debug("TP: %d" % len(self.true_positive))
  240 + logging.debug("FP: %d" % len(self.false_positive))
  241 + logging.debug("FN: %d" % len(self.false_negative))
  242 + logging.debug("Repo_size: %d" % self.repository_size)
  243 + logging.debug("Relevant: %d" % len(self.real_relevant))
198 244  
199 245 def run(self,metric):
200 246 """
... ... @@ -206,7 +252,7 @@ class CrossValidation:
206 252 """
207 253 Class designed to perform cross-validation process.
208 254 """
209   - def __init__(self,partition_proportion,rounds,rec,metrics_list):
  255 + def __init__(self,partition_proportion,rounds,rec,metrics_list,result_proportion):
210 256 """
211 257 Set initial parameters.
212 258 """
... ... @@ -219,34 +265,13 @@ class CrossValidation:
219 265 self.recommender = rec
220 266 self.metrics_list = metrics_list
221 267 self.cross_results = defaultdict(list)
222   -
223   - def __str__(self):
224   - """
225   - String representation of the object.
226   - """
227   - str = "\n"
228   - metrics_desc = ""
229   - for metric in self.metrics_list:
230   - metrics_desc += "%s|" % (metric.desc)
231   - str += "| Round |%s\n" % metrics_desc
232   - for r in range(self.rounds):
233   - metrics_result = ""
234   - for metric in self.metrics_list:
235   - metrics_result += (" %2.1f%% |" %
236   - (self.cross_results[metric.desc][r]*100))
237   - str += "| %d |%s\n" % (r,metrics_result)
238   - metrics_mean = ""
239   - for metric in self.metrics_list:
240   - mean = float(sum(self.cross_results[metric.desc]) /
241   - len(self.cross_results[metric.desc]))
242   - metrics_mean += " %2.1f%% |" % (mean*100)
243   - str += "| Mean |%s\n" % (metrics_mean)
244   - return str
  268 + self.result_proportion = result_proportion
245 269  
246 270 def run(self,user):
247 271 """
248 272 Perform cross-validation.
249 273 """
  274 + #
250 275 cross_item_score = dict.fromkeys(user.pkg_profile,1)
251 276 partition_size = int(len(cross_item_score)*self.partition_proportion)
252 277 for r in range(self.rounds):
... ... @@ -258,10 +283,17 @@ class CrossValidation:
258 283 logging.critical("Empty cross_item_score.")
259 284 raise Error
260 285 round_partition[random_key] = cross_item_score.pop(random_key)
  286 + #logging.debug("Round partition: %s",str(round_partition))
  287 + #logging.debug("Cross item-score: %s",str(cross_item_score))
261 288 round_user = User(cross_item_score)
262   - predicted_result = self.recommender.get_recommendation(round_user)
263   - real_result = RecommendationResult(round_partition,len(round_partition))
264   - evaluation = Evaluation(predicted_result,real_result)
  289 + result_size = int(self.recommender.items_repository.get_doccount()*
  290 + self.result_proportion)
  291 + predicted_result = self.recommender.get_recommendation(round_user,result_size)
  292 + print len(round_partition)
  293 + real_result = RecommendationResult(round_partition)
  294 + #logging.debug("Predicted result: %s",predicted_result)
  295 + evaluation = Evaluation(predicted_result,real_result,
  296 + self.recommender.items_repository.get_doccount())
265 297 for metric in self.metrics_list:
266 298 result = evaluation.run(metric)
267 299 self.cross_results[metric.desc].append(result)
... ... @@ -269,3 +301,26 @@ class CrossValidation:
269 301 item,score = round_partition.popitem()
270 302 cross_item_score[item] = score
271 303  
  304 + def __str__(self):
  305 + """
  306 + String representation of the object.
  307 + """
  308 + str = "\n"
  309 + metrics_desc = ""
  310 + for metric in self.metrics_list:
  311 + metrics_desc += "%s|" % (metric.desc)
  312 + str += "| Round |%s\n" % metrics_desc
  313 + for r in range(self.rounds):
  314 + metrics_result = ""
  315 + for metric in self.metrics_list:
  316 + metrics_result += (" %2.1f%% |" %
  317 + (self.cross_results[metric.desc][r]*100))
  318 + str += "| %d |%s\n" % (r,metrics_result)
  319 + metrics_mean = ""
  320 + for metric in self.metrics_list:
  321 + mean = float(sum(self.cross_results[metric.desc]) /
  322 + len(self.cross_results[metric.desc]))
  323 + metrics_mean += " %2.1f%% |" % (mean*100)
  324 + str += "| Mean |%s\n" % (metrics_mean)
  325 + return str
  326 +
... ...
src/recommender.py
... ... @@ -45,13 +45,15 @@ class RecommendationResult:
45 45 str += "%2d: %s\n" % (i,result[i][0])
46 46 return str
47 47  
48   - def get_prediction(self,limit=20):
  48 + def get_prediction(self,limit=0):
49 49 """
50 50 Return prediction based on recommendation size (number of items).
51 51 """
52   - if limit > self.size: limit = self.size
53 52 sorted_result = sorted(self.item_score.items(),
54 53 key=operator.itemgetter(1))
  54 + if not limit or limit > self.size:
  55 + limit = self.size
  56 +
55 57 return list(reversed(sorted_result[-limit:]))
56 58  
57 59 class Recommender:
... ... @@ -63,13 +65,12 @@ class Recommender:
63 65 Set initial parameters.
64 66 """
65 67 self.items_repository = xapian.Database(cfg.axi)
66   - self.users_repository = data.PopconXapianIndex(cfg)
67   - #self.clustered_users_repository = data.PopconXapianIndex(cfg)
68 68 self.set_strategy(cfg.strategy)
69 69 if cfg.weight == "bm25":
70 70 self.weight = xapian.BM25Weight()
71 71 else:
72 72 self.weight = xapian.TradWeight()
  73 + self.cfg = cfg
73 74  
74 75 def set_strategy(self,strategy_str):
75 76 """
... ... @@ -83,6 +84,7 @@ class Recommender:
83 84 self.strategy = strategy.ContentBasedStrategy("desc")
84 85 if strategy_str == "col":
85 86 self.strategy = strategy.CollaborativeStrategy(20)
  87 + self.users_repository = data.PopconXapianIndex(self.cfg)
86 88  
87 89 def get_recommendation(self,user,result_size=20):
88 90 """
... ...
src/tests/data_tests.py
... ... @@ -71,13 +71,13 @@ class PopconXapianIndexTests(unittest2.TestCase):
71 71  
72 72 def test_reindex(self):
73 73 # force reindex with no clustering
74   - self.cfg.index_mode = "10"
  74 + self.cfg.index_mode = "reindex"
75 75 pxi = PopconXapianIndex(self.cfg)
76 76 self.assertEqual(pxi.get_metadata("old"),"")
77 77  
78 78 def test_clustering(self):
79 79 # force reindex with clustering
80   - self.cfg.index_mode = "11"
  80 + self.cfg.index_mode = "cluster"
81 81 pxi = PopconXapianIndex(self.cfg)
82 82 self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
83 83 all_submissions = [submissions for (root, dirs, submissions) in
... ... @@ -95,6 +95,13 @@ class PopconXapianIndexTests(unittest2.TestCase):
95 95 sum([len(submissions) for submissions in
96 96 all_submissions]))
97 97  
  98 + def test_recluster(self):
  99 + # force reindexing and clustering
  100 + self.cfg.index_mode = "recluster"
  101 + self.cfg.k_medoids = 2
  102 + pxi = PopconXapianIndex(self.cfg)
  103 + self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
  104 + self.assertEqual(pxi.get_doccount(),2)
98 105  
99 106 if __name__ == '__main__':
100 107 unittest2.main()
... ...
src/tests/test_data/popcon_dir/test_popcon_0
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_0 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  2 +1309407475 1303670994 gimp /usr/bin/perl
  3 +1309407451 1303670982 inkscape /lib/i686/cmov/libc-2.11.2.so
  4 +1309407450 1303670973 imagination /lib/ld-2.11.2.so
5 5 1309407434 1295654294 dash /bin/dash
6 6 0 0 libusbmuxd1 <NOFILES>
7 7 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_1
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_1 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  2 +1309407475 1303670994 gimp /usr/bin/perl
5 3 1309407434 1295654294 dash /bin/dash
6 4 0 0 libusbmuxd1 <NOFILES>
7 5 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_2
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_2 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
  2 +1309407475 1303670994 iceweasel /usr/bin/perl
  3 +1309407451 1303670982 python /lib/i686/cmov/libc-2.11.2.so
4 4 1309407450 1303670973 libc6 /lib/ld-2.11.2.so
5 5 1309407434 1295654294 dash /bin/dash
6 6 0 0 libusbmuxd1 <NOFILES>
... ...
src/tests/test_data/popcon_dir/test_popcon_3
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_3 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
  2 +1309407475 1303670994 eog /usr/bin/perl
  3 +1309407451 1303670982 nautilus /lib/i686/cmov/libc-2.11.2.so
4 4 1309407450 1303670973 libc6 /lib/ld-2.11.2.so
5   -1309407434 1295654294 dash /bin/dash
6 5 0 0 libusbmuxd1 <NOFILES>
7 6 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_4
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_4 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
  2 +1309407475 1303670994 konqueror /usr/bin/perl
  3 +1309407451 1303670982 kedit /lib/i686/cmov/libc-2.11.2.so
4 4 1309407450 1303670973 libc6 /lib/ld-2.11.2.so
5   -1309407434 1295654294 dash /bin/dash
6 5 0 0 libusbmuxd1 <NOFILES>
7 6 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_5
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_5 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  2 +1309407475 1303670994 konqueror /usr/bin/perl
5 3 1309407434 1295654294 dash /bin/dash
6 4 0 0 libusbmuxd1 <NOFILES>
7 5 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_6
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_6 ARCH:i386 POPCONVER:1.52
2 2 1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  3 +1309407451 1303670982 eog /lib/i686/cmov/libc-2.11.2.so
  4 +1309407450 1303670973 nautilus /lib/ld-2.11.2.so
5 5 1309407434 1295654294 dash /bin/dash
6 6 0 0 libusbmuxd1 <NOFILES>
7 7 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_7
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_7 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  2 +1309407475 1303670994 apticron /usr/bin/perl
  3 +1309407451 1303670982 aptitude /lib/i686/cmov/libc-2.11.2.so
  4 +1309407450 1303670973 apt /lib/ld-2.11.2.so
5 5 1309407434 1295654294 dash /bin/dash
6 6 0 0 libusbmuxd1 <NOFILES>
7 7 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/tests/test_data/popcon_dir/test_popcon_8
1 1 POPULARITY-CONTEST-0 TIME:1309407492 ID:8b44fcdbcf676e711a153d5db0test_8 ARCH:i386 POPCONVER:1.52
2   -1309407475 1303670994 perl-base /usr/bin/perl
3   -1309407451 1303670982 libc6-i686 /lib/i686/cmov/libc-2.11.2.so
4   -1309407450 1303670973 libc6 /lib/ld-2.11.2.so
  2 +1309407475 1303670994 apticron /usr/bin/perl
  3 +1309407451 1303670982 eog /lib/i686/cmov/libc-2.11.2.so
  4 +1309407450 1303670973 nautilus /lib/ld-2.11.2.so
5 5 1309407434 1295654294 dash /bin/dash
6 6 0 0 libusbmuxd1 <NOFILES>
7 7 END-POPULARITY-CONTEST-0 TIME:1309407492
... ...
src/user.py
... ... @@ -152,6 +152,24 @@ class User:
152 152 desc_profile = self.desc_profile(items_repository,size)[:size/2]
153 153 return tag_profile+desc_profile
154 154  
  155 + def app_pkg_profile(self,axi):
  156 + """
  157 + Return list of packages that are applications.
  158 + """
  159 + old_profile_size = len(self.pkg_profile)
  160 + for p in self.pkg_profile[:]: #iterate list copy
  161 + tags = data.axi_search_pkg_tags(axi,p)
  162 + try:
  163 +
  164 + if not "XTrole::program" in tags:
  165 + self.pkg_profile.remove(p)
  166 + except:
  167 + logging.debug("Package not found in axi: %s" % p)
  168 + profile_size = len(self.pkg_profile)
  169 + logging.debug("App package profile: reduced packages profile size \
  170 + from %d to %d." % (old_profile_size, profile_size))
  171 + return self.pkg_profile
  172 +
155 173 def maximal_pkg_profile(self):
156 174 """
157 175 Return list of packages that are not dependence of any other package in
... ...