Commit 353b42add083cccd2d18764ef0ec6e6b6b1878b6
1 parent
37e376c1
Exists in
master
and in
1 other branch
Data classes complete refactoring.
Showing
1 changed file
with
134 additions
and
171 deletions
Show diff stats
src/data.py
@@ -22,17 +22,14 @@ __license__ = """ | @@ -22,17 +22,14 @@ __license__ = """ | ||
22 | import os | 22 | import os |
23 | import sys | 23 | import sys |
24 | import gc | 24 | import gc |
25 | -import re | ||
26 | import xapian | 25 | import xapian |
27 | -import axi | ||
28 | -from debian import debtags | ||
29 | import logging | 26 | import logging |
30 | -import hashlib | ||
31 | import random | 27 | import random |
28 | +import cluster | ||
29 | +import shutil | ||
32 | 30 | ||
33 | from error import Error | 31 | from error import Error |
34 | from singleton import Singleton | 32 | from singleton import Singleton |
35 | -import cluster | ||
36 | from dissimilarity import * | 33 | from dissimilarity import * |
37 | 34 | ||
38 | def axi_search_pkgs(axi,pkgs_list): | 35 | def axi_search_pkgs(axi,pkgs_list): |
@@ -53,101 +50,114 @@ def axi_search_pkg_tags(axi,pkg): | @@ -53,101 +50,114 @@ def axi_search_pkg_tags(axi,pkg): | ||
53 | term.term.startswith("XT")] | 50 | term.term.startswith("XT")] |
54 | return tags | 51 | return tags |
55 | 52 | ||
53 | +def print_index(index): | ||
54 | + output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" | ||
55 | + for term in index.allterms(): | ||
56 | + output += term.term+"\n" | ||
57 | + output += str([index.get_document(posting.docid).get_data() | ||
58 | + for posting in index.postlist(term.term)]) | ||
59 | + output += "\n---" | ||
60 | + return output | ||
61 | + | ||
56 | class SampleAptXapianIndex(xapian.WritableDatabase): | 62 | class SampleAptXapianIndex(xapian.WritableDatabase): |
57 | """ | 63 | """ |
58 | Sample data source for packages information, mainly useful for tests. | 64 | Sample data source for packages information, mainly useful for tests. |
59 | """ | 65 | """ |
60 | - def __init__(self,pkgs_list,axi): | ||
61 | - xapian.WritableDatabase.__init__(self,".sample_axi", | 66 | + def __init__(self,pkgs_list,axi,path): |
67 | + xapian.WritableDatabase.__init__(self,path, | ||
62 | xapian.DB_CREATE_OR_OVERWRITE) | 68 | xapian.DB_CREATE_OR_OVERWRITE) |
63 | sample = axi_search_pkgs(axi,pkgs_list) | 69 | sample = axi_search_pkgs(axi,pkgs_list) |
64 | - self.all_docs = [] | ||
65 | for package in sample: | 70 | for package in sample: |
66 | doc_id = self.add_document(axi.get_document(package.docid)) | 71 | doc_id = self.add_document(axi.get_document(package.docid)) |
67 | - self.all_docs.append(doc_id) | ||
68 | 72 | ||
69 | - def _print(self): | ||
70 | - print "---" | ||
71 | - print xapian.WritableDatabase.__repr__(self) | ||
72 | - print "---" | ||
73 | - for doc_id in self.all_docs: | ||
74 | - print [term.term for term in self.get_document(doc_id).termlist()] | ||
75 | - print "---" | 73 | + def __str__(self): |
74 | + return print_index(self) | ||
76 | 75 | ||
77 | class PopconSubmission(): | 76 | class PopconSubmission(): |
78 | - def __init__(self,submission_hash): | ||
79 | - self.hash = submission_hash | ||
80 | - self.pkgs_list = [] | 77 | + def __init__(self,path,user_id=0): |
78 | + self.packages = dict() | ||
79 | + self.path = path | ||
80 | + self.load() | ||
81 | + if user_id: | ||
82 | + self.user_id = user_id | ||
81 | 83 | ||
82 | - def add_pkg(self,pkg): | ||
83 | - self.pkgs_list.append(pkg) | 84 | + def __str__(self): |
85 | + output = "\nPopularity-contest submission ID "+self.user_id | ||
86 | + for pkg, weight in self.packages.items(): | ||
87 | + output += "\n "+pkg+": "+str(weight) | ||
88 | + return output | ||
84 | 89 | ||
85 | - def parse_submission(self,submission_path,binary=1): | 90 | + def load(self,binary=1): |
86 | """ | 91 | """ |
87 | Parse a popcon submission, generating the names of the valid packages | 92 | Parse a popcon submission, generating the names of the valid packages |
88 | in the vote. | 93 | in the vote. |
89 | """ | 94 | """ |
90 | - submission = open(submission_path) | ||
91 | - for line in submission: | ||
92 | - if not line.startswith("POPULARITY"): | ||
93 | - if not line.startswith("END-POPULARITY"): | ||
94 | - data = line[:-1].split(" ") | ||
95 | - if len(data) > 3: | ||
96 | - if binary: | ||
97 | - # every installed package has the same weight | ||
98 | - yield data[2], 1 | ||
99 | - elif data[3] == '<NOFILES>': | 95 | + with open(self.path) as submission: |
96 | + for line in submission: | ||
97 | + if line.startswith("POPULARITY"): | ||
98 | + self.user_id = line.split()[2].lstrip("ID:") | ||
99 | + elif not line.startswith("END-POPULARITY"): | ||
100 | + data = line.rstrip('\n').split() | ||
101 | + if len(data) > 2: | ||
102 | + pkg = data[2] | ||
103 | + if len(data) > 3: | ||
104 | + exec_file = data[3] | ||
105 | + # Binary weight | ||
106 | + if binary: | ||
107 | + self.packages[pkg] = 1 | ||
108 | + # Weights inherited from Enrico's anapop | ||
100 | # No executable files to track | 109 | # No executable files to track |
101 | - yield data[2], 1 | ||
102 | - elif len(data) == 4: | 110 | + elif exec_file == '<NOFILES>': |
111 | + self.packages[pkg] = 1 | ||
103 | # Recently used packages | 112 | # Recently used packages |
104 | - yield data[2], 10 | ||
105 | - elif data[4] == '<OLD>': | 113 | + elif len(data) == 4: |
114 | + self.packages[pkg] = 10 | ||
106 | # Unused packages | 115 | # Unused packages |
107 | - yield data[2], 3 | ||
108 | - elif data[4] == '<RECENT-CTIME>': | 116 | + elif data[4] == '<OLD>': |
117 | + self.packages[pkg] = 3 | ||
109 | # Recently installed packages | 118 | # Recently installed packages |
110 | - yield data[2], 8 | ||
111 | -class PopconXapianIndex(xapian.WritableDatabase,Singleton): | 119 | + elif data[4] == '<RECENT-CTIME>': |
120 | + self.packages[pkg] = 8 | ||
121 | + | ||
122 | +class PopconXapianIndex(xapian.WritableDatabase): | ||
112 | """ | 123 | """ |
113 | Data source for popcon submissions defined as a singleton xapian database. | 124 | Data source for popcon submissions defined as a singleton xapian database. |
114 | """ | 125 | """ |
115 | - def __init__(self,cfg): | 126 | + def __init__(self,cfg,reindex=0,recluster=0): |
116 | """ | 127 | """ |
117 | Set initial attributes. | 128 | Set initial attributes. |
118 | """ | 129 | """ |
119 | - self.path = os.path.expanduser(cfg.popcon_index) | ||
120 | - self.popcon_dir = os.path.expanduser(cfg.popcon_dir) | ||
121 | - #self.debtags_path = os.path.expanduser(cfg.tags_db) | ||
122 | self.axi = xapian.Database(cfg.axi) | 130 | self.axi = xapian.Database(cfg.axi) |
123 | - self.load_index() | 131 | + self.path = os.path.expanduser(cfg.popcon_index) |
132 | + if reindex or not self.load_index(): | ||
133 | + if not os.path.exists(cfg.popcon_dir): | ||
134 | + os.makedirs(cfg.popcon_dir) | ||
135 | + if not os.listdir(cfg.popcon_dir): | ||
136 | + logging.critical("Popcon dir seems to be empty.") | ||
137 | + raise Error | ||
138 | + if not cfg.clustering: | ||
139 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | ||
140 | + else: | ||
141 | + self.source_dir = os.path.expanduser(cfg.clusters_dir) | ||
142 | + if not os.path.exists(cfg.clusters_dir): | ||
143 | + os.makedirs(cfg.clusters_dir) | ||
144 | + if not os.listdir(cfg.clusters_dir): | ||
145 | + distance = JaccardDistance() | ||
146 | + logging.info("Clustering popcon submissions from \'%s\'" | ||
147 | + % cfg.popcon_dir) | ||
148 | + logging.info("Clusters will be placed at \'%s\'" | ||
149 | + % cfg.clusters_dir) | ||
150 | + data = self.get_submissions(cfg.popcon_dir) | ||
151 | + if cfg.clustering == "Hierarchical": | ||
152 | + self.hierarchical_clustering(data,cfg.clusters_dir, | ||
153 | + distance) | ||
154 | + else: | ||
155 | + self.kmedoids_clustering(data,cfg.clusters_dir, | ||
156 | + distance) | ||
157 | + self.build_index() | ||
124 | 158 | ||
125 | - def parse_submission(self,submission_path,binary=1): | ||
126 | - """ | ||
127 | - Parse a popcon submission, generating the names of the valid packages | ||
128 | - in the vote. | ||
129 | - """ | ||
130 | - submission = open(submission_path) | ||
131 | - for line in submission: | ||
132 | - if not line.startswith("POPULARITY"): | ||
133 | - if not line.startswith("END-POPULARITY"): | ||
134 | - data = line[:-1].split(" ") | ||
135 | - if len(data) > 3: | ||
136 | - if binary: | ||
137 | - # every installed package has the same weight | ||
138 | - yield data[2], 1 | ||
139 | - elif data[3] == '<NOFILES>': | ||
140 | - # No executable files to track | ||
141 | - yield data[2], 1 | ||
142 | - elif len(data) == 4: | ||
143 | - # Recently used packages | ||
144 | - yield data[2], 10 | ||
145 | - elif data[4] == '<OLD>': | ||
146 | - # Unused packages | ||
147 | - yield data[2], 3 | ||
148 | - elif data[4] == '<RECENT-CTIME>': | ||
149 | - # Recently installed packages | ||
150 | - yield data[2], 8 | 159 | + def __str__(self): |
160 | + return print_index(self) | ||
151 | 161 | ||
152 | def load_index(self): | 162 | def load_index(self): |
153 | """ | 163 | """ |
@@ -159,19 +169,19 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -159,19 +169,19 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
159 | xapian.Database.__init__(self,self.path) | 169 | xapian.Database.__init__(self,self.path) |
160 | except xapian.DatabaseError: | 170 | except xapian.DatabaseError: |
161 | logging.info("Could not open popcon index.") | 171 | logging.info("Could not open popcon index.") |
162 | - self.new_index() | 172 | + return 0 |
163 | 173 | ||
164 | - def new_index(self): | 174 | + def build_index(self): |
165 | """ | 175 | """ |
166 | - Create a xapian index for popcon submissions at 'popcon_dir' and | 176 | + Create a xapian index for popcon submissions at 'source_dir' and |
167 | place it at 'self.path'. | 177 | place it at 'self.path'. |
168 | """ | 178 | """ |
169 | - if not os.path.exists(self.path): | ||
170 | - os.makedirs(self.path) | 179 | + shutil.rmtree(self.path,1) |
180 | + os.makedirs(self.path) | ||
171 | 181 | ||
172 | try: | 182 | try: |
173 | logging.info("Indexing popcon submissions from \'%s\'" % | 183 | logging.info("Indexing popcon submissions from \'%s\'" % |
174 | - self.popcon_dir) | 184 | + self.source_dir) |
175 | logging.info("Creating new xapian index at \'%s\'" % | 185 | logging.info("Creating new xapian index at \'%s\'" % |
176 | self.path) | 186 | self.path) |
177 | xapian.WritableDatabase.__init__(self,self.path, | 187 | xapian.WritableDatabase.__init__(self,self.path, |
@@ -180,123 +190,79 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -180,123 +190,79 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
180 | logging.critical("Could not create popcon xapian index.") | 190 | logging.critical("Could not create popcon xapian index.") |
181 | raise Error | 191 | raise Error |
182 | 192 | ||
183 | - for root, dirs, files in os.walk(self.popcon_dir): | ||
184 | - for submission in files: | ||
185 | - submission_path = os.path.join(root, submission) | 193 | + for root, dirs, files in os.walk(self.source_dir): |
194 | + for popcon_file in files: | ||
195 | + submission = PopconSubmission(os.path.join(root, popcon_file)) | ||
186 | doc = xapian.Document() | 196 | doc = xapian.Document() |
187 | - doc.set_data(submission) | ||
188 | - logging.debug("Parsing popcon submission at \'%s\'" % | ||
189 | - submission_path) | ||
190 | - for pkg, freq in self.parse_submission(submission_path): | 197 | + doc.set_data(submission.user_id) |
198 | + logging.debug("Parsing popcon submission \'%s\'" % | ||
199 | + submission.user_id) | ||
200 | + for pkg, freq in submission.packages.items(): | ||
191 | doc.add_term("XP"+pkg,freq) | 201 | doc.add_term("XP"+pkg,freq) |
192 | for tag in axi_search_pkg_tags(self.axi,pkg): | 202 | for tag in axi_search_pkg_tags(self.axi,pkg): |
193 | - print tag | ||
194 | doc.add_term(tag,freq) | 203 | doc.add_term(tag,freq) |
195 | doc_id = self.add_document(doc) | 204 | doc_id = self.add_document(doc) |
196 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 205 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
197 | # python garbage collector | 206 | # python garbage collector |
198 | gc.collect() | 207 | gc.collect() |
199 | # flush to disk database changes | 208 | # flush to disk database changes |
200 | - self.flush() | 209 | + self.commit() |
201 | 210 | ||
202 | -class PopconClusteredData(Singleton): | ||
203 | - """ | ||
204 | - Data source for popcon submissions defined as a singleton xapian database. | ||
205 | - """ | ||
206 | - def __init__(self,cfg): | 211 | + def get_submissions(self,submissions_dir): |
207 | """ | 212 | """ |
208 | - Set initial attributes. | 213 | + Get popcon submissions from popcon_dir |
209 | """ | 214 | """ |
210 | - self.popcon_dir = os.path.expanduser(cfg.popcon_dir) | ||
211 | - self.clusters_dir = os.path.expanduser(cfg.clusters_dir) | ||
212 | - self.submissions = [] | ||
213 | - self.clustering() | 215 | + submissions = [] |
216 | + for root, dirs, files in os.walk(submissions_dir): | ||
217 | + for popcon_file in files: | ||
218 | + submission = PopconSubmission(os.path.join(root, popcon_file)) | ||
219 | + submissions.append(submission) | ||
220 | + return submissions | ||
214 | 221 | ||
215 | - def parse_submission(self,submission_path,binary=1): | ||
216 | - """ | ||
217 | - Parse a popcon submission, generating the names of the valid packages | ||
218 | - in the vote. | ||
219 | - """ | ||
220 | - submission_file = open(submission_path) | ||
221 | - for line in submission_file: | ||
222 | - if not line.startswith("POPULARITY"): | ||
223 | - if not line.startswith("END-POPULARITY"): | ||
224 | - data = line[:-1].split(" ") | ||
225 | - if len(data) > 3: | ||
226 | - if binary: | ||
227 | - # every installed package has the same weight | ||
228 | - yield data[2], 1 | ||
229 | - elif data[3] == '<NOFILES>': | ||
230 | - # No executable files to track | ||
231 | - yield data[2], 1 | ||
232 | - elif len(data) == 4: | ||
233 | - # Recently used packages | ||
234 | - yield data[2], 10 | ||
235 | - elif data[4] == '<OLD>': | ||
236 | - # Unused packages | ||
237 | - yield data[2], 3 | ||
238 | - elif data[4] == '<RECENT-CTIME>': | ||
239 | - # Recently installed packages | ||
240 | - yield data[2], 8 | ||
241 | - | ||
242 | - def clustering(self): | 222 | + def hierarchical_clustering(self,data,clusters_dir,distance,k=10): |
243 | """ | 223 | """ |
244 | - called by init | ||
245 | - Create a xapian index for popcon submissions at 'popcon_dir' and | ||
246 | - place it at 'self.path'. | 224 | + Select popcon submissions from popcon_dir and place them at clusters_dir |
247 | """ | 225 | """ |
248 | - if not os.path.exists(self.clusters_dir): | ||
249 | - os.makedirs(self.clusters_dir) | ||
250 | - | ||
251 | - logging.info("Clustering popcon submissions from \'%s\'" % | ||
252 | - self.popcon_dir) | ||
253 | - logging.info("Clusters will be placed at \'%s\'" % self.clusters_dir) | 226 | + cl = cluster.HierarchicalClustering(data, lambda x,y: |
227 | + distance(x.packages.keys(), | ||
228 | + y.packages.keys())) | ||
229 | + clusters = cl.getlevel(0.5) | ||
230 | + for c in clusters: | ||
231 | + print "cluster" | ||
232 | + for submission in c: | ||
233 | + print submission.user_id | ||
254 | 234 | ||
255 | - for root, dirs, files in os.walk(self.popcon_dir): | ||
256 | - for submission_hash in files: | ||
257 | - s = PopconSubmission(submission_hash) | ||
258 | - submission_path = os.path.join(root, submission_hash) | ||
259 | - logging.debug("Parsing popcon submission \'%s\'" % | ||
260 | - submission_hash) | ||
261 | - for pkg, freq in self.parse_submission(submission_path): | ||
262 | - s.add_pkg(pkg) | ||
263 | - self.submissions.append(s) | ||
264 | - | ||
265 | - distanceFunction = JaccardDistance() | ||
266 | - # cl = cluster.HierarchicalClustering(self.submissions,lambda x,y: distanceFunction(x.pkgs_list,y.pkgs_list)) | ||
267 | - # clusters = cl.getlevel(0.5) | ||
268 | - # for c in clusters: | ||
269 | - # print "cluster" | ||
270 | - # for submission in c: | ||
271 | - # print submission.hash | ||
272 | - cl = KMedoidsClusteringPopcon(self.submissions, lambda x,y: \ | ||
273 | - distanceFunction(x.pkgs_list,y.pkgs_list)) | ||
274 | - #clusters = cl.getclusters(2) | ||
275 | - medoids = cl.getMedoids(2) | ||
276 | - print "medoids" | ||
277 | - for m in medoids: | ||
278 | - print m.hash | 235 | + def kmedoids_clustering(self,data,clusters_dir,distance,k=10): |
236 | + clusters = KMedoidsClustering(data,lambda x,y: | ||
237 | + distance(x.packages.keys(), | ||
238 | + y.packages.keys())) | ||
239 | + medoids = clusters.getMedoids(2) | ||
240 | + for submission in medoids: | ||
241 | + shutil.copyfile(submission.path,os.path.join(clusters_dir, | ||
242 | + submission.user_id)) | ||
279 | 243 | ||
280 | -class KMedoidsClusteringPopcon(cluster.KMeansClustering): | 244 | +class KMedoidsClustering(cluster.KMeansClustering): |
281 | 245 | ||
282 | def __init__(self,data,distance): | 246 | def __init__(self,data,distance): |
283 | - if len(data)>100: | 247 | + if len(data)<100: |
248 | + data_sample = data | ||
249 | + else: | ||
284 | data_sample = random.sample(data,100) | 250 | data_sample = random.sample(data,100) |
285 | cluster.KMeansClustering.__init__(self, data_sample, distance) | 251 | cluster.KMeansClustering.__init__(self, data_sample, distance) |
286 | self.distanceMatrix = {} | 252 | self.distanceMatrix = {} |
287 | for submission in self._KMeansClustering__data: | 253 | for submission in self._KMeansClustering__data: |
288 | - self.distanceMatrix[submission.hash] = {} | 254 | + self.distanceMatrix[submission.user_id] = {} |
289 | 255 | ||
290 | def loadDistanceMatrix(self,cluster): | 256 | def loadDistanceMatrix(self,cluster): |
291 | for i in range(len(cluster)-1): | 257 | for i in range(len(cluster)-1): |
292 | for j in range(i+1,len(cluster)): | 258 | for j in range(i+1,len(cluster)): |
293 | try: | 259 | try: |
294 | - d = self.distanceMatrix[cluster[i].hash][cluster[j].hash] | 260 | + d = self.distanceMatrix[cluster[i].user_id][cluster[j].user_id] |
295 | logging.debug("Using d[%d,%d]" % (i,j)) | 261 | logging.debug("Using d[%d,%d]" % (i,j)) |
296 | except: | 262 | except: |
297 | d = self.distance(cluster[i],cluster[j]) | 263 | d = self.distance(cluster[i],cluster[j]) |
298 | - self.distanceMatrix[cluster[i].hash][cluster[j].hash] = d | ||
299 | - self.distanceMatrix[cluster[j].hash][cluster[i].hash] = d | 264 | + self.distanceMatrix[cluster[i].user_id][cluster[j].user_id] = d |
265 | + self.distanceMatrix[cluster[j].user_id][cluster[i].user_id] = d | ||
300 | logging.debug("d[%d,%d] = %.2f" % (i,j,d)) | 266 | logging.debug("d[%d,%d] = %.2f" % (i,j,d)) |
301 | 267 | ||
302 | def getMedoid(self,cluster): | 268 | def getMedoid(self,cluster): |
@@ -308,22 +274,19 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | @@ -308,22 +274,19 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | ||
308 | self.loadDistanceMatrix(cluster) | 274 | self.loadDistanceMatrix(cluster) |
309 | medoidDistance = sys.maxint | 275 | medoidDistance = sys.maxint |
310 | for i in range(len(cluster)): | 276 | for i in range(len(cluster)): |
311 | - totalDistance = sum(self.distanceMatrix[cluster[i].hash].values()) | 277 | + totalDistance = sum(self.distanceMatrix[cluster[i].user_id].values()) |
312 | print "totalDistance[",i,"]=",totalDistance | 278 | print "totalDistance[",i,"]=",totalDistance |
313 | if totalDistance < medoidDistance: | 279 | if totalDistance < medoidDistance: |
314 | medoidDistance = totalDistance | 280 | medoidDistance = totalDistance |
315 | medoid = i | 281 | medoid = i |
316 | print "medoidDistance:",medoidDistance | 282 | print "medoidDistance:",medoidDistance |
317 | - logging.debug("Cluster medoid: [%d] %s" % (medoid, cluster[medoid].hash)) | 283 | + logging.debug("Cluster medoid: [%d] %s" % (medoid, |
284 | + cluster[medoid].user_id)) | ||
318 | return cluster[medoid] | 285 | return cluster[medoid] |
319 | 286 | ||
320 | def assign_item(self, item, origin): | 287 | def assign_item(self, item, origin): |
321 | """ | 288 | """ |
322 | Assigns an item from a given cluster to the closest located cluster | 289 | Assigns an item from a given cluster to the closest located cluster |
323 | - | ||
324 | - PARAMETERS | ||
325 | - item - the item to be moved | ||
326 | - origin - the originating cluster | ||
327 | """ | 290 | """ |
328 | closest_cluster = origin | 291 | closest_cluster = origin |
329 | for cluster in self._KMeansClustering__clusters: | 292 | for cluster in self._KMeansClustering__clusters: |
@@ -332,7 +295,7 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | @@ -332,7 +295,7 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | ||
332 | 295 | ||
333 | if closest_cluster != origin: | 296 | if closest_cluster != origin: |
334 | self.move_item(item, origin, closest_cluster) | 297 | self.move_item(item, origin, closest_cluster) |
335 | - logging.debug("Item changed cluster: %s" % item.hash) | 298 | + logging.debug("Item changed cluster: %s" % item.user_id) |
336 | return True | 299 | return True |
337 | else: | 300 | else: |
338 | return False | 301 | return False |
@@ -342,5 +305,5 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | @@ -342,5 +305,5 @@ class KMedoidsClusteringPopcon(cluster.KMeansClustering): | ||
342 | Generate n clusters and return their medoids. | 305 | Generate n clusters and return their medoids. |
343 | """ | 306 | """ |
344 | medoids = [self.getMedoid(cluster) for cluster in self.getclusters(n)] | 307 | medoids = [self.getMedoid(cluster) for cluster in self.getclusters(n)] |
345 | - logging.info("Clustering completed and the following centroids were found: %s" % [c.hash for c in medoids]) | 308 | + logging.info("Clustering completed and the following centroids were found: %s" % [c.user_id for c in medoids]) |
346 | return medoids | 309 | return medoids |