Commit b7634993d7d6976821e2a9b2b24d39d8726ea3f0
1 parent
165d3a03
Exists in
master
and in
1 other branch
Removing data size constraint.
Showing
1 changed file
with
13 additions
and
6 deletions
Show diff stats
src/data.py
... | ... | @@ -271,11 +271,12 @@ class PopconXapianIndex(xapian.WritableDatabase): |
271 | 271 | class KMedoidsClustering(cluster.KMeansClustering): |
272 | 272 | |
273 | 273 | def __init__(self,data,distance,max_data=100): |
274 | - if len(data)<max_data: | |
275 | - data_sample = data | |
276 | - else: | |
277 | - data_sample = random.sample(data,max_data) | |
278 | - cluster.KMeansClustering.__init__(self, data_sample, distance) | |
274 | + # if len(data)<max_data: | |
275 | + # data_sample = data | |
276 | + # else: | |
277 | + # data_sample = random.sample(data,max_data) | |
278 | + # cluster.KMeansClustering.__init__(self, data_sample, distance) | |
279 | + cluster.KMeansClustering.__init__(self, data, distance) | |
279 | 280 | self.distanceMatrix = {} |
280 | 281 | for submission in self._KMeansClustering__data: |
281 | 282 | self.distanceMatrix[submission.user_id] = {} |
... | ... | @@ -332,7 +333,13 @@ class KMedoidsClustering(cluster.KMeansClustering): |
332 | 333 | """ |
333 | 334 | Generate n clusters and return their medoids. |
334 | 335 | """ |
335 | - medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)] | |
336 | + #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)] | |
337 | + medoids_distances = [] | |
338 | + for cluster in self.getclusters(n): | |
339 | + type(cluster) | |
340 | + print cluster | |
341 | + medoids_distances.append(self.getMedoid(cluster)) | |
342 | + print medoids_distances | |
336 | 343 | medoids = [m[0] for m in medoids_distances] |
337 | 344 | dispersion = sum([m[1] for m in medoids_distances]) |
338 | 345 | logging.info("Clustering completed and the following medoids were found: %s" % [c.user_id for c in medoids]) | ... | ... |