Commit b7634993d7d6976821e2a9b2b24d39d8726ea3f0
1 parent
165d3a03
Exists in
master
and in
1 other branch
Removing data size constraint.
Showing
1 changed file
with
13 additions
and
6 deletions
Show diff stats
src/data.py
| ... | ... | @@ -271,11 +271,12 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 271 | 271 | class KMedoidsClustering(cluster.KMeansClustering): |
| 272 | 272 | |
| 273 | 273 | def __init__(self,data,distance,max_data=100): |
| 274 | - if len(data)<max_data: | |
| 275 | - data_sample = data | |
| 276 | - else: | |
| 277 | - data_sample = random.sample(data,max_data) | |
| 278 | - cluster.KMeansClustering.__init__(self, data_sample, distance) | |
| 274 | + # if len(data)<max_data: | |
| 275 | + # data_sample = data | |
| 276 | + # else: | |
| 277 | + # data_sample = random.sample(data,max_data) | |
| 278 | + # cluster.KMeansClustering.__init__(self, data_sample, distance) | |
| 279 | + cluster.KMeansClustering.__init__(self, data, distance) | |
| 279 | 280 | self.distanceMatrix = {} |
| 280 | 281 | for submission in self._KMeansClustering__data: |
| 281 | 282 | self.distanceMatrix[submission.user_id] = {} |
| ... | ... | @@ -332,7 +333,13 @@ class KMedoidsClustering(cluster.KMeansClustering): |
| 332 | 333 | """ |
| 333 | 334 | Generate n clusters and return their medoids. |
| 334 | 335 | """ |
| 335 | - medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)] | |
| 336 | + #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)] | |
| 337 | + medoids_distances = [] | |
| 338 | + for cluster in self.getclusters(n): | |
| 339 | + type(cluster) | |
| 340 | + print cluster | |
| 341 | + medoids_distances.append(self.getMedoid(cluster)) | |
| 342 | + print medoids_distances | |
| 336 | 343 | medoids = [m[0] for m in medoids_distances] |
| 337 | 344 | dispersion = sum([m[1] for m in medoids_distances]) |
| 338 | 345 | logging.info("Clustering completed and the following medoids were found: %s" % [c.user_id for c in medoids]) | ... | ... |