Commit b7634993d7d6976821e2a9b2b24d39d8726ea3f0

Authored by Tássia Camões Araújo
1 parent 165d3a03
Exists in master and in 1 other branch add_vagrant

Removing data size constraint.

Showing 1 changed file with 13 additions and 6 deletions   Show diff stats
src/data.py
... ... @@ -271,11 +271,12 @@ class PopconXapianIndex(xapian.WritableDatabase):
271 271 class KMedoidsClustering(cluster.KMeansClustering):
272 272  
273 273 def __init__(self,data,distance,max_data=100):
274   - if len(data)<max_data:
275   - data_sample = data
276   - else:
277   - data_sample = random.sample(data,max_data)
278   - cluster.KMeansClustering.__init__(self, data_sample, distance)
  274 + # if len(data)<max_data:
  275 + # data_sample = data
  276 + # else:
  277 + # data_sample = random.sample(data,max_data)
  278 + # cluster.KMeansClustering.__init__(self, data_sample, distance)
  279 + cluster.KMeansClustering.__init__(self, data, distance)
279 280 self.distanceMatrix = {}
280 281 for submission in self._KMeansClustering__data:
281 282 self.distanceMatrix[submission.user_id] = {}
... ... @@ -332,7 +333,13 @@ class KMedoidsClustering(cluster.KMeansClustering):
332 333 """
333 334 Generate n clusters and return their medoids.
334 335 """
335   - medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
  336 + #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
  337 + medoids_distances = []
  338 + for cluster in self.getclusters(n):
  339 + type(cluster)
  340 + print cluster
  341 + medoids_distances.append(self.getMedoid(cluster))
  342 + print medoids_distances
336 343 medoids = [m[0] for m in medoids_distances]
337 344 dispersion = sum([m[1] for m in medoids_distances])
338 345 logging.info("Clustering completed and the following medoids were found: %s" % [c.user_id for c in medoids])
... ...