Commit 3ca461fae1bcd5c4d7b76b15689d01254874303e

Authored by Tássia Camões Araújo
1 parent 32cf4ed6
Exists in master and in 1 other branch add_vagrant

Added popcon experiments.

Showing 1 changed file with 68 additions and 0 deletions   Show diff stats
src/experiments/misc-popcon.py 0 → 100755
... ... @@ -0,0 +1,68 @@
  1 +#! /usr/bin/env python
  2 +"""
  3 + misc_popcon - misc experiments with popcon data
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import Gnuplot
  23 +import xapian
  24 +
  25 +def profile_population():
  26 + popcon = xapian.Database("/home/tassia/.app-recommender/popcon_desktopapps")
  27 + popcon_size = popcon.get_doccount()
  28 + print "User repository size: %d" % popcon_size
  29 + profiles_size = []
  30 + for n in range(1,popcon_size):
  31 + user = popcon.get_document(n)
  32 + profile = [t.term for t in user.termlist()]
  33 + profiles_size.append(len(profile))
  34 + profile_population = [(n,profiles_size.count(n))
  35 + for n in range(max(profiles_size)+1)
  36 + if profiles_size.count(n)>0 ]
  37 + ranges_population = []
  38 + ranges_percentage = []
  39 + ranges = range(0,601,50)
  40 + for maximum in ranges[1:]:
  41 + minimum = ranges[ranges.index(maximum)-1]
  42 + valid = [x[1] for x in profile_population
  43 + if x[0]>minimum and x[0]<=maximum]
  44 + ranges_population.append((maximum,sum(valid)))
  45 + ranges_percentage.append((maximum,sum(valid)/float(popcon_size)))
  46 +
  47 + g = Gnuplot.Gnuplot()
  48 + g('set style data points') # give gnuplot an arbitrary command
  49 + g.xlabel('Desktop profile size')
  50 + g.ylabel('Population size')
  51 + g.plot(profile_population)
  52 + g.hardcopy('profile_population.png', terminal="png")
  53 + g.hardcopy('profile_population.ps', terminal="postscript", enhanced=1, color=1)
  54 + g.reset()
  55 + g.xlabel('Range Desktop profile size')
  56 + g.ylabel('Population size')
  57 + g.plot(ranges_population)
  58 + g.hardcopy('ranges_profile_population.png', terminal="png")
  59 + g.hardcopy('ranges_profile_population.ps', terminal="postscript", enhanced=1, color=1)
  60 + g.reset()
  61 + g.xlabel('Range Desktop profile size')
  62 + g.ylabel('Population percentage')
  63 + g.plot(ranges_percentage)
  64 + g.hardcopy('ranges_profile_percentage.png', terminal="png")
  65 + g.hardcopy('ranges_profile_percentage.ps', terminal="postscript", enhanced=1, color=1)
  66 +if __name__ == '__main__':
  67 + profile_population()
  68 +
... ...