Commit 3ca461fae1bcd5c4d7b76b15689d01254874303e
1 parent
32cf4ed6
Exists in
master
and in
1 other branch
Added popcon experiments.
Showing
1 changed file
with
68 additions
and
0 deletions
Show diff stats
| ... | ... | @@ -0,0 +1,68 @@ |
| 1 | +#! /usr/bin/env python | |
| 2 | +""" | |
| 3 | + misc_popcon - misc experiments with popcon data | |
| 4 | +""" | |
| 5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
| 6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
| 7 | +__license__ = """ | |
| 8 | + This program is free software: you can redistribute it and/or modify | |
| 9 | + it under the terms of the GNU General Public License as published by | |
| 10 | + the Free Software Foundation, either version 3 of the License, or | |
| 11 | + (at your option) any later version. | |
| 12 | + | |
| 13 | + This program is distributed in the hope that it will be useful, | |
| 14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 16 | + GNU General Public License for more details. | |
| 17 | + | |
| 18 | + You should have received a copy of the GNU General Public License | |
| 19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 20 | +""" | |
| 21 | + | |
| 22 | +import Gnuplot | |
| 23 | +import xapian | |
| 24 | + | |
| 25 | +def profile_population(): | |
| 26 | + popcon = xapian.Database("/home/tassia/.app-recommender/popcon_desktopapps") | |
| 27 | + popcon_size = popcon.get_doccount() | |
| 28 | + print "User repository size: %d" % popcon_size | |
| 29 | + profiles_size = [] | |
| 30 | + for n in range(1,popcon_size): | |
| 31 | + user = popcon.get_document(n) | |
| 32 | + profile = [t.term for t in user.termlist()] | |
| 33 | + profiles_size.append(len(profile)) | |
| 34 | + profile_population = [(n,profiles_size.count(n)) | |
| 35 | + for n in range(max(profiles_size)+1) | |
| 36 | + if profiles_size.count(n)>0 ] | |
| 37 | + ranges_population = [] | |
| 38 | + ranges_percentage = [] | |
| 39 | + ranges = range(0,601,50) | |
| 40 | + for maximum in ranges[1:]: | |
| 41 | + minimum = ranges[ranges.index(maximum)-1] | |
| 42 | + valid = [x[1] for x in profile_population | |
| 43 | + if x[0]>minimum and x[0]<=maximum] | |
| 44 | + ranges_population.append((maximum,sum(valid))) | |
| 45 | + ranges_percentage.append((maximum,sum(valid)/float(popcon_size))) | |
| 46 | + | |
| 47 | + g = Gnuplot.Gnuplot() | |
| 48 | + g('set style data points') # give gnuplot an arbitrary command | |
| 49 | + g.xlabel('Desktop profile size') | |
| 50 | + g.ylabel('Population size') | |
| 51 | + g.plot(profile_population) | |
| 52 | + g.hardcopy('profile_population.png', terminal="png") | |
| 53 | + g.hardcopy('profile_population.ps', terminal="postscript", enhanced=1, color=1) | |
| 54 | + g.reset() | |
| 55 | + g.xlabel('Range Desktop profile size') | |
| 56 | + g.ylabel('Population size') | |
| 57 | + g.plot(ranges_population) | |
| 58 | + g.hardcopy('ranges_profile_population.png', terminal="png") | |
| 59 | + g.hardcopy('ranges_profile_population.ps', terminal="postscript", enhanced=1, color=1) | |
| 60 | + g.reset() | |
| 61 | + g.xlabel('Range Desktop profile size') | |
| 62 | + g.ylabel('Population percentage') | |
| 63 | + g.plot(ranges_percentage) | |
| 64 | + g.hardcopy('ranges_profile_percentage.png', terminal="png") | |
| 65 | + g.hardcopy('ranges_profile_percentage.ps', terminal="postscript", enhanced=1, color=1) | |
| 66 | +if __name__ == '__main__': | |
| 67 | + profile_population() | |
| 68 | + | ... | ... |