Commit 19f1de42244a512b3f9de2c90fd93b5578a002e8

Authored by Tássia Camões Araújo
1 parent caf1024a
Exists in master and in 1 other branch add_vagrant

Minor changes, mainly documentation

src/bin/get_desktop.sh
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
  2 +#
  3 +# get_desktop.sh - get packages which have desktop files
  4 +
2 cd /usr/share/app-install/desktop 5 cd /usr/share/app-install/desktop
3 sed -ne 's/X-AppInstall-Package=//p' * | sort -u 6 sed -ne 's/X-AppInstall-Package=//p' * | sort -u
src/bin/get_pkgs_inst.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
  2 +#
  3 +# get_pkgs_inst.py - get tuple (package,installation) from popcon results file
2 4
3 from operator import itemgetter 5 from operator import itemgetter
4 if __name__ == '__main__': 6 if __name__ == '__main__':
src/bin/get_program.sh
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
  2 +#
  3 +# get_program.sh - get packages which have the tags 'role::program' associated
  4 +
2 cat /var/lib/debtags/package-tags |grep "role::program" | awk -F: '{ print $1 }' 5 cat /var/lib/debtags/package-tags |grep "role::program" | awk -F: '{ print $1 }'
src/bin/get_tags.sh
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
  2 +#
  3 +# get_tags.py - get meaningful tags for recommendation purposes
  4 +
2 cat /var/lib/debtags/vocabulary | grep "Tag:" | egrep -v "culture::|devel::lang|hardware::|implemented-in::|interface::|iso15924::|made-of::|network::|protocol::|role::|scope::|secteam::|special::|uitoolkit::|x11::|TODO" | awk '{print $2}' 5 cat /var/lib/debtags/vocabulary | grep "Tag:" | egrep -v "culture::|devel::lang|hardware::|implemented-in::|interface::|iso15924::|made-of::|network::|protocol::|role::|scope::|secteam::|special::|uitoolkit::|x11::|TODO" | awk '{print $2}'
src/bin/pkgindex.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """ 2 """
3 - Clustering - A python script to perform clustering of popcon data. 3 + pkgindex.py - generate a pkgs index to be used by the recommender as the
  4 + items repository, based on the pkgs filter provided by config
4 """ 5 """
5 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" 6 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" 7 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
@@ -38,7 +39,7 @@ if __name__ == &#39;__main__&#39;: @@ -38,7 +39,7 @@ if __name__ == &#39;__main__&#39;:
38 pkgs_list = [line.strip() for line in valid] 39 pkgs_list = [line.strip() for line in valid]
39 logging.info("Packages list length: %d" % len(pkgs_list)) 40 logging.info("Packages list length: %d" % len(pkgs_list))
40 41
41 - # use config file or command line options 42 + # use config file or command line options
42 pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), 43 pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi),
43 cfg.axi+"-"+cfg.pkgs_filter) 44 cfg.axi+"-"+cfg.pkgs_filter)
44 end_time = datetime.datetime.now() 45 end_time = datetime.datetime.now()
src/bin/popindex.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """ 2 """
3 - Clustering - A python script to perform clustering of popcon data. 3 + popindex.py - generate a popcon index to be used by the recommender as the
  4 + users repository, based on filters provided by config
4 """ 5 """
5 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" 6 __author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
6 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" 7 __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
src/bin/rank_terms.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
  2 +#
  3 +# rank_terms.py - rank index terms by frequency
2 4
3 import xapian 5 import xapian
4 import os 6 import os
@@ -67,7 +67,7 @@ def print_index(index): @@ -67,7 +67,7 @@ def print_index(index):
67 67
68 class AppAptXapianIndex(xapian.WritableDatabase): 68 class AppAptXapianIndex(xapian.WritableDatabase):
69 """ 69 """
70 - Sample data source for packages information, mainly useful for tests. 70 + Data source for application packages information
71 """ 71 """
72 def __init__(self,axi_path,path): 72 def __init__(self,axi_path,path):
73 xapian.WritableDatabase.__init__(self,path, 73 xapian.WritableDatabase.__init__(self,path,
@@ -93,7 +93,8 @@ class AppAptXapianIndex(xapian.WritableDatabase): @@ -93,7 +93,8 @@ class AppAptXapianIndex(xapian.WritableDatabase):
93 93
94 class SampleAptXapianIndex(xapian.WritableDatabase): 94 class SampleAptXapianIndex(xapian.WritableDatabase):
95 """ 95 """
96 - Sample data source for packages information, mainly useful for tests. 96 + Sample data source for packages information, generated from a list of
  97 + packages.
97 """ 98 """
98 def __init__(self,pkgs_list,axi,path): 99 def __init__(self,pkgs_list,axi,path):
99 xapian.WritableDatabase.__init__(self,path, 100 xapian.WritableDatabase.__init__(self,path,
@@ -106,9 +107,10 @@ class SampleAptXapianIndex(xapian.WritableDatabase): @@ -106,9 +107,10 @@ class SampleAptXapianIndex(xapian.WritableDatabase):
106 return print_index(self) 107 return print_index(self)
107 108
108 class PopconSubmission(): 109 class PopconSubmission():
109 - def __init__(self,path,user_id=0): 110 + def __init__(self,path,user_id=0,binary=1):
110 self.packages = dict() 111 self.packages = dict()
111 self.path = path 112 self.path = path
  113 + self.binary = binary
112 self.load() 114 self.load()
113 if user_id: 115 if user_id:
114 self.user_id = user_id 116 self.user_id = user_id
@@ -142,7 +144,7 @@ class PopconSubmission(): @@ -142,7 +144,7 @@ class PopconSubmission():
142 if len(data) > 3: 144 if len(data) > 3:
143 exec_file = data[3] 145 exec_file = data[3]
144 # Binary weight 146 # Binary weight
145 - if binary: 147 + if self.binary:
146 self.packages[pkg] = 1 148 self.packages[pkg] = 1
147 # Weights inherited from Enrico's anapop 149 # Weights inherited from Enrico's anapop
148 # No executable files to track 150 # No executable files to track
@@ -171,7 +173,7 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -171,7 +173,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
171 self.source_dir = os.path.expanduser(cfg.popcon_dir) 173 self.source_dir = os.path.expanduser(cfg.popcon_dir)
172 self.max_popcon = cfg.max_popcon 174 self.max_popcon = cfg.max_popcon
173 self.valid_pkgs = [] 175 self.valid_pkgs = []
174 - # file format: one pkg_name per line 176 + # file format for filter: one package name per line
175 with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs: 177 with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs:
176 self.valid_pkgs = [line.strip() for line in valid_pkgs 178 self.valid_pkgs = [line.strip() for line in valid_pkgs
177 if not line.startswith("#")] 179 if not line.startswith("#")]