Commit 19f1de42244a512b3f9de2c90fd93b5578a002e8
1 parent
caf1024a
Exists in
master
and in
1 other branch
Minor changes, mainly documentation
Showing
8 changed files
with
25 additions
and
8 deletions
Show diff stats
src/bin/get_desktop.sh
src/bin/get_pkgs_inst.py
src/bin/get_program.sh
| 1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
| 2 | +# | ||
| 3 | +# get_program.sh - get packages which have the tags 'role::program' associated | ||
| 4 | + | ||
| 2 | cat /var/lib/debtags/package-tags |grep "role::program" | awk -F: '{ print $1 }' | 5 | cat /var/lib/debtags/package-tags |grep "role::program" | awk -F: '{ print $1 }' |
src/bin/get_tags.sh
| 1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
| 2 | +# | ||
| 3 | +# get_tags.py - get meaningful tags for recommendation purposes | ||
| 4 | + | ||
| 2 | cat /var/lib/debtags/vocabulary | grep "Tag:" | egrep -v "culture::|devel::lang|hardware::|implemented-in::|interface::|iso15924::|made-of::|network::|protocol::|role::|scope::|secteam::|special::|uitoolkit::|x11::|TODO" | awk '{print $2}' | 5 | cat /var/lib/debtags/vocabulary | grep "Tag:" | egrep -v "culture::|devel::lang|hardware::|implemented-in::|interface::|iso15924::|made-of::|network::|protocol::|role::|scope::|secteam::|special::|uitoolkit::|x11::|TODO" | awk '{print $2}' |
src/bin/pkgindex.py
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | """ | 2 | """ |
| 3 | - Clustering - A python script to perform clustering of popcon data. | 3 | + pkgindex.py - generate a pkgs index to be used by the recommender as the |
| 4 | + items repository, based on the pkgs filter provided by config | ||
| 4 | """ | 5 | """ |
| 5 | __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | 6 | __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" |
| 6 | __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | 7 | __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" |
| @@ -38,7 +39,7 @@ if __name__ == '__main__': | @@ -38,7 +39,7 @@ if __name__ == '__main__': | ||
| 38 | pkgs_list = [line.strip() for line in valid] | 39 | pkgs_list = [line.strip() for line in valid] |
| 39 | logging.info("Packages list length: %d" % len(pkgs_list)) | 40 | logging.info("Packages list length: %d" % len(pkgs_list)) |
| 40 | 41 | ||
| 41 | - # use config file or command line options | 42 | + # use config file or command line options |
| 42 | pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), | 43 | pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), |
| 43 | cfg.axi+"-"+cfg.pkgs_filter) | 44 | cfg.axi+"-"+cfg.pkgs_filter) |
| 44 | end_time = datetime.datetime.now() | 45 | end_time = datetime.datetime.now() |
src/bin/popindex.py
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | """ | 2 | """ |
| 3 | - Clustering - A python script to perform clustering of popcon data. | 3 | + popindex.py - generate a popcon index to be used by the recommender as the |
| 4 | + users repository, based on filters provided by config | ||
| 4 | """ | 5 | """ |
| 5 | __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | 6 | __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" |
| 6 | __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | 7 | __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" |
src/bin/rank_terms.py
src/data.py
| @@ -67,7 +67,7 @@ def print_index(index): | @@ -67,7 +67,7 @@ def print_index(index): | ||
| 67 | 67 | ||
| 68 | class AppAptXapianIndex(xapian.WritableDatabase): | 68 | class AppAptXapianIndex(xapian.WritableDatabase): |
| 69 | """ | 69 | """ |
| 70 | - Sample data source for packages information, mainly useful for tests. | 70 | + Data source for application packages information |
| 71 | """ | 71 | """ |
| 72 | def __init__(self,axi_path,path): | 72 | def __init__(self,axi_path,path): |
| 73 | xapian.WritableDatabase.__init__(self,path, | 73 | xapian.WritableDatabase.__init__(self,path, |
| @@ -93,7 +93,8 @@ class AppAptXapianIndex(xapian.WritableDatabase): | @@ -93,7 +93,8 @@ class AppAptXapianIndex(xapian.WritableDatabase): | ||
| 93 | 93 | ||
| 94 | class SampleAptXapianIndex(xapian.WritableDatabase): | 94 | class SampleAptXapianIndex(xapian.WritableDatabase): |
| 95 | """ | 95 | """ |
| 96 | - Sample data source for packages information, mainly useful for tests. | 96 | + Sample data source for packages information, generated from a list of |
| 97 | + packages. | ||
| 97 | """ | 98 | """ |
| 98 | def __init__(self,pkgs_list,axi,path): | 99 | def __init__(self,pkgs_list,axi,path): |
| 99 | xapian.WritableDatabase.__init__(self,path, | 100 | xapian.WritableDatabase.__init__(self,path, |
| @@ -106,9 +107,10 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | @@ -106,9 +107,10 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | ||
| 106 | return print_index(self) | 107 | return print_index(self) |
| 107 | 108 | ||
| 108 | class PopconSubmission(): | 109 | class PopconSubmission(): |
| 109 | - def __init__(self,path,user_id=0): | 110 | + def __init__(self,path,user_id=0,binary=1): |
| 110 | self.packages = dict() | 111 | self.packages = dict() |
| 111 | self.path = path | 112 | self.path = path |
| 113 | + self.binary = binary | ||
| 112 | self.load() | 114 | self.load() |
| 113 | if user_id: | 115 | if user_id: |
| 114 | self.user_id = user_id | 116 | self.user_id = user_id |
| @@ -142,7 +144,7 @@ class PopconSubmission(): | @@ -142,7 +144,7 @@ class PopconSubmission(): | ||
| 142 | if len(data) > 3: | 144 | if len(data) > 3: |
| 143 | exec_file = data[3] | 145 | exec_file = data[3] |
| 144 | # Binary weight | 146 | # Binary weight |
| 145 | - if binary: | 147 | + if self.binary: |
| 146 | self.packages[pkg] = 1 | 148 | self.packages[pkg] = 1 |
| 147 | # Weights inherited from Enrico's anapop | 149 | # Weights inherited from Enrico's anapop |
| 148 | # No executable files to track | 150 | # No executable files to track |
| @@ -171,7 +173,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -171,7 +173,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
| 171 | self.source_dir = os.path.expanduser(cfg.popcon_dir) | 173 | self.source_dir = os.path.expanduser(cfg.popcon_dir) |
| 172 | self.max_popcon = cfg.max_popcon | 174 | self.max_popcon = cfg.max_popcon |
| 173 | self.valid_pkgs = [] | 175 | self.valid_pkgs = [] |
| 174 | - # file format: one pkg_name per line | 176 | + # file format for filter: one package name per line |
| 175 | with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs: | 177 | with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid_pkgs: |
| 176 | self.valid_pkgs = [line.strip() for line in valid_pkgs | 178 | self.valid_pkgs = [line.strip() for line in valid_pkgs |
| 177 | if not line.startswith("#")] | 179 | if not line.startswith("#")] |