Commit 7f0ce65a882066a510888dcd87d1efa2e9dbef63
1 parent
98f794f3
Exists in
master
and in
1 other branch
Using python variables __author__, copyright__ and __license__.
Showing
16 changed files
with
360 additions
and
423 deletions
Show diff stats
src/app_recommender.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# AppRecommender - a GNU/Linux application recommender. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + AppRecommender - A GNU/Linux application recommender | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | import os |
21 | 23 | import sys | ... | ... |
src/clustering.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# Clustering - a python script to perform clustering of popcon data. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | - | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + Clustering - A python script to perform clustering of popcon data. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
20 | 21 | import os |
21 | 22 | import sys |
22 | 23 | import logging | ... | ... |
src/config.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# config - python module for configuration options. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + config - python module for configuration options. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | import getopt |
21 | 23 | import sys | ... | ... |
src/cross_validation.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# CrossValidation - python module for classes and methods related to | |
4 | -# recommenders evaluation. | |
5 | -# | |
6 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
7 | -# | |
8 | -# This program is free software: you can redistribute it and/or modify | |
9 | -# it under the terms of the GNU General Public License as published by | |
10 | -# the Free Software Foundation, either version 3 of the License, or | |
11 | -# (at your option) any later version. | |
12 | -# | |
13 | -# This program is distributed in the hope that it will be useful, | |
14 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | -# GNU General Public License for more details. | |
17 | -# | |
18 | -# You should have received a copy of the GNU General Public License | |
19 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + CrossValidation - python module for classes and methods related to | |
4 | + recommenders evaluation. | |
5 | +""" | |
6 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
7 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
8 | +__license__ = """ | |
9 | + This program is free software: you can redistribute it and/or modify | |
10 | + it under the terms of the GNU General Public License as published by | |
11 | + the Free Software Foundation, either version 3 of the License, or | |
12 | + (at your option) any later version. | |
13 | + | |
14 | + This program is distributed in the hope that it will be useful, | |
15 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | + GNU General Public License for more details. | |
18 | + | |
19 | + You should have received a copy of the GNU General Public License | |
20 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
21 | +""" | |
20 | 22 | |
21 | 23 | import os |
22 | 24 | import sys | ... | ... |
src/data.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# data - python module for data sources classes and methods. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + data - python module for data sources classes and methods. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | import os |
21 | 23 | import sys | ... | ... |
src/demo_rec.py
... | ... | @@ -1,118 +0,0 @@ |
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# DemoRecommender - demonstration of a GNU/Linux application recommender. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | - | |
20 | -import os | |
21 | -import sys | |
22 | -import commands | |
23 | -import re | |
24 | - | |
25 | -import xapian | |
26 | -from debian import debtags | |
27 | -from strategy import PkgMatchDecider | |
28 | - | |
29 | -DB_PATH = "/var/lib/debtags/package-tags" | |
30 | -INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") | |
31 | - | |
32 | -def load_debtags_db(path): | |
33 | - """ Load debtags database. """ | |
34 | - debtags_db = debtags.DB() | |
35 | - tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | |
36 | - try: | |
37 | - debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x)) | |
38 | - except IOError: | |
39 | - print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" % | |
40 | - path) | |
41 | - exit(1) | |
42 | - return debtags_db | |
43 | - | |
44 | -def get_system_pkgs(): | |
45 | - """ Return set of system packages. """ | |
46 | - dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') | |
47 | - return dpkg_output.replace('install','\t').split() | |
48 | - | |
49 | -def get_most_relevant_tags(debtags_db,pkgs_list): | |
50 | - """ Return most relevant tags considering a list of packages. """ | |
51 | - relevant_db = debtags_db.choose_packages(pkgs_list) | |
52 | - relevance_index = debtags.relevance_index_function(debtags_db,relevant_db) | |
53 | - sorted_relevant_tags = sorted(relevant_db.iter_tags(), | |
54 | - lambda a, b: cmp(relevance_index(a), | |
55 | - relevance_index(b))) | |
56 | - return normalize_tags(' '.join(sorted_relevant_tags[-50:])) | |
57 | - | |
58 | -def normalize_tags(string): | |
59 | - """ Normalize tag string so that it can be indexed and retrieved. """ | |
60 | - return string.replace(':','_').replace('-','\'') | |
61 | - | |
62 | -def create_debtags_index(debtags_db,index_path): | |
63 | - """ Create a xapian index for debtags info based on file 'debtags_db' and | |
64 | - place it at 'index_path'. | |
65 | - """ | |
66 | - if not os.path.exists(index_path): | |
67 | - os.makedirs(index_path) | |
68 | - print "Creating new debtags xapian index at \'%s\'" % index_path | |
69 | - debtags_index = xapian.WritableDatabase(index_path, | |
70 | - xapian.DB_CREATE_OR_OVERWRITE) | |
71 | - for pkg,tags in debtags_db.iter_packages_tags(): | |
72 | - doc = xapian.Document() | |
73 | - doc.set_data(pkg) | |
74 | - for tag in tags: | |
75 | - doc.add_term(normalize_tags(tag)) | |
76 | - print "indexing ",debtags_index.add_document(doc) | |
77 | - return debtags_index | |
78 | - | |
79 | -def load_debtags_index(debtags_db,reindex): | |
80 | - """ Load an existing or new debtags index, based on boolean reindex. """ | |
81 | - if not reindex: | |
82 | - try: | |
83 | - print ("Opening existing debtags xapian index at \'%s\'" % | |
84 | - INDEX_PATH) | |
85 | - debtags_index = xapian.Database(INDEX_PATH) | |
86 | - except DatabaseError: | |
87 | - print "Could not open debtags xapian index" | |
88 | - reindex = 1 | |
89 | - if reindex: | |
90 | - debtags_index = create_debtags_index(debtags_db,INDEX_PATH) | |
91 | - return debtags_index | |
92 | - | |
93 | - | |
94 | -if __name__ == '__main__': | |
95 | - | |
96 | - reindex = 0 | |
97 | - if len(sys.argv) == 2: | |
98 | - DB_PATH = sys.argv[1] | |
99 | - reindex = 1 | |
100 | - print "reindex true" | |
101 | - elif len(sys.argv) > 2: | |
102 | - print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % | |
103 | - sys.argv[0]) | |
104 | - sys.exit(1) | |
105 | - | |
106 | - debtags_db = load_debtags_db(DB_PATH) | |
107 | - installed_pkgs = get_system_pkgs() | |
108 | - best_tags = get_most_relevant_tags(debtags_db,installed_pkgs) | |
109 | - | |
110 | - debtags_index = load_debtags_index(debtags_db,reindex) | |
111 | - qp = xapian.QueryParser() | |
112 | - query = qp.parse_query(best_tags) | |
113 | - enquire = xapian.Enquire(debtags_index) | |
114 | - enquire.set_query(query) | |
115 | - | |
116 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs)) | |
117 | - for m in mset: | |
118 | - print "%2d: %s" % (m.rank, m.document.get_data()) |
src/dissimilarity.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + similarity - python module for classes and methods related to similarity | |
4 | + measuring between two sets of data. | |
5 | +""" | |
6 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
7 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
8 | +__license__ = """ | |
9 | + This program is free software: you can redistribute it and/or modify | |
10 | + it under the terms of the GNU General Public License as published by | |
11 | + the Free Software Foundation, either version 3 of the License, or | |
12 | + (at your option) any later version. | |
2 | 13 | |
3 | -# similarity - python module for classes and methods related to similarity | |
4 | -# measuring between two sets of data. | |
5 | -# | |
6 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
7 | -# | |
8 | -# This program is free software: you can redistribute it and/or modify | |
9 | -# it under the terms of the GNU General Public License as published by | |
10 | -# the Free Software Foundation, either version 3 of the License, or | |
11 | -# (at your option) any later version. | |
12 | -# | |
13 | -# This program is distributed in the hope that it will be useful, | |
14 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | -# GNU General Public License for more details. | |
17 | -# | |
18 | -# You should have received a copy of the GNU General Public License | |
19 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
14 | + This program is distributed in the hope that it will be useful, | |
15 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | + GNU General Public License for more details. | |
18 | + | |
19 | + You should have received a copy of the GNU General Public License | |
20 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
21 | +""" | |
20 | 22 | |
21 | 23 | import math |
22 | 24 | import stats | ... | ... |
src/error.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + error.py - python module for error definition. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
2 | 12 | |
3 | -# error.py - python module for error definition. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | class Error(Exception): |
21 | 23 | """ | ... | ... |
src/evaluation.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + evaluation - python module for classes and methods related to recommenders | |
4 | + evaluation. | |
5 | +""" | |
6 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
7 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
8 | +__license__ = """ | |
9 | + This program is free software: you can redistribute it and/or modify | |
10 | + it under the terms of the GNU General Public License as published by | |
11 | + the Free Software Foundation, either version 3 of the License, or | |
12 | + (at your option) any later version. | |
2 | 13 | |
3 | -# evaluation - python module for classes and methods related to recommenders | |
4 | -# evaluation. | |
5 | -# | |
6 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
7 | -# | |
8 | -# This program is free software: you can redistribute it and/or modify | |
9 | -# it under the terms of the GNU General Public License as published by | |
10 | -# the Free Software Foundation, either version 3 of the License, or | |
11 | -# (at your option) any later version. | |
12 | -# | |
13 | -# This program is distributed in the hope that it will be useful, | |
14 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | -# GNU General Public License for more details. | |
17 | -# | |
18 | -# You should have received a copy of the GNU General Public License | |
19 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
14 | + This program is distributed in the hope that it will be useful, | |
15 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | + GNU General Public License for more details. | |
18 | + | |
19 | + You should have received a copy of the GNU General Public License | |
20 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
21 | +""" | |
20 | 22 | |
21 | 23 | import math |
22 | 24 | import random | ... | ... |
... | ... | @@ -0,0 +1,120 @@ |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + DemoRecommender - demonstration of a GNU/Linux application recommender. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
21 | + | |
22 | +import os | |
23 | +import sys | |
24 | +import commands | |
25 | +import re | |
26 | + | |
27 | +import xapian | |
28 | +from debian import debtags | |
29 | +from strategy import PkgMatchDecider | |
30 | + | |
31 | +DB_PATH = "/var/lib/debtags/package-tags" | |
32 | +INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") | |
33 | + | |
34 | +def load_debtags_db(path): | |
35 | + """ Load debtags database. """ | |
36 | + debtags_db = debtags.DB() | |
37 | + tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | |
38 | + try: | |
39 | + debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x)) | |
40 | + except IOError: | |
41 | + print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" % | |
42 | + path) | |
43 | + exit(1) | |
44 | + return debtags_db | |
45 | + | |
46 | +def get_system_pkgs(): | |
47 | + """ Return set of system packages. """ | |
48 | + dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') | |
49 | + return dpkg_output.replace('install','\t').split() | |
50 | + | |
51 | +def get_most_relevant_tags(debtags_db,pkgs_list): | |
52 | + """ Return most relevant tags considering a list of packages. """ | |
53 | + relevant_db = debtags_db.choose_packages(pkgs_list) | |
54 | + relevance_index = debtags.relevance_index_function(debtags_db,relevant_db) | |
55 | + sorted_relevant_tags = sorted(relevant_db.iter_tags(), | |
56 | + lambda a, b: cmp(relevance_index(a), | |
57 | + relevance_index(b))) | |
58 | + return normalize_tags(' '.join(sorted_relevant_tags[-50:])) | |
59 | + | |
60 | +def normalize_tags(string): | |
61 | + """ Normalize tag string so that it can be indexed and retrieved. """ | |
62 | + return string.replace(':','_').replace('-','\'') | |
63 | + | |
64 | +def create_debtags_index(debtags_db,index_path): | |
65 | + """ Create a xapian index for debtags info based on file 'debtags_db' and | |
66 | + place it at 'index_path'. | |
67 | + """ | |
68 | + if not os.path.exists(index_path): | |
69 | + os.makedirs(index_path) | |
70 | + print "Creating new debtags xapian index at \'%s\'" % index_path | |
71 | + debtags_index = xapian.WritableDatabase(index_path, | |
72 | + xapian.DB_CREATE_OR_OVERWRITE) | |
73 | + for pkg,tags in debtags_db.iter_packages_tags(): | |
74 | + doc = xapian.Document() | |
75 | + doc.set_data(pkg) | |
76 | + for tag in tags: | |
77 | + doc.add_term(normalize_tags(tag)) | |
78 | + print "indexing ",debtags_index.add_document(doc) | |
79 | + return debtags_index | |
80 | + | |
81 | +def load_debtags_index(debtags_db,reindex): | |
82 | + """ Load an existing or new debtags index, based on boolean reindex. """ | |
83 | + if not reindex: | |
84 | + try: | |
85 | + print ("Opening existing debtags xapian index at \'%s\'" % | |
86 | + INDEX_PATH) | |
87 | + debtags_index = xapian.Database(INDEX_PATH) | |
88 | + except DatabaseError: | |
89 | + print "Could not open debtags xapian index" | |
90 | + reindex = 1 | |
91 | + if reindex: | |
92 | + debtags_index = create_debtags_index(debtags_db,INDEX_PATH) | |
93 | + return debtags_index | |
94 | + | |
95 | + | |
96 | +if __name__ == '__main__': | |
97 | + | |
98 | + reindex = 0 | |
99 | + if len(sys.argv) == 2: | |
100 | + DB_PATH = sys.argv[1] | |
101 | + reindex = 1 | |
102 | + print "reindex true" | |
103 | + elif len(sys.argv) > 2: | |
104 | + print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % | |
105 | + sys.argv[0]) | |
106 | + sys.exit(1) | |
107 | + | |
108 | + debtags_db = load_debtags_db(DB_PATH) | |
109 | + installed_pkgs = get_system_pkgs() | |
110 | + best_tags = get_most_relevant_tags(debtags_db,installed_pkgs) | |
111 | + | |
112 | + debtags_index = load_debtags_index(debtags_db,reindex) | |
113 | + qp = xapian.QueryParser() | |
114 | + query = qp.parse_query(best_tags) | |
115 | + enquire = xapian.Enquire(debtags_index) | |
116 | + enquire.set_query(query) | |
117 | + | |
118 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs)) | |
119 | + for m in mset: | |
120 | + print "%2d: %s" % (m.rank, m.document.get_data()) | ... | ... |
src/generate_doc.sh
src/recommender.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + recommender - python module for classes related to recommenders. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
2 | 12 | |
3 | -# recommender - python module for classes related to recommenders. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | from operator import itemgetter |
21 | 23 | from data import * | ... | ... |
src/similarity.py
... | ... | @@ -1,89 +0,0 @@ |
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# similarity - python module for classes and methods related to similarity | |
4 | -# measuring between two sets of data. | |
5 | -# | |
6 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
7 | -# | |
8 | -# This program is free software: you can redistribute it and/or modify | |
9 | -# it under the terms of the GNU General Public License as published by | |
10 | -# the Free Software Foundation, either version 3 of the License, or | |
11 | -# (at your option) any later version. | |
12 | -# | |
13 | -# This program is distributed in the hope that it will be useful, | |
14 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | -# GNU General Public License for more details. | |
17 | -# | |
18 | -# You should have received a copy of the GNU General Public License | |
19 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | - | |
21 | -import math | |
22 | -import stats | |
23 | - | |
24 | -def norm(x): | |
25 | - """ | |
26 | - Return vector norm. | |
27 | - """ | |
28 | - return math.sqrt(sum([x_i**2 for x_i in x])) | |
29 | - | |
30 | -def dot_product(x,y): | |
31 | - """ | |
32 | - Return dot product of vectors 'x' and 'y'. | |
33 | - """ | |
34 | - return sum([(x[i] * y[i]) for i in range(len(x))]) | |
35 | - | |
36 | -class SimilarityMeasure: | |
37 | - """ | |
38 | - Abstraction for diferent similarity measure approaches. | |
39 | - """ | |
40 | - | |
41 | -class Distance(SimilarityMeasure): | |
42 | - """ | |
43 | - Euclidian distance measure. | |
44 | - """ | |
45 | - def __call__(self,x,y): | |
46 | - """ | |
47 | - Return euclidian distance between vectors 'x' and 'y'. | |
48 | - """ | |
49 | - sum_pow = sum([((x[i] - y[i]) ** 2) for i in range(len(x))]) | |
50 | - return math.sqrt(sum_pow) | |
51 | - | |
52 | -class Cosine(SimilarityMeasure): | |
53 | - """ | |
54 | - Cosine similarity measure. | |
55 | - """ | |
56 | - def __call__(self,x,y): | |
57 | - """ | |
58 | - Return cosine of angle between vectors 'x' and 'y'. | |
59 | - """ | |
60 | - return float(dot_product(x,y)/(norm(x)*norm(y))) | |
61 | - | |
62 | -class Pearson(SimilarityMeasure): | |
63 | - """ | |
64 | - Pearson coeficient measure. | |
65 | - """ | |
66 | - def __call__(self,x,y): | |
67 | - """ Return Pearson coeficient between vectors 'x' and 'y'. """ | |
68 | - return stats.pearsonr(x,y) # FIXME: ZeroDivisionError | |
69 | - | |
70 | -class Spearman(SimilarityMeasure): | |
71 | - """ | |
72 | - Spearman correlation measure. | |
73 | - """ | |
74 | - def __call__(self,x,y): | |
75 | - """ | |
76 | - Return Spearman correlation between vectors 'x' and 'y'. | |
77 | - """ | |
78 | - return stats.spearmanr(x,y) # FIXME: ZeroDivisionError | |
79 | - | |
80 | -class Tanimoto(SimilarityMeasure): | |
81 | - """ | |
82 | - Tanimoto coeficient measure. | |
83 | - """ | |
84 | - def __call__(self,x,y): | |
85 | - """ | |
86 | - Return Tanimoto coeficient between vectors 'x' and 'y'. | |
87 | - """ | |
88 | - z = [v for v in x if v in y] | |
89 | - return float(len(z))/(len(x)+len(y)-len(z)) |
src/singleton.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + singleton - python class that implements singleton design pattern. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
12 | + | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
2 | 21 | |
3 | -# singleton - python class that implements singleton design pattern. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | 22 | class Singleton(object): |
20 | 23 | """ |
21 | 24 | Base class for inheritance of only-one-instance classes. | ... | ... |
src/strategy.py
1 | -#!/usr/bin/python | |
2 | - | |
3 | -# strategy - python module for classes and methods related to recommendation | |
4 | -# strategies. | |
5 | -# | |
6 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
7 | -# | |
8 | -# This program is free software: you can redistribute it and/or modify | |
9 | -# it under the terms of the GNU General Public License as published by | |
10 | -# the Free Software Foundation, either version 3 of the License, or | |
11 | -# (at your option) any later version. | |
12 | -# | |
13 | -# This program is distributed in the hope that it will be useful, | |
14 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | -# GNU General Public License for more details. | |
17 | -# | |
18 | -# You should have received a copy of the GNU General Public License | |
19 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + strategy - python module for classes and methods related to recommendation | |
4 | + strategies. | |
5 | +""" | |
6 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
7 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
8 | +__license__ = """ | |
9 | + This program is free software: you can redistribute it and/or modify | |
10 | + it under the terms of the GNU General Public License as published by | |
11 | + the Free Software Foundation, either version 3 of the License, or | |
12 | + (at your option) any later version. | |
13 | + | |
14 | + This program is distributed in the hope that it will be useful, | |
15 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | + GNU General Public License for more details. | |
18 | + | |
19 | + You should have received a copy of the GNU General Public License | |
20 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
21 | +""" | |
20 | 22 | |
21 | 23 | import os, re |
22 | 24 | import xapian | ... | ... |
src/user.py
1 | -#!/usr/bin/python | |
1 | +#!/usr/bin/env python | |
2 | +""" | |
3 | + user - python module for classes and methods related to recommenders' users. | |
4 | +""" | |
5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
7 | +__license__ = """ | |
8 | + This program is free software: you can redistribute it and/or modify | |
9 | + it under the terms of the GNU General Public License as published by | |
10 | + the Free Software Foundation, either version 3 of the License, or | |
11 | + (at your option) any later version. | |
2 | 12 | |
3 | -# user - python module for classes and methods related to recommenders' users. | |
4 | -# | |
5 | -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com> | |
6 | -# | |
7 | -# This program is free software: you can redistribute it and/or modify | |
8 | -# it under the terms of the GNU General Public License as published by | |
9 | -# the Free Software Foundation, either version 3 of the License, or | |
10 | -# (at your option) any later version. | |
11 | -# | |
12 | -# This program is distributed in the hope that it will be useful, | |
13 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | -# GNU General Public License for more details. | |
16 | -# | |
17 | -# You should have received a copy of the GNU General Public License | |
18 | -# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
13 | + This program is distributed in the hope that it will be useful, | |
14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | + GNU General Public License for more details. | |
17 | + | |
18 | + You should have received a copy of the GNU General Public License | |
19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | +""" | |
19 | 21 | |
20 | 22 | import commands |
21 | 23 | import xapian | ... | ... |