Commit f8016ea20ccb7fd1397c4b43919d51846b83968a

Authored by Tássia Camões Araújo
1 parent 702d4c94
Exists in master and in 1 other branch add_vagrant

Refactored demo recommender.

Showing 2 changed files with 128 additions and 103 deletions   Show diff stats
apprec.py
@@ -1,103 +0,0 @@ @@ -1,103 +0,0 @@
1 -#!/usr/bin/python  
2 -  
3 -# AppRecomender - A GNU/Linux application recommender  
4 -#  
5 -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>  
6 -#  
7 -# This program is free software: you can redistribute it and/or modify  
8 -# it under the terms of the GNU General Public License as published by  
9 -# the Free Software Foundation, either version 3 of the License, or  
10 -# (at your option) any later version.  
11 -#  
12 -# This program is distributed in the hope that it will be useful,  
13 -# but WITHOUT ANY WARRANTY; without even the implied warranty of  
14 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
15 -# GNU General Public License for more details.  
16 -#  
17 -# You should have received a copy of the GNU General Public License  
18 -# along with this program. If not, see <http://www.gnu.org/licenses/>.  
19 -  
20 -import xapian  
21 -from debian import debtags  
22 -import re  
23 -import sys  
24 -import os  
25 -import commands  
26 -  
27 -DB_PATH = "/var/lib/debtags/package-tags"  
28 -INDEX_PATH = "~/.app-recommender/debtags_index"  
29 -  
30 -INDEX_PATH = os.path.expanduser(INDEX_PATH)  
31 -  
32 -def normalize_tags(string):  
33 - return string.replace(':','_').replace('-','\'')  
34 -  
35 -def createDebtagsIndex(debtags_db,index_path):  
36 - if not os.path.exists(index_path):  
37 - os.makedirs(index_path)  
38 - print "Creating new debtags xapian index at \'%s\'" % index_path  
39 - debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE)  
40 - for pkg,tags in debtags_db.iter_packages_tags():  
41 - doc = xapian.Document()  
42 - doc.set_data(pkg)  
43 - for tag in tags:  
44 - doc.add_term(normalize_tags(tag))  
45 - print "indexing ",debtags_index.add_document(doc)  
46 - return debtags_index  
47 -  
48 -# MatchDecider to disconsider installed packages  
49 -class pkgmatchdecider(xapian.MatchDecider):  
50 - def __init__(self, installed_pkgs):  
51 - xapian.MatchDecider.__init__(self)  
52 - self.installed_pkgs = installed_pkgs  
53 -  
54 - def __call__(self, doc):  
55 - return doc.get_data() not in self.installed_pkgs  
56 -  
57 -# Handle input arguments  
58 -REINDEX = 0  
59 -if len(sys.argv) == 2:  
60 - DB_PATH = sys.argv[1]  
61 - REINDEX = 1  
62 - print "REINDEX true"  
63 -elif len(sys.argv) > 2:  
64 - print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0]  
65 - sys.exit(1)  
66 -  
67 -# Load debtags database  
68 -debtags_db = debtags.DB()  
69 -tag_filter = re.compile(r"^special::.+$|^.+::TODO$")  
70 -try:  
71 - debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x))  
72 -except IOError:  
73 - print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH  
74 - exit(1)  
75 -  
76 -# Set of installed packages  
77 -installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split()  
78 -installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs)  
79 -  
80 -# Most relevant tags  
81 -rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags)  
82 -relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b)))  
83 -normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:]))  
84 -  
85 -if not REINDEX:  
86 - try:  
87 - print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH  
88 - debtags_index = xapian.Database(INDEX_PATH)  
89 - except DatabaseError:  
90 - print "Could not open debtags xapian index"  
91 - REINDEX = 1  
92 -  
93 -if REINDEX:  
94 - debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH)  
95 -  
96 -qp = xapian.QueryParser()  
97 -query = qp.parse_query(normalized_relevant_tags)  
98 -enquire = xapian.Enquire(debtags_index)  
99 -enquire.set_query(query)  
100 -  
101 -mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs))  
102 -for m in mset:  
103 - print "%2d: %s" % (m.rank, m.document.get_data())  
demo_rec.py 0 → 100644
@@ -0,0 +1,128 @@ @@ -0,0 +1,128 @@
  1 +#!/usr/bin/python
  2 +
  3 +# AppRecomender - A GNU/Linux application recommender
  4 +#
  5 +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
  20 +import os
  21 +import sys
  22 +import commands
  23 +import re
  24 +
  25 +import xapian
  26 +from debian import debtags
  27 +
  28 +DB_PATH = "/var/lib/debtags/package-tags"
  29 +INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
  30 +
  31 +def load_debtags_db(path):
  32 + """ Load debtags database. """
  33 + debtags_db = debtags.DB()
  34 + tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
  35 + try:
  36 + debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x))
  37 + except IOError:
  38 + print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" %
  39 + path)
  40 + exit(1)
  41 + return debtags_db
  42 +
  43 +def get_system_pkgs():
  44 + """ Return set of system packages. """
  45 + dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections')
  46 + return dpkg_output.replace('install','\t').split()
  47 +
  48 +def get_most_relevant_tags(debtags_db,pkgs_list):
  49 + """ Return most relevant tags considering a list of packages. """
  50 + relevant_db = debtags_db.choose_packages(pkgs_list)
  51 + relevance_index = debtags.relevance_index_function(debtags_db,relevant_db)
  52 + sorted_relevant_tags = sorted(relevant_db.iter_tags(),
  53 + lambda a, b: cmp(relevance_index(a),
  54 + relevance_index(b)))
  55 + return normalize_tags(' '.join(sorted_relevant_tags[-50:]))
  56 +
  57 +def normalize_tags(string):
  58 + """ Normalize tag string so that it can be indexed and retrieved. """
  59 + return string.replace(':','_').replace('-','\'')
  60 +
  61 +def create_debtags_index(debtags_db,index_path):
  62 + """ Create a xapian index for debtags info based on file 'debtags_db' and
  63 + place it at 'index_path'.
  64 + """
  65 + if not os.path.exists(index_path):
  66 + os.makedirs(index_path)
  67 + print "Creating new debtags xapian index at \'%s\'" % index_path
  68 + debtags_index = xapian.WritableDatabase(index_path,
  69 + xapian.DB_CREATE_OR_OVERWRITE)
  70 + for pkg,tags in debtags_db.iter_packages_tags():
  71 + doc = xapian.Document()
  72 + doc.set_data(pkg)
  73 + for tag in tags:
  74 + doc.add_term(normalize_tags(tag))
  75 + print "indexing ",debtags_index.add_document(doc)
  76 + return debtags_index
  77 +
  78 +def load_debtags_index(debtags_db,reindex):
  79 + """ Load an existing or new debtags index, based on boolean reindex. """
  80 + if not reindex:
  81 + try:
  82 + print ("Opening existing debtags xapian index at \'%s\'" %
  83 + INDEX_PATH)
  84 + debtags_index = xapian.Database(INDEX_PATH)
  85 + except DatabaseError:
  86 + print "Could not open debtags xapian index"
  87 + reindex = 1
  88 + if reindex:
  89 + debtags_index = create_debtags_index(debtags_db,INDEX_PATH)
  90 + return debtags_index
  91 +
  92 +
  93 +class PkgMatchDecider(xapian.MatchDecider):
  94 + """ Extends xapian.MatchDecider to disconsider installed packages. """
  95 +
  96 + def __init__(self, installed_pkgs):
  97 + xapian.MatchDecider.__init__(self)
  98 + self.installed_pkgs = installed_pkgs
  99 +
  100 + def __call__(self, doc):
  101 + return doc.get_data() not in self.installed_pkgs
  102 +
  103 +
  104 +if __name__ == '__main__':
  105 +
  106 + reindex = 0
  107 + if len(sys.argv) == 2:
  108 + DB_PATH = sys.argv[1]
  109 + reindex = 1
  110 + print "reindex true"
  111 + elif len(sys.argv) > 2:
  112 + print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
  113 + sys.argv[0])
  114 + sys.exit(1)
  115 +
  116 + debtags_db = load_debtags_db(DB_PATH)
  117 + installed_pkgs = get_system_pkgs()
  118 + best_tags = get_most_relevant_tags(debtags_db,installed_pkgs)
  119 +
  120 + debtags_index = load_debtags_index(debtags_db,reindex)
  121 + qp = xapian.QueryParser()
  122 + query = qp.parse_query(best_tags)
  123 + enquire = xapian.Enquire(debtags_index)
  124 + enquire.set_query(query)
  125 +
  126 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs))
  127 + for m in mset:
  128 + print "%2d: %s" % (m.rank, m.document.get_data())