Commit f8016ea20ccb7fd1397c4b43919d51846b83968a

Authored by Tássia Camões Araújo
1 parent 702d4c94
Exists in master and in 1 other branch add_vagrant

Refactored demo recommender.

Showing 2 changed files with 128 additions and 103 deletions   Show diff stats
apprec.py
... ... @@ -1,103 +0,0 @@
1   -#!/usr/bin/python
2   -
3   -# AppRecomender - A GNU/Linux application recommender
4   -#
5   -# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6   -#
7   -# This program is free software: you can redistribute it and/or modify
8   -# it under the terms of the GNU General Public License as published by
9   -# the Free Software Foundation, either version 3 of the License, or
10   -# (at your option) any later version.
11   -#
12   -# This program is distributed in the hope that it will be useful,
13   -# but WITHOUT ANY WARRANTY; without even the implied warranty of
14   -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15   -# GNU General Public License for more details.
16   -#
17   -# You should have received a copy of the GNU General Public License
18   -# along with this program. If not, see <http://www.gnu.org/licenses/>.
19   -
20   -import xapian
21   -from debian import debtags
22   -import re
23   -import sys
24   -import os
25   -import commands
26   -
27   -DB_PATH = "/var/lib/debtags/package-tags"
28   -INDEX_PATH = "~/.app-recommender/debtags_index"
29   -
30   -INDEX_PATH = os.path.expanduser(INDEX_PATH)
31   -
32   -def normalize_tags(string):
33   - return string.replace(':','_').replace('-','\'')
34   -
35   -def createDebtagsIndex(debtags_db,index_path):
36   - if not os.path.exists(index_path):
37   - os.makedirs(index_path)
38   - print "Creating new debtags xapian index at \'%s\'" % index_path
39   - debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE)
40   - for pkg,tags in debtags_db.iter_packages_tags():
41   - doc = xapian.Document()
42   - doc.set_data(pkg)
43   - for tag in tags:
44   - doc.add_term(normalize_tags(tag))
45   - print "indexing ",debtags_index.add_document(doc)
46   - return debtags_index
47   -
48   -# MatchDecider to disconsider installed packages
49   -class pkgmatchdecider(xapian.MatchDecider):
50   - def __init__(self, installed_pkgs):
51   - xapian.MatchDecider.__init__(self)
52   - self.installed_pkgs = installed_pkgs
53   -
54   - def __call__(self, doc):
55   - return doc.get_data() not in self.installed_pkgs
56   -
57   -# Handle input arguments
58   -REINDEX = 0
59   -if len(sys.argv) == 2:
60   - DB_PATH = sys.argv[1]
61   - REINDEX = 1
62   - print "REINDEX true"
63   -elif len(sys.argv) > 2:
64   - print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0]
65   - sys.exit(1)
66   -
67   -# Load debtags database
68   -debtags_db = debtags.DB()
69   -tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
70   -try:
71   - debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x))
72   -except IOError:
73   - print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH
74   - exit(1)
75   -
76   -# Set of installed packages
77   -installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split()
78   -installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs)
79   -
80   -# Most relevant tags
81   -rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags)
82   -relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b)))
83   -normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:]))
84   -
85   -if not REINDEX:
86   - try:
87   - print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH
88   - debtags_index = xapian.Database(INDEX_PATH)
89   - except DatabaseError:
90   - print "Could not open debtags xapian index"
91   - REINDEX = 1
92   -
93   -if REINDEX:
94   - debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH)
95   -
96   -qp = xapian.QueryParser()
97   -query = qp.parse_query(normalized_relevant_tags)
98   -enquire = xapian.Enquire(debtags_index)
99   -enquire.set_query(query)
100   -
101   -mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs))
102   -for m in mset:
103   - print "%2d: %s" % (m.rank, m.document.get_data())
demo_rec.py 0 → 100644
... ... @@ -0,0 +1,128 @@
  1 +#!/usr/bin/python
  2 +
  3 +# AppRecomender - A GNU/Linux application recommender
  4 +#
  5 +# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
  20 +import os
  21 +import sys
  22 +import commands
  23 +import re
  24 +
  25 +import xapian
  26 +from debian import debtags
  27 +
  28 +DB_PATH = "/var/lib/debtags/package-tags"
  29 +INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
  30 +
  31 +def load_debtags_db(path):
  32 + """ Load debtags database. """
  33 + debtags_db = debtags.DB()
  34 + tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
  35 + try:
  36 + debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x))
  37 + except IOError:
  38 + print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" %
  39 + path)
  40 + exit(1)
  41 + return debtags_db
  42 +
  43 +def get_system_pkgs():
  44 + """ Return set of system packages. """
  45 + dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections')
  46 + return dpkg_output.replace('install','\t').split()
  47 +
  48 +def get_most_relevant_tags(debtags_db,pkgs_list):
  49 + """ Return most relevant tags considering a list of packages. """
  50 + relevant_db = debtags_db.choose_packages(pkgs_list)
  51 + relevance_index = debtags.relevance_index_function(debtags_db,relevant_db)
  52 + sorted_relevant_tags = sorted(relevant_db.iter_tags(),
  53 + lambda a, b: cmp(relevance_index(a),
  54 + relevance_index(b)))
  55 + return normalize_tags(' '.join(sorted_relevant_tags[-50:]))
  56 +
  57 +def normalize_tags(string):
  58 + """ Normalize tag string so that it can be indexed and retrieved. """
  59 + return string.replace(':','_').replace('-','\'')
  60 +
  61 +def create_debtags_index(debtags_db,index_path):
  62 + """ Create a xapian index for debtags info based on file 'debtags_db' and
  63 + place it at 'index_path'.
  64 + """
  65 + if not os.path.exists(index_path):
  66 + os.makedirs(index_path)
  67 + print "Creating new debtags xapian index at \'%s\'" % index_path
  68 + debtags_index = xapian.WritableDatabase(index_path,
  69 + xapian.DB_CREATE_OR_OVERWRITE)
  70 + for pkg,tags in debtags_db.iter_packages_tags():
  71 + doc = xapian.Document()
  72 + doc.set_data(pkg)
  73 + for tag in tags:
  74 + doc.add_term(normalize_tags(tag))
  75 + print "indexing ",debtags_index.add_document(doc)
  76 + return debtags_index
  77 +
  78 +def load_debtags_index(debtags_db,reindex):
  79 + """ Load an existing or new debtags index, based on boolean reindex. """
  80 + if not reindex:
  81 + try:
  82 + print ("Opening existing debtags xapian index at \'%s\'" %
  83 + INDEX_PATH)
  84 + debtags_index = xapian.Database(INDEX_PATH)
  85 + except DatabaseError:
  86 + print "Could not open debtags xapian index"
  87 + reindex = 1
  88 + if reindex:
  89 + debtags_index = create_debtags_index(debtags_db,INDEX_PATH)
  90 + return debtags_index
  91 +
  92 +
  93 +class PkgMatchDecider(xapian.MatchDecider):
  94 + """ Extends xapian.MatchDecider to disconsider installed packages. """
  95 +
  96 + def __init__(self, installed_pkgs):
  97 + xapian.MatchDecider.__init__(self)
  98 + self.installed_pkgs = installed_pkgs
  99 +
  100 + def __call__(self, doc):
  101 + return doc.get_data() not in self.installed_pkgs
  102 +
  103 +
  104 +if __name__ == '__main__':
  105 +
  106 + reindex = 0
  107 + if len(sys.argv) == 2:
  108 + DB_PATH = sys.argv[1]
  109 + reindex = 1
  110 + print "reindex true"
  111 + elif len(sys.argv) > 2:
  112 + print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
  113 + sys.argv[0])
  114 + sys.exit(1)
  115 +
  116 + debtags_db = load_debtags_db(DB_PATH)
  117 + installed_pkgs = get_system_pkgs()
  118 + best_tags = get_most_relevant_tags(debtags_db,installed_pkgs)
  119 +
  120 + debtags_index = load_debtags_index(debtags_db,reindex)
  121 + qp = xapian.QueryParser()
  122 + query = qp.parse_query(best_tags)
  123 + enquire = xapian.Enquire(debtags_index)
  124 + enquire.set_query(query)
  125 +
  126 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs))
  127 + for m in mset:
  128 + print "%2d: %s" % (m.rank, m.document.get_data())
... ...