Commit 4674b436dfbe5afed2ac1f40c72385a3f18405a3

Authored by Tássia Camões Araújo
1 parent 67b79c04
Exists in master and in 1 other branch add_vagrant

Content-based recommender using debtags info only.

Showing 2 changed files with 90 additions and 0 deletions   Show diff stats
README
... ... @@ -0,0 +1,6 @@
  1 +apprec.py Depends on:
  2 +
  3 +python
  4 +debian-python
  5 +
  6 +#apt-xapian-index
... ...
apprec.py 0 → 100644
... ... @@ -0,0 +1,84 @@
  1 +import xapian
  2 +from debian import debtags
  3 +import re
  4 +import sys
  5 +import os
  6 +import commands
  7 +
  8 +DB_PATH = "/var/lib/debtags/package-tags"
  9 +INDEX_PATH = "~/.app-recommender/debtags_index"
  10 +
  11 +INDEX_PATH = os.path.expanduser(INDEX_PATH)
  12 +
  13 +def normalize_tags(string):
  14 + return string.replace(':','_').replace('-','\'')
  15 +
  16 +def createDebtagsIndex(debtags_db,index_path):
  17 + if not os.path.exists(index_path):
  18 + os.makedirs(index_path)
  19 + print "Creating new debtags xapian index at \'%s\'" % index_path
  20 + debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE)
  21 + for pkg,tags in debtags_db.iter_packages_tags():
  22 + doc = xapian.Document()
  23 + doc.set_data(pkg)
  24 + for tag in tags:
  25 + doc.add_term(normalize_tags(tag))
  26 + print "indexing ",debtags_index.add_document(doc)
  27 + return debtags_index
  28 +
  29 +# MatchDecider to disconsider installed packages
  30 +class pkgmatchdecider(xapian.MatchDecider):
  31 + def __init__(self, installed_pkgs):
  32 + xapian.MatchDecider.__init__(self)
  33 + self.installed_pkgs = installed_pkgs
  34 +
  35 + def __call__(self, doc):
  36 + return doc.get_data() not in self.installed_pkgs
  37 +
  38 +# Handle input arguments
  39 +REINDEX = 0
  40 +if len(sys.argv) == 2:
  41 + DB_PATH = sys.argv[1]
  42 + REINDEX = 1
  43 + print "REINDEX true"
  44 +elif len(sys.argv) > 2:
  45 + print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0]
  46 + sys.exit(1)
  47 +
  48 +# Load debtags database
  49 +debtags_db = debtags.DB()
  50 +tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
  51 +try:
  52 + debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x))
  53 +except IOError:
  54 + print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH
  55 + exit(1)
  56 +
  57 +# Set of installed packages
  58 +installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split()
  59 +installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs)
  60 +
  61 +# Most relevant tags
  62 +rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags)
  63 +relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b)))
  64 +normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:]))
  65 +
  66 +if not REINDEX:
  67 + try:
  68 + print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH
  69 + debtags_index = xapian.Database(INDEX_PATH)
  70 + except DatabaseError:
  71 + print "Could not open debtags xapian index"
  72 + REINDEX = 1
  73 +
  74 +if REINDEX:
  75 + debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH)
  76 +
  77 +qp = xapian.QueryParser()
  78 +query = qp.parse_query(normalized_relevant_tags)
  79 +enquire = xapian.Enquire(debtags_index)
  80 +enquire.set_query(query)
  81 +
  82 +mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs))
  83 +for m in mset:
  84 + print "%2d: %s" % (m.rank, m.document.get_data())
... ...