diff --git a/README b/README index e69de29..3d8f20a 100644 --- a/README +++ b/README @@ -0,0 +1,6 @@ +apprec.py Depends on: + +python +debian-python + +#apt-xapian-index diff --git a/apprec.py b/apprec.py new file mode 100644 index 0000000..0f90467 --- /dev/null +++ b/apprec.py @@ -0,0 +1,84 @@ +import xapian +from debian import debtags +import re +import sys +import os +import commands + +DB_PATH = "/var/lib/debtags/package-tags" +INDEX_PATH = "~/.app-recommender/debtags_index" + +INDEX_PATH = os.path.expanduser(INDEX_PATH) + +def normalize_tags(string): + return string.replace(':','_').replace('-','\'') + +def createDebtagsIndex(debtags_db,index_path): + if not os.path.exists(index_path): + os.makedirs(index_path) + print "Creating new debtags xapian index at \'%s\'" % index_path + debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE) + for pkg,tags in debtags_db.iter_packages_tags(): + doc = xapian.Document() + doc.set_data(pkg) + for tag in tags: + doc.add_term(normalize_tags(tag)) + print "indexing ",debtags_index.add_document(doc) + return debtags_index + +# MatchDecider to disconsider installed packages +class pkgmatchdecider(xapian.MatchDecider): + def __init__(self, installed_pkgs): + xapian.MatchDecider.__init__(self) + self.installed_pkgs = installed_pkgs + + def __call__(self, doc): + return doc.get_data() not in self.installed_pkgs + +# Handle input arguments +REINDEX = 0 +if len(sys.argv) == 2: + DB_PATH = sys.argv[1] + REINDEX = 1 + print "REINDEX true" +elif len(sys.argv) > 2: + print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0] + sys.exit(1) + +# Load debtags database +debtags_db = debtags.DB() +tag_filter = re.compile(r"^special::.+$|^.+::TODO$") +try: + debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x)) +except IOError: + print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH + exit(1) + +# Set of installed packages +installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split() +installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs) + +# Most relevant tags +rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags) +relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b))) +normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:])) + +if not REINDEX: + try: + print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH + debtags_index = xapian.Database(INDEX_PATH) + except DatabaseError: + print "Could not open debtags xapian index" + REINDEX = 1 + +if REINDEX: + debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH) + +qp = xapian.QueryParser() +query = qp.parse_query(normalized_relevant_tags) +enquire = xapian.Enquire(debtags_index) +enquire.set_query(query) + +mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs)) +for m in mset: + print "%2d: %s" % (m.rank, m.document.get_data()) -- libgit2 0.21.2