Commit 4674b436dfbe5afed2ac1f40c72385a3f18405a3
1 parent
67b79c04
Exists in
master
and in
1 other branch
Content-based recommender using debtags info only.
Showing
2 changed files
with
90 additions
and
0 deletions
Show diff stats
README
@@ -0,0 +1,84 @@ | @@ -0,0 +1,84 @@ | ||
1 | +import xapian | ||
2 | +from debian import debtags | ||
3 | +import re | ||
4 | +import sys | ||
5 | +import os | ||
6 | +import commands | ||
7 | + | ||
8 | +DB_PATH = "/var/lib/debtags/package-tags" | ||
9 | +INDEX_PATH = "~/.app-recommender/debtags_index" | ||
10 | + | ||
11 | +INDEX_PATH = os.path.expanduser(INDEX_PATH) | ||
12 | + | ||
13 | +def normalize_tags(string): | ||
14 | + return string.replace(':','_').replace('-','\'') | ||
15 | + | ||
16 | +def createDebtagsIndex(debtags_db,index_path): | ||
17 | + if not os.path.exists(index_path): | ||
18 | + os.makedirs(index_path) | ||
19 | + print "Creating new debtags xapian index at \'%s\'" % index_path | ||
20 | + debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE) | ||
21 | + for pkg,tags in debtags_db.iter_packages_tags(): | ||
22 | + doc = xapian.Document() | ||
23 | + doc.set_data(pkg) | ||
24 | + for tag in tags: | ||
25 | + doc.add_term(normalize_tags(tag)) | ||
26 | + print "indexing ",debtags_index.add_document(doc) | ||
27 | + return debtags_index | ||
28 | + | ||
29 | +# MatchDecider to disconsider installed packages | ||
30 | +class pkgmatchdecider(xapian.MatchDecider): | ||
31 | + def __init__(self, installed_pkgs): | ||
32 | + xapian.MatchDecider.__init__(self) | ||
33 | + self.installed_pkgs = installed_pkgs | ||
34 | + | ||
35 | + def __call__(self, doc): | ||
36 | + return doc.get_data() not in self.installed_pkgs | ||
37 | + | ||
38 | +# Handle input arguments | ||
39 | +REINDEX = 0 | ||
40 | +if len(sys.argv) == 2: | ||
41 | + DB_PATH = sys.argv[1] | ||
42 | + REINDEX = 1 | ||
43 | + print "REINDEX true" | ||
44 | +elif len(sys.argv) > 2: | ||
45 | + print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0] | ||
46 | + sys.exit(1) | ||
47 | + | ||
48 | +# Load debtags database | ||
49 | +debtags_db = debtags.DB() | ||
50 | +tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | ||
51 | +try: | ||
52 | + debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x)) | ||
53 | +except IOError: | ||
54 | + print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH | ||
55 | + exit(1) | ||
56 | + | ||
57 | +# Set of installed packages | ||
58 | +installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split() | ||
59 | +installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs) | ||
60 | + | ||
61 | +# Most relevant tags | ||
62 | +rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags) | ||
63 | +relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b))) | ||
64 | +normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:])) | ||
65 | + | ||
66 | +if not REINDEX: | ||
67 | + try: | ||
68 | + print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH | ||
69 | + debtags_index = xapian.Database(INDEX_PATH) | ||
70 | + except DatabaseError: | ||
71 | + print "Could not open debtags xapian index" | ||
72 | + REINDEX = 1 | ||
73 | + | ||
74 | +if REINDEX: | ||
75 | + debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH) | ||
76 | + | ||
77 | +qp = xapian.QueryParser() | ||
78 | +query = qp.parse_query(normalized_relevant_tags) | ||
79 | +enquire = xapian.Enquire(debtags_index) | ||
80 | +enquire.set_query(query) | ||
81 | + | ||
82 | +mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs)) | ||
83 | +for m in mset: | ||
84 | + print "%2d: %s" % (m.rank, m.document.get_data()) |