Commit 4674b436dfbe5afed2ac1f40c72385a3f18405a3
1 parent
67b79c04
Exists in
master
and in
1 other branch
Content-based recommender using debtags info only.
Showing
2 changed files
with
90 additions
and
0 deletions
Show diff stats
README
... | ... | @@ -0,0 +1,84 @@ |
1 | +import xapian | |
2 | +from debian import debtags | |
3 | +import re | |
4 | +import sys | |
5 | +import os | |
6 | +import commands | |
7 | + | |
8 | +DB_PATH = "/var/lib/debtags/package-tags" | |
9 | +INDEX_PATH = "~/.app-recommender/debtags_index" | |
10 | + | |
11 | +INDEX_PATH = os.path.expanduser(INDEX_PATH) | |
12 | + | |
13 | +def normalize_tags(string): | |
14 | + return string.replace(':','_').replace('-','\'') | |
15 | + | |
16 | +def createDebtagsIndex(debtags_db,index_path): | |
17 | + if not os.path.exists(index_path): | |
18 | + os.makedirs(index_path) | |
19 | + print "Creating new debtags xapian index at \'%s\'" % index_path | |
20 | + debtags_index = xapian.WritableDatabase(index_path, xapian.DB_CREATE_OR_OVERWRITE) | |
21 | + for pkg,tags in debtags_db.iter_packages_tags(): | |
22 | + doc = xapian.Document() | |
23 | + doc.set_data(pkg) | |
24 | + for tag in tags: | |
25 | + doc.add_term(normalize_tags(tag)) | |
26 | + print "indexing ",debtags_index.add_document(doc) | |
27 | + return debtags_index | |
28 | + | |
29 | +# MatchDecider to disconsider installed packages | |
30 | +class pkgmatchdecider(xapian.MatchDecider): | |
31 | + def __init__(self, installed_pkgs): | |
32 | + xapian.MatchDecider.__init__(self) | |
33 | + self.installed_pkgs = installed_pkgs | |
34 | + | |
35 | + def __call__(self, doc): | |
36 | + return doc.get_data() not in self.installed_pkgs | |
37 | + | |
38 | +# Handle input arguments | |
39 | +REINDEX = 0 | |
40 | +if len(sys.argv) == 2: | |
41 | + DB_PATH = sys.argv[1] | |
42 | + REINDEX = 1 | |
43 | + print "REINDEX true" | |
44 | +elif len(sys.argv) > 2: | |
45 | + print >> sys.stderr, "Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0] | |
46 | + sys.exit(1) | |
47 | + | |
48 | +# Load debtags database | |
49 | +debtags_db = debtags.DB() | |
50 | +tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | |
51 | +try: | |
52 | + debtags_db.read(open(DB_PATH, "r"), lambda x: not tag_filter.match(x)) | |
53 | +except IOError: | |
54 | + print >> sys.stderr, "IOError: could not open debtags file \'%s\'" % DB_PATH | |
55 | + exit(1) | |
56 | + | |
57 | +# Set of installed packages | |
58 | +installed_pkgs = commands.getoutput('/usr/bin/dpkg --get-selections').replace('install','\t').split() | |
59 | +installed_pkgs_tags = debtags_db.choose_packages(installed_pkgs) | |
60 | + | |
61 | +# Most relevant tags | |
62 | +rel_index = debtags.relevance_index_function(debtags_db, installed_pkgs_tags) | |
63 | +relevant_tags = sorted(installed_pkgs_tags.iter_tags(), lambda a, b: cmp(rel_index(a), rel_index(b))) | |
64 | +normalized_relevant_tags = normalize_tags(' '.join(relevant_tags[-50:])) | |
65 | + | |
66 | +if not REINDEX: | |
67 | + try: | |
68 | + print "Opening existing debtags xapian index at \'%s\'" % INDEX_PATH | |
69 | + debtags_index = xapian.Database(INDEX_PATH) | |
70 | + except DatabaseError: | |
71 | + print "Could not open debtags xapian index" | |
72 | + REINDEX = 1 | |
73 | + | |
74 | +if REINDEX: | |
75 | + debtags_index = createDebtagsIndex(debtags_db,INDEX_PATH) | |
76 | + | |
77 | +qp = xapian.QueryParser() | |
78 | +query = qp.parse_query(normalized_relevant_tags) | |
79 | +enquire = xapian.Enquire(debtags_index) | |
80 | +enquire.set_query(query) | |
81 | + | |
82 | +mset = enquire.get_mset(0, 20, None, pkgmatchdecider(installed_pkgs)) | |
83 | +for m in mset: | |
84 | + print "%2d: %s" % (m.rank, m.document.get_data()) | ... | ... |