Commit f2264959dbe61cedb82c446cdb1623393180836a
1 parent
759bc928
Exists in
master
and in
1 other branch
Added AppAptXapianIndex, an app-only index.
Showing
1 changed file
with
35 additions
and
5 deletions
Show diff stats
src/data.py
@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): | @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): | ||
41 | return matches | 41 | return matches |
42 | 42 | ||
43 | def axi_search_pkg_tags(axi,pkg): | 43 | def axi_search_pkg_tags(axi,pkg): |
44 | - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | ||
45 | enquire = xapian.Enquire(axi) | 44 | enquire = xapian.Enquire(axi) |
46 | - enquire.set_query(query) | 45 | + enquire.set_query(xapian.Query("XP"+pkg)) |
47 | matches = enquire.get_mset(0,1) | 46 | matches = enquire.get_mset(0,1) |
47 | + if not matches: | ||
48 | + logging.debug("Package %s not found in items repository" % pkg) | ||
49 | + return [] | ||
48 | for m in matches: | 50 | for m in matches: |
49 | tags = [term.term for term in axi.get_document(m.docid).termlist() if | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
50 | term.term.startswith("XT")] | 52 | term.term.startswith("XT")] |
51 | - return tags | 53 | + return tags |
52 | 54 | ||
53 | def print_index(index): | 55 | def print_index(index): |
54 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" | 56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
@@ -59,6 +61,32 @@ def print_index(index): | @@ -59,6 +61,32 @@ def print_index(index): | ||
59 | output += "\n---" | 61 | output += "\n---" |
60 | return output | 62 | return output |
61 | 63 | ||
64 | +class AppAptXapianIndex(xapian.WritableDatabase): | ||
65 | + """ | ||
66 | + Sample data source for packages information, mainly useful for tests. | ||
67 | + """ | ||
68 | + def __init__(self,axi_path,path): | ||
69 | + xapian.WritableDatabase.__init__(self,path, | ||
70 | + xapian.DB_CREATE_OR_OVERWRITE) | ||
71 | + axi = xapian.Database(axi_path) | ||
72 | + logging.info("AptXapianIndex size: %d" % axi.get_doccount()) | ||
73 | + for docid in range(1,axi.get_lastdocid()+1): | ||
74 | + try: | ||
75 | + doc = axi.get_document(docid) | ||
76 | + allterms = [term.term for term in doc.termlist()] | ||
77 | + if "XTrole::program" in allterms: | ||
78 | + self.add_document(doc) | ||
79 | + logging.info("Added doc %d." % docid) | ||
80 | + else: | ||
81 | + logging.info("Discarded doc %d." % docid) | ||
82 | + except: | ||
83 | + logging.info("Doc %d not found in axi." % docid) | ||
84 | + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % | ||
85 | + self.get_doccount(), self.get_lastdocid()) | ||
86 | + | ||
87 | + def __str__(self): | ||
88 | + return print_index(self) | ||
89 | + | ||
62 | class SampleAptXapianIndex(xapian.WritableDatabase): | 90 | class SampleAptXapianIndex(xapian.WritableDatabase): |
63 | """ | 91 | """ |
64 | Sample data source for packages information, mainly useful for tests. | 92 | Sample data source for packages information, mainly useful for tests. |
@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
129 | """ | 157 | """ |
130 | self.axi = xapian.Database(cfg.axi) | 158 | self.axi = xapian.Database(cfg.axi) |
131 | self.path = os.path.expanduser(cfg.popcon_index) | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | ||
132 | if not cfg.index_mode == "old" or not self.load_index(): | 161 | if not cfg.index_mode == "old" or not self.load_index(): |
133 | if not os.path.exists(cfg.popcon_dir): | 162 | if not os.path.exists(cfg.popcon_dir): |
134 | os.makedirs(cfg.popcon_dir) | 163 | os.makedirs(cfg.popcon_dir) |
@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): | ||
205 | submission.user_id) | 234 | submission.user_id) |
206 | for pkg, freq in submission.packages.items(): | 235 | for pkg, freq in submission.packages.items(): |
207 | doc.add_term("XP"+pkg,freq) | 236 | doc.add_term("XP"+pkg,freq) |
208 | - for tag in axi_search_pkg_tags(self.axi,pkg): | ||
209 | - doc.add_term(tag,freq) | 237 | + if axi_search_pkg_tags(self.axi,pkg): |
238 | + for tag in axi_search_pkg_tags(self.axi,pkg): | ||
239 | + doc.add_term(tag,freq) | ||
210 | doc_id = self.add_document(doc) | 240 | doc_id = self.add_document(doc) |
211 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 241 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
212 | # python garbage collector | 242 | # python garbage collector |