Commit f2264959dbe61cedb82c446cdb1623393180836a
1 parent
759bc928
Exists in
master
and in
1 other branch
Added AppAptXapianIndex, an app-only index.
Showing
1 changed file
with
35 additions
and
5 deletions
Show diff stats
src/data.py
... | ... | @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): |
41 | 41 | return matches |
42 | 42 | |
43 | 43 | def axi_search_pkg_tags(axi,pkg): |
44 | - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | |
45 | 44 | enquire = xapian.Enquire(axi) |
46 | - enquire.set_query(query) | |
45 | + enquire.set_query(xapian.Query("XP"+pkg)) | |
47 | 46 | matches = enquire.get_mset(0,1) |
47 | + if not matches: | |
48 | + logging.debug("Package %s not found in items repository" % pkg) | |
49 | + return [] | |
48 | 50 | for m in matches: |
49 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
50 | 52 | term.term.startswith("XT")] |
51 | - return tags | |
53 | + return tags | |
52 | 54 | |
53 | 55 | def print_index(index): |
54 | 56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
... | ... | @@ -59,6 +61,32 @@ def print_index(index): |
59 | 61 | output += "\n---" |
60 | 62 | return output |
61 | 63 | |
64 | +class AppAptXapianIndex(xapian.WritableDatabase): | |
65 | + """ | |
66 | + Sample data source for packages information, mainly useful for tests. | |
67 | + """ | |
68 | + def __init__(self,axi_path,path): | |
69 | + xapian.WritableDatabase.__init__(self,path, | |
70 | + xapian.DB_CREATE_OR_OVERWRITE) | |
71 | + axi = xapian.Database(axi_path) | |
72 | + logging.info("AptXapianIndex size: %d" % axi.get_doccount()) | |
73 | + for docid in range(1,axi.get_lastdocid()+1): | |
74 | + try: | |
75 | + doc = axi.get_document(docid) | |
76 | + allterms = [term.term for term in doc.termlist()] | |
77 | + if "XTrole::program" in allterms: | |
78 | + self.add_document(doc) | |
79 | + logging.info("Added doc %d." % docid) | |
80 | + else: | |
81 | + logging.info("Discarded doc %d." % docid) | |
82 | + except: | |
83 | + logging.info("Doc %d not found in axi." % docid) | |
84 | + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % | |
85 | + self.get_doccount(), self.get_lastdocid()) | |
86 | + | |
87 | + def __str__(self): | |
88 | + return print_index(self) | |
89 | + | |
62 | 90 | class SampleAptXapianIndex(xapian.WritableDatabase): |
63 | 91 | """ |
64 | 92 | Sample data source for packages information, mainly useful for tests. |
... | ... | @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): |
129 | 157 | """ |
130 | 158 | self.axi = xapian.Database(cfg.axi) |
131 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
160 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | |
132 | 161 | if not cfg.index_mode == "old" or not self.load_index(): |
133 | 162 | if not os.path.exists(cfg.popcon_dir): |
134 | 163 | os.makedirs(cfg.popcon_dir) |
... | ... | @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): |
205 | 234 | submission.user_id) |
206 | 235 | for pkg, freq in submission.packages.items(): |
207 | 236 | doc.add_term("XP"+pkg,freq) |
208 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
209 | - doc.add_term(tag,freq) | |
237 | + if axi_search_pkg_tags(self.axi,pkg): | |
238 | + for tag in axi_search_pkg_tags(self.axi,pkg): | |
239 | + doc.add_term(tag,freq) | |
210 | 240 | doc_id = self.add_document(doc) |
211 | 241 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
212 | 242 | # python garbage collector | ... | ... |