Commit f2264959dbe61cedb82c446cdb1623393180836a
1 parent
759bc928
Exists in
master
and in
1 other branch
Added AppAptXapianIndex, an app-only index.
Showing
1 changed file
with
35 additions
and
5 deletions
Show diff stats
src/data.py
| ... | ... | @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list): |
| 41 | 41 | return matches |
| 42 | 42 | |
| 43 | 43 | def axi_search_pkg_tags(axi,pkg): |
| 44 | - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | |
| 45 | 44 | enquire = xapian.Enquire(axi) |
| 46 | - enquire.set_query(query) | |
| 45 | + enquire.set_query(xapian.Query("XP"+pkg)) | |
| 47 | 46 | matches = enquire.get_mset(0,1) |
| 47 | + if not matches: | |
| 48 | + logging.debug("Package %s not found in items repository" % pkg) | |
| 49 | + return [] | |
| 48 | 50 | for m in matches: |
| 49 | 51 | tags = [term.term for term in axi.get_document(m.docid).termlist() if |
| 50 | 52 | term.term.startswith("XT")] |
| 51 | - return tags | |
| 53 | + return tags | |
| 52 | 54 | |
| 53 | 55 | def print_index(index): |
| 54 | 56 | output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n" |
| ... | ... | @@ -59,6 +61,32 @@ def print_index(index): |
| 59 | 61 | output += "\n---" |
| 60 | 62 | return output |
| 61 | 63 | |
| 64 | +class AppAptXapianIndex(xapian.WritableDatabase): | |
| 65 | + """ | |
| 66 | + Sample data source for packages information, mainly useful for tests. | |
| 67 | + """ | |
| 68 | + def __init__(self,axi_path,path): | |
| 69 | + xapian.WritableDatabase.__init__(self,path, | |
| 70 | + xapian.DB_CREATE_OR_OVERWRITE) | |
| 71 | + axi = xapian.Database(axi_path) | |
| 72 | + logging.info("AptXapianIndex size: %d" % axi.get_doccount()) | |
| 73 | + for docid in range(1,axi.get_lastdocid()+1): | |
| 74 | + try: | |
| 75 | + doc = axi.get_document(docid) | |
| 76 | + allterms = [term.term for term in doc.termlist()] | |
| 77 | + if "XTrole::program" in allterms: | |
| 78 | + self.add_document(doc) | |
| 79 | + logging.info("Added doc %d." % docid) | |
| 80 | + else: | |
| 81 | + logging.info("Discarded doc %d." % docid) | |
| 82 | + except: | |
| 83 | + logging.info("Doc %d not found in axi." % docid) | |
| 84 | + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % | |
| 85 | + self.get_doccount(), self.get_lastdocid()) | |
| 86 | + | |
| 87 | + def __str__(self): | |
| 88 | + return print_index(self) | |
| 89 | + | |
| 62 | 90 | class SampleAptXapianIndex(xapian.WritableDatabase): |
| 63 | 91 | """ |
| 64 | 92 | Sample data source for packages information, mainly useful for tests. |
| ... | ... | @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 129 | 157 | """ |
| 130 | 158 | self.axi = xapian.Database(cfg.axi) |
| 131 | 159 | self.path = os.path.expanduser(cfg.popcon_index) |
| 160 | + self.source_dir = os.path.expanduser(cfg.popcon_dir) | |
| 132 | 161 | if not cfg.index_mode == "old" or not self.load_index(): |
| 133 | 162 | if not os.path.exists(cfg.popcon_dir): |
| 134 | 163 | os.makedirs(cfg.popcon_dir) |
| ... | ... | @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase): |
| 205 | 234 | submission.user_id) |
| 206 | 235 | for pkg, freq in submission.packages.items(): |
| 207 | 236 | doc.add_term("XP"+pkg,freq) |
| 208 | - for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 209 | - doc.add_term(tag,freq) | |
| 237 | + if axi_search_pkg_tags(self.axi,pkg): | |
| 238 | + for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 239 | + doc.add_term(tag,freq) | |
| 210 | 240 | doc_id = self.add_document(doc) |
| 211 | 241 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 212 | 242 | # python garbage collector | ... | ... |