Commit af80bfe9a4cd6cf70ed228b73461092d124242e6
1 parent
010147f6
Exists in
master
and in
1 other branch
Adding PopconSubmission class.
Showing
1 changed file
with
39 additions
and
29 deletions
Show diff stats
src/data.py
| ... | ... | @@ -74,22 +74,40 @@ class SampleAptXapianIndex(xapian.WritableDatabase): |
| 74 | 74 | print [term.term for term in self.get_document(doc_id).termlist()] |
| 75 | 75 | print "---" |
| 76 | 76 | |
| 77 | -#[FIXME] get pkg tags from axi and remove load_debtags_db method | |
| 78 | -def load_debtags_db(db_path): | |
| 79 | - """ | |
| 80 | - Load debtags database from the source file. | |
| 81 | - """ | |
| 82 | - tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | |
| 83 | - try: | |
| 84 | - db_file = open(db_path, "r") | |
| 85 | - debtags_db = debtags.DB() | |
| 86 | - debtags_db.read(db_file,lambda x: not tag_filter.match(x)) | |
| 87 | - db_file.close() | |
| 88 | - return debtags_db | |
| 89 | - except: | |
| 90 | - logging.error("Could not load DebtagsDB from '%s'." % self.db_path) | |
| 91 | - raise Error | |
| 77 | +class PopconSubmission(): | |
| 78 | + def __init__(self,submission_hash): | |
| 79 | + self.hash = submission_hash | |
| 80 | + self.pkgs_list = [] | |
| 92 | 81 | |
| 82 | + def add_pkg(self,pkg): | |
| 83 | + self.pkgs_list.append(pkg) | |
| 84 | + | |
| 85 | + def parse_submission(self,submission_path,binary=1): | |
| 86 | + """ | |
| 87 | + Parse a popcon submission, generating the names of the valid packages | |
| 88 | + in the vote. | |
| 89 | + """ | |
| 90 | + submission = open(submission_path) | |
| 91 | + for line in submission: | |
| 92 | + if not line.startswith("POPULARITY"): | |
| 93 | + if not line.startswith("END-POPULARITY"): | |
| 94 | + data = line[:-1].split(" ") | |
| 95 | + if len(data) > 3: | |
| 96 | + if binary: | |
| 97 | + # every installed package has the same weight | |
| 98 | + yield data[2], 1 | |
| 99 | + elif data[3] == '<NOFILES>': | |
| 100 | + # No executable files to track | |
| 101 | + yield data[2], 1 | |
| 102 | + elif len(data) == 4: | |
| 103 | + # Recently used packages | |
| 104 | + yield data[2], 10 | |
| 105 | + elif data[4] == '<OLD>': | |
| 106 | + # Unused packages | |
| 107 | + yield data[2], 3 | |
| 108 | + elif data[4] == '<RECENT-CTIME>': | |
| 109 | + # Recently installed packages | |
| 110 | + yield data[2], 8 | |
| 93 | 111 | class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 94 | 112 | """ |
| 95 | 113 | Data source for popcon submissions defined as a singleton xapian database. |
| ... | ... | @@ -100,7 +118,8 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 100 | 118 | """ |
| 101 | 119 | self.path = os.path.expanduser(cfg.popcon_index) |
| 102 | 120 | self.popcon_dir = os.path.expanduser(cfg.popcon_dir) |
| 103 | - self.debtags_path = os.path.expanduser(cfg.tags_db) | |
| 121 | + #self.debtags_path = os.path.expanduser(cfg.tags_db) | |
| 122 | + self.axi = xapian.Database(cfg.axi) | |
| 104 | 123 | self.load_index() |
| 105 | 124 | |
| 106 | 125 | def parse_submission(self,submission_path,binary=1): |
| ... | ... | @@ -149,7 +168,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 149 | 168 | """ |
| 150 | 169 | if not os.path.exists(self.path): |
| 151 | 170 | os.makedirs(self.path) |
| 152 | - debtags_db = load_debtags_db(self.debtags_path) #[FIXME] | |
| 153 | 171 | |
| 154 | 172 | try: |
| 155 | 173 | logging.info("Indexing popcon submissions from \'%s\'" % |
| ... | ... | @@ -170,10 +188,10 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 170 | 188 | logging.debug("Parsing popcon submission at \'%s\'" % |
| 171 | 189 | submission_path) |
| 172 | 190 | for pkg, freq in self.parse_submission(submission_path): |
| 173 | - doc.add_term(pkg,freq) | |
| 174 | - #[FIXME] get tags from axi | |
| 175 | - for tag in debtags_db.tags_of_package(pkg): | |
| 176 | - doc.add_term("XT"+tag,freq) | |
| 191 | + doc.add_term("XP"+pkg,freq) | |
| 192 | + for tag in axi_search_pkg_tags(self.axi,pkg): | |
| 193 | + print tag | |
| 194 | + doc.add_term(tag,freq) | |
| 177 | 195 | doc_id = self.add_document(doc) |
| 178 | 196 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 179 | 197 | # python garbage collector |
| ... | ... | @@ -181,14 +199,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 181 | 199 | # flush to disk database changes |
| 182 | 200 | self.flush() |
| 183 | 201 | |
| 184 | -class PopconSubmission(): | |
| 185 | - def __init__(self,submission_hash): | |
| 186 | - self.hash = submission_hash | |
| 187 | - self.pkgs_list = [] | |
| 188 | - | |
| 189 | - def add_pkg(self,pkg): | |
| 190 | - self.pkgs_list.append(pkg) | |
| 191 | - | |
| 192 | 202 | class PopconClusteredData(Singleton): |
| 193 | 203 | """ |
| 194 | 204 | Data source for popcon submissions defined as a singleton xapian database. | ... | ... |