Commit af80bfe9a4cd6cf70ed228b73461092d124242e6
1 parent
010147f6
Exists in
master
and in
1 other branch
Adding PopconSubmission class.
Showing
1 changed file
with
39 additions
and
29 deletions
Show diff stats
src/data.py
| @@ -74,22 +74,40 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | @@ -74,22 +74,40 @@ class SampleAptXapianIndex(xapian.WritableDatabase): | ||
| 74 | print [term.term for term in self.get_document(doc_id).termlist()] | 74 | print [term.term for term in self.get_document(doc_id).termlist()] |
| 75 | print "---" | 75 | print "---" |
| 76 | 76 | ||
| 77 | -#[FIXME] get pkg tags from axi and remove load_debtags_db method | ||
| 78 | -def load_debtags_db(db_path): | ||
| 79 | - """ | ||
| 80 | - Load debtags database from the source file. | ||
| 81 | - """ | ||
| 82 | - tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | ||
| 83 | - try: | ||
| 84 | - db_file = open(db_path, "r") | ||
| 85 | - debtags_db = debtags.DB() | ||
| 86 | - debtags_db.read(db_file,lambda x: not tag_filter.match(x)) | ||
| 87 | - db_file.close() | ||
| 88 | - return debtags_db | ||
| 89 | - except: | ||
| 90 | - logging.error("Could not load DebtagsDB from '%s'." % self.db_path) | ||
| 91 | - raise Error | 77 | +class PopconSubmission(): |
| 78 | + def __init__(self,submission_hash): | ||
| 79 | + self.hash = submission_hash | ||
| 80 | + self.pkgs_list = [] | ||
| 92 | 81 | ||
| 82 | + def add_pkg(self,pkg): | ||
| 83 | + self.pkgs_list.append(pkg) | ||
| 84 | + | ||
| 85 | + def parse_submission(self,submission_path,binary=1): | ||
| 86 | + """ | ||
| 87 | + Parse a popcon submission, generating the names of the valid packages | ||
| 88 | + in the vote. | ||
| 89 | + """ | ||
| 90 | + submission = open(submission_path) | ||
| 91 | + for line in submission: | ||
| 92 | + if not line.startswith("POPULARITY"): | ||
| 93 | + if not line.startswith("END-POPULARITY"): | ||
| 94 | + data = line[:-1].split(" ") | ||
| 95 | + if len(data) > 3: | ||
| 96 | + if binary: | ||
| 97 | + # every installed package has the same weight | ||
| 98 | + yield data[2], 1 | ||
| 99 | + elif data[3] == '<NOFILES>': | ||
| 100 | + # No executable files to track | ||
| 101 | + yield data[2], 1 | ||
| 102 | + elif len(data) == 4: | ||
| 103 | + # Recently used packages | ||
| 104 | + yield data[2], 10 | ||
| 105 | + elif data[4] == '<OLD>': | ||
| 106 | + # Unused packages | ||
| 107 | + yield data[2], 3 | ||
| 108 | + elif data[4] == '<RECENT-CTIME>': | ||
| 109 | + # Recently installed packages | ||
| 110 | + yield data[2], 8 | ||
| 93 | class PopconXapianIndex(xapian.WritableDatabase,Singleton): | 111 | class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 94 | """ | 112 | """ |
| 95 | Data source for popcon submissions defined as a singleton xapian database. | 113 | Data source for popcon submissions defined as a singleton xapian database. |
| @@ -100,7 +118,8 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -100,7 +118,8 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
| 100 | """ | 118 | """ |
| 101 | self.path = os.path.expanduser(cfg.popcon_index) | 119 | self.path = os.path.expanduser(cfg.popcon_index) |
| 102 | self.popcon_dir = os.path.expanduser(cfg.popcon_dir) | 120 | self.popcon_dir = os.path.expanduser(cfg.popcon_dir) |
| 103 | - self.debtags_path = os.path.expanduser(cfg.tags_db) | 121 | + #self.debtags_path = os.path.expanduser(cfg.tags_db) |
| 122 | + self.axi = xapian.Database(cfg.axi) | ||
| 104 | self.load_index() | 123 | self.load_index() |
| 105 | 124 | ||
| 106 | def parse_submission(self,submission_path,binary=1): | 125 | def parse_submission(self,submission_path,binary=1): |
| @@ -149,7 +168,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -149,7 +168,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
| 149 | """ | 168 | """ |
| 150 | if not os.path.exists(self.path): | 169 | if not os.path.exists(self.path): |
| 151 | os.makedirs(self.path) | 170 | os.makedirs(self.path) |
| 152 | - debtags_db = load_debtags_db(self.debtags_path) #[FIXME] | ||
| 153 | 171 | ||
| 154 | try: | 172 | try: |
| 155 | logging.info("Indexing popcon submissions from \'%s\'" % | 173 | logging.info("Indexing popcon submissions from \'%s\'" % |
| @@ -170,10 +188,10 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -170,10 +188,10 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
| 170 | logging.debug("Parsing popcon submission at \'%s\'" % | 188 | logging.debug("Parsing popcon submission at \'%s\'" % |
| 171 | submission_path) | 189 | submission_path) |
| 172 | for pkg, freq in self.parse_submission(submission_path): | 190 | for pkg, freq in self.parse_submission(submission_path): |
| 173 | - doc.add_term(pkg,freq) | ||
| 174 | - #[FIXME] get tags from axi | ||
| 175 | - for tag in debtags_db.tags_of_package(pkg): | ||
| 176 | - doc.add_term("XT"+tag,freq) | 191 | + doc.add_term("XP"+pkg,freq) |
| 192 | + for tag in axi_search_pkg_tags(self.axi,pkg): | ||
| 193 | + print tag | ||
| 194 | + doc.add_term(tag,freq) | ||
| 177 | doc_id = self.add_document(doc) | 195 | doc_id = self.add_document(doc) |
| 178 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) | 196 | logging.debug("Popcon Xapian: Indexing doc %d" % doc_id) |
| 179 | # python garbage collector | 197 | # python garbage collector |
| @@ -181,14 +199,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | @@ -181,14 +199,6 @@ class PopconXapianIndex(xapian.WritableDatabase,Singleton): | ||
| 181 | # flush to disk database changes | 199 | # flush to disk database changes |
| 182 | self.flush() | 200 | self.flush() |
| 183 | 201 | ||
| 184 | -class PopconSubmission(): | ||
| 185 | - def __init__(self,submission_hash): | ||
| 186 | - self.hash = submission_hash | ||
| 187 | - self.pkgs_list = [] | ||
| 188 | - | ||
| 189 | - def add_pkg(self,pkg): | ||
| 190 | - self.pkgs_list.append(pkg) | ||
| 191 | - | ||
| 192 | class PopconClusteredData(Singleton): | 202 | class PopconClusteredData(Singleton): |
| 193 | """ | 203 | """ |
| 194 | Data source for popcon submissions defined as a singleton xapian database. | 204 | Data source for popcon submissions defined as a singleton xapian database. |