indexer_axi.py
2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
"""
indexer.py - generate xapian indexes to be used as items and users
repositories
"""
__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
__license__ = """
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import os
import sys
sys.path.insert(0,'../')
import datetime
from config import Config
from error import Error
import data
import xapian
if __name__ == '__main__':
axi_path = "/var/lib/apt-xapian-index/index"
axi = xapian.Database(axi_path)
base_dir = os.path.expanduser("~/.app-recommender/")
begin_time = datetime.datetime.now()
# axi sample based on the pkgs sample provided by command line
if "sample" in sys.argv:
print ("Sample package indexing started at %s" % begin_time)
if len(sys.argv) > 2:
pkgs_filter = sys.argv[2]
else:
print "Usage: indexer axi_sample pkgs_sample_file"
exit(1)
with open(pkgs_filter) as valid:
pkgs_list = [line.strip() for line in valid]
filter_str = pkgs_filter.split("/")[-1]
index = data.SampleAptXapianIndex(pkgs_list,axi,
os.path.join(base_dir,"axi_"+filter_str))
print ("Axi size: %d" % axi.get_doccount())
print ("Packages list length: %d" % len(pkgs_list))
print ("Sample index size: %d" %
index.get_doccount())
# axi filtered by terms provided by command line
if "filter" in sys.argv:
print ("Filtered package indexing started at %s" % begin_time)
if len(sys.argv) > 2:
terms = sys.argv[2:]
else:
print ("Usage: indexer axi_filter term [additional terms]")
exit(1)
terms_str = "_".join([t.split("::")[-1] for t in terms])
index = data.FilteredXapianIndex(terms,axi,
os.path.join(base_dir,"axi_"+terms_str))
print ("Axi size: %d" % axi.get_doccount())
print ("Terms filter: %s" % terms)
print ("Filtered index size: %d" %
index.get_doccount())
end_time = datetime.datetime.now()
print ("Indexing completed at %s" % end_time)
delta = end_time - begin_time
print ("Time elapsed: %d seconds." % delta.seconds)