Commit cb2464a226ce9992f1b6a902b5acb3d612ac6f7f
1 parent
2188f43d
Exists in
master
and in
1 other branch
Code cleaning and refactoring.
Showing
4 changed files
with
83 additions
and
115 deletions
Show diff stats
src/bin/apprec.py
... | ... | @@ -23,38 +23,27 @@ import os |
23 | 23 | import sys |
24 | 24 | sys.path.insert(0,'../') |
25 | 25 | import logging |
26 | -import datetime | |
27 | 26 | import random |
28 | -from datetime import timedelta | |
27 | +import datetime | |
29 | 28 | |
30 | -from config import * | |
31 | -from data import * | |
32 | -from evaluation import * | |
33 | -from dissimilarity import * | |
34 | -from recommender import * | |
35 | -from strategy import * | |
36 | -from user import * | |
37 | -from error import Error | |
29 | +from config import Config | |
30 | +from recommender import Recommender | |
31 | +from user import LocalSystem, RandomPopcon | |
38 | 32 | |
39 | 33 | if __name__ == '__main__': |
40 | - try: | |
41 | - begin_time = datetime.datetime.now() | |
42 | - logging.debug("Computation started at %s" % begin_time) | |
43 | - cfg = Config() | |
44 | - rec = Recommender(cfg) | |
45 | - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | |
46 | - #user = LocalSystem() | |
47 | - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | |
48 | - user.maximal_pkg_profile() | |
49 | - | |
50 | - logging.info("Recommending applications for user %s" % user.user_id) | |
51 | - logging.info(rec.get_recommendation(user,20)) | |
52 | - | |
53 | - end_time = datetime.datetime.now() | |
54 | - logging.debug("Computation completed at %s" % end_time) | |
55 | - delta = end_time - begin_time | |
56 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | |
57 | - | |
58 | - except Error: | |
59 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | |
60 | - | |
34 | + begin_time = datetime.datetime.now() | |
35 | + cfg = Config() | |
36 | + rec = Recommender(cfg) | |
37 | + logging.info("Computation started at %s" % begin_time) | |
38 | + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | |
39 | + #user = LocalSystem() | |
40 | + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | |
41 | + user.maximal_pkg_profile() | |
42 | + | |
43 | + logging.info("Recommending applications for user %s" % user.user_id) | |
44 | + logging.info(rec.get_recommendation(user,20)) | |
45 | + | |
46 | + end_time = datetime.datetime.now() | |
47 | + logging.info("Computation completed at %s" % end_time) | |
48 | + delta = end_time - begin_time | |
49 | + logging.info("Time elapsed: %d seconds." % delta.seconds) | ... | ... |
src/bin/cross_validation.py
... | ... | @@ -25,43 +25,34 @@ import sys |
25 | 25 | sys.path.insert(0,'../') |
26 | 26 | import logging |
27 | 27 | import datetime |
28 | -from datetime import timedelta | |
29 | 28 | |
30 | -from config import * | |
31 | -from data import * | |
32 | -from evaluation import * | |
33 | -from dissimilarity import * | |
34 | -from recommender import * | |
35 | -from strategy import * | |
36 | -from user import * | |
37 | -from error import Error | |
29 | +from config import Config | |
30 | +from evaluation import CrossValidation, Precision, Recall, F1, Accuracy, SimpleAccuracy | |
31 | +from recommender import Recommender | |
32 | +from user import RandomPopcon | |
38 | 33 | |
39 | 34 | if __name__ == '__main__': |
40 | - try: | |
41 | - cfg = Config() | |
42 | - rec = Recommender(cfg) | |
43 | - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | |
44 | - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | |
45 | - user.maximal_pkg_profile() | |
46 | - begin_time = datetime.datetime.now() | |
47 | - | |
48 | - metrics = [] | |
49 | - metrics.append(Precision()) | |
50 | - metrics.append(Recall()) | |
51 | - metrics.append(F1()) | |
52 | - metrics.append(Accuracy()) | |
53 | - metrics.append(SimpleAccuracy()) | |
54 | - validation = CrossValidation(0.9,10,rec,metrics,0.1) | |
55 | - validation.run(user) | |
56 | - print validation | |
57 | - | |
58 | - end_time = datetime.datetime.now() | |
59 | - delta = end_time - begin_time | |
60 | - logging.info("Cross-validation for user %s" % user.user_id) | |
61 | - logging.info("Recommender strategy: %s" % rec.strategy.description) | |
62 | - logging.debug("Cross-validation started at %s" % begin_time) | |
63 | - logging.debug("Cross-validation completed at %s" % end_time) | |
64 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | |
65 | - | |
66 | - except Error: | |
67 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | |
35 | + cfg = Config() | |
36 | + rec = Recommender(cfg) | |
37 | + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | |
38 | + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | |
39 | + user.maximal_pkg_profile() | |
40 | + begin_time = datetime.datetime.now() | |
41 | + | |
42 | + metrics = [] | |
43 | + metrics.append(Precision()) | |
44 | + metrics.append(Recall()) | |
45 | + metrics.append(F1()) | |
46 | + metrics.append(Accuracy()) | |
47 | + metrics.append(SimpleAccuracy()) | |
48 | + validation = CrossValidation(0.9,10,rec,metrics,0.01) | |
49 | + validation.run(user) | |
50 | + print validation | |
51 | + | |
52 | + end_time = datetime.datetime.now() | |
53 | + delta = end_time - begin_time | |
54 | + logging.info("Cross-validation for user %s" % user.user_id) | |
55 | + logging.info("Recommender strategy: %s" % rec.strategy.description) | |
56 | + logging.debug("Cross-validation started at %s" % begin_time) | |
57 | + logging.debug("Cross-validation completed at %s" % end_time) | |
58 | + logging.info("Time elapsed: %d seconds." % delta.seconds) | ... | ... |
src/bin/pkgindex.py
... | ... | @@ -18,11 +18,12 @@ __license__ = """ |
18 | 18 | You should have received a copy of the GNU General Public License |
19 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
20 | 20 | """ |
21 | + | |
22 | +import os | |
21 | 23 | import sys |
22 | 24 | sys.path.insert(0,'../') |
23 | 25 | import logging |
24 | 26 | import datetime |
25 | -from datetime import timedelta | |
26 | 27 | |
27 | 28 | from config import Config |
28 | 29 | from error import Error |
... | ... | @@ -32,27 +33,18 @@ import xapian |
32 | 33 | if __name__ == '__main__': |
33 | 34 | cfg = Config() |
34 | 35 | begin_time = datetime.datetime.now() |
35 | - if len(sys.argv) >= 3: | |
36 | - try: | |
37 | - with open(sys.argv[2]) as valid: | |
38 | - pkgs_list = [line.strip() for line in valid] | |
39 | - logging.info("Packages list length: %d" % len(pkgs_list)) | |
40 | - except: | |
41 | - logging.critical("File %s does not seem to be a package \ | |
42 | - list" % sys.argv[2]) | |
43 | - raise Error | |
44 | - pkgs_index = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), | |
45 | - sys.argv[1]) | |
46 | - try: | |
47 | - logging.info("Sample package indexing started at %s" % begin_time) | |
48 | - except: | |
49 | - logging.critical("Could not create the index at %s" % sys.argv[1]) | |
50 | - raise Error | |
36 | + logging.info("Sample package indexing started at %s" % begin_time) | |
37 | + with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid: | |
38 | + pkgs_list = [line.strip() for line in valid] | |
39 | + logging.info("Packages list length: %d" % len(pkgs_list)) | |
40 | + | |
41 | + # use config file or command line options | |
42 | + pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), | |
43 | + cfg.axi+"-"+cfg.pkgs_filter) | |
44 | + end_time = datetime.datetime.now() | |
45 | + logging.info("Sample package indexing completed at %s" % end_time) | |
46 | + logging.info("Number of documents (packages): %d" % | |
47 | + pkgindex.get_doccount()) | |
51 | 48 | |
52 | - end_time = datetime.datetime.now() | |
53 | - print("Sample package indexing completed at %s" % end_time) | |
54 | - print("Number of documents: %d" % pkgs_index.get_doccount()) | |
55 | - delta = end_time - begin_time | |
56 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | |
57 | - else: | |
58 | - logging.critical("Usage: pkgindex.py INDEX_PATH PKGS_LIST") | |
49 | + delta = end_time - begin_time | |
50 | + logging.info("Time elapsed: %d seconds." % delta.seconds) | ... | ... |
src/bin/popindex.py
... | ... | @@ -23,29 +23,25 @@ import sys |
23 | 23 | sys.path.insert(0,'../') |
24 | 24 | import logging |
25 | 25 | import datetime |
26 | -from datetime import timedelta | |
27 | 26 | |
28 | -from config import * | |
29 | -from data import * | |
30 | -from dissimilarity import * | |
31 | -from error import Error | |
27 | +from config import Config | |
28 | +from data import PopconXapianIndex | |
32 | 29 | |
33 | 30 | if __name__ == '__main__': |
34 | - try: | |
35 | - cfg = Config() | |
36 | - begin_time = datetime.datetime.now() | |
37 | - logging.info("Popcon indexing started at %s" % begin_time) | |
38 | - | |
39 | - pxi = PopconXapianIndex(cfg) | |
40 | - | |
41 | - end_time = datetime.datetime.now() | |
42 | - logging.info("Popcon indexing completed at %s" % end_time) | |
43 | - delta = end_time - begin_time | |
44 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | |
45 | - if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": | |
46 | - logging.info("Medoids: %d\tDispersion:%f" % | |
47 | - (cfg.k_medoids,pxi.cluster_dispersion)) | |
48 | - | |
49 | - except Error: | |
50 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | |
51 | - | |
31 | + cfg = Config() | |
32 | + begin_time = datetime.datetime.now() | |
33 | + logging.info("Popcon indexing started at %s" % begin_time) | |
34 | + | |
35 | + # use config file or command line options | |
36 | + popindex = PopconXapianIndex(cfg) | |
37 | + | |
38 | + end_time = datetime.datetime.now() | |
39 | + logging.info("Popcon indexing completed at %s" % end_time) | |
40 | + logging.info("Number of documents (submissions): %d" % | |
41 | + popindex.get_doccount()) | |
42 | + | |
43 | + delta = end_time - begin_time | |
44 | + logging.info("Time elapsed: %d seconds." % delta.seconds) | |
45 | + if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": | |
46 | + logging.info("Medoids: %d\tDispersion:%f" % | |
47 | + (cfg.k_medoids,popindex.cluster_dispersion)) | ... | ... |