Commit cb2464a226ce9992f1b6a902b5acb3d612ac6f7f
1 parent
2188f43d
Exists in
master
and in
1 other branch
Code cleaning and refactoring.
Showing
4 changed files
with
83 additions
and
115 deletions
Show diff stats
src/bin/apprec.py
| @@ -23,38 +23,27 @@ import os | @@ -23,38 +23,27 @@ import os | ||
| 23 | import sys | 23 | import sys |
| 24 | sys.path.insert(0,'../') | 24 | sys.path.insert(0,'../') |
| 25 | import logging | 25 | import logging |
| 26 | -import datetime | ||
| 27 | import random | 26 | import random |
| 28 | -from datetime import timedelta | 27 | +import datetime |
| 29 | 28 | ||
| 30 | -from config import * | ||
| 31 | -from data import * | ||
| 32 | -from evaluation import * | ||
| 33 | -from dissimilarity import * | ||
| 34 | -from recommender import * | ||
| 35 | -from strategy import * | ||
| 36 | -from user import * | ||
| 37 | -from error import Error | 29 | +from config import Config |
| 30 | +from recommender import Recommender | ||
| 31 | +from user import LocalSystem, RandomPopcon | ||
| 38 | 32 | ||
| 39 | if __name__ == '__main__': | 33 | if __name__ == '__main__': |
| 40 | - try: | ||
| 41 | - begin_time = datetime.datetime.now() | ||
| 42 | - logging.debug("Computation started at %s" % begin_time) | ||
| 43 | - cfg = Config() | ||
| 44 | - rec = Recommender(cfg) | ||
| 45 | - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | ||
| 46 | - #user = LocalSystem() | ||
| 47 | - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | ||
| 48 | - user.maximal_pkg_profile() | ||
| 49 | - | ||
| 50 | - logging.info("Recommending applications for user %s" % user.user_id) | ||
| 51 | - logging.info(rec.get_recommendation(user,20)) | ||
| 52 | - | ||
| 53 | - end_time = datetime.datetime.now() | ||
| 54 | - logging.debug("Computation completed at %s" % end_time) | ||
| 55 | - delta = end_time - begin_time | ||
| 56 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | ||
| 57 | - | ||
| 58 | - except Error: | ||
| 59 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | ||
| 60 | - | 34 | + begin_time = datetime.datetime.now() |
| 35 | + cfg = Config() | ||
| 36 | + rec = Recommender(cfg) | ||
| 37 | + logging.info("Computation started at %s" % begin_time) | ||
| 38 | + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | ||
| 39 | + #user = LocalSystem() | ||
| 40 | + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | ||
| 41 | + user.maximal_pkg_profile() | ||
| 42 | + | ||
| 43 | + logging.info("Recommending applications for user %s" % user.user_id) | ||
| 44 | + logging.info(rec.get_recommendation(user,20)) | ||
| 45 | + | ||
| 46 | + end_time = datetime.datetime.now() | ||
| 47 | + logging.info("Computation completed at %s" % end_time) | ||
| 48 | + delta = end_time - begin_time | ||
| 49 | + logging.info("Time elapsed: %d seconds." % delta.seconds) |
src/bin/cross_validation.py
| @@ -25,43 +25,34 @@ import sys | @@ -25,43 +25,34 @@ import sys | ||
| 25 | sys.path.insert(0,'../') | 25 | sys.path.insert(0,'../') |
| 26 | import logging | 26 | import logging |
| 27 | import datetime | 27 | import datetime |
| 28 | -from datetime import timedelta | ||
| 29 | 28 | ||
| 30 | -from config import * | ||
| 31 | -from data import * | ||
| 32 | -from evaluation import * | ||
| 33 | -from dissimilarity import * | ||
| 34 | -from recommender import * | ||
| 35 | -from strategy import * | ||
| 36 | -from user import * | ||
| 37 | -from error import Error | 29 | +from config import Config |
| 30 | +from evaluation import CrossValidation, Precision, Recall, F1, Accuracy, SimpleAccuracy | ||
| 31 | +from recommender import Recommender | ||
| 32 | +from user import RandomPopcon | ||
| 38 | 33 | ||
| 39 | if __name__ == '__main__': | 34 | if __name__ == '__main__': |
| 40 | - try: | ||
| 41 | - cfg = Config() | ||
| 42 | - rec = Recommender(cfg) | ||
| 43 | - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | ||
| 44 | - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | ||
| 45 | - user.maximal_pkg_profile() | ||
| 46 | - begin_time = datetime.datetime.now() | ||
| 47 | - | ||
| 48 | - metrics = [] | ||
| 49 | - metrics.append(Precision()) | ||
| 50 | - metrics.append(Recall()) | ||
| 51 | - metrics.append(F1()) | ||
| 52 | - metrics.append(Accuracy()) | ||
| 53 | - metrics.append(SimpleAccuracy()) | ||
| 54 | - validation = CrossValidation(0.9,10,rec,metrics,0.1) | ||
| 55 | - validation.run(user) | ||
| 56 | - print validation | ||
| 57 | - | ||
| 58 | - end_time = datetime.datetime.now() | ||
| 59 | - delta = end_time - begin_time | ||
| 60 | - logging.info("Cross-validation for user %s" % user.user_id) | ||
| 61 | - logging.info("Recommender strategy: %s" % rec.strategy.description) | ||
| 62 | - logging.debug("Cross-validation started at %s" % begin_time) | ||
| 63 | - logging.debug("Cross-validation completed at %s" % end_time) | ||
| 64 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | ||
| 65 | - | ||
| 66 | - except Error: | ||
| 67 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | 35 | + cfg = Config() |
| 36 | + rec = Recommender(cfg) | ||
| 37 | + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop")) | ||
| 38 | + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop")) | ||
| 39 | + user.maximal_pkg_profile() | ||
| 40 | + begin_time = datetime.datetime.now() | ||
| 41 | + | ||
| 42 | + metrics = [] | ||
| 43 | + metrics.append(Precision()) | ||
| 44 | + metrics.append(Recall()) | ||
| 45 | + metrics.append(F1()) | ||
| 46 | + metrics.append(Accuracy()) | ||
| 47 | + metrics.append(SimpleAccuracy()) | ||
| 48 | + validation = CrossValidation(0.9,10,rec,metrics,0.01) | ||
| 49 | + validation.run(user) | ||
| 50 | + print validation | ||
| 51 | + | ||
| 52 | + end_time = datetime.datetime.now() | ||
| 53 | + delta = end_time - begin_time | ||
| 54 | + logging.info("Cross-validation for user %s" % user.user_id) | ||
| 55 | + logging.info("Recommender strategy: %s" % rec.strategy.description) | ||
| 56 | + logging.debug("Cross-validation started at %s" % begin_time) | ||
| 57 | + logging.debug("Cross-validation completed at %s" % end_time) | ||
| 58 | + logging.info("Time elapsed: %d seconds." % delta.seconds) |
src/bin/pkgindex.py
| @@ -18,11 +18,12 @@ __license__ = """ | @@ -18,11 +18,12 @@ __license__ = """ | ||
| 18 | You should have received a copy of the GNU General Public License | 18 | You should have received a copy of the GNU General Public License |
| 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 20 | """ | 20 | """ |
| 21 | + | ||
| 22 | +import os | ||
| 21 | import sys | 23 | import sys |
| 22 | sys.path.insert(0,'../') | 24 | sys.path.insert(0,'../') |
| 23 | import logging | 25 | import logging |
| 24 | import datetime | 26 | import datetime |
| 25 | -from datetime import timedelta | ||
| 26 | 27 | ||
| 27 | from config import Config | 28 | from config import Config |
| 28 | from error import Error | 29 | from error import Error |
| @@ -32,27 +33,18 @@ import xapian | @@ -32,27 +33,18 @@ import xapian | ||
| 32 | if __name__ == '__main__': | 33 | if __name__ == '__main__': |
| 33 | cfg = Config() | 34 | cfg = Config() |
| 34 | begin_time = datetime.datetime.now() | 35 | begin_time = datetime.datetime.now() |
| 35 | - if len(sys.argv) >= 3: | ||
| 36 | - try: | ||
| 37 | - with open(sys.argv[2]) as valid: | ||
| 38 | - pkgs_list = [line.strip() for line in valid] | ||
| 39 | - logging.info("Packages list length: %d" % len(pkgs_list)) | ||
| 40 | - except: | ||
| 41 | - logging.critical("File %s does not seem to be a package \ | ||
| 42 | - list" % sys.argv[2]) | ||
| 43 | - raise Error | ||
| 44 | - pkgs_index = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), | ||
| 45 | - sys.argv[1]) | ||
| 46 | - try: | ||
| 47 | - logging.info("Sample package indexing started at %s" % begin_time) | ||
| 48 | - except: | ||
| 49 | - logging.critical("Could not create the index at %s" % sys.argv[1]) | ||
| 50 | - raise Error | 36 | + logging.info("Sample package indexing started at %s" % begin_time) |
| 37 | + with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid: | ||
| 38 | + pkgs_list = [line.strip() for line in valid] | ||
| 39 | + logging.info("Packages list length: %d" % len(pkgs_list)) | ||
| 40 | + | ||
| 41 | + # use config file or command line options | ||
| 42 | + pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi), | ||
| 43 | + cfg.axi+"-"+cfg.pkgs_filter) | ||
| 44 | + end_time = datetime.datetime.now() | ||
| 45 | + logging.info("Sample package indexing completed at %s" % end_time) | ||
| 46 | + logging.info("Number of documents (packages): %d" % | ||
| 47 | + pkgindex.get_doccount()) | ||
| 51 | 48 | ||
| 52 | - end_time = datetime.datetime.now() | ||
| 53 | - print("Sample package indexing completed at %s" % end_time) | ||
| 54 | - print("Number of documents: %d" % pkgs_index.get_doccount()) | ||
| 55 | - delta = end_time - begin_time | ||
| 56 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | ||
| 57 | - else: | ||
| 58 | - logging.critical("Usage: pkgindex.py INDEX_PATH PKGS_LIST") | 49 | + delta = end_time - begin_time |
| 50 | + logging.info("Time elapsed: %d seconds." % delta.seconds) |
src/bin/popindex.py
| @@ -23,29 +23,25 @@ import sys | @@ -23,29 +23,25 @@ import sys | ||
| 23 | sys.path.insert(0,'../') | 23 | sys.path.insert(0,'../') |
| 24 | import logging | 24 | import logging |
| 25 | import datetime | 25 | import datetime |
| 26 | -from datetime import timedelta | ||
| 27 | 26 | ||
| 28 | -from config import * | ||
| 29 | -from data import * | ||
| 30 | -from dissimilarity import * | ||
| 31 | -from error import Error | 27 | +from config import Config |
| 28 | +from data import PopconXapianIndex | ||
| 32 | 29 | ||
| 33 | if __name__ == '__main__': | 30 | if __name__ == '__main__': |
| 34 | - try: | ||
| 35 | - cfg = Config() | ||
| 36 | - begin_time = datetime.datetime.now() | ||
| 37 | - logging.info("Popcon indexing started at %s" % begin_time) | ||
| 38 | - | ||
| 39 | - pxi = PopconXapianIndex(cfg) | ||
| 40 | - | ||
| 41 | - end_time = datetime.datetime.now() | ||
| 42 | - logging.info("Popcon indexing completed at %s" % end_time) | ||
| 43 | - delta = end_time - begin_time | ||
| 44 | - logging.info("Time elapsed: %d seconds." % delta.seconds) | ||
| 45 | - if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": | ||
| 46 | - logging.info("Medoids: %d\tDispersion:%f" % | ||
| 47 | - (cfg.k_medoids,pxi.cluster_dispersion)) | ||
| 48 | - | ||
| 49 | - except Error: | ||
| 50 | - logging.critical("Aborting proccess. Use '--debug' for more details.") | ||
| 51 | - | 31 | + cfg = Config() |
| 32 | + begin_time = datetime.datetime.now() | ||
| 33 | + logging.info("Popcon indexing started at %s" % begin_time) | ||
| 34 | + | ||
| 35 | + # use config file or command line options | ||
| 36 | + popindex = PopconXapianIndex(cfg) | ||
| 37 | + | ||
| 38 | + end_time = datetime.datetime.now() | ||
| 39 | + logging.info("Popcon indexing completed at %s" % end_time) | ||
| 40 | + logging.info("Number of documents (submissions): %d" % | ||
| 41 | + popindex.get_doccount()) | ||
| 42 | + | ||
| 43 | + delta = end_time - begin_time | ||
| 44 | + logging.info("Time elapsed: %d seconds." % delta.seconds) | ||
| 45 | + if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": | ||
| 46 | + logging.info("Medoids: %d\tDispersion:%f" % | ||
| 47 | + (cfg.k_medoids,popindex.cluster_dispersion)) |