Commit cb2464a226ce9992f1b6a902b5acb3d612ac6f7f

Authored by Tássia Camões Araújo
1 parent 2188f43d
Exists in master and in 1 other branch add_vagrant

Code cleaning and refactoring.

src/bin/apprec.py
@@ -23,38 +23,27 @@ import os @@ -23,38 +23,27 @@ import os
23 import sys 23 import sys
24 sys.path.insert(0,'../') 24 sys.path.insert(0,'../')
25 import logging 25 import logging
26 -import datetime  
27 import random 26 import random
28 -from datetime import timedelta 27 +import datetime
29 28
30 -from config import *  
31 -from data import *  
32 -from evaluation import *  
33 -from dissimilarity import *  
34 -from recommender import *  
35 -from strategy import *  
36 -from user import *  
37 -from error import Error 29 +from config import Config
  30 +from recommender import Recommender
  31 +from user import LocalSystem, RandomPopcon
38 32
39 if __name__ == '__main__': 33 if __name__ == '__main__':
40 - try:  
41 - begin_time = datetime.datetime.now()  
42 - logging.debug("Computation started at %s" % begin_time)  
43 - cfg = Config()  
44 - rec = Recommender(cfg)  
45 - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))  
46 - #user = LocalSystem()  
47 - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop"))  
48 - user.maximal_pkg_profile()  
49 -  
50 - logging.info("Recommending applications for user %s" % user.user_id)  
51 - logging.info(rec.get_recommendation(user,20))  
52 -  
53 - end_time = datetime.datetime.now()  
54 - logging.debug("Computation completed at %s" % end_time)  
55 - delta = end_time - begin_time  
56 - logging.info("Time elapsed: %d seconds." % delta.seconds)  
57 -  
58 - except Error:  
59 - logging.critical("Aborting proccess. Use '--debug' for more details.")  
60 - 34 + begin_time = datetime.datetime.now()
  35 + cfg = Config()
  36 + rec = Recommender(cfg)
  37 + logging.info("Computation started at %s" % begin_time)
  38 + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))
  39 + #user = LocalSystem()
  40 + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop"))
  41 + user.maximal_pkg_profile()
  42 +
  43 + logging.info("Recommending applications for user %s" % user.user_id)
  44 + logging.info(rec.get_recommendation(user,20))
  45 +
  46 + end_time = datetime.datetime.now()
  47 + logging.info("Computation completed at %s" % end_time)
  48 + delta = end_time - begin_time
  49 + logging.info("Time elapsed: %d seconds." % delta.seconds)
src/bin/cross_validation.py
@@ -25,43 +25,34 @@ import sys @@ -25,43 +25,34 @@ import sys
25 sys.path.insert(0,'../') 25 sys.path.insert(0,'../')
26 import logging 26 import logging
27 import datetime 27 import datetime
28 -from datetime import timedelta  
29 28
30 -from config import *  
31 -from data import *  
32 -from evaluation import *  
33 -from dissimilarity import *  
34 -from recommender import *  
35 -from strategy import *  
36 -from user import *  
37 -from error import Error 29 +from config import Config
  30 +from evaluation import CrossValidation, Precision, Recall, F1, Accuracy, SimpleAccuracy
  31 +from recommender import Recommender
  32 +from user import RandomPopcon
38 33
39 if __name__ == '__main__': 34 if __name__ == '__main__':
40 - try:  
41 - cfg = Config()  
42 - rec = Recommender(cfg)  
43 - user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))  
44 - user.filter_pkg_profile(os.path.join(cfg.filters,"desktop"))  
45 - user.maximal_pkg_profile()  
46 - begin_time = datetime.datetime.now()  
47 -  
48 - metrics = []  
49 - metrics.append(Precision())  
50 - metrics.append(Recall())  
51 - metrics.append(F1())  
52 - metrics.append(Accuracy())  
53 - metrics.append(SimpleAccuracy())  
54 - validation = CrossValidation(0.9,10,rec,metrics,0.1)  
55 - validation.run(user)  
56 - print validation  
57 -  
58 - end_time = datetime.datetime.now()  
59 - delta = end_time - begin_time  
60 - logging.info("Cross-validation for user %s" % user.user_id)  
61 - logging.info("Recommender strategy: %s" % rec.strategy.description)  
62 - logging.debug("Cross-validation started at %s" % begin_time)  
63 - logging.debug("Cross-validation completed at %s" % end_time)  
64 - logging.info("Time elapsed: %d seconds." % delta.seconds)  
65 -  
66 - except Error:  
67 - logging.critical("Aborting proccess. Use '--debug' for more details.") 35 + cfg = Config()
  36 + rec = Recommender(cfg)
  37 + user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters,"desktop"))
  38 + user.filter_pkg_profile(os.path.join(cfg.filters,"desktop"))
  39 + user.maximal_pkg_profile()
  40 + begin_time = datetime.datetime.now()
  41 +
  42 + metrics = []
  43 + metrics.append(Precision())
  44 + metrics.append(Recall())
  45 + metrics.append(F1())
  46 + metrics.append(Accuracy())
  47 + metrics.append(SimpleAccuracy())
  48 + validation = CrossValidation(0.9,10,rec,metrics,0.01)
  49 + validation.run(user)
  50 + print validation
  51 +
  52 + end_time = datetime.datetime.now()
  53 + delta = end_time - begin_time
  54 + logging.info("Cross-validation for user %s" % user.user_id)
  55 + logging.info("Recommender strategy: %s" % rec.strategy.description)
  56 + logging.debug("Cross-validation started at %s" % begin_time)
  57 + logging.debug("Cross-validation completed at %s" % end_time)
  58 + logging.info("Time elapsed: %d seconds." % delta.seconds)
src/bin/pkgindex.py
@@ -18,11 +18,12 @@ __license__ = """ @@ -18,11 +18,12 @@ __license__ = """
18 You should have received a copy of the GNU General Public License 18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 """ 20 """
  21 +
  22 +import os
21 import sys 23 import sys
22 sys.path.insert(0,'../') 24 sys.path.insert(0,'../')
23 import logging 25 import logging
24 import datetime 26 import datetime
25 -from datetime import timedelta  
26 27
27 from config import Config 28 from config import Config
28 from error import Error 29 from error import Error
@@ -32,27 +33,18 @@ import xapian @@ -32,27 +33,18 @@ import xapian
32 if __name__ == '__main__': 33 if __name__ == '__main__':
33 cfg = Config() 34 cfg = Config()
34 begin_time = datetime.datetime.now() 35 begin_time = datetime.datetime.now()
35 - if len(sys.argv) >= 3:  
36 - try:  
37 - with open(sys.argv[2]) as valid:  
38 - pkgs_list = [line.strip() for line in valid]  
39 - logging.info("Packages list length: %d" % len(pkgs_list))  
40 - except:  
41 - logging.critical("File %s does not seem to be a package \  
42 - list" % sys.argv[2])  
43 - raise Error  
44 - pkgs_index = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi),  
45 - sys.argv[1])  
46 - try:  
47 - logging.info("Sample package indexing started at %s" % begin_time)  
48 - except:  
49 - logging.critical("Could not create the index at %s" % sys.argv[1])  
50 - raise Error 36 + logging.info("Sample package indexing started at %s" % begin_time)
  37 + with open(os.path.join(cfg.filters,cfg.pkgs_filter)) as valid:
  38 + pkgs_list = [line.strip() for line in valid]
  39 + logging.info("Packages list length: %d" % len(pkgs_list))
  40 +
  41 + # use config file or command line options
  42 + pkgindex = data.SampleAptXapianIndex(pkgs_list,xapian.Database(cfg.axi),
  43 + cfg.axi+"-"+cfg.pkgs_filter)
  44 + end_time = datetime.datetime.now()
  45 + logging.info("Sample package indexing completed at %s" % end_time)
  46 + logging.info("Number of documents (packages): %d" %
  47 + pkgindex.get_doccount())
51 48
52 - end_time = datetime.datetime.now()  
53 - print("Sample package indexing completed at %s" % end_time)  
54 - print("Number of documents: %d" % pkgs_index.get_doccount())  
55 - delta = end_time - begin_time  
56 - logging.info("Time elapsed: %d seconds." % delta.seconds)  
57 - else:  
58 - logging.critical("Usage: pkgindex.py INDEX_PATH PKGS_LIST") 49 + delta = end_time - begin_time
  50 + logging.info("Time elapsed: %d seconds." % delta.seconds)
src/bin/popindex.py
@@ -23,29 +23,25 @@ import sys @@ -23,29 +23,25 @@ import sys
23 sys.path.insert(0,'../') 23 sys.path.insert(0,'../')
24 import logging 24 import logging
25 import datetime 25 import datetime
26 -from datetime import timedelta  
27 26
28 -from config import *  
29 -from data import *  
30 -from dissimilarity import *  
31 -from error import Error 27 +from config import Config
  28 +from data import PopconXapianIndex
32 29
33 if __name__ == '__main__': 30 if __name__ == '__main__':
34 - try:  
35 - cfg = Config()  
36 - begin_time = datetime.datetime.now()  
37 - logging.info("Popcon indexing started at %s" % begin_time)  
38 -  
39 - pxi = PopconXapianIndex(cfg)  
40 -  
41 - end_time = datetime.datetime.now()  
42 - logging.info("Popcon indexing completed at %s" % end_time)  
43 - delta = end_time - begin_time  
44 - logging.info("Time elapsed: %d seconds." % delta.seconds)  
45 - if cfg.index_mode=="cluster" or cfg.index_mode=="recluster":  
46 - logging.info("Medoids: %d\tDispersion:%f" %  
47 - (cfg.k_medoids,pxi.cluster_dispersion))  
48 -  
49 - except Error:  
50 - logging.critical("Aborting proccess. Use '--debug' for more details.")  
51 - 31 + cfg = Config()
  32 + begin_time = datetime.datetime.now()
  33 + logging.info("Popcon indexing started at %s" % begin_time)
  34 +
  35 + # use config file or command line options
  36 + popindex = PopconXapianIndex(cfg)
  37 +
  38 + end_time = datetime.datetime.now()
  39 + logging.info("Popcon indexing completed at %s" % end_time)
  40 + logging.info("Number of documents (submissions): %d" %
  41 + popindex.get_doccount())
  42 +
  43 + delta = end_time - begin_time
  44 + logging.info("Time elapsed: %d seconds." % delta.seconds)
  45 + if cfg.index_mode=="cluster" or cfg.index_mode=="recluster":
  46 + logging.info("Medoids: %d\tDispersion:%f" %
  47 + (cfg.k_medoids,popindex.cluster_dispersion))