Documentation improved (using python docstring).

Tássia Camões Araújo
1 parent 7c99a2c6
Showing 12 changed files with 247 additions and 59 deletions Show diff stats
src/app_recommender.py
src/config.py
src/cross_validation.py
src/data.py
src/demo_rec.py
src/error.py
src/evaluation.py
src/generate_doc.sh
src/recommender.py
src/similarity_measure.py
src/strategy.py
src/user.py
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  AppRecommender - a GNU/Linux application recommender.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  config - python module for configuration options.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -144,6 +144,9 @@ class Config():
                 assert False, "unhandled option"
     def set_logger(self):
+        """
+        Configure application logger and log level.
+        """
         self.logger = getLogger('')  # root logger is used by default
         self.logger.setLevel(DEBUG)
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  CrossValidation - python module for classes and methods related to
+#                    recommenders evaluation.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -47,6 +48,7 @@ if __name__ == &#39;__main__&#39;:
         metrics.append(Recall())
         validation = CrossValidation(0.3,10,rec,metrics)
         validation.run(user)
+        print validation
         end_time = datetime.datetime.now()
         logging.debug("Cross-validation completed at %s" % end_time)
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  data - python module for data sources classes and methods.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -29,32 +29,50 @@ import hashlib
 from error import Error
 class Item:
-    """  """
+    """
+    Generic item definition.
+    """
 class Package(Item):
-    """  """
+    """
+    Definition of a GNU/Linux application as a recommender item.
+    """
     def __init__(self,package_name):
-        """  """
+        """
+        Set initial attributes.
+        """
         self.package_name  = package_name
-    def load_package_info(self):
-        """  """
-        print "debian pkg",self.id
-
 def normalize_tags(string):
     """
-    Normalize tag string so that it can be indexed and retrieved.
+    Substitute string characters : by _ and - by '.
+    Examples:
+        admin::package-management   ->   admin__package'management
+        implemented-in::c++         ->   implemented-in__c++
     """
     return string.replace(':','_').replace('-','\'')
 class Singleton(object):
+    """
+    Base class for inheritance of only-one-instance classes.
+    Singleton design pattern.
+    """
     def __new__(cls, *args, **kwargs):
+        """
+        Creates a new instance of the class only if none already exists.
+        """
         if '_inst' not in vars(cls):
             cls._inst = object.__new__(cls)
         return cls._inst
 class TagsXapianIndex(xapian.WritableDatabase,Singleton):
+    """
+    Data source for tags info defined as a singleton xapian database.
+    """
     def __init__(self,cfg):
+        """
+        Set initial attributes.
+        """
         self.path = os.path.expanduser(cfg.tags_index)
         self.db_path = os.path.expanduser(cfg.tags_db)
         self.debtags_db = debtags.DB()
@@ -67,6 +85,9 @@ class TagsXapianIndex(xapian.WritableDatabase,Singleton):
         self.load_index(cfg.reindex)
     def load_db(self):
+        """
+        Load debtags database from the source file.
+        """
         tag_filter = re.compile(r"^special::.+$|^.+::TODO$")
         try:
             db_file = open(self.db_path, "r")
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  DemoRecommender - demonstration of a GNU/Linux application recommender.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
+#!/usr/bin/python
+
+#  error.py - python module for error definition.
+#
+#  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 class Error(Exception):
-    """Base class for exceptions."""
+    """
+    Base class for exceptions.
+    """
     pass
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  evaluation - python module for classes and methods related to recommenders
+#               evaluation.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -25,30 +26,57 @@ from user import *
 from recommender import *
 class Metric:
-    """  """
+    """
+    Base class for metrics. Strategy design pattern.
+    """
+    pass
 class Precision(Metric):
-    """  """
+    """
+    Accuracy evaluation metric defined as the percentage of relevant itens
+    among the predicted ones.
+    """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = " Precision "
     def run(self,evaluation):
+        """
+        Compute metric.
+        """
         return float(len(evaluation.predicted_real))/len(evaluation.predicted_relevant)
 class Recall(Metric):
-    """  """
+    """
+    Accuracy evaluation metric defined as the percentage of relevant itens
+    which were predicted as so.
+    """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = "   Recall  "
     def run(self,evaluation):
+        """
+        Compute metric.
+        """
         return float(len(evaluation.predicted_real))/len(evaluation.real_relevant)
 class F1(Metric):
     """  """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = "     F1    "
     def run(self,evaluation):
+        """
+        Compute metric.
+        """
         p = Precision().run(evaluation)
         r = Recall().run(evaluation)
         return float((2*p*r)/(p+r))
@@ -56,80 +84,110 @@ class F1(Metric):
 class MAE(Metric):
     """  """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = "    MAE    "
     def run(self,evaluation):
-        print "run"
+        """
+        Compute metric.
+        """
+        print "---" #FIXME
 class MSE(Metric):
     """  """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = "    MSE    "
     def run(self,evaluation):
-        print "run"
+        """
+        Compute metric.
+        """
+        print "---" #FIXME
 class Coverage(Metric):
     """  """
     def __init__(self):
+        """
+        Set metric description.
+        """
         self.desc = "  Coverage "
     def run(self,evaluation):
-        print "run"
+        """
+        Compute metric.
+        """
+        print "---" #FIXME
 class Evaluation:
-    """  """
+    """
+    Class designed to perform prediction evaluation, given data and metric.
+    """
     def __init__(self,predicted_result,real_result):
-        """  """
+        """
+        Set initial parameters.
+        """
         self.predicted_item_scores = predicted_result.item_score
         self.predicted_relevant = predicted_result.get_prediction()
         self.real_item_scores = real_result.item_score
         self.real_relevant = real_result.get_prediction()
         self.predicted_real = [v for v in self.predicted_relevant if v in
                                self.real_relevant]
-        print len(self.predicted_relevant)
-        print len(self.real_relevant)
-        print len(self.predicted_real)
+        #print len(self.predicted_relevant)
+        #print len(self.real_relevant)
+        #print len(self.predicted_real)
     def run(self,metric):
+        """
+        Perform the evaluation with the given metric.
+        """
         return metric.run(self)
 class CrossValidation:
     """
-    Cross-validation method
+    Class designed to perform cross-validation process.
     """
     def __init__(self,partition_proportion,rounds,rec,metrics_list):
         """
-        Set defaults: partition_size, rounds, recommender and metrics_list
+        Set initial parameters.
         """
         if partition_proportion<1 and partition_proportion>0:
             self.partition_proportion = partition_proportion
         else:
-            logging.critical("A proporcao de particao deve ser um avalor ente 0 e 1.")
+            logging.critical("Partition proportion must be a value in the
+                              interval [0,1].")
             raise Error
         self.rounds = rounds
         self.recommender = rec
         self.metrics_list = metrics_list
         self.cross_results = defaultdict(list)
-    def print_result(self):
-        print ""
+    def __str__(self):
+        """
+        String representation of the object.
+        """
+        str = "\n"
         metrics_desc = ""
         for metric in self.metrics_list:
             metrics_desc += "%s|" % (metric.desc)
-        print "| Round |%s" % metrics_desc
+        str += "| Round |%s\n" % metrics_desc
         for r in range(self.rounds):
             metrics_result = ""
             for metric in self.metrics_list:
                 metrics_result += ("    %.2f   |" %
                                    (self.cross_results[metric.desc][r]))
-            print "|   %d   |%s" % (r,metrics_result)
+            str += "|   %d   |%s\n" % (r,metrics_result)
         metrics_mean = ""
         for metric in self.metrics_list:
             mean = float(sum(self.cross_results[metric.desc]) /
                          len(self.cross_results[metric.desc]))
             metrics_mean += "    %.2f   |" % (mean)
-        print "|  Mean |%s" % (metrics_mean)
+        str += "|  Mean |%s\n" % (metrics_mean)
+        return str
     def run(self,user):
         """
@@ -144,7 +202,7 @@ class CrossValidation:
                 if len(cross_item_score)>0:
                     random_key = random.choice(cross_item_score.keys())
                 else:
-                    logging.critical("cross_item_score vazio")
+                    logging.critical("Empty cross_item_score.")
                     raise Error
                 round_partition[random_key] = cross_item_score.pop(random_key)
             round_user = User(cross_item_score)
@@ -157,5 +215,4 @@ class CrossValidation:
             while len(round_partition)>0:
                 item,score = round_partition.popitem()
                 cross_item_score[item] = score
-        self.print_result()
 #!/bin/bash
+#
+#  generate_doc.sh - shell script to generate documentation using doxygen.
+#
+#  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# Get project version from git repository
 TAG=$(git describe --tags --abbrev=0)
 sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config
 rm -Rf ../doc/html
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  recommender - python module for classes related to recommenders.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -23,11 +23,20 @@ from strategy import *
 from error import Error
 class RecommendationResult:
+    """
+    Class designed to describe a recommendation result: items and scores.
+    """
     def __init__(self,item_score,size):
+        """
+        Set initial parameters.
+        """
         self.item_score = item_score
         self.size = size
     def __str__(self):
+        """
+        String representation of the object.
+        """
         result = self.get_prediction()
         str = "\n"
         for i in range(len(result)):
@@ -35,12 +44,20 @@ class RecommendationResult:
         return str
     def get_prediction(self):
+        """
+        Return prediction based on recommendation size (number of items).
+        """
         sorted_result = sorted(self.item_score.items(), key=itemgetter(1))
         return sorted_result[:self.size]
 class Recommender:
-    """  """
+    """
+    Class designed to play the role of recommender.
+    """
     def __init__(self,cfg):
+        """
+        Set initial parameters.
+        """
         try:
             strategy = "self."+cfg.strategy+"(cfg)"
             exec(strategy)
@@ -50,17 +67,28 @@ class Recommender:
             raise Error
     def ct(self,cfg):
+        """
+        Perform content-based recommendation using tags index as source data.
+        """
         self.items_repository = TagsXapianIndex(cfg)
         self.strategy = ContentBasedStrategy()
     def cta(self,cfg):
+        """
+        Perform content-based recommendation using apt-xapian-index as source
+        data.
+        """
         self.items_repository = xapian.Database(cfg.axi)
         self.strategy = AxiContentBasedStrategy()
     def set_strategy(self,strategy):
-        """  """
+        """
+        Set the recommendation strategy.
+        """
         self.strategy = strategy
     def get_recommendation(self,user):
-        """  """
+        """
+        Produces recommendation using previously loaded strategy.
+        """
         return self.strategy.run(self,user)
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  similarity-measure - python module for classes and methods related to
+#                       measuring similarity between two sets of data.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  strategy - python module for classes and methods related to recommendation
+#             strategies.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -26,40 +27,51 @@ class ReputationHeuristic:
     """
     Abstraction for diferent reputation heuristics.
     """
+    pass
 class BugsHeuristic(ReputationHeuristic):
     """
     Reputation heuristic based on quantity of open bugs.
     """
+    pass
 class RCBugsHeuristic(ReputationHeuristic):
     """
     Reputation heuristic based on quantity of RC bugs.
     """
+    pass
 class PopularityHeuristic(ReputationHeuristic):
     """
     Reputation heuristic based on popularity of packages.
     """
+    pass
 class PkgMatchDecider(xapian.MatchDecider):
     """
-    Extends xapian.MatchDecider to disconsider installed packages.
+    Extend xapian.MatchDecider to not consider installed packages.
     """
     def __init__(self, installed_pkgs):
+        """
+        Set initial parameters.
+        """
         xapian.MatchDecider.__init__(self)
         self.installed_pkgs = installed_pkgs
     def __call__(self, doc):
+        """
+        True if the package is not already installed.
+        """
         return doc.get_data() not in self.installed_pkgs
 class RecommendationStrategy:
     """
-    Abstraction for diferent recommendation strategy.
+    Base class for recommendation strategies.
     """
+    pass
 class ItemReputationStrategy(RecommendationStrategy):
     """
 #!/usr/bin/python
-#  AppRecommender - A GNU/Linux application recommender
+#  user - python module for classes and methods related to recommenders' users.
 #
 #  Copyright (C) 2010  Tassia Camoes <tassia@gmail.com>
 #
@@ -23,6 +23,9 @@ import logging
 import apt
 class FilterTag(xapian.ExpandDecider):
+    """
+    Extend xapian.ExpandDecider to consider only tag terms.
+    """
     def __call__(self, term):
         """
         Return true if the term is a tag, else false.
@@ -30,29 +33,28 @@ class FilterTag(xapian.ExpandDecider):
         return term[:2] == "XT"
 class User:
-    """  """
+    """
+    Define a user of a recommender.
+    """
     def __init__(self,item_score,user_id=0,demographic_profile=0):
-        """  """
+        """
+        Set initial parameters.
+        """
         self.id = user_id
         self.item_score = item_score
         self.pkg_profile = self.item_score.keys()
         self.demographic_profile = demographic_profile
     def items(self):
+        """
+        Return dictionary relating items and repective scores.
+        """
         return self.item_score.keys()
-    def maximal_pkg_profile(self):
-        cache = apt.Cache()
-        old_profile_size = len(self.pkg_profile)
-        for p in self.pkg_profile[:]:     #iterate list copy
-            pkg = cache[p]
-            if pkg.is_auto_installed:
-                self.pkg_profile.remove(p)
-        profile_size = len(self.pkg_profile)
-        logging.info("Reduced packages profile size from %d to %d." %
-                     (old_profile_size, profile_size))
-
     def axi_tag_profile(self,apt_xapian_index,profile_size):
+        """
+        Return most relevant tags for a list of packages based on axi.
+        """
         terms = []
         for item in self.pkg_profile:
             terms.append("XP"+item)
@@ -70,15 +72,38 @@ class User:
         return profile
     def txi_tag_profile(self,tags_xapian_index,profile_size):
+        """
+        Return most relevant tags for a list of packages based on tags index.
+        """
         return tags_xapian_index.relevant_tags_from_db(self.pkg_profile,
                                                        profile_size)
 class LocalSystem(User):
-    """  """
+    """
+    Extend the class User to consider the packages installed on the local
+    system as the set of selected itens.
+    """
     def __init__(self):
+        """
+        Set initial parameters.
+        """
         item_score = {}
         dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections')
         for line in dpkg_output.splitlines():
             pkg = line.split('\t')[0]
             item_score[pkg] = 1
         User.__init__(self,item_score)
+
+    def maximal_pkg_profile(self):
+        """
+        Return list of packages voluntarily installed.
+        """
+        cache = apt.Cache()
+        old_profile_size = len(self.pkg_profile)
+        for p in self.pkg_profile[:]:     #iterate list copy
+            pkg = cache[p]
+            if pkg.is_auto_installed:
+                self.pkg_profile.remove(p)
+        profile_size = len(self.pkg_profile)
+        logging.info("Reduced packages profile size from %d to %d." %
+                     (old_profile_size, profile_size))
1	#!/usr/bin/python	1	#!/usr/bin/python
2		2
3	-# AppRecommender - A GNU/Linux application recommender	3	+# AppRecommender - a GNU/Linux application recommender.
4	#	4	#
5	# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>	5	# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
6	#	6	#
		1	+#!/usr/bin/python
		2	+
		3	+# error.py - python module for error definition.
		4	+#
		5	+# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
		6	+#
		7	+# This program is free software: you can redistribute it and/or modify
		8	+# it under the terms of the GNU General Public License as published by
		9	+# the Free Software Foundation, either version 3 of the License, or
		10	+# (at your option) any later version.
		11	+#
		12	+# This program is distributed in the hope that it will be useful,
		13	+# but WITHOUT ANY WARRANTY; without even the implied warranty of
		14	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		15	+# GNU General Public License for more details.
		16	+#
		17	+# You should have received a copy of the GNU General Public License
		18	+# along with this program. If not, see <http://www.gnu.org/licenses/>.
		19	+
1	class Error(Exception):	20	class Error(Exception):
2	- """Base class for exceptions."""	21	+ """
		22	+ Base class for exceptions.
		23	+ """
3	pass	24	pass
1	#!/bin/bash	1	#!/bin/bash
		2	+#
		3	+# generate_doc.sh - shell script to generate documentation using doxygen.
		4	+#
		5	+# Copyright (C) 2010 Tassia Camoes <tassia@gmail.com>
		6	+#
		7	+# This program is free software: you can redistribute it and/or modify
		8	+# it under the terms of the GNU General Public License as published by
		9	+# the Free Software Foundation, either version 3 of the License, or
		10	+# (at your option) any later version.
		11	+#
		12	+# This program is distributed in the hope that it will be useful,
		13	+# but WITHOUT ANY WARRANTY; without even the implied warranty of
		14	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		15	+# GNU General Public License for more details.
		16	+#
		17	+# You should have received a copy of the GNU General Public License
		18	+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2		19
		20	+# Get project version from git repository
3	TAG=$(git describe --tags --abbrev=0)	21	TAG=$(git describe --tags --abbrev=0)
4	sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config	22	sed -i "s/^PROJECT_NUMBER.*$/PROJECT_NUMBER\t\t= $TAG/" ../doc/doxy_config
5	rm -Rf ../doc/html	23	rm -Rf ../doc/html