Merge branch 'master' of github.com:tassia/AppRecommender

Tiago Bortoletto Vaz
2 parents 2f49eb5c c4327ec0
Showing 15 changed files with 516 additions and 55 deletions Show diff stats
src/data.py
src/evaluation.py
src/examples/cross_validation.py
src/experiments/README
src/experiments/experiments.cfg
src/experiments/runner.py
src/recommender.py
src/strategy.py
src/tests/data_tests.py
src/tests/evaluation_tests.py
src/web/templates/about.html
src/web/templates/apprec.html
src/web/templates/index.html
src/web/templates/layout.html
src/web/templates/survey_index.html
@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):
     return matches
  
 def axi_search_pkg_tags(axi,pkg):
-    query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
     enquire = xapian.Enquire(axi)
-    enquire.set_query(query)
+    enquire.set_query(xapian.Query("XP"+pkg))
     matches = enquire.get_mset(0,1)
+    if not matches:
+        logging.debug("Package %s not found in items repository" % pkg)
+        return []
     for m in matches:
         tags = [term.term for term in axi.get_document(m.docid).termlist() if
                 term.term.startswith("XT")]
-    return tags
+        return tags
  
 def print_index(index):
     output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"
@@ -59,6 +61,32 @@ def print_index(index):
         output += "\n---"
     return output
  
+class AppAptXapianIndex(xapian.WritableDatabase):
+    """
+    Sample data source for packages information, mainly useful for tests.
+    """
+    def __init__(self,axi_path,path):
+        xapian.WritableDatabase.__init__(self,path,
+                                         xapian.DB_CREATE_OR_OVERWRITE)
+        axi = xapian.Database(axi_path)
+        logging.info("AptXapianIndex size: %d" % axi.get_doccount())
+        for docid in range(1,axi.get_lastdocid()+1):
+            try:
+                doc = axi.get_document(docid)
+                allterms = [term.term for term in doc.termlist()]
+                if "XTrole::program" in allterms:
+                    self.add_document(doc)
+                    logging.info("Added doc %d." % docid)
+                else:
+                    logging.info("Discarded doc %d." % docid)
+            except:
+                logging.info("Doc %d not found in axi." % docid)
+        logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
+                     self.get_doccount(), self.get_lastdocid())
+
+    def __str__(self):
+        return print_index(self)
+
 class SampleAptXapianIndex(xapian.WritableDatabase):
     """
     Sample data source for packages information, mainly useful for tests.
@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
         """
         self.axi = xapian.Database(cfg.axi)
         self.path = os.path.expanduser(cfg.popcon_index)
+        self.source_dir = os.path.expanduser(cfg.popcon_dir)
         if not cfg.index_mode == "old" or not self.load_index():
             if not os.path.exists(cfg.popcon_dir):
                 os.makedirs(cfg.popcon_dir)
@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
                               submission.user_id)
                 for pkg, freq in submission.packages.items():
                     doc.add_term("XP"+pkg,freq)
-                    for tag in axi_search_pkg_tags(self.axi,pkg):
-                        doc.add_term(tag,freq)
+                    if axi_search_pkg_tags(self.axi,pkg):
+                        for tag in axi_search_pkg_tags(self.axi,pkg):
+                            doc.add_term(tag,freq)
                 doc_id = self.add_document(doc)
                 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
             # python garbage collector
@@ -140,7 +140,7 @@ class F1(Metric):
         p = Precision().run(evaluation)
         r = Recall().run(evaluation)
         if (p+r)>0:
-            return float((2*p*r))/(p+r)
+            return float(2*((p*r)/(p+r)))
         else:
             return 0
  
@@ -289,7 +289,7 @@ class CrossValidation:
             result_size = int(self.recommender.items_repository.get_doccount()*
                               self.result_proportion)
             predicted_result = self.recommender.get_recommendation(round_user,result_size)
-            print len(round_partition)
+            #print len(round_partition)
             real_result = RecommendationResult(round_partition)
             #logging.debug("Predicted result: %s",predicted_result)
             evaluation = Evaluation(predicted_result,real_result,
@@ -40,16 +40,20 @@ if __name__ == &#39;__main__&#39;:
     try:
         cfg = Config()
         rec = Recommender(cfg)
+        print "\nRecommender strategy: ",rec.strategy.description
         user = LocalSystem()
-        user.maximal_pkg_profile()
-
+        #user.app_pkg_profile(rec.items_repository)
+        user.no_auto_pkg_profile()
         begin_time = datetime.datetime.now()
         logging.debug("Cross-validation started at %s" % begin_time)
  
         metrics = []
         metrics.append(Precision())
         metrics.append(Recall())
-        validation = CrossValidation(0.3,10,rec,metrics)
+        metrics.append(F1())
+        metrics.append(Accuracy())
+        metrics.append(SimpleAccuracy())
+        validation = CrossValidation(0.3,10,rec,metrics,0.005)
         validation.run(user)
         print validation
  
@@ -0,0 +1,2 @@
+Experiments handled by expsuite:
+https://github.com/rueckstiess/expsuite
@@ -0,0 +1,26 @@
+[DEFAULT]
+repetitions = 1
+iterations = 10
+path = 'results'
+experiment = 'grid'
+weight = ['bm25', 'trad']
+;profile_size = range(10,100,10)
+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+[content]
+strategy = ['cb','cbt','cbd']
+
+[clustering]
+experiment = 'single'
+;iterations = 4
+;medoids = range(2,6)
+iterations = 6
+medoids = [100,500,1000,5000,10000,50000]
+;disabled for this experiment
+weight = 0
+profile_size = 0
+sample = 0
+
+[colaborative]
+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
+neighbors = range(10,1010,50)
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+"""
+    recommender suite - recommender experiments suite 
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import expsuite
+import sys
+sys.path.insert(0,'../')
+from config import Config
+from data import PopconXapianIndex, PopconSubmission
+from recommender import Recommender
+from user import LocalSystem, User
+from evaluation import *
+import logging
+import random
+import Gnuplot
+
+class ClusteringSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        self.cfg = Config()
+        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
+        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
+        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
+
+        if params['name'] == "clustering":
+            logging.info("Starting 'clustering' experiments suite...")
+            self.cfg.index_mode = "recluster"
+
+    def iterate(self, params, rep, n):
+        if params['name'] == "clustering":
+            logging.info("Running iteration %d" % params['medoids'][n])
+            self.cfg.k_medoids = params['medoids'][n]
+            pxi = PopconXapianIndex(self.cfg)
+            result = {'k_medoids': params['medoids'][n],
+                   'dispersion': pxi.cluster_dispersion}
+        else:
+            result = {}
+        return result
+
+class ContentBasedSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        if params['name'].startswith("content"):
+            cfg = Config()
+            #if the index was not built yet
+            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
+            cfg.axi = "data/AppAxi"
+            cfg.index_mode = "old"
+            cfg.weight = params['weight']
+            self.rec = Recommender(cfg)
+            self.rec.set_strategy(params['strategy'])
+            self.repo_size = self.rec.items_repository.get_doccount()
+            self.user = LocalSystem()
+            self.user.app_pkg_profile(self.rec.items_repository)
+            self.user.no_auto_pkg_profile()
+            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
+            # iteration should be set to 10 in config file
+            #self.profile_size = range(10,101,10)
+
+    def iterate(self, params, rep, n):
+        if params['name'].startswith("content"):
+            # Get full recommendation
+            item_score = dict.fromkeys(self.user.pkg_profile,1)
+            sample = {}
+            for i in range(self.sample_size):
+                 item, score = item_score.popitem()
+                 sample[item] = score
+            user = User(item_score)
+            recommendation = self.rec.get_recommendation(user,self.repo_size)
+            # Write recall log
+            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
+                          (params['strategy'],params['weight'],params['sample'],n)
+            output = open(recall_file,'w')
+            output.write("# weight=%s\n" % params['weight'])
+            output.write("# strategy=%s\n" % params['strategy'])
+            output.write("# sample=%f\n" % params['sample'])
+            output.write("\n%d %d %d\n" % \
+                         (self.repo_size,len(item_score),self.sample_size))
+            notfound = []
+            ranks = []
+            for pkg in sample.keys():
+                if pkg in recommendation.ranking:
+                    ranks.append(recommendation.ranking.index(pkg))
+                else:
+                    notfound.append(pkg)
+            for r in sorted(ranks):
+                output.write(str(r)+"\n")
+            if notfound:
+                output.write("Out of recommendation:\n")
+                for pkg in notfound:
+                    output.write(pkg+"\n")
+            output.close()
+            # Plot metrics summary
+            g = Gnuplot.Gnuplot()
+            g('set style data lines')
+            g.xlabel('Recommendation size')
+            accuracy = []
+            precision = []
+            recall = []
+            f1 = []
+            for size in range(1,len(recommendation.ranking)+1,100):
+                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
+                real = RecommendationResult(sample)
+                evaluation = Evaluation(predicted,real,self.repo_size)
+                accuracy.append([size,evaluation.run(Accuracy())])
+                precision.append([size,evaluation.run(Precision())])
+                recall.append([size,evaluation.run(Recall())])
+                f1.append([size,evaluation.run(F1())])
+            #print "accuracy", len(accuracy)
+            #print "precision", len(precision)
+            #print "recall", len(recall)
+            #print "f1", len(f1)
+            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
+                   Gnuplot.Data(precision,title="Precision"),
+                   Gnuplot.Data(recall,title="Recall"),
+                   Gnuplot.Data(f1,title="F1"))
+            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
+            result = {}
+            result = {'weight': params['weight'],
+                      'strategy': params['strategy'],
+                      'accuracy': accuracy[20],
+                      'precision': precision[20],
+                      'recall:': recall[20],
+                      'f1': f1[20]}
+            return result
+
+#class CollaborativeSuite(expsuite.PyExperimentSuite):
+#    def reset(self, params, rep):
+#        if params['name'].startswith("collaborative"):
+#
+#    def iterate(self, params, rep, n):
+#        if params['name'].startswith("collaborative"):
+#            for root, dirs, files in os.walk(self.source_dir):
+#                for popcon_file in files:
+#                    submission = PopconSubmission(os.path.join(root,popcon_file))
+#                    user = User(submission.packages)
+#                    user.maximal_pkg_profile()
+#                    rec.get_recommendation(user)
+#                    precision = 0
+#                    result = {'weight': params['weight'],
+#                              'strategy': params['strategy'],
+#                              'profile_size': self.profile_size[n],
+#                              'accuracy': accuracy,
+#                              'precision': precision,
+#                              'recall:': recall,
+#                              'f1': }
+#        else:
+#            result = {}
+#        return result
+
+if __name__ == '__main__':
+
+    if "clustering" in sys.argv or len(sys.argv)<3:
+        ClusteringSuite().start()
+    if "content" in sys.argv or len(sys.argv)<3:
+        ContentBasedSuite().start()
+    #if "collaborative" in sys.argv or len(sys.argv)<3:
+    #CollaborativeSuite().start()
@@ -28,12 +28,14 @@ class RecommendationResult:
     """
     Class designed to describe a recommendation result: items and scores.
     """
-    def __init__(self,item_score):
+    def __init__(self,item_score,ranking=0):
         """
         Set initial parameters.
         """
         self.item_score = item_score
         self.size = len(item_score)
+        if ranking:
+            self.ranking = ranking
  
     def __str__(self):
         """
@@ -64,13 +66,13 @@ class Recommender:
         """
         Set initial parameters.
         """
+        self.cfg = cfg
         self.items_repository = xapian.Database(cfg.axi)
         self.set_strategy(cfg.strategy)
         if cfg.weight == "bm25":
             self.weight = xapian.BM25Weight()
         else:
             self.weight = xapian.TradWeight()
-        self.cfg = cfg
  
     def set_strategy(self,strategy_str):
         """
@@ -83,10 +85,10 @@ class Recommender:
         if strategy_str == "cbd":
             self.strategy = strategy.ContentBasedStrategy("desc")
         if strategy_str == "col":
-            self.strategy = strategy.CollaborativeStrategy(20)
             self.users_repository = data.PopconXapianIndex(self.cfg)
+            self.strategy = strategy.CollaborativeStrategy(20)
  
-    def get_recommendation(self,user,result_size=20):
+    def get_recommendation(self,user,result_size=100):
         """
         Produces recommendation using previously loaded strategy.
         """
@@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider):
         """
         return doc.get_data() not in self.pkgs_list
  
+class AppMatchDecider(xapian.MatchDecider):
+    """
+    Extend xapian.MatchDecider to not consider only applications packages.
+    """
+    def __init__(self, pkgs_list, axi):
+        """
+        Set initial parameters.
+        """
+        xapian.MatchDecider.__init__(self)
+        self.pkgs_list = pkgs_list
+        self.axi = axi
+
+    def __call__(self, doc):
+        """
+        True if the package is not already installed.
+        """
+        tags = axi_search_pkg_tags(self.axi,doc.get_data())
+        return (("XTrole::program" in tags) and
+                (doc.get_data() not in self.pkgs_list))
+
 class UserMatchDecider(xapian.MatchDecider):
     """
     Extend xapian.MatchDecider to match similar profiles.
@@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider):
         True if the term is a package.
         """
         # [FIXME] return term.startswith("XP")
-        return not term.startswith("XT")
+        #return not term.startswith("XT")
+        return term.startswith("XP")
+
+class AppExpandDecider(xapian.ExpandDecider):
+    """
+    Extend xapian.ExpandDecider to consider applications only.
+    """
+    def __init__(self,axi):
+        xapian.ExpandDecider.__init__(self)
+        self.axi = axi
+
+    def __call__(self, term):
+        """
+        True if the term is a package.
+        """
+        if not term.startswith("XT"):
+            package = term.lstrip("XP")
+            print package
+            tags = axi_search_pkg_tags(self.axi,package)
+            if "XTrole::program" in tags:
+                print tags
+                return True
+            else:
+                return False
+        else:
+            return False
  
 class TagExpandDecider(xapian.ExpandDecider):
     """
@@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy):
         self.content = content
         self.profile_size = profile_size
  
-    def run(self,rec,user,limit):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
@@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy):
         enquire.set_query(query)
         try:
             # retrieve matching packages
-            mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
+            mset = enquire.get_mset(0, recommendation_size, None,
+                                    PkgMatchDecider(user.items()))
+                                    #AppMatchDecider(user.items(),
+                                    #                rec.items_repository))
         except xapian.DatabaseError as error:
             logging.critical("Content-based strategy: "+error.get_msg())
         # compose result dictionary
         item_score = {}
+        ranking = []
         for m in mset:
+            #[FIXME] set this constraint somehow
+            #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())
+            #if "XTrole::program" in tags:
             item_score[m.document.get_data()] = m.weight
-        return recommender.RecommendationResult(item_score)
+            ranking.append(m.document.get_data())
+
+        return recommender.RecommendationResult(item_score,ranking)
  
 class CollaborativeStrategy(RecommendationStrategy):
     """
     Colaborative recommendation strategy.
     """
-    def __init__(self,k,clustering=1):
+    def __init__(self,k):
         self.description = "Collaborative"
-        self.clustering = clustering
         self.neighbours = k
  
-    def run(self,rec,user,result_size):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
-        profile = user.pkg_profile
+        profile = ["XP"+package for package in user.pkg_profile]
         # prepair index for querying user profile
         query = xapian.Query(xapian.Query.OP_OR,profile)
-        if self.clustering:
-            enquire = xapian.Enquire(rec.clustered_users_repository)
-        else:
-            enquire = xapian.Enquire(rec.users_repository)
+        enquire = xapian.Enquire(rec.users_repository)
         enquire.set_weighting_scheme(rec.weight)
         enquire.set_query(query)
         try:
@@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy):
             rset.add_document(m.document.get_docid())
             logging.debug(m.document.get_data())
         # retrieve most relevant packages
-        eset = enquire.get_eset(result_size,rset,PkgExpandDecider())
+        #eset = enquire.get_eset(recommendation_size,rset,
+        #                        AppExpandDecider(rec.items_repository))
+        eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider())
         # compose result dictionary
         item_score = {}
-        for package in eset:
-            item_score[package.term.lstrip("XP")] = package.weight
+        for e in eset:
+            package = e.term.lstrip("XP")
+            tags = axi_search_pkg_tags(rec.items_repository,package)
+            #[FIXME] set this constraint somehow
+            #if "XTrole::program" in tags:
+            item_score[package] = e.weight
         return recommender.RecommendationResult(item_score)
  
 class DemographicStrategy(RecommendationStrategy):
     """
     Recommendation strategy based on demographic data.
     """
+    #def __init__(self, result):
+        #self.result = result
     def __init__(self):
         self.description = "Demographic"
         logging.debug("Demographic recommendation not yet implemented.")
         raise Error
  
-    def run(self,user,items_repository):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
-        pass
+        ordered_result = self.result.get_prediction()
+
+        for item,weight in ordered_result:
+            pass
+
  
 class KnowledgeBasedStrategy(RecommendationStrategy):
     """
@@ -22,14 +22,29 @@ __license__ = &quot;&quot;&quot;
 import unittest2
 import shutil
 import os
+import xapian
 import sys
 sys.path.insert(0,'../')
-from data import PopconSubmission, PopconXapianIndex
+from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags
 from config import Config
  
 def suite():
     return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)
  
+class AxiSearchTests(unittest2.TestCase):
+    @classmethod
+    def setUpClass(self):
+        cfg = Config()
+        self.axi = xapian.Database(cfg.axi)
+
+    def test_search_pkg_tags(self):
+        tags = axi_search_pkg_tags(self.axi,'apticron')
+        self.assertEqual(set(tags),set(['XTadmin::package-management',
+                                        'XTinterface::daemon',
+                                        'XTnetwork::server', 'XTrole::program',
+                                        'XTsuite::debian', 'XTuse::monitor',
+                                        'XTworks-with::mail']))
+
 class PopconSubmissionTests(unittest2.TestCase):
     @classmethod
     def setUpClass(self):
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+"""
+    singletonTests - Singleton class test case
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import xapian
+import unittest2
+import sys
+sys.path.insert(0,'../')
+from evaluation import (Accuracy, Precision, Recall, F1, Coverage,
+                        Evaluation, CrossValidation)
+from recommender import RecommendationResult
+from config import Config
+from recommender import Recommender
+from user import User
+from data import SampleAptXapianIndex
+
+class MetricsTests(unittest2.TestCase):
+    @classmethod
+    def setUpClass(self):
+        repository = ['apple','grape','pineaple','melon','watermelon','orange']
+        real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1))
+        predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1))
+        self.evaluation = Evaluation(predicted,real,len(repository))
+
+    def test_class_accuracy(self):
+        accuracy = Accuracy().run(self.evaluation)
+        self.assertEqual(accuracy,0.5)
+
+    def test_precision(self):
+        precision = Precision().run(self.evaluation)
+        self.assertEqual("%.2f" % precision,"0.67")
+
+    def test_recall(self):
+        recall = Recall().run(self.evaluation)
+        self.assertEqual(recall,0.5)
+
+    def test_f1(self):
+        f1 = F1().run(self.evaluation)
+        self.assertEqual("%.2f" % f1,"0.57")
+
+    def test_coverage(self):
+        evaluations_set = set()
+        evaluations_set.add(self.evaluation)
+        coverage = Coverage().run(evaluations_set)
+        self.assertEqual(coverage,0.5)
+
+    def test_evaluation(self):
+        self.assertEqual(self.evaluation.true_positive, ['apple','grape'])
+        self.assertEqual(self.evaluation.false_positive, ['orange'])
+        self.assertEqual(self.evaluation.false_negative, ['pineaple','melon'])
+
+    def test_cross_validation(self):
+        cfg = Config()
+        axi = xapian.Database(cfg.axi)
+        packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
+                    "festival","file","inkscape","xpdf"]
+        path = "test_data/.sample_axi"
+        sample_axi = SampleAptXapianIndex(packages,axi,path)
+        rec = Recommender(cfg)
+        rec.items_repository = sample_axi
+        user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
+
+        metrics = []
+        metrics.append(Precision())
+        metrics.append(Recall())
+        metrics.append(F1())
+
+        validation = CrossValidation(0.3,5,rec,metrics,0.5)
+        validation.run(user)
+        print validation
+
+if __name__ == '__main__':
+        unittest2.main()
@@ -3,23 +3,23 @@ $var mod = &#39;about&#39;;
 $var cssfiles:
 $var jsfiles:
  
-
 <div id="maincontent">
 <div class="innertube">
  
 <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" />
  
-<h1>What is this?</h1>
-
-<p>
-AppRecommender is a project in development that aims to provide solutions
-for application recommendation at the GNU/Linux world. It was initially thought
-as a Debian package recommender, but considering the multi-distro effort in
-providing platform independent solutions, it should also follow this
-principle.
-</p>
+<h1>About</h1>
  
+<p>This experiment aims to compare and validate automated application
+recommendations produced by various strategies and algorithms tunnings. Asking
+real users about the relevance of the recommendation is the closest we can get
+of the real accuracy of the recommender system.</p>
  
+<p>The engine that is being tested is a free software called <a
+href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was
+initially developed using the Debian Project infrasctructure, but the solution
+is essentially distro-independent and could even be adapted to non GNU/Linux
+systems given that there was available data for that.</p>
  
 </div><!-- id="innertube" -->
 </div><!-- id="maincontent" -->
@@ -41,7 +41,6 @@ $$(document).ready(function() {
 });
 </script>
  
-
 <div id="sidebar">
 <div class="innertube">
  
@@ -670,18 +670,17 @@ function showtags(tagid) {
 <h1>You might also like...</h1>
  
 <p>Provide a list of packages or upload a popcon submission file and you'll get
-a list of suggested packages automatically computed by AppRecommender. You can
-customize the recommender setup or let it randomly choose one.</p>
+a list of suggested packages automatically computed by AppRecommender.<!-- You can
+customize the recommender setup or let it randomly choose one.--></p> 
  
-<p>Please fill the form that follows the recommendation results. Your
+<p>Given the recommendation result, please evaluate each application and
+choose if you want to continue with another round of suggestions.
+<!--fill the form that follows the recommendation results.--> </p><p>Your
 feedback is very much appreciated!</p>
  
-
-<p>Enjoy it :)</p>
 </div>
  
 </div><!-- class="innertube" -->
 </div><!-- id="maincontent" -->
  
  
-
@@ -59,7 +59,7 @@ if (x==null || x==&quot;Write your list App here or send a file list this icon:&quot;)
       <fieldset>
 	<div id="submit-box">
 	<input type="submit" value="RECOMMENDER" id="submit-button"><br />
-        <a id="advanced-button">advanced query?</a>
+        <!--<a id="advanced-button">advanced query?</a>-->
 	</div>
 	<div id="input-box">
 	  <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a>
@@ -165,16 +165,15 @@ $:content
  <div id="footer">
  
   <div id="navbar">
-     <ul> 
+     <ul>
 	<li><a href="$url_base">Home</a></li>
 	<li><a href="$url_base/about">About</a></li>
-	<li><a href="$url_base/support">Support</a></li>
 	<li><a href="http://github.com/tassia/AppRecommender">Development</a></li>
     </ul>
    </div><!-- id="navbar" -->
-  <p id="copyright">
-   Copyright © 2011 AppRecommender.  Debian is a registered trademark of Software in the Public Interest, Inc.
-  </p>
+<!--  <p id="copyright">
+   Copyright © 2011 AppRecommender team.
+  </p> -->
  </div><!-- id="footer" -->
  
  
@@ -0,0 +1,60 @@
+$var title: Survey
+$var mod = 'index';
+$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css 
+$var jsfiles: static/js/facebox.js
+
+
+<!-- Dynamic form -->
+<script type="application/x-javascript">
+window.onload = function() {
+    setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' )
+  };
+</script>
+
+<script type="application/x-javascript">
+$$(document).ready(function() {
+ $$('a[rel*=facebox]').facebox({
+ loadingImage : '/static/images/loading.gif',
+ closeImage   : '/static/images/closelabel.png'
+ });
+ $$("#tags-box").click(function () {
+ $$("#tags-box").hide(1000);
+ });
+
+});
+</script>
+
+
+<div id="sidebar">
+<div class="innertube">
+
+
+<br style="clear: both" />
+</div><!-- class="innertube" -->
+</div><!-- id="sidebar" -->
+
+<div id="maincontent">
+<div class="innertube">
+
+<div class="textbox">
+<h1>Help us learn your needs!</h1>
+
+<p>Participate in this survey and contribute for the development of
+AppRecommender, a recommender system for GNU/Linux applications.</p>
+<br />
+<p>Please provide the list of packages installed in a real running system,
+by uploading a popcon submission or the file generated with the command:
+"dpkg -l > packages_list".</p>
+
+<p>Evaluate at least 10 suggested applications and identify yourself if you
+wish to. Upon the completion of this survey there will be a thank you page
+listing all identified participant.</p>
+
+<p>Your help is very much appreciated!</p>
+
+</div>
+
+</div><!-- class="innertube" -->
+</div><!-- id="maincontent" -->
+
+
...	...	@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):
41	41	return matches
42	42
43	43	def axi_search_pkg_tags(axi,pkg):
44		- query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
45	44	enquire = xapian.Enquire(axi)
46		- enquire.set_query(query)
	45	+ enquire.set_query(xapian.Query("XP"+pkg))
47	46	matches = enquire.get_mset(0,1)
	47	+ if not matches:
	48	+ logging.debug("Package %s not found in items repository" % pkg)
	49	+ return []
48	50	for m in matches:
49	51	tags = [term.term for term in axi.get_document(m.docid).termlist() if
50	52	term.term.startswith("XT")]
51		- return tags
	53	+ return tags
52	54
53	55	def print_index(index):
54	56	output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"
...	...	@@ -59,6 +61,32 @@ def print_index(index):
59	61	output += "\n---"
60	62	return output
61	63
	64	+class AppAptXapianIndex(xapian.WritableDatabase):
	65	+ """
	66	+ Sample data source for packages information, mainly useful for tests.
	67	+ """
	68	+ def __init__(self,axi_path,path):
	69	+ xapian.WritableDatabase.__init__(self,path,
	70	+ xapian.DB_CREATE_OR_OVERWRITE)
	71	+ axi = xapian.Database(axi_path)
	72	+ logging.info("AptXapianIndex size: %d" % axi.get_doccount())
	73	+ for docid in range(1,axi.get_lastdocid()+1):
	74	+ try:
	75	+ doc = axi.get_document(docid)
	76	+ allterms = [term.term for term in doc.termlist()]
	77	+ if "XTrole::program" in allterms:
	78	+ self.add_document(doc)
	79	+ logging.info("Added doc %d." % docid)
	80	+ else:
	81	+ logging.info("Discarded doc %d." % docid)
	82	+ except:
	83	+ logging.info("Doc %d not found in axi." % docid)
	84	+ logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
	85	+ self.get_doccount(), self.get_lastdocid())
	86	+
	87	+ def __str__(self):
	88	+ return print_index(self)
	89	+
62	90	class SampleAptXapianIndex(xapian.WritableDatabase):
63	91	"""
64	92	Sample data source for packages information, mainly useful for tests.
...	...	@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
129	157	"""
130	158	self.axi = xapian.Database(cfg.axi)
131	159	self.path = os.path.expanduser(cfg.popcon_index)
	160	+ self.source_dir = os.path.expanduser(cfg.popcon_dir)
132	161	if not cfg.index_mode == "old" or not self.load_index():
133	162	if not os.path.exists(cfg.popcon_dir):
134	163	os.makedirs(cfg.popcon_dir)
...	...	@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
205	234	submission.user_id)
206	235	for pkg, freq in submission.packages.items():
207	236	doc.add_term("XP"+pkg,freq)
208		- for tag in axi_search_pkg_tags(self.axi,pkg):
209		- doc.add_term(tag,freq)
	237	+ if axi_search_pkg_tags(self.axi,pkg):
	238	+ for tag in axi_search_pkg_tags(self.axi,pkg):
	239	+ doc.add_term(tag,freq)
210	240	doc_id = self.add_document(doc)
211	241	logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
212	242	# python garbage collector
...	...
...	...	@@ -140,7 +140,7 @@ class F1(Metric):
140	140	p = Precision().run(evaluation)
141	141	r = Recall().run(evaluation)
142	142	if (p+r)>0:
143		- return float((2pr))/(p+r)
	143	+ return float(2((pr)/(p+r)))
144	144	else:
145	145	return 0
146	146
...	...	@@ -289,7 +289,7 @@ class CrossValidation:
289	289	result_size = int(self.recommender.items_repository.get_doccount()*
290	290	self.result_proportion)
291	291	predicted_result = self.recommender.get_recommendation(round_user,result_size)
292		- print len(round_partition)
	292	+ #print len(round_partition)
293	293	real_result = RecommendationResult(round_partition)
294	294	#logging.debug("Predicted result: %s",predicted_result)
295	295	evaluation = Evaluation(predicted_result,real_result,
...	...
...	...	@@ -40,16 +40,20 @@ if __name__ == '__main__':
40	40	try:
41	41	cfg = Config()
42	42	rec = Recommender(cfg)
	43	+ print "\nRecommender strategy: ",rec.strategy.description
43	44	user = LocalSystem()
44		- user.maximal_pkg_profile()
45		-
	45	+ #user.app_pkg_profile(rec.items_repository)
	46	+ user.no_auto_pkg_profile()
46	47	begin_time = datetime.datetime.now()
47	48	logging.debug("Cross-validation started at %s" % begin_time)
48	49
49	50	metrics = []
50	51	metrics.append(Precision())
51	52	metrics.append(Recall())
52		- validation = CrossValidation(0.3,10,rec,metrics)
	53	+ metrics.append(F1())
	54	+ metrics.append(Accuracy())
	55	+ metrics.append(SimpleAccuracy())
	56	+ validation = CrossValidation(0.3,10,rec,metrics,0.005)
53	57	validation.run(user)
54	58	print validation
55	59
...	...
...	...	@@ -0,0 +1,2 @@
	1	+Experiments handled by expsuite:
	2	+https://github.com/rueckstiess/expsuite
...	...
...	...	@@ -0,0 +1,26 @@
	1	+[DEFAULT]
	2	+repetitions = 1
	3	+iterations = 10
	4	+path = 'results'
	5	+experiment = 'grid'
	6	+weight = ['bm25', 'trad']
	7	+;profile_size = range(10,100,10)
	8	+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
	9	+
	10	+[content]
	11	+strategy = ['cb','cbt','cbd']
	12	+
	13	+[clustering]
	14	+experiment = 'single'
	15	+;iterations = 4
	16	+;medoids = range(2,6)
	17	+iterations = 6
	18	+medoids = [100,500,1000,5000,10000,50000]
	19	+;disabled for this experiment
	20	+weight = 0
	21	+profile_size = 0
	22	+sample = 0
	23	+
	24	+[colaborative]
	25	+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
	26	+neighbors = range(10,1010,50)
...	...
...	...	@@ -0,0 +1,173 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ recommender suite - recommender experiments suite
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import expsuite
	23	+import sys
	24	+sys.path.insert(0,'../')
	25	+from config import Config
	26	+from data import PopconXapianIndex, PopconSubmission
	27	+from recommender import Recommender
	28	+from user import LocalSystem, User
	29	+from evaluation import *
	30	+import logging
	31	+import random
	32	+import Gnuplot
	33	+
	34	+class ClusteringSuite(expsuite.PyExperimentSuite):
	35	+ def reset(self, params, rep):
	36	+ self.cfg = Config()
	37	+ self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
	38	+ self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
	39	+ self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
	40	+
	41	+ if params['name'] == "clustering":
	42	+ logging.info("Starting 'clustering' experiments suite...")
	43	+ self.cfg.index_mode = "recluster"
	44	+
	45	+ def iterate(self, params, rep, n):
	46	+ if params['name'] == "clustering":
	47	+ logging.info("Running iteration %d" % params['medoids'][n])
	48	+ self.cfg.k_medoids = params['medoids'][n]
	49	+ pxi = PopconXapianIndex(self.cfg)
	50	+ result = {'k_medoids': params['medoids'][n],
	51	+ 'dispersion': pxi.cluster_dispersion}
	52	+ else:
	53	+ result = {}
	54	+ return result
	55	+
	56	+class ContentBasedSuite(expsuite.PyExperimentSuite):
	57	+ def reset(self, params, rep):
	58	+ if params['name'].startswith("content"):
	59	+ cfg = Config()
	60	+ #if the index was not built yet
	61	+ #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
	62	+ cfg.axi = "data/AppAxi"
	63	+ cfg.index_mode = "old"
	64	+ cfg.weight = params['weight']
	65	+ self.rec = Recommender(cfg)
	66	+ self.rec.set_strategy(params['strategy'])
	67	+ self.repo_size = self.rec.items_repository.get_doccount()
	68	+ self.user = LocalSystem()
	69	+ self.user.app_pkg_profile(self.rec.items_repository)
	70	+ self.user.no_auto_pkg_profile()
	71	+ self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
	72	+ # iteration should be set to 10 in config file
	73	+ #self.profile_size = range(10,101,10)
	74	+
	75	+ def iterate(self, params, rep, n):
	76	+ if params['name'].startswith("content"):
	77	+ # Get full recommendation
	78	+ item_score = dict.fromkeys(self.user.pkg_profile,1)
	79	+ sample = {}
	80	+ for i in range(self.sample_size):
	81	+ item, score = item_score.popitem()
	82	+ sample[item] = score
	83	+ user = User(item_score)
	84	+ recommendation = self.rec.get_recommendation(user,self.repo_size)
	85	+ # Write recall log
	86	+ recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
	87	+ (params['strategy'],params['weight'],params['sample'],n)
	88	+ output = open(recall_file,'w')
	89	+ output.write("# weight=%s\n" % params['weight'])
	90	+ output.write("# strategy=%s\n" % params['strategy'])
	91	+ output.write("# sample=%f\n" % params['sample'])
	92	+ output.write("\n%d %d %d\n" % \
	93	+ (self.repo_size,len(item_score),self.sample_size))
	94	+ notfound = []
	95	+ ranks = []
	96	+ for pkg in sample.keys():
	97	+ if pkg in recommendation.ranking:
	98	+ ranks.append(recommendation.ranking.index(pkg))
	99	+ else:
	100	+ notfound.append(pkg)
	101	+ for r in sorted(ranks):
	102	+ output.write(str(r)+"\n")
	103	+ if notfound:
	104	+ output.write("Out of recommendation:\n")
	105	+ for pkg in notfound:
	106	+ output.write(pkg+"\n")
	107	+ output.close()
	108	+ # Plot metrics summary
	109	+ g = Gnuplot.Gnuplot()
	110	+ g('set style data lines')
	111	+ g.xlabel('Recommendation size')
	112	+ accuracy = []
	113	+ precision = []
	114	+ recall = []
	115	+ f1 = []
	116	+ for size in range(1,len(recommendation.ranking)+1,100):
	117	+ predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
	118	+ real = RecommendationResult(sample)
	119	+ evaluation = Evaluation(predicted,real,self.repo_size)
	120	+ accuracy.append([size,evaluation.run(Accuracy())])
	121	+ precision.append([size,evaluation.run(Precision())])
	122	+ recall.append([size,evaluation.run(Recall())])
	123	+ f1.append([size,evaluation.run(F1())])
	124	+ #print "accuracy", len(accuracy)
	125	+ #print "precision", len(precision)
	126	+ #print "recall", len(recall)
	127	+ #print "f1", len(f1)
	128	+ g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
	129	+ Gnuplot.Data(precision,title="Precision"),
	130	+ Gnuplot.Data(recall,title="Recall"),
	131	+ Gnuplot.Data(f1,title="F1"))
	132	+ g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
	133	+ result = {}
	134	+ result = {'weight': params['weight'],
	135	+ 'strategy': params['strategy'],
	136	+ 'accuracy': accuracy[20],
	137	+ 'precision': precision[20],
	138	+ 'recall:': recall[20],
	139	+ 'f1': f1[20]}
	140	+ return result
	141	+
	142	+#class CollaborativeSuite(expsuite.PyExperimentSuite):
	143	+# def reset(self, params, rep):
	144	+# if params['name'].startswith("collaborative"):
	145	+#
	146	+# def iterate(self, params, rep, n):
	147	+# if params['name'].startswith("collaborative"):
	148	+# for root, dirs, files in os.walk(self.source_dir):
	149	+# for popcon_file in files:
	150	+# submission = PopconSubmission(os.path.join(root,popcon_file))
	151	+# user = User(submission.packages)
	152	+# user.maximal_pkg_profile()
	153	+# rec.get_recommendation(user)
	154	+# precision = 0
	155	+# result = {'weight': params['weight'],
	156	+# 'strategy': params['strategy'],
	157	+# 'profile_size': self.profile_size[n],
	158	+# 'accuracy': accuracy,
	159	+# 'precision': precision,
	160	+# 'recall:': recall,
	161	+# 'f1': }
	162	+# else:
	163	+# result = {}
	164	+# return result
	165	+
	166	+if __name__ == '__main__':
	167	+
	168	+ if "clustering" in sys.argv or len(sys.argv)<3:
	169	+ ClusteringSuite().start()
	170	+ if "content" in sys.argv or len(sys.argv)<3:
	171	+ ContentBasedSuite().start()
	172	+ #if "collaborative" in sys.argv or len(sys.argv)<3:
	173	+ #CollaborativeSuite().start()
...	...
...	...	@@ -28,12 +28,14 @@ class RecommendationResult:
28	28	"""
29	29	Class designed to describe a recommendation result: items and scores.
30	30	"""
31		- def __init__(self,item_score):
	31	+ def __init__(self,item_score,ranking=0):
32	32	"""
33	33	Set initial parameters.
34	34	"""
35	35	self.item_score = item_score
36	36	self.size = len(item_score)
	37	+ if ranking:
	38	+ self.ranking = ranking
37	39
38	40	def __str__(self):
39	41	"""
...	...	@@ -64,13 +66,13 @@ class Recommender:
64	66	"""
65	67	Set initial parameters.
66	68	"""
	69	+ self.cfg = cfg
67	70	self.items_repository = xapian.Database(cfg.axi)
68	71	self.set_strategy(cfg.strategy)
69	72	if cfg.weight == "bm25":
70	73	self.weight = xapian.BM25Weight()
71	74	else:
72	75	self.weight = xapian.TradWeight()
73		- self.cfg = cfg
74	76
75	77	def set_strategy(self,strategy_str):
76	78	"""
...	...	@@ -83,10 +85,10 @@ class Recommender:
83	85	if strategy_str == "cbd":
84	86	self.strategy = strategy.ContentBasedStrategy("desc")
85	87	if strategy_str == "col":
86		- self.strategy = strategy.CollaborativeStrategy(20)
87	88	self.users_repository = data.PopconXapianIndex(self.cfg)
	89	+ self.strategy = strategy.CollaborativeStrategy(20)
88	90
89		- def get_recommendation(self,user,result_size=20):
	91	+ def get_recommendation(self,user,result_size=100):
90	92	"""
91	93	Produces recommendation using previously loaded strategy.
92	94	"""
...	...
...	...	@@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider):
42	42	"""
43	43	return doc.get_data() not in self.pkgs_list
44	44
	45	+class AppMatchDecider(xapian.MatchDecider):
	46	+ """
	47	+ Extend xapian.MatchDecider to not consider only applications packages.
	48	+ """
	49	+ def __init__(self, pkgs_list, axi):
	50	+ """
	51	+ Set initial parameters.
	52	+ """
	53	+ xapian.MatchDecider.__init__(self)
	54	+ self.pkgs_list = pkgs_list
	55	+ self.axi = axi
	56	+
	57	+ def __call__(self, doc):
	58	+ """
	59	+ True if the package is not already installed.
	60	+ """
	61	+ tags = axi_search_pkg_tags(self.axi,doc.get_data())
	62	+ return (("XTrole::program" in tags) and
	63	+ (doc.get_data() not in self.pkgs_list))
	64	+
45	65	class UserMatchDecider(xapian.MatchDecider):
46	66	"""
47	67	Extend xapian.MatchDecider to match similar profiles.
...	...	@@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider):
73	93	True if the term is a package.
74	94	"""
75	95	# [FIXME] return term.startswith("XP")
76		- return not term.startswith("XT")
	96	+ #return not term.startswith("XT")
	97	+ return term.startswith("XP")
	98	+
	99	+class AppExpandDecider(xapian.ExpandDecider):
	100	+ """
	101	+ Extend xapian.ExpandDecider to consider applications only.
	102	+ """
	103	+ def __init__(self,axi):
	104	+ xapian.ExpandDecider.__init__(self)
	105	+ self.axi = axi
	106	+
	107	+ def __call__(self, term):
	108	+ """
	109	+ True if the term is a package.
	110	+ """
	111	+ if not term.startswith("XT"):
	112	+ package = term.lstrip("XP")
	113	+ print package
	114	+ tags = axi_search_pkg_tags(self.axi,package)
	115	+ if "XTrole::program" in tags:
	116	+ print tags
	117	+ return True
	118	+ else:
	119	+ return False
	120	+ else:
	121	+ return False
77	122
78	123	class TagExpandDecider(xapian.ExpandDecider):
79	124	"""
...	...	@@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy):
100	145	self.content = content
101	146	self.profile_size = profile_size
102	147
103		- def run(self,rec,user,limit):
	148	+ def run(self,rec,user,recommendation_size):
104	149	"""
105	150	Perform recommendation strategy.
106	151	"""
...	...	@@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy):
113	158	enquire.set_query(query)
114	159	try:
115	160	# retrieve matching packages
116		- mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
	161	+ mset = enquire.get_mset(0, recommendation_size, None,
	162	+ PkgMatchDecider(user.items()))
	163	+ #AppMatchDecider(user.items(),
	164	+ # rec.items_repository))
117	165	except xapian.DatabaseError as error:
118	166	logging.critical("Content-based strategy: "+error.get_msg())
119	167	# compose result dictionary
120	168	item_score = {}
	169	+ ranking = []
121	170	for m in mset:
	171	+ #[FIXME] set this constraint somehow
	172	+ #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())
	173	+ #if "XTrole::program" in tags:
122	174	item_score[m.document.get_data()] = m.weight
123		- return recommender.RecommendationResult(item_score)
	175	+ ranking.append(m.document.get_data())
	176	+
	177	+ return recommender.RecommendationResult(item_score,ranking)
124	178
125	179	class CollaborativeStrategy(RecommendationStrategy):
126	180	"""
127	181	Colaborative recommendation strategy.
128	182	"""
129		- def __init__(self,k,clustering=1):
	183	+ def __init__(self,k):
130	184	self.description = "Collaborative"
131		- self.clustering = clustering
132	185	self.neighbours = k
133	186
134		- def run(self,rec,user,result_size):
	187	+ def run(self,rec,user,recommendation_size):
135	188	"""
136	189	Perform recommendation strategy.
137	190	"""
138		- profile = user.pkg_profile
	191	+ profile = ["XP"+package for package in user.pkg_profile]
139	192	# prepair index for querying user profile
140	193	query = xapian.Query(xapian.Query.OP_OR,profile)
141		- if self.clustering:
142		- enquire = xapian.Enquire(rec.clustered_users_repository)
143		- else:
144		- enquire = xapian.Enquire(rec.users_repository)
	194	+ enquire = xapian.Enquire(rec.users_repository)
145	195	enquire.set_weighting_scheme(rec.weight)
146	196	enquire.set_query(query)
147	197	try:
...	...	@@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy):
155	205	rset.add_document(m.document.get_docid())
156	206	logging.debug(m.document.get_data())
157	207	# retrieve most relevant packages
158		- eset = enquire.get_eset(result_size,rset,PkgExpandDecider())
	208	+ #eset = enquire.get_eset(recommendation_size,rset,
	209	+ # AppExpandDecider(rec.items_repository))
	210	+ eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider())
159	211	# compose result dictionary
160	212	item_score = {}
161		- for package in eset:
162		- item_score[package.term.lstrip("XP")] = package.weight
	213	+ for e in eset:
	214	+ package = e.term.lstrip("XP")
	215	+ tags = axi_search_pkg_tags(rec.items_repository,package)
	216	+ #[FIXME] set this constraint somehow
	217	+ #if "XTrole::program" in tags:
	218	+ item_score[package] = e.weight
163	219	return recommender.RecommendationResult(item_score)
164	220
165	221	class DemographicStrategy(RecommendationStrategy):
166	222	"""
167	223	Recommendation strategy based on demographic data.
168	224	"""
	225	+ #def __init__(self, result):
	226	+ #self.result = result
169	227	def __init__(self):
170	228	self.description = "Demographic"
171	229	logging.debug("Demographic recommendation not yet implemented.")
172	230	raise Error
173	231
174		- def run(self,user,items_repository):
	232	+ def run(self,rec,user,recommendation_size):
175	233	"""
176	234	Perform recommendation strategy.
177	235	"""
178		- pass
	236	+ ordered_result = self.result.get_prediction()
	237	+
	238	+ for item,weight in ordered_result:
	239	+ pass
	240	+
179	241
180	242	class KnowledgeBasedStrategy(RecommendationStrategy):
181	243	"""
...	...
...	...	@@ -22,14 +22,29 @@ __license__ = """
22	22	import unittest2
23	23	import shutil
24	24	import os
	25	+import xapian
25	26	import sys
26	27	sys.path.insert(0,'../')
27		-from data import PopconSubmission, PopconXapianIndex
	28	+from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags
28	29	from config import Config
29	30
30	31	def suite():
31	32	return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)
32	33
	34	+class AxiSearchTests(unittest2.TestCase):
	35	+ @classmethod
	36	+ def setUpClass(self):
	37	+ cfg = Config()
	38	+ self.axi = xapian.Database(cfg.axi)
	39	+
	40	+ def test_search_pkg_tags(self):
	41	+ tags = axi_search_pkg_tags(self.axi,'apticron')
	42	+ self.assertEqual(set(tags),set(['XTadmin::package-management',
	43	+ 'XTinterface::daemon',
	44	+ 'XTnetwork::server', 'XTrole::program',
	45	+ 'XTsuite::debian', 'XTuse::monitor',
	46	+ 'XTworks-with::mail']))
	47	+
33	48	class PopconSubmissionTests(unittest2.TestCase):
34	49	@classmethod
35	50	def setUpClass(self):
...	...
...	...	@@ -0,0 +1,90 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ singletonTests - Singleton class test case
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import xapian
	23	+import unittest2
	24	+import sys
	25	+sys.path.insert(0,'../')
	26	+from evaluation import (Accuracy, Precision, Recall, F1, Coverage,
	27	+ Evaluation, CrossValidation)
	28	+from recommender import RecommendationResult
	29	+from config import Config
	30	+from recommender import Recommender
	31	+from user import User
	32	+from data import SampleAptXapianIndex
	33	+
	34	+class MetricsTests(unittest2.TestCase):
	35	+ @classmethod
	36	+ def setUpClass(self):
	37	+ repository = ['apple','grape','pineaple','melon','watermelon','orange']
	38	+ real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1))
	39	+ predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1))
	40	+ self.evaluation = Evaluation(predicted,real,len(repository))
	41	+
	42	+ def test_class_accuracy(self):
	43	+ accuracy = Accuracy().run(self.evaluation)
	44	+ self.assertEqual(accuracy,0.5)
	45	+
	46	+ def test_precision(self):
	47	+ precision = Precision().run(self.evaluation)
	48	+ self.assertEqual("%.2f" % precision,"0.67")
	49	+
	50	+ def test_recall(self):
	51	+ recall = Recall().run(self.evaluation)
	52	+ self.assertEqual(recall,0.5)
	53	+
	54	+ def test_f1(self):
	55	+ f1 = F1().run(self.evaluation)
	56	+ self.assertEqual("%.2f" % f1,"0.57")
	57	+
	58	+ def test_coverage(self):
	59	+ evaluations_set = set()
	60	+ evaluations_set.add(self.evaluation)
	61	+ coverage = Coverage().run(evaluations_set)
	62	+ self.assertEqual(coverage,0.5)
	63	+
	64	+ def test_evaluation(self):
	65	+ self.assertEqual(self.evaluation.true_positive, ['apple','grape'])
	66	+ self.assertEqual(self.evaluation.false_positive, ['orange'])
	67	+ self.assertEqual(self.evaluation.false_negative, ['pineaple','melon'])
	68	+
	69	+ def test_cross_validation(self):
	70	+ cfg = Config()
	71	+ axi = xapian.Database(cfg.axi)
	72	+ packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
	73	+ "festival","file","inkscape","xpdf"]
	74	+ path = "test_data/.sample_axi"
	75	+ sample_axi = SampleAptXapianIndex(packages,axi,path)
	76	+ rec = Recommender(cfg)
	77	+ rec.items_repository = sample_axi
	78	+ user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
	79	+
	80	+ metrics = []
	81	+ metrics.append(Precision())
	82	+ metrics.append(Recall())
	83	+ metrics.append(F1())
	84	+
	85	+ validation = CrossValidation(0.3,5,rec,metrics,0.5)
	86	+ validation.run(user)
	87	+ print validation
	88	+
	89	+if __name__ == '__main__':
	90	+ unittest2.main()
...	...