Merge branch 'master' of github.com:tassia/AppRecommender

Tiago Bortoletto Vaz
2 parents 2f49eb5c c4327ec0
Showing 15 changed files with 516 additions and 55 deletions Show diff stats
src/data.py
src/evaluation.py
src/examples/cross_validation.py
src/experiments/README
src/experiments/experiments.cfg
src/experiments/runner.py
src/recommender.py
src/strategy.py
src/tests/data_tests.py
src/tests/evaluation_tests.py
src/web/templates/about.html
src/web/templates/apprec.html
src/web/templates/index.html
src/web/templates/layout.html
src/web/templates/survey_index.html
@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):
     return matches
 def axi_search_pkg_tags(axi,pkg):
-    query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
     enquire = xapian.Enquire(axi)
-    enquire.set_query(query)
+    enquire.set_query(xapian.Query("XP"+pkg))
     matches = enquire.get_mset(0,1)
+    if not matches:
+        logging.debug("Package %s not found in items repository" % pkg)
+        return []
     for m in matches:
         tags = [term.term for term in axi.get_document(m.docid).termlist() if
                 term.term.startswith("XT")]
-    return tags
+        return tags
 def print_index(index):
     output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"
@@ -59,6 +61,32 @@ def print_index(index):
         output += "\n---"
     return output
+class AppAptXapianIndex(xapian.WritableDatabase):
+    """
+    Sample data source for packages information, mainly useful for tests.
+    """
+    def __init__(self,axi_path,path):
+        xapian.WritableDatabase.__init__(self,path,
+                                         xapian.DB_CREATE_OR_OVERWRITE)
+        axi = xapian.Database(axi_path)
+        logging.info("AptXapianIndex size: %d" % axi.get_doccount())
+        for docid in range(1,axi.get_lastdocid()+1):
+            try:
+                doc = axi.get_document(docid)
+                allterms = [term.term for term in doc.termlist()]
+                if "XTrole::program" in allterms:
+                    self.add_document(doc)
+                    logging.info("Added doc %d." % docid)
+                else:
+                    logging.info("Discarded doc %d." % docid)
+            except:
+                logging.info("Doc %d not found in axi." % docid)
+        logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
+                     self.get_doccount(), self.get_lastdocid())
+
+    def __str__(self):
+        return print_index(self)
+
 class SampleAptXapianIndex(xapian.WritableDatabase):
     """
     Sample data source for packages information, mainly useful for tests.
@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
         """
         self.axi = xapian.Database(cfg.axi)
         self.path = os.path.expanduser(cfg.popcon_index)
+        self.source_dir = os.path.expanduser(cfg.popcon_dir)
         if not cfg.index_mode == "old" or not self.load_index():
             if not os.path.exists(cfg.popcon_dir):
                 os.makedirs(cfg.popcon_dir)
@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
                               submission.user_id)
                 for pkg, freq in submission.packages.items():
                     doc.add_term("XP"+pkg,freq)
-                    for tag in axi_search_pkg_tags(self.axi,pkg):
-                        doc.add_term(tag,freq)
+                    if axi_search_pkg_tags(self.axi,pkg):
+                        for tag in axi_search_pkg_tags(self.axi,pkg):
+                            doc.add_term(tag,freq)
                 doc_id = self.add_document(doc)
                 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
             # python garbage collector
@@ -140,7 +140,7 @@ class F1(Metric):
         p = Precision().run(evaluation)
         r = Recall().run(evaluation)
         if (p+r)>0:
-            return float((2*p*r))/(p+r)
+            return float(2*((p*r)/(p+r)))
         else:
             return 0
@@ -289,7 +289,7 @@ class CrossValidation:
             result_size = int(self.recommender.items_repository.get_doccount()*
                               self.result_proportion)
             predicted_result = self.recommender.get_recommendation(round_user,result_size)
-            print len(round_partition)
+            #print len(round_partition)
             real_result = RecommendationResult(round_partition)
             #logging.debug("Predicted result: %s",predicted_result)
             evaluation = Evaluation(predicted_result,real_result,
@@ -40,16 +40,20 @@ if __name__ == &#39;__main__&#39;:
     try:
         cfg = Config()
         rec = Recommender(cfg)
+        print "\nRecommender strategy: ",rec.strategy.description
         user = LocalSystem()
-        user.maximal_pkg_profile()
-
+        #user.app_pkg_profile(rec.items_repository)
+        user.no_auto_pkg_profile()
         begin_time = datetime.datetime.now()
         logging.debug("Cross-validation started at %s" % begin_time)
         metrics = []
         metrics.append(Precision())
         metrics.append(Recall())
-        validation = CrossValidation(0.3,10,rec,metrics)
+        metrics.append(F1())
+        metrics.append(Accuracy())
+        metrics.append(SimpleAccuracy())
+        validation = CrossValidation(0.3,10,rec,metrics,0.005)
         validation.run(user)
         print validation
@@ -0,0 +1,2 @@
+Experiments handled by expsuite:
+https://github.com/rueckstiess/expsuite
@@ -0,0 +1,26 @@
+[DEFAULT]
+repetitions = 1
+iterations = 10
+path = 'results'
+experiment = 'grid'
+weight = ['bm25', 'trad']
+;profile_size = range(10,100,10)
+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+[content]
+strategy = ['cb','cbt','cbd']
+
+[clustering]
+experiment = 'single'
+;iterations = 4
+;medoids = range(2,6)
+iterations = 6
+medoids = [100,500,1000,5000,10000,50000]
+;disabled for this experiment
+weight = 0
+profile_size = 0
+sample = 0
+
+[colaborative]
+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
+neighbors = range(10,1010,50)
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+"""
+    recommender suite - recommender experiments suite 
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import expsuite
+import sys
+sys.path.insert(0,'../')
+from config import Config
+from data import PopconXapianIndex, PopconSubmission
+from recommender import Recommender
+from user import LocalSystem, User
+from evaluation import *
+import logging
+import random
+import Gnuplot
+
+class ClusteringSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        self.cfg = Config()
+        self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
+        self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
+        self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
+
+        if params['name'] == "clustering":
+            logging.info("Starting 'clustering' experiments suite...")
+            self.cfg.index_mode = "recluster"
+
+    def iterate(self, params, rep, n):
+        if params['name'] == "clustering":
+            logging.info("Running iteration %d" % params['medoids'][n])
+            self.cfg.k_medoids = params['medoids'][n]
+            pxi = PopconXapianIndex(self.cfg)
+            result = {'k_medoids': params['medoids'][n],
+                   'dispersion': pxi.cluster_dispersion}
+        else:
+            result = {}
+        return result
+
+class ContentBasedSuite(expsuite.PyExperimentSuite):
+    def reset(self, params, rep):
+        if params['name'].startswith("content"):
+            cfg = Config()
+            #if the index was not built yet
+            #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
+            cfg.axi = "data/AppAxi"
+            cfg.index_mode = "old"
+            cfg.weight = params['weight']
+            self.rec = Recommender(cfg)
+            self.rec.set_strategy(params['strategy'])
+            self.repo_size = self.rec.items_repository.get_doccount()
+            self.user = LocalSystem()
+            self.user.app_pkg_profile(self.rec.items_repository)
+            self.user.no_auto_pkg_profile()
+            self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
+            # iteration should be set to 10 in config file
+            #self.profile_size = range(10,101,10)
+
+    def iterate(self, params, rep, n):
+        if params['name'].startswith("content"):
+            # Get full recommendation
+            item_score = dict.fromkeys(self.user.pkg_profile,1)
+            sample = {}
+            for i in range(self.sample_size):
+                 item, score = item_score.popitem()
+                 sample[item] = score
+            user = User(item_score)
+            recommendation = self.rec.get_recommendation(user,self.repo_size)
+            # Write recall log
+            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
+                          (params['strategy'],params['weight'],params['sample'],n)
+            output = open(recall_file,'w')
+            output.write("# weight=%s\n" % params['weight'])
+            output.write("# strategy=%s\n" % params['strategy'])
+            output.write("# sample=%f\n" % params['sample'])
+            output.write("\n%d %d %d\n" % \
+                         (self.repo_size,len(item_score),self.sample_size))
+            notfound = []
+            ranks = []
+            for pkg in sample.keys():
+                if pkg in recommendation.ranking:
+                    ranks.append(recommendation.ranking.index(pkg))
+                else:
+                    notfound.append(pkg)
+            for r in sorted(ranks):
+                output.write(str(r)+"\n")
+            if notfound:
+                output.write("Out of recommendation:\n")
+                for pkg in notfound:
+                    output.write(pkg+"\n")
+            output.close()
+            # Plot metrics summary
+            g = Gnuplot.Gnuplot()
+            g('set style data lines')
+            g.xlabel('Recommendation size')
+            accuracy = []
+            precision = []
+            recall = []
+            f1 = []
+            for size in range(1,len(recommendation.ranking)+1,100):
+                predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
+                real = RecommendationResult(sample)
+                evaluation = Evaluation(predicted,real,self.repo_size)
+                accuracy.append([size,evaluation.run(Accuracy())])
+                precision.append([size,evaluation.run(Precision())])
+                recall.append([size,evaluation.run(Recall())])
+                f1.append([size,evaluation.run(F1())])
+            #print "accuracy", len(accuracy)
+            #print "precision", len(precision)
+            #print "recall", len(recall)
+            #print "f1", len(f1)
+            g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
+                   Gnuplot.Data(precision,title="Precision"),
+                   Gnuplot.Data(recall,title="Recall"),
+                   Gnuplot.Data(f1,title="F1"))
+            g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
+            result = {}
+            result = {'weight': params['weight'],
+                      'strategy': params['strategy'],
+                      'accuracy': accuracy[20],
+                      'precision': precision[20],
+                      'recall:': recall[20],
+                      'f1': f1[20]}
+            return result
+
+#class CollaborativeSuite(expsuite.PyExperimentSuite):
+#    def reset(self, params, rep):
+#        if params['name'].startswith("collaborative"):
+#
+#    def iterate(self, params, rep, n):
+#        if params['name'].startswith("collaborative"):
+#            for root, dirs, files in os.walk(self.source_dir):
+#                for popcon_file in files:
+#                    submission = PopconSubmission(os.path.join(root,popcon_file))
+#                    user = User(submission.packages)
+#                    user.maximal_pkg_profile()
+#                    rec.get_recommendation(user)
+#                    precision = 0
+#                    result = {'weight': params['weight'],
+#                              'strategy': params['strategy'],
+#                              'profile_size': self.profile_size[n],
+#                              'accuracy': accuracy,
+#                              'precision': precision,
+#                              'recall:': recall,
+#                              'f1': }
+#        else:
+#            result = {}
+#        return result
+
+if __name__ == '__main__':
+
+    if "clustering" in sys.argv or len(sys.argv)<3:
+        ClusteringSuite().start()
+    if "content" in sys.argv or len(sys.argv)<3:
+        ContentBasedSuite().start()
+    #if "collaborative" in sys.argv or len(sys.argv)<3:
+    #CollaborativeSuite().start()
@@ -28,12 +28,14 @@ class RecommendationResult:
     """
     Class designed to describe a recommendation result: items and scores.
     """
-    def __init__(self,item_score):
+    def __init__(self,item_score,ranking=0):
         """
         Set initial parameters.
         """
         self.item_score = item_score
         self.size = len(item_score)
+        if ranking:
+            self.ranking = ranking
     def __str__(self):
         """
@@ -64,13 +66,13 @@ class Recommender:
         """
         Set initial parameters.
         """
+        self.cfg = cfg
         self.items_repository = xapian.Database(cfg.axi)
         self.set_strategy(cfg.strategy)
         if cfg.weight == "bm25":
             self.weight = xapian.BM25Weight()
         else:
             self.weight = xapian.TradWeight()
-        self.cfg = cfg
     def set_strategy(self,strategy_str):
         """
@@ -83,10 +85,10 @@ class Recommender:
         if strategy_str == "cbd":
             self.strategy = strategy.ContentBasedStrategy("desc")
         if strategy_str == "col":
-            self.strategy = strategy.CollaborativeStrategy(20)
             self.users_repository = data.PopconXapianIndex(self.cfg)
+            self.strategy = strategy.CollaborativeStrategy(20)
-    def get_recommendation(self,user,result_size=20):
+    def get_recommendation(self,user,result_size=100):
         """
         Produces recommendation using previously loaded strategy.
         """
@@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider):
         """
         return doc.get_data() not in self.pkgs_list
+class AppMatchDecider(xapian.MatchDecider):
+    """
+    Extend xapian.MatchDecider to not consider only applications packages.
+    """
+    def __init__(self, pkgs_list, axi):
+        """
+        Set initial parameters.
+        """
+        xapian.MatchDecider.__init__(self)
+        self.pkgs_list = pkgs_list
+        self.axi = axi
+
+    def __call__(self, doc):
+        """
+        True if the package is not already installed.
+        """
+        tags = axi_search_pkg_tags(self.axi,doc.get_data())
+        return (("XTrole::program" in tags) and
+                (doc.get_data() not in self.pkgs_list))
+
 class UserMatchDecider(xapian.MatchDecider):
     """
     Extend xapian.MatchDecider to match similar profiles.
@@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider):
         True if the term is a package.
         """
         # [FIXME] return term.startswith("XP")
-        return not term.startswith("XT")
+        #return not term.startswith("XT")
+        return term.startswith("XP")
+
+class AppExpandDecider(xapian.ExpandDecider):
+    """
+    Extend xapian.ExpandDecider to consider applications only.
+    """
+    def __init__(self,axi):
+        xapian.ExpandDecider.__init__(self)
+        self.axi = axi
+
+    def __call__(self, term):
+        """
+        True if the term is a package.
+        """
+        if not term.startswith("XT"):
+            package = term.lstrip("XP")
+            print package
+            tags = axi_search_pkg_tags(self.axi,package)
+            if "XTrole::program" in tags:
+                print tags
+                return True
+            else:
+                return False
+        else:
+            return False
 class TagExpandDecider(xapian.ExpandDecider):
     """
@@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy):
         self.content = content
         self.profile_size = profile_size
-    def run(self,rec,user,limit):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
@@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy):
         enquire.set_query(query)
         try:
             # retrieve matching packages
-            mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
+            mset = enquire.get_mset(0, recommendation_size, None,
+                                    PkgMatchDecider(user.items()))
+                                    #AppMatchDecider(user.items(),
+                                    #                rec.items_repository))
         except xapian.DatabaseError as error:
             logging.critical("Content-based strategy: "+error.get_msg())
         # compose result dictionary
         item_score = {}
+        ranking = []
         for m in mset:
+            #[FIXME] set this constraint somehow
+            #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())
+            #if "XTrole::program" in tags:
             item_score[m.document.get_data()] = m.weight
-        return recommender.RecommendationResult(item_score)
+            ranking.append(m.document.get_data())
+
+        return recommender.RecommendationResult(item_score,ranking)
 class CollaborativeStrategy(RecommendationStrategy):
     """
     Colaborative recommendation strategy.
     """
-    def __init__(self,k,clustering=1):
+    def __init__(self,k):
         self.description = "Collaborative"
-        self.clustering = clustering
         self.neighbours = k
-    def run(self,rec,user,result_size):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
-        profile = user.pkg_profile
+        profile = ["XP"+package for package in user.pkg_profile]
         # prepair index for querying user profile
         query = xapian.Query(xapian.Query.OP_OR,profile)
-        if self.clustering:
-            enquire = xapian.Enquire(rec.clustered_users_repository)
-        else:
-            enquire = xapian.Enquire(rec.users_repository)
+        enquire = xapian.Enquire(rec.users_repository)
         enquire.set_weighting_scheme(rec.weight)
         enquire.set_query(query)
         try:
@@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy):
             rset.add_document(m.document.get_docid())
             logging.debug(m.document.get_data())
         # retrieve most relevant packages
-        eset = enquire.get_eset(result_size,rset,PkgExpandDecider())
+        #eset = enquire.get_eset(recommendation_size,rset,
+        #                        AppExpandDecider(rec.items_repository))
+        eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider())
         # compose result dictionary
         item_score = {}
-        for package in eset:
-            item_score[package.term.lstrip("XP")] = package.weight
+        for e in eset:
+            package = e.term.lstrip("XP")
+            tags = axi_search_pkg_tags(rec.items_repository,package)
+            #[FIXME] set this constraint somehow
+            #if "XTrole::program" in tags:
+            item_score[package] = e.weight
         return recommender.RecommendationResult(item_score)
 class DemographicStrategy(RecommendationStrategy):
     """
     Recommendation strategy based on demographic data.
     """
+    #def __init__(self, result):
+        #self.result = result
     def __init__(self):
         self.description = "Demographic"
         logging.debug("Demographic recommendation not yet implemented.")
         raise Error
-    def run(self,user,items_repository):
+    def run(self,rec,user,recommendation_size):
         """
         Perform recommendation strategy.
         """
-        pass
+        ordered_result = self.result.get_prediction()
+
+        for item,weight in ordered_result:
+            pass
+
 class KnowledgeBasedStrategy(RecommendationStrategy):
     """
@@ -22,14 +22,29 @@ __license__ = &quot;&quot;&quot;
 import unittest2
 import shutil
 import os
+import xapian
 import sys
 sys.path.insert(0,'../')
-from data import PopconSubmission, PopconXapianIndex
+from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags
 from config import Config
 def suite():
     return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)
+class AxiSearchTests(unittest2.TestCase):
+    @classmethod
+    def setUpClass(self):
+        cfg = Config()
+        self.axi = xapian.Database(cfg.axi)
+
+    def test_search_pkg_tags(self):
+        tags = axi_search_pkg_tags(self.axi,'apticron')
+        self.assertEqual(set(tags),set(['XTadmin::package-management',
+                                        'XTinterface::daemon',
+                                        'XTnetwork::server', 'XTrole::program',
+                                        'XTsuite::debian', 'XTuse::monitor',
+                                        'XTworks-with::mail']))
+
 class PopconSubmissionTests(unittest2.TestCase):
     @classmethod
     def setUpClass(self):
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+"""
+    singletonTests - Singleton class test case
+"""
+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
+__license__ = """
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import xapian
+import unittest2
+import sys
+sys.path.insert(0,'../')
+from evaluation import (Accuracy, Precision, Recall, F1, Coverage,
+                        Evaluation, CrossValidation)
+from recommender import RecommendationResult
+from config import Config
+from recommender import Recommender
+from user import User
+from data import SampleAptXapianIndex
+
+class MetricsTests(unittest2.TestCase):
+    @classmethod
+    def setUpClass(self):
+        repository = ['apple','grape','pineaple','melon','watermelon','orange']
+        real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1))
+        predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1))
+        self.evaluation = Evaluation(predicted,real,len(repository))
+
+    def test_class_accuracy(self):
+        accuracy = Accuracy().run(self.evaluation)
+        self.assertEqual(accuracy,0.5)
+
+    def test_precision(self):
+        precision = Precision().run(self.evaluation)
+        self.assertEqual("%.2f" % precision,"0.67")
+
+    def test_recall(self):
+        recall = Recall().run(self.evaluation)
+        self.assertEqual(recall,0.5)
+
+    def test_f1(self):
+        f1 = F1().run(self.evaluation)
+        self.assertEqual("%.2f" % f1,"0.57")
+
+    def test_coverage(self):
+        evaluations_set = set()
+        evaluations_set.add(self.evaluation)
+        coverage = Coverage().run(evaluations_set)
+        self.assertEqual(coverage,0.5)
+
+    def test_evaluation(self):
+        self.assertEqual(self.evaluation.true_positive, ['apple','grape'])
+        self.assertEqual(self.evaluation.false_positive, ['orange'])
+        self.assertEqual(self.evaluation.false_negative, ['pineaple','melon'])
+
+    def test_cross_validation(self):
+        cfg = Config()
+        axi = xapian.Database(cfg.axi)
+        packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
+                    "festival","file","inkscape","xpdf"]
+        path = "test_data/.sample_axi"
+        sample_axi = SampleAptXapianIndex(packages,axi,path)
+        rec = Recommender(cfg)
+        rec.items_repository = sample_axi
+        user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
+
+        metrics = []
+        metrics.append(Precision())
+        metrics.append(Recall())
+        metrics.append(F1())
+
+        validation = CrossValidation(0.3,5,rec,metrics,0.5)
+        validation.run(user)
+        print validation
+
+if __name__ == '__main__':
+        unittest2.main()
@@ -3,23 +3,23 @@ $var mod = &#39;about&#39;;
 $var cssfiles:
 $var jsfiles:
-
 <div id="maincontent">
 <div class="innertube">
 <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" />
-<h1>What is this?</h1>
-
-<p>
-AppRecommender is a project in development that aims to provide solutions
-for application recommendation at the GNU/Linux world. It was initially thought
-as a Debian package recommender, but considering the multi-distro effort in
-providing platform independent solutions, it should also follow this
-principle.
-</p>
+<h1>About</h1>
+<p>This experiment aims to compare and validate automated application
+recommendations produced by various strategies and algorithms tunnings. Asking
+real users about the relevance of the recommendation is the closest we can get
+of the real accuracy of the recommender system.</p>
+<p>The engine that is being tested is a free software called <a
+href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was
+initially developed using the Debian Project infrasctructure, but the solution
+is essentially distro-independent and could even be adapted to non GNU/Linux
+systems given that there was available data for that.</p>
 </div><!-- id="innertube" -->
 </div><!-- id="maincontent" -->
@@ -41,7 +41,6 @@ $$(document).ready(function() {
 });
 </script>
-
 <div id="sidebar">
 <div class="innertube">
@@ -670,18 +670,17 @@ function showtags(tagid) {
 <h1>You might also like...</h1>
 <p>Provide a list of packages or upload a popcon submission file and you'll get
-a list of suggested packages automatically computed by AppRecommender. You can
-customize the recommender setup or let it randomly choose one.</p>
+a list of suggested packages automatically computed by AppRecommender.<!-- You can
+customize the recommender setup or let it randomly choose one.--></p> 
-<p>Please fill the form that follows the recommendation results. Your
+<p>Given the recommendation result, please evaluate each application and
+choose if you want to continue with another round of suggestions.
+<!--fill the form that follows the recommendation results.--> </p><p>Your
 feedback is very much appreciated!</p>
-
-<p>Enjoy it :)</p>
 </div>
 </div><!-- class="innertube" -->
 </div><!-- id="maincontent" -->
-
@@ -59,7 +59,7 @@ if (x==null || x==&quot;Write your list App here or send a file list this icon:&quot;)
       <fieldset>
 	<div id="submit-box">
 	<input type="submit" value="RECOMMENDER" id="submit-button"><br />
-        <a id="advanced-button">advanced query?</a>
+        <!--<a id="advanced-button">advanced query?</a>-->
 	</div>
 	<div id="input-box">
 	  <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a>
@@ -165,16 +165,15 @@ $:content
  <div id="footer">
   <div id="navbar">
-     <ul> 
+     <ul>
 	<li><a href="$url_base">Home</a></li>
 	<li><a href="$url_base/about">About</a></li>
-	<li><a href="$url_base/support">Support</a></li>
 	<li><a href="http://github.com/tassia/AppRecommender">Development</a></li>
     </ul>
    </div><!-- id="navbar" -->
-  <p id="copyright">
-   Copyright © 2011 AppRecommender.  Debian is a registered trademark of Software in the Public Interest, Inc.
-  </p>
+<!--  <p id="copyright">
+   Copyright © 2011 AppRecommender team.
+  </p> -->
  </div><!-- id="footer" -->
@@ -0,0 +1,60 @@
+$var title: Survey
+$var mod = 'index';
+$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css 
+$var jsfiles: static/js/facebox.js
+
+
+<!-- Dynamic form -->
+<script type="application/x-javascript">
+window.onload = function() {
+    setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' )
+  };
+</script>
+
+<script type="application/x-javascript">
+$$(document).ready(function() {
+ $$('a[rel*=facebox]').facebox({
+ loadingImage : '/static/images/loading.gif',
+ closeImage   : '/static/images/closelabel.png'
+ });
+ $$("#tags-box").click(function () {
+ $$("#tags-box").hide(1000);
+ });
+
+});
+</script>
+
+
+<div id="sidebar">
+<div class="innertube">
+
+
+<br style="clear: both" />
+</div><!-- class="innertube" -->
+</div><!-- id="sidebar" -->
+
+<div id="maincontent">
+<div class="innertube">
+
+<div class="textbox">
+<h1>Help us learn your needs!</h1>
+
+<p>Participate in this survey and contribute for the development of
+AppRecommender, a recommender system for GNU/Linux applications.</p>
+<br />
+<p>Please provide the list of packages installed in a real running system,
+by uploading a popcon submission or the file generated with the command:
+"dpkg -l > packages_list".</p>
+
+<p>Evaluate at least 10 suggested applications and identify yourself if you
+wish to. Upon the completion of this survey there will be a thank you page
+listing all identified participant.</p>
+
+<p>Your help is very much appreciated!</p>
+
+</div>
+
+</div><!-- class="innertube" -->
+</div><!-- id="maincontent" -->
+
+
	@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):		@@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):
41	return matches	41	return matches
42		42
43	def axi_search_pkg_tags(axi,pkg):	43	def axi_search_pkg_tags(axi,pkg):
44	- query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
45	enquire = xapian.Enquire(axi)	44	enquire = xapian.Enquire(axi)
46	- enquire.set_query(query)	45	+ enquire.set_query(xapian.Query("XP"+pkg))
47	matches = enquire.get_mset(0,1)	46	matches = enquire.get_mset(0,1)
		47	+ if not matches:
		48	+ logging.debug("Package %s not found in items repository" % pkg)
		49	+ return []
48	for m in matches:	50	for m in matches:
49	tags = [term.term for term in axi.get_document(m.docid).termlist() if	51	tags = [term.term for term in axi.get_document(m.docid).termlist() if
50	term.term.startswith("XT")]	52	term.term.startswith("XT")]
51	- return tags	53	+ return tags
52		54
53	def print_index(index):	55	def print_index(index):
54	output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"	56	output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"
	@@ -59,6 +61,32 @@ def print_index(index):		@@ -59,6 +61,32 @@ def print_index(index):
59	output += "\n---"	61	output += "\n---"
60	return output	62	return output
61		63
		64	+class AppAptXapianIndex(xapian.WritableDatabase):
		65	+ """
		66	+ Sample data source for packages information, mainly useful for tests.
		67	+ """
		68	+ def __init__(self,axi_path,path):
		69	+ xapian.WritableDatabase.__init__(self,path,
		70	+ xapian.DB_CREATE_OR_OVERWRITE)
		71	+ axi = xapian.Database(axi_path)
		72	+ logging.info("AptXapianIndex size: %d" % axi.get_doccount())
		73	+ for docid in range(1,axi.get_lastdocid()+1):
		74	+ try:
		75	+ doc = axi.get_document(docid)
		76	+ allterms = [term.term for term in doc.termlist()]
		77	+ if "XTrole::program" in allterms:
		78	+ self.add_document(doc)
		79	+ logging.info("Added doc %d." % docid)
		80	+ else:
		81	+ logging.info("Discarded doc %d." % docid)
		82	+ except:
		83	+ logging.info("Doc %d not found in axi." % docid)
		84	+ logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
		85	+ self.get_doccount(), self.get_lastdocid())
		86	+
		87	+ def __str__(self):
		88	+ return print_index(self)
		89	+
62	class SampleAptXapianIndex(xapian.WritableDatabase):	90	class SampleAptXapianIndex(xapian.WritableDatabase):
63	"""	91	"""
64	Sample data source for packages information, mainly useful for tests.	92	Sample data source for packages information, mainly useful for tests.
	@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):		@@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
129	"""	157	"""
130	self.axi = xapian.Database(cfg.axi)	158	self.axi = xapian.Database(cfg.axi)
131	self.path = os.path.expanduser(cfg.popcon_index)	159	self.path = os.path.expanduser(cfg.popcon_index)
		160	+ self.source_dir = os.path.expanduser(cfg.popcon_dir)
132	if not cfg.index_mode == "old" or not self.load_index():	161	if not cfg.index_mode == "old" or not self.load_index():
133	if not os.path.exists(cfg.popcon_dir):	162	if not os.path.exists(cfg.popcon_dir):
134	os.makedirs(cfg.popcon_dir)	163	os.makedirs(cfg.popcon_dir)
	@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):		@@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
205	submission.user_id)	234	submission.user_id)
206	for pkg, freq in submission.packages.items():	235	for pkg, freq in submission.packages.items():
207	doc.add_term("XP"+pkg,freq)	236	doc.add_term("XP"+pkg,freq)
208	- for tag in axi_search_pkg_tags(self.axi,pkg):
209	- doc.add_term(tag,freq)	237	+ if axi_search_pkg_tags(self.axi,pkg):
		238	+ for tag in axi_search_pkg_tags(self.axi,pkg):
		239	+ doc.add_term(tag,freq)
210	doc_id = self.add_document(doc)	240	doc_id = self.add_document(doc)
211	logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)	241	logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
212	# python garbage collector	242	# python garbage collector
	@@ -140,7 +140,7 @@ class F1(Metric):		@@ -140,7 +140,7 @@ class F1(Metric):
140	p = Precision().run(evaluation)	140	p = Precision().run(evaluation)
141	r = Recall().run(evaluation)	141	r = Recall().run(evaluation)
142	if (p+r)>0:	142	if (p+r)>0:
143	- return float((2pr))/(p+r)	143	+ return float(2((pr)/(p+r)))
144	else:	144	else:
145	return 0	145	return 0
146		146
	@@ -289,7 +289,7 @@ class CrossValidation:		@@ -289,7 +289,7 @@ class CrossValidation:
289	result_size = int(self.recommender.items_repository.get_doccount()*	289	result_size = int(self.recommender.items_repository.get_doccount()*
290	self.result_proportion)	290	self.result_proportion)
291	predicted_result = self.recommender.get_recommendation(round_user,result_size)	291	predicted_result = self.recommender.get_recommendation(round_user,result_size)
292	- print len(round_partition)	292	+ #print len(round_partition)
293	real_result = RecommendationResult(round_partition)	293	real_result = RecommendationResult(round_partition)
294	#logging.debug("Predicted result: %s",predicted_result)	294	#logging.debug("Predicted result: %s",predicted_result)
295	evaluation = Evaluation(predicted_result,real_result,	295	evaluation = Evaluation(predicted_result,real_result,
@@ -0,0 +1,2 @@		@@ -0,0 +1,2 @@
	1	+Experiments handled by expsuite:
	2	+https://github.com/rueckstiess/expsuite
@@ -0,0 +1,26 @@		@@ -0,0 +1,26 @@
	1	+[DEFAULT]
	2	+repetitions = 1
	3	+iterations = 10
	4	+path = 'results'
	5	+experiment = 'grid'
	6	+weight = ['bm25', 'trad']
	7	+;profile_size = range(10,100,10)
	8	+sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
	9	+
	10	+[content]
	11	+strategy = ['cb','cbt','cbd']
	12	+
	13	+[clustering]
	14	+experiment = 'single'
	15	+;iterations = 4
	16	+;medoids = range(2,6)
	17	+iterations = 6
	18	+medoids = [100,500,1000,5000,10000,50000]
	19	+;disabled for this experiment
	20	+weight = 0
	21	+profile_size = 0
	22	+sample = 0
	23	+
	24	+[colaborative]
	25	+users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
	26	+neighbors = range(10,1010,50)
@@ -0,0 +1,173 @@		@@ -0,0 +1,173 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ recommender suite - recommender experiments suite
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import expsuite
	23	+import sys
	24	+sys.path.insert(0,'../')
	25	+from config import Config
	26	+from data import PopconXapianIndex, PopconSubmission
	27	+from recommender import Recommender
	28	+from user import LocalSystem, User
	29	+from evaluation import *
	30	+import logging
	31	+import random
	32	+import Gnuplot
	33	+
	34	+class ClusteringSuite(expsuite.PyExperimentSuite):
	35	+ def reset(self, params, rep):
	36	+ self.cfg = Config()
	37	+ self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
	38	+ self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
	39	+ self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
	40	+
	41	+ if params['name'] == "clustering":
	42	+ logging.info("Starting 'clustering' experiments suite...")
	43	+ self.cfg.index_mode = "recluster"
	44	+
	45	+ def iterate(self, params, rep, n):
	46	+ if params['name'] == "clustering":
	47	+ logging.info("Running iteration %d" % params['medoids'][n])
	48	+ self.cfg.k_medoids = params['medoids'][n]
	49	+ pxi = PopconXapianIndex(self.cfg)
	50	+ result = {'k_medoids': params['medoids'][n],
	51	+ 'dispersion': pxi.cluster_dispersion}
	52	+ else:
	53	+ result = {}
	54	+ return result
	55	+
	56	+class ContentBasedSuite(expsuite.PyExperimentSuite):
	57	+ def reset(self, params, rep):
	58	+ if params['name'].startswith("content"):
	59	+ cfg = Config()
	60	+ #if the index was not built yet
	61	+ #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
	62	+ cfg.axi = "data/AppAxi"
	63	+ cfg.index_mode = "old"
	64	+ cfg.weight = params['weight']
	65	+ self.rec = Recommender(cfg)
	66	+ self.rec.set_strategy(params['strategy'])
	67	+ self.repo_size = self.rec.items_repository.get_doccount()
	68	+ self.user = LocalSystem()
	69	+ self.user.app_pkg_profile(self.rec.items_repository)
	70	+ self.user.no_auto_pkg_profile()
	71	+ self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
	72	+ # iteration should be set to 10 in config file
	73	+ #self.profile_size = range(10,101,10)
	74	+
	75	+ def iterate(self, params, rep, n):
	76	+ if params['name'].startswith("content"):
	77	+ # Get full recommendation
	78	+ item_score = dict.fromkeys(self.user.pkg_profile,1)
	79	+ sample = {}
	80	+ for i in range(self.sample_size):
	81	+ item, score = item_score.popitem()
	82	+ sample[item] = score
	83	+ user = User(item_score)
	84	+ recommendation = self.rec.get_recommendation(user,self.repo_size)
	85	+ # Write recall log
	86	+ recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
	87	+ (params['strategy'],params['weight'],params['sample'],n)
	88	+ output = open(recall_file,'w')
	89	+ output.write("# weight=%s\n" % params['weight'])
	90	+ output.write("# strategy=%s\n" % params['strategy'])
	91	+ output.write("# sample=%f\n" % params['sample'])
	92	+ output.write("\n%d %d %d\n" % \
	93	+ (self.repo_size,len(item_score),self.sample_size))
	94	+ notfound = []
	95	+ ranks = []
	96	+ for pkg in sample.keys():
	97	+ if pkg in recommendation.ranking:
	98	+ ranks.append(recommendation.ranking.index(pkg))
	99	+ else:
	100	+ notfound.append(pkg)
	101	+ for r in sorted(ranks):
	102	+ output.write(str(r)+"\n")
	103	+ if notfound:
	104	+ output.write("Out of recommendation:\n")
	105	+ for pkg in notfound:
	106	+ output.write(pkg+"\n")
	107	+ output.close()
	108	+ # Plot metrics summary
	109	+ g = Gnuplot.Gnuplot()
	110	+ g('set style data lines')
	111	+ g.xlabel('Recommendation size')
	112	+ accuracy = []
	113	+ precision = []
	114	+ recall = []
	115	+ f1 = []
	116	+ for size in range(1,len(recommendation.ranking)+1,100):
	117	+ predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
	118	+ real = RecommendationResult(sample)
	119	+ evaluation = Evaluation(predicted,real,self.repo_size)
	120	+ accuracy.append([size,evaluation.run(Accuracy())])
	121	+ precision.append([size,evaluation.run(Precision())])
	122	+ recall.append([size,evaluation.run(Recall())])
	123	+ f1.append([size,evaluation.run(F1())])
	124	+ #print "accuracy", len(accuracy)
	125	+ #print "precision", len(precision)
	126	+ #print "recall", len(recall)
	127	+ #print "f1", len(f1)
	128	+ g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
	129	+ Gnuplot.Data(precision,title="Precision"),
	130	+ Gnuplot.Data(recall,title="Recall"),
	131	+ Gnuplot.Data(f1,title="F1"))
	132	+ g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
	133	+ result = {}
	134	+ result = {'weight': params['weight'],
	135	+ 'strategy': params['strategy'],
	136	+ 'accuracy': accuracy[20],
	137	+ 'precision': precision[20],
	138	+ 'recall:': recall[20],
	139	+ 'f1': f1[20]}
	140	+ return result
	141	+
	142	+#class CollaborativeSuite(expsuite.PyExperimentSuite):
	143	+# def reset(self, params, rep):
	144	+# if params['name'].startswith("collaborative"):
	145	+#
	146	+# def iterate(self, params, rep, n):
	147	+# if params['name'].startswith("collaborative"):
	148	+# for root, dirs, files in os.walk(self.source_dir):
	149	+# for popcon_file in files:
	150	+# submission = PopconSubmission(os.path.join(root,popcon_file))
	151	+# user = User(submission.packages)
	152	+# user.maximal_pkg_profile()
	153	+# rec.get_recommendation(user)
	154	+# precision = 0
	155	+# result = {'weight': params['weight'],
	156	+# 'strategy': params['strategy'],
	157	+# 'profile_size': self.profile_size[n],
	158	+# 'accuracy': accuracy,
	159	+# 'precision': precision,
	160	+# 'recall:': recall,
	161	+# 'f1': }
	162	+# else:
	163	+# result = {}
	164	+# return result
	165	+
	166	+if __name__ == '__main__':
	167	+
	168	+ if "clustering" in sys.argv or len(sys.argv)<3:
	169	+ ClusteringSuite().start()
	170	+ if "content" in sys.argv or len(sys.argv)<3:
	171	+ ContentBasedSuite().start()
	172	+ #if "collaborative" in sys.argv or len(sys.argv)<3:
	173	+ #CollaborativeSuite().start()
	@@ -22,14 +22,29 @@ __license__ = """		@@ -22,14 +22,29 @@ __license__ = """
22	import unittest2	22	import unittest2
23	import shutil	23	import shutil
24	import os	24	import os
		25	+import xapian
25	import sys	26	import sys
26	sys.path.insert(0,'../')	27	sys.path.insert(0,'../')
27	-from data import PopconSubmission, PopconXapianIndex	28	+from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags
28	from config import Config	29	from config import Config
29		30
30	def suite():	31	def suite():
31	return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)	32	return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)
32		33
		34	+class AxiSearchTests(unittest2.TestCase):
		35	+ @classmethod
		36	+ def setUpClass(self):
		37	+ cfg = Config()
		38	+ self.axi = xapian.Database(cfg.axi)
		39	+
		40	+ def test_search_pkg_tags(self):
		41	+ tags = axi_search_pkg_tags(self.axi,'apticron')
		42	+ self.assertEqual(set(tags),set(['XTadmin::package-management',
		43	+ 'XTinterface::daemon',
		44	+ 'XTnetwork::server', 'XTrole::program',
		45	+ 'XTsuite::debian', 'XTuse::monitor',
		46	+ 'XTworks-with::mail']))
		47	+
33	class PopconSubmissionTests(unittest2.TestCase):	48	class PopconSubmissionTests(unittest2.TestCase):
34	@classmethod	49	@classmethod
35	def setUpClass(self):	50	def setUpClass(self):
@@ -0,0 +1,90 @@		@@ -0,0 +1,90 @@
	1	+#!/usr/bin/env python
	2	+"""
	3	+ singletonTests - Singleton class test case
	4	+"""
	5	+__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
	6	+__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
	7	+__license__ = """
	8	+ This program is free software: you can redistribute it and/or modify
	9	+ it under the terms of the GNU General Public License as published by
	10	+ the Free Software Foundation, either version 3 of the License, or
	11	+ (at your option) any later version.
	12	+
	13	+ This program is distributed in the hope that it will be useful,
	14	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	+ GNU General Public License for more details.
	17	+
	18	+ You should have received a copy of the GNU General Public License
	19	+ along with this program. If not, see <http://www.gnu.org/licenses/>.
	20	+"""
	21	+
	22	+import xapian
	23	+import unittest2
	24	+import sys
	25	+sys.path.insert(0,'../')
	26	+from evaluation import (Accuracy, Precision, Recall, F1, Coverage,
	27	+ Evaluation, CrossValidation)
	28	+from recommender import RecommendationResult
	29	+from config import Config
	30	+from recommender import Recommender
	31	+from user import User
	32	+from data import SampleAptXapianIndex
	33	+
	34	+class MetricsTests(unittest2.TestCase):
	35	+ @classmethod
	36	+ def setUpClass(self):
	37	+ repository = ['apple','grape','pineaple','melon','watermelon','orange']
	38	+ real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1))
	39	+ predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1))
	40	+ self.evaluation = Evaluation(predicted,real,len(repository))
	41	+
	42	+ def test_class_accuracy(self):
	43	+ accuracy = Accuracy().run(self.evaluation)
	44	+ self.assertEqual(accuracy,0.5)
	45	+
	46	+ def test_precision(self):
	47	+ precision = Precision().run(self.evaluation)
	48	+ self.assertEqual("%.2f" % precision,"0.67")
	49	+
	50	+ def test_recall(self):
	51	+ recall = Recall().run(self.evaluation)
	52	+ self.assertEqual(recall,0.5)
	53	+
	54	+ def test_f1(self):
	55	+ f1 = F1().run(self.evaluation)
	56	+ self.assertEqual("%.2f" % f1,"0.57")
	57	+
	58	+ def test_coverage(self):
	59	+ evaluations_set = set()
	60	+ evaluations_set.add(self.evaluation)
	61	+ coverage = Coverage().run(evaluations_set)
	62	+ self.assertEqual(coverage,0.5)
	63	+
	64	+ def test_evaluation(self):
	65	+ self.assertEqual(self.evaluation.true_positive, ['apple','grape'])
	66	+ self.assertEqual(self.evaluation.false_positive, ['orange'])
	67	+ self.assertEqual(self.evaluation.false_negative, ['pineaple','melon'])
	68	+
	69	+ def test_cross_validation(self):
	70	+ cfg = Config()
	71	+ axi = xapian.Database(cfg.axi)
	72	+ packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
	73	+ "festival","file","inkscape","xpdf"]
	74	+ path = "test_data/.sample_axi"
	75	+ sample_axi = SampleAptXapianIndex(packages,axi,path)
	76	+ rec = Recommender(cfg)
	77	+ rec.items_repository = sample_axi
	78	+ user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
	79	+
	80	+ metrics = []
	81	+ metrics.append(Precision())
	82	+ metrics.append(Recall())
	83	+ metrics.append(F1())
	84	+
	85	+ validation = CrossValidation(0.3,5,rec,metrics,0.5)
	86	+ validation.run(user)
	87	+ print validation
	88	+
	89	+if __name__ == '__main__':
	90	+ unittest2.main()
	@@ -41,7 +41,6 @@ $$(document).ready(function() {		@@ -41,7 +41,6 @@ $$(document).ready(function() {
41	});	41	});
42	</script>	42	</script>
43		43
44	-
45	<div id="sidebar">	44	<div id="sidebar">
46	<div class="innertube">	45	<div class="innertube">
47		46
@@ -0,0 +1,60 @@		@@ -0,0 +1,60 @@
	1	+$var title: Survey
	2	+$var mod = 'index';
	3	+$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css
	4	+$var jsfiles: static/js/facebox.js
	5	+
	6	+
	7	+<!-- Dynamic form -->
	8	+<script type="application/x-javascript">
	9	+window.onload = function() {
	10	+ setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' )
	11	+ };
	12	+</script>
	13	+
	14	+<script type="application/x-javascript">
	15	+$$(document).ready(function() {
	16	+ $$('a[rel*=facebox]').facebox({
	17	+ loadingImage : '/static/images/loading.gif',
	18	+ closeImage : '/static/images/closelabel.png'
	19	+ });
	20	+ $$("#tags-box").click(function () {
	21	+ $$("#tags-box").hide(1000);
	22	+ });
	23	+
	24	+});
	25	+</script>
	26	+
	27	+
	28	+<div id="sidebar">
	29	+<div class="innertube">
	30	+
	31	+
	32	+<br style="clear: both" />
	33	+</div><!-- class="innertube" -->
	34	+</div><!-- id="sidebar" -->
	35	+
	36	+<div id="maincontent">
	37	+<div class="innertube">
	38	+
	39	+<div class="textbox">
	40	+<h1>Help us learn your needs!</h1>
	41	+
	42	+<p>Participate in this survey and contribute for the development of
	43	+AppRecommender, a recommender system for GNU/Linux applications.</p>
	44	+<br />
	45	+<p>Please provide the list of packages installed in a real running system,
	46	+by uploading a popcon submission or the file generated with the command:
	47	+"dpkg -l > packages_list".</p>
	48	+
	49	+<p>Evaluate at least 10 suggested applications and identify yourself if you
	50	+wish to. Upon the completion of this survey there will be a thank you page
	51	+listing all identified participant.</p>
	52	+
	53	+<p>Your help is very much appreciated!</p>
	54	+
	55	+</div>
	56	+
	57	+</div><!-- class="innertube" -->
	58	+</div><!-- id="maincontent" -->
	59	+
	60	+