Commit 2fce1682e62e824dcbf897ea5aa2e2a30464a63a

Authored by Tiago Bortoletto Vaz
2 parents 2f49eb5c c4327ec0
Exists in master and in 1 other branch add_vagrant

Merge branch 'master' of github.com:tassia/AppRecommender

src/data.py
... ... @@ -41,14 +41,16 @@ def axi_search_pkgs(axi,pkgs_list):
41 41 return matches
42 42  
43 43 def axi_search_pkg_tags(axi,pkg):
44   - query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg)
45 44 enquire = xapian.Enquire(axi)
46   - enquire.set_query(query)
  45 + enquire.set_query(xapian.Query("XP"+pkg))
47 46 matches = enquire.get_mset(0,1)
  47 + if not matches:
  48 + logging.debug("Package %s not found in items repository" % pkg)
  49 + return []
48 50 for m in matches:
49 51 tags = [term.term for term in axi.get_document(m.docid).termlist() if
50 52 term.term.startswith("XT")]
51   - return tags
  53 + return tags
52 54  
53 55 def print_index(index):
54 56 output = "\n---\n" + xapian.Database.__repr__(index) + "\n---\n"
... ... @@ -59,6 +61,32 @@ def print_index(index):
59 61 output += "\n---"
60 62 return output
61 63  
  64 +class AppAptXapianIndex(xapian.WritableDatabase):
  65 + """
  66 + Sample data source for packages information, mainly useful for tests.
  67 + """
  68 + def __init__(self,axi_path,path):
  69 + xapian.WritableDatabase.__init__(self,path,
  70 + xapian.DB_CREATE_OR_OVERWRITE)
  71 + axi = xapian.Database(axi_path)
  72 + logging.info("AptXapianIndex size: %d" % axi.get_doccount())
  73 + for docid in range(1,axi.get_lastdocid()+1):
  74 + try:
  75 + doc = axi.get_document(docid)
  76 + allterms = [term.term for term in doc.termlist()]
  77 + if "XTrole::program" in allterms:
  78 + self.add_document(doc)
  79 + logging.info("Added doc %d." % docid)
  80 + else:
  81 + logging.info("Discarded doc %d." % docid)
  82 + except:
  83 + logging.info("Doc %d not found in axi." % docid)
  84 + logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
  85 + self.get_doccount(), self.get_lastdocid())
  86 +
  87 + def __str__(self):
  88 + return print_index(self)
  89 +
62 90 class SampleAptXapianIndex(xapian.WritableDatabase):
63 91 """
64 92 Sample data source for packages information, mainly useful for tests.
... ... @@ -129,6 +157,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
129 157 """
130 158 self.axi = xapian.Database(cfg.axi)
131 159 self.path = os.path.expanduser(cfg.popcon_index)
  160 + self.source_dir = os.path.expanduser(cfg.popcon_dir)
132 161 if not cfg.index_mode == "old" or not self.load_index():
133 162 if not os.path.exists(cfg.popcon_dir):
134 163 os.makedirs(cfg.popcon_dir)
... ... @@ -205,8 +234,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
205 234 submission.user_id)
206 235 for pkg, freq in submission.packages.items():
207 236 doc.add_term("XP"+pkg,freq)
208   - for tag in axi_search_pkg_tags(self.axi,pkg):
209   - doc.add_term(tag,freq)
  237 + if axi_search_pkg_tags(self.axi,pkg):
  238 + for tag in axi_search_pkg_tags(self.axi,pkg):
  239 + doc.add_term(tag,freq)
210 240 doc_id = self.add_document(doc)
211 241 logging.debug("Popcon Xapian: Indexing doc %d" % doc_id)
212 242 # python garbage collector
... ...
src/evaluation.py
... ... @@ -140,7 +140,7 @@ class F1(Metric):
140 140 p = Precision().run(evaluation)
141 141 r = Recall().run(evaluation)
142 142 if (p+r)>0:
143   - return float((2*p*r))/(p+r)
  143 + return float(2*((p*r)/(p+r)))
144 144 else:
145 145 return 0
146 146  
... ... @@ -289,7 +289,7 @@ class CrossValidation:
289 289 result_size = int(self.recommender.items_repository.get_doccount()*
290 290 self.result_proportion)
291 291 predicted_result = self.recommender.get_recommendation(round_user,result_size)
292   - print len(round_partition)
  292 + #print len(round_partition)
293 293 real_result = RecommendationResult(round_partition)
294 294 #logging.debug("Predicted result: %s",predicted_result)
295 295 evaluation = Evaluation(predicted_result,real_result,
... ...
src/examples/cross_validation.py
... ... @@ -40,16 +40,20 @@ if __name__ == '__main__':
40 40 try:
41 41 cfg = Config()
42 42 rec = Recommender(cfg)
  43 + print "\nRecommender strategy: ",rec.strategy.description
43 44 user = LocalSystem()
44   - user.maximal_pkg_profile()
45   -
  45 + #user.app_pkg_profile(rec.items_repository)
  46 + user.no_auto_pkg_profile()
46 47 begin_time = datetime.datetime.now()
47 48 logging.debug("Cross-validation started at %s" % begin_time)
48 49  
49 50 metrics = []
50 51 metrics.append(Precision())
51 52 metrics.append(Recall())
52   - validation = CrossValidation(0.3,10,rec,metrics)
  53 + metrics.append(F1())
  54 + metrics.append(Accuracy())
  55 + metrics.append(SimpleAccuracy())
  56 + validation = CrossValidation(0.3,10,rec,metrics,0.005)
53 57 validation.run(user)
54 58 print validation
55 59  
... ...
src/experiments/README 0 → 100644
... ... @@ -0,0 +1,2 @@
  1 +Experiments handled by expsuite:
  2 +https://github.com/rueckstiess/expsuite
... ...
src/experiments/experiments.cfg 0 → 100644
... ... @@ -0,0 +1,26 @@
  1 +[DEFAULT]
  2 +repetitions = 1
  3 +iterations = 10
  4 +path = 'results'
  5 +experiment = 'grid'
  6 +weight = ['bm25', 'trad']
  7 +;profile_size = range(10,100,10)
  8 +sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  9 +
  10 +[content]
  11 +strategy = ['cb','cbt','cbd']
  12 +
  13 +[clustering]
  14 +experiment = 'single'
  15 +;iterations = 4
  16 +;medoids = range(2,6)
  17 +iterations = 6
  18 +medoids = [100,500,1000,5000,10000,50000]
  19 +;disabled for this experiment
  20 +weight = 0
  21 +profile_size = 0
  22 +sample = 0
  23 +
  24 +[colaborative]
  25 +users_repository=["data/popcon","data/popcon-100","data/popcon-500","data/popcon-1000","data/popcon-5000","data/popcon-10000","data/popcon-50000"]
  26 +neighbors = range(10,1010,50)
... ...
src/experiments/runner.py 0 → 100755
... ... @@ -0,0 +1,173 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + recommender suite - recommender experiments suite
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import expsuite
  23 +import sys
  24 +sys.path.insert(0,'../')
  25 +from config import Config
  26 +from data import PopconXapianIndex, PopconSubmission
  27 +from recommender import Recommender
  28 +from user import LocalSystem, User
  29 +from evaluation import *
  30 +import logging
  31 +import random
  32 +import Gnuplot
  33 +
  34 +class ClusteringSuite(expsuite.PyExperimentSuite):
  35 + def reset(self, params, rep):
  36 + self.cfg = Config()
  37 + self.cfg.popcon_index = "../tests/test_data/.sample_pxi"
  38 + self.cfg.popcon_dir = "../tests/test_data/popcon_dir"
  39 + self.cfg.clusters_dir = "../tests/test_data/clusters_dir"
  40 +
  41 + if params['name'] == "clustering":
  42 + logging.info("Starting 'clustering' experiments suite...")
  43 + self.cfg.index_mode = "recluster"
  44 +
  45 + def iterate(self, params, rep, n):
  46 + if params['name'] == "clustering":
  47 + logging.info("Running iteration %d" % params['medoids'][n])
  48 + self.cfg.k_medoids = params['medoids'][n]
  49 + pxi = PopconXapianIndex(self.cfg)
  50 + result = {'k_medoids': params['medoids'][n],
  51 + 'dispersion': pxi.cluster_dispersion}
  52 + else:
  53 + result = {}
  54 + return result
  55 +
  56 +class ContentBasedSuite(expsuite.PyExperimentSuite):
  57 + def reset(self, params, rep):
  58 + if params['name'].startswith("content"):
  59 + cfg = Config()
  60 + #if the index was not built yet
  61 + #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
  62 + cfg.axi = "data/AppAxi"
  63 + cfg.index_mode = "old"
  64 + cfg.weight = params['weight']
  65 + self.rec = Recommender(cfg)
  66 + self.rec.set_strategy(params['strategy'])
  67 + self.repo_size = self.rec.items_repository.get_doccount()
  68 + self.user = LocalSystem()
  69 + self.user.app_pkg_profile(self.rec.items_repository)
  70 + self.user.no_auto_pkg_profile()
  71 + self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
  72 + # iteration should be set to 10 in config file
  73 + #self.profile_size = range(10,101,10)
  74 +
  75 + def iterate(self, params, rep, n):
  76 + if params['name'].startswith("content"):
  77 + # Get full recommendation
  78 + item_score = dict.fromkeys(self.user.pkg_profile,1)
  79 + sample = {}
  80 + for i in range(self.sample_size):
  81 + item, score = item_score.popitem()
  82 + sample[item] = score
  83 + user = User(item_score)
  84 + recommendation = self.rec.get_recommendation(user,self.repo_size)
  85 + # Write recall log
  86 + recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
  87 + (params['strategy'],params['weight'],params['sample'],n)
  88 + output = open(recall_file,'w')
  89 + output.write("# weight=%s\n" % params['weight'])
  90 + output.write("# strategy=%s\n" % params['strategy'])
  91 + output.write("# sample=%f\n" % params['sample'])
  92 + output.write("\n%d %d %d\n" % \
  93 + (self.repo_size,len(item_score),self.sample_size))
  94 + notfound = []
  95 + ranks = []
  96 + for pkg in sample.keys():
  97 + if pkg in recommendation.ranking:
  98 + ranks.append(recommendation.ranking.index(pkg))
  99 + else:
  100 + notfound.append(pkg)
  101 + for r in sorted(ranks):
  102 + output.write(str(r)+"\n")
  103 + if notfound:
  104 + output.write("Out of recommendation:\n")
  105 + for pkg in notfound:
  106 + output.write(pkg+"\n")
  107 + output.close()
  108 + # Plot metrics summary
  109 + g = Gnuplot.Gnuplot()
  110 + g('set style data lines')
  111 + g.xlabel('Recommendation size')
  112 + accuracy = []
  113 + precision = []
  114 + recall = []
  115 + f1 = []
  116 + for size in range(1,len(recommendation.ranking)+1,100):
  117 + predicted = RecommendationResult(dict.fromkeys(recommendation.ranking[:size],1))
  118 + real = RecommendationResult(sample)
  119 + evaluation = Evaluation(predicted,real,self.repo_size)
  120 + accuracy.append([size,evaluation.run(Accuracy())])
  121 + precision.append([size,evaluation.run(Precision())])
  122 + recall.append([size,evaluation.run(Recall())])
  123 + f1.append([size,evaluation.run(F1())])
  124 + #print "accuracy", len(accuracy)
  125 + #print "precision", len(precision)
  126 + #print "recall", len(recall)
  127 + #print "f1", len(f1)
  128 + g.plot(Gnuplot.Data(accuracy,title="Accuracy"),
  129 + Gnuplot.Data(precision,title="Precision"),
  130 + Gnuplot.Data(recall,title="Recall"),
  131 + Gnuplot.Data(f1,title="F1"))
  132 + g.hardcopy(recall_file+"-plot.ps", enhanced=1, color=1)
  133 + result = {}
  134 + result = {'weight': params['weight'],
  135 + 'strategy': params['strategy'],
  136 + 'accuracy': accuracy[20],
  137 + 'precision': precision[20],
  138 + 'recall:': recall[20],
  139 + 'f1': f1[20]}
  140 + return result
  141 +
  142 +#class CollaborativeSuite(expsuite.PyExperimentSuite):
  143 +# def reset(self, params, rep):
  144 +# if params['name'].startswith("collaborative"):
  145 +#
  146 +# def iterate(self, params, rep, n):
  147 +# if params['name'].startswith("collaborative"):
  148 +# for root, dirs, files in os.walk(self.source_dir):
  149 +# for popcon_file in files:
  150 +# submission = PopconSubmission(os.path.join(root,popcon_file))
  151 +# user = User(submission.packages)
  152 +# user.maximal_pkg_profile()
  153 +# rec.get_recommendation(user)
  154 +# precision = 0
  155 +# result = {'weight': params['weight'],
  156 +# 'strategy': params['strategy'],
  157 +# 'profile_size': self.profile_size[n],
  158 +# 'accuracy': accuracy,
  159 +# 'precision': precision,
  160 +# 'recall:': recall,
  161 +# 'f1': }
  162 +# else:
  163 +# result = {}
  164 +# return result
  165 +
  166 +if __name__ == '__main__':
  167 +
  168 + if "clustering" in sys.argv or len(sys.argv)<3:
  169 + ClusteringSuite().start()
  170 + if "content" in sys.argv or len(sys.argv)<3:
  171 + ContentBasedSuite().start()
  172 + #if "collaborative" in sys.argv or len(sys.argv)<3:
  173 + #CollaborativeSuite().start()
... ...
src/recommender.py
... ... @@ -28,12 +28,14 @@ class RecommendationResult:
28 28 """
29 29 Class designed to describe a recommendation result: items and scores.
30 30 """
31   - def __init__(self,item_score):
  31 + def __init__(self,item_score,ranking=0):
32 32 """
33 33 Set initial parameters.
34 34 """
35 35 self.item_score = item_score
36 36 self.size = len(item_score)
  37 + if ranking:
  38 + self.ranking = ranking
37 39  
38 40 def __str__(self):
39 41 """
... ... @@ -64,13 +66,13 @@ class Recommender:
64 66 """
65 67 Set initial parameters.
66 68 """
  69 + self.cfg = cfg
67 70 self.items_repository = xapian.Database(cfg.axi)
68 71 self.set_strategy(cfg.strategy)
69 72 if cfg.weight == "bm25":
70 73 self.weight = xapian.BM25Weight()
71 74 else:
72 75 self.weight = xapian.TradWeight()
73   - self.cfg = cfg
74 76  
75 77 def set_strategy(self,strategy_str):
76 78 """
... ... @@ -83,10 +85,10 @@ class Recommender:
83 85 if strategy_str == "cbd":
84 86 self.strategy = strategy.ContentBasedStrategy("desc")
85 87 if strategy_str == "col":
86   - self.strategy = strategy.CollaborativeStrategy(20)
87 88 self.users_repository = data.PopconXapianIndex(self.cfg)
  89 + self.strategy = strategy.CollaborativeStrategy(20)
88 90  
89   - def get_recommendation(self,user,result_size=20):
  91 + def get_recommendation(self,user,result_size=100):
90 92 """
91 93 Produces recommendation using previously loaded strategy.
92 94 """
... ...
src/strategy.py
... ... @@ -42,6 +42,26 @@ class PkgMatchDecider(xapian.MatchDecider):
42 42 """
43 43 return doc.get_data() not in self.pkgs_list
44 44  
  45 +class AppMatchDecider(xapian.MatchDecider):
  46 + """
  47 + Extend xapian.MatchDecider to not consider only applications packages.
  48 + """
  49 + def __init__(self, pkgs_list, axi):
  50 + """
  51 + Set initial parameters.
  52 + """
  53 + xapian.MatchDecider.__init__(self)
  54 + self.pkgs_list = pkgs_list
  55 + self.axi = axi
  56 +
  57 + def __call__(self, doc):
  58 + """
  59 + True if the package is not already installed.
  60 + """
  61 + tags = axi_search_pkg_tags(self.axi,doc.get_data())
  62 + return (("XTrole::program" in tags) and
  63 + (doc.get_data() not in self.pkgs_list))
  64 +
45 65 class UserMatchDecider(xapian.MatchDecider):
46 66 """
47 67 Extend xapian.MatchDecider to match similar profiles.
... ... @@ -73,7 +93,32 @@ class PkgExpandDecider(xapian.ExpandDecider):
73 93 True if the term is a package.
74 94 """
75 95 # [FIXME] return term.startswith("XP")
76   - return not term.startswith("XT")
  96 + #return not term.startswith("XT")
  97 + return term.startswith("XP")
  98 +
  99 +class AppExpandDecider(xapian.ExpandDecider):
  100 + """
  101 + Extend xapian.ExpandDecider to consider applications only.
  102 + """
  103 + def __init__(self,axi):
  104 + xapian.ExpandDecider.__init__(self)
  105 + self.axi = axi
  106 +
  107 + def __call__(self, term):
  108 + """
  109 + True if the term is a package.
  110 + """
  111 + if not term.startswith("XT"):
  112 + package = term.lstrip("XP")
  113 + print package
  114 + tags = axi_search_pkg_tags(self.axi,package)
  115 + if "XTrole::program" in tags:
  116 + print tags
  117 + return True
  118 + else:
  119 + return False
  120 + else:
  121 + return False
77 122  
78 123 class TagExpandDecider(xapian.ExpandDecider):
79 124 """
... ... @@ -100,7 +145,7 @@ class ContentBasedStrategy(RecommendationStrategy):
100 145 self.content = content
101 146 self.profile_size = profile_size
102 147  
103   - def run(self,rec,user,limit):
  148 + def run(self,rec,user,recommendation_size):
104 149 """
105 150 Perform recommendation strategy.
106 151 """
... ... @@ -113,35 +158,40 @@ class ContentBasedStrategy(RecommendationStrategy):
113 158 enquire.set_query(query)
114 159 try:
115 160 # retrieve matching packages
116   - mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items()))
  161 + mset = enquire.get_mset(0, recommendation_size, None,
  162 + PkgMatchDecider(user.items()))
  163 + #AppMatchDecider(user.items(),
  164 + # rec.items_repository))
117 165 except xapian.DatabaseError as error:
118 166 logging.critical("Content-based strategy: "+error.get_msg())
119 167 # compose result dictionary
120 168 item_score = {}
  169 + ranking = []
121 170 for m in mset:
  171 + #[FIXME] set this constraint somehow
  172 + #tags = axi_search_pkg_tags(rec.items_repository,m.document.get_data())
  173 + #if "XTrole::program" in tags:
122 174 item_score[m.document.get_data()] = m.weight
123   - return recommender.RecommendationResult(item_score)
  175 + ranking.append(m.document.get_data())
  176 +
  177 + return recommender.RecommendationResult(item_score,ranking)
124 178  
125 179 class CollaborativeStrategy(RecommendationStrategy):
126 180 """
127 181 Colaborative recommendation strategy.
128 182 """
129   - def __init__(self,k,clustering=1):
  183 + def __init__(self,k):
130 184 self.description = "Collaborative"
131   - self.clustering = clustering
132 185 self.neighbours = k
133 186  
134   - def run(self,rec,user,result_size):
  187 + def run(self,rec,user,recommendation_size):
135 188 """
136 189 Perform recommendation strategy.
137 190 """
138   - profile = user.pkg_profile
  191 + profile = ["XP"+package for package in user.pkg_profile]
139 192 # prepair index for querying user profile
140 193 query = xapian.Query(xapian.Query.OP_OR,profile)
141   - if self.clustering:
142   - enquire = xapian.Enquire(rec.clustered_users_repository)
143   - else:
144   - enquire = xapian.Enquire(rec.users_repository)
  194 + enquire = xapian.Enquire(rec.users_repository)
145 195 enquire.set_weighting_scheme(rec.weight)
146 196 enquire.set_query(query)
147 197 try:
... ... @@ -155,27 +205,39 @@ class CollaborativeStrategy(RecommendationStrategy):
155 205 rset.add_document(m.document.get_docid())
156 206 logging.debug(m.document.get_data())
157 207 # retrieve most relevant packages
158   - eset = enquire.get_eset(result_size,rset,PkgExpandDecider())
  208 + #eset = enquire.get_eset(recommendation_size,rset,
  209 + # AppExpandDecider(rec.items_repository))
  210 + eset = enquire.get_eset(recommendation_size,rset,PkgExpandDecider())
159 211 # compose result dictionary
160 212 item_score = {}
161   - for package in eset:
162   - item_score[package.term.lstrip("XP")] = package.weight
  213 + for e in eset:
  214 + package = e.term.lstrip("XP")
  215 + tags = axi_search_pkg_tags(rec.items_repository,package)
  216 + #[FIXME] set this constraint somehow
  217 + #if "XTrole::program" in tags:
  218 + item_score[package] = e.weight
163 219 return recommender.RecommendationResult(item_score)
164 220  
165 221 class DemographicStrategy(RecommendationStrategy):
166 222 """
167 223 Recommendation strategy based on demographic data.
168 224 """
  225 + #def __init__(self, result):
  226 + #self.result = result
169 227 def __init__(self):
170 228 self.description = "Demographic"
171 229 logging.debug("Demographic recommendation not yet implemented.")
172 230 raise Error
173 231  
174   - def run(self,user,items_repository):
  232 + def run(self,rec,user,recommendation_size):
175 233 """
176 234 Perform recommendation strategy.
177 235 """
178   - pass
  236 + ordered_result = self.result.get_prediction()
  237 +
  238 + for item,weight in ordered_result:
  239 + pass
  240 +
179 241  
180 242 class KnowledgeBasedStrategy(RecommendationStrategy):
181 243 """
... ...
src/tests/data_tests.py
... ... @@ -22,14 +22,29 @@ __license__ = &quot;&quot;&quot;
22 22 import unittest2
23 23 import shutil
24 24 import os
  25 +import xapian
25 26 import sys
26 27 sys.path.insert(0,'../')
27   -from data import PopconSubmission, PopconXapianIndex
  28 +from data import PopconSubmission, PopconXapianIndex, axi_search_pkg_tags
28 29 from config import Config
29 30  
30 31 def suite():
31 32 return unittest2.TestLoader().loadTestsFromTestCase(PopconSubmissionTests)
32 33  
  34 +class AxiSearchTests(unittest2.TestCase):
  35 + @classmethod
  36 + def setUpClass(self):
  37 + cfg = Config()
  38 + self.axi = xapian.Database(cfg.axi)
  39 +
  40 + def test_search_pkg_tags(self):
  41 + tags = axi_search_pkg_tags(self.axi,'apticron')
  42 + self.assertEqual(set(tags),set(['XTadmin::package-management',
  43 + 'XTinterface::daemon',
  44 + 'XTnetwork::server', 'XTrole::program',
  45 + 'XTsuite::debian', 'XTuse::monitor',
  46 + 'XTworks-with::mail']))
  47 +
33 48 class PopconSubmissionTests(unittest2.TestCase):
34 49 @classmethod
35 50 def setUpClass(self):
... ...
src/tests/evaluation_tests.py 0 → 100755
... ... @@ -0,0 +1,90 @@
  1 +#!/usr/bin/env python
  2 +"""
  3 + singletonTests - Singleton class test case
  4 +"""
  5 +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
  6 +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
  7 +__license__ = """
  8 + This program is free software: you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation, either version 3 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program. If not, see <http://www.gnu.org/licenses/>.
  20 +"""
  21 +
  22 +import xapian
  23 +import unittest2
  24 +import sys
  25 +sys.path.insert(0,'../')
  26 +from evaluation import (Accuracy, Precision, Recall, F1, Coverage,
  27 + Evaluation, CrossValidation)
  28 +from recommender import RecommendationResult
  29 +from config import Config
  30 +from recommender import Recommender
  31 +from user import User
  32 +from data import SampleAptXapianIndex
  33 +
  34 +class MetricsTests(unittest2.TestCase):
  35 + @classmethod
  36 + def setUpClass(self):
  37 + repository = ['apple','grape','pineaple','melon','watermelon','orange']
  38 + real = RecommendationResult(dict.fromkeys(['apple','grape','pineaple','melon'],1))
  39 + predicted = RecommendationResult(dict.fromkeys(['apple','grape','orange'],1))
  40 + self.evaluation = Evaluation(predicted,real,len(repository))
  41 +
  42 + def test_class_accuracy(self):
  43 + accuracy = Accuracy().run(self.evaluation)
  44 + self.assertEqual(accuracy,0.5)
  45 +
  46 + def test_precision(self):
  47 + precision = Precision().run(self.evaluation)
  48 + self.assertEqual("%.2f" % precision,"0.67")
  49 +
  50 + def test_recall(self):
  51 + recall = Recall().run(self.evaluation)
  52 + self.assertEqual(recall,0.5)
  53 +
  54 + def test_f1(self):
  55 + f1 = F1().run(self.evaluation)
  56 + self.assertEqual("%.2f" % f1,"0.57")
  57 +
  58 + def test_coverage(self):
  59 + evaluations_set = set()
  60 + evaluations_set.add(self.evaluation)
  61 + coverage = Coverage().run(evaluations_set)
  62 + self.assertEqual(coverage,0.5)
  63 +
  64 + def test_evaluation(self):
  65 + self.assertEqual(self.evaluation.true_positive, ['apple','grape'])
  66 + self.assertEqual(self.evaluation.false_positive, ['orange'])
  67 + self.assertEqual(self.evaluation.false_negative, ['pineaple','melon'])
  68 +
  69 + def test_cross_validation(self):
  70 + cfg = Config()
  71 + axi = xapian.Database(cfg.axi)
  72 + packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
  73 + "festival","file","inkscape","xpdf"]
  74 + path = "test_data/.sample_axi"
  75 + sample_axi = SampleAptXapianIndex(packages,axi,path)
  76 + rec = Recommender(cfg)
  77 + rec.items_repository = sample_axi
  78 + user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})
  79 +
  80 + metrics = []
  81 + metrics.append(Precision())
  82 + metrics.append(Recall())
  83 + metrics.append(F1())
  84 +
  85 + validation = CrossValidation(0.3,5,rec,metrics,0.5)
  86 + validation.run(user)
  87 + print validation
  88 +
  89 +if __name__ == '__main__':
  90 + unittest2.main()
... ...
src/web/templates/about.html
... ... @@ -3,23 +3,23 @@ $var mod = &#39;about&#39;;
3 3 $var cssfiles:
4 4 $var jsfiles:
5 5  
6   -
7 6 <div id="maincontent">
8 7 <div class="innertube">
9 8  
10 9 <img style="float: right;" alt="AppRecommender logo" src="/static/images/logo.png" width="150px" />
11 10  
12   -<h1>What is this?</h1>
13   -
14   -<p>
15   -AppRecommender is a project in development that aims to provide solutions
16   -for application recommendation at the GNU/Linux world. It was initially thought
17   -as a Debian package recommender, but considering the multi-distro effort in
18   -providing platform independent solutions, it should also follow this
19   -principle.
20   -</p>
  11 +<h1>About</h1>
21 12  
  13 +<p>This experiment aims to compare and validate automated application
  14 +recommendations produced by various strategies and algorithms tunnings. Asking
  15 +real users about the relevance of the recommendation is the closest we can get
  16 +of the real accuracy of the recommender system.</p>
22 17  
  18 +<p>The engine that is being tested is a free software called <a
  19 +href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was
  20 +initially developed using the Debian Project infrasctructure, but the solution
  21 +is essentially distro-independent and could even be adapted to non GNU/Linux
  22 +systems given that there was available data for that.</p>
23 23  
24 24 </div><!-- id="innertube" -->
25 25 </div><!-- id="maincontent" -->
... ...
src/web/templates/apprec.html
... ... @@ -41,7 +41,6 @@ $$(document).ready(function() {
41 41 });
42 42 </script>
43 43  
44   -
45 44 <div id="sidebar">
46 45 <div class="innertube">
47 46  
... ...
src/web/templates/index.html
... ... @@ -670,18 +670,17 @@ function showtags(tagid) {
670 670 <h1>You might also like...</h1>
671 671  
672 672 <p>Provide a list of packages or upload a popcon submission file and you'll get
673   -a list of suggested packages automatically computed by AppRecommender. You can
674   -customize the recommender setup or let it randomly choose one.</p>
  673 +a list of suggested packages automatically computed by AppRecommender.<!-- You can
  674 +customize the recommender setup or let it randomly choose one.--></p>
675 675  
676   -<p>Please fill the form that follows the recommendation results. Your
  676 +<p>Given the recommendation result, please evaluate each application and
  677 +choose if you want to continue with another round of suggestions.
  678 +<!--fill the form that follows the recommendation results.--> </p><p>Your
677 679 feedback is very much appreciated!</p>
678 680  
679   -
680   -<p>Enjoy it :)</p>
681 681 </div>
682 682  
683 683 </div><!-- class="innertube" -->
684 684 </div><!-- id="maincontent" -->
685 685  
686 686  
687   -
... ...
src/web/templates/layout.html
... ... @@ -59,7 +59,7 @@ if (x==null || x==&quot;Write your list App here or send a file list this icon:&quot;)
59 59 <fieldset>
60 60 <div id="submit-box">
61 61 <input type="submit" value="RECOMMENDER" id="submit-button"><br />
62   - <a id="advanced-button">advanced query?</a>
  62 + <!--<a id="advanced-button">advanced query?</a>-->
63 63 </div>
64 64 <div id="input-box">
65 65 <a href="#attachfile" rel="facebox" id="upfile"><span style="display: none;">Upload a file.</span></a>
... ... @@ -165,16 +165,15 @@ $:content
165 165 <div id="footer">
166 166  
167 167 <div id="navbar">
168   - <ul>
  168 + <ul>
169 169 <li><a href="$url_base">Home</a></li>
170 170 <li><a href="$url_base/about">About</a></li>
171   - <li><a href="$url_base/support">Support</a></li>
172 171 <li><a href="http://github.com/tassia/AppRecommender">Development</a></li>
173 172 </ul>
174 173 </div><!-- id="navbar" -->
175   - <p id="copyright">
176   - Copyright © 2011 AppRecommender. Debian is a registered trademark of Software in the Public Interest, Inc.
177   - </p>
  174 +<!-- <p id="copyright">
  175 + Copyright © 2011 AppRecommender team.
  176 + </p> -->
178 177 </div><!-- id="footer" -->
179 178  
180 179  
... ...
src/web/templates/survey_index.html 0 → 100644
... ... @@ -0,0 +1,60 @@
  1 +$var title: Survey
  2 +$var mod = 'index';
  3 +$var cssfiles: static/css/tabs.css static/css/debtags.css static/css/facebox.css
  4 +$var jsfiles: static/js/facebox.js
  5 +
  6 +
  7 +<!-- Dynamic form -->
  8 +<script type="application/x-javascript">
  9 +window.onload = function() {
  10 + setupDependencies('weboptions'); //name of form(s). Seperate each with a comma (ie: 'weboptions', 'myotherform' )
  11 + };
  12 +</script>
  13 +
  14 +<script type="application/x-javascript">
  15 +$$(document).ready(function() {
  16 + $$('a[rel*=facebox]').facebox({
  17 + loadingImage : '/static/images/loading.gif',
  18 + closeImage : '/static/images/closelabel.png'
  19 + });
  20 + $$("#tags-box").click(function () {
  21 + $$("#tags-box").hide(1000);
  22 + });
  23 +
  24 +});
  25 +</script>
  26 +
  27 +
  28 +<div id="sidebar">
  29 +<div class="innertube">
  30 +
  31 +
  32 +<br style="clear: both" />
  33 +</div><!-- class="innertube" -->
  34 +</div><!-- id="sidebar" -->
  35 +
  36 +<div id="maincontent">
  37 +<div class="innertube">
  38 +
  39 +<div class="textbox">
  40 +<h1>Help us learn your needs!</h1>
  41 +
  42 +<p>Participate in this survey and contribute for the development of
  43 +AppRecommender, a recommender system for GNU/Linux applications.</p>
  44 +<br />
  45 +<p>Please provide the list of packages installed in a real running system,
  46 +by uploading a popcon submission or the file generated with the command:
  47 +"dpkg -l > packages_list".</p>
  48 +
  49 +<p>Evaluate at least 10 suggested applications and identify yourself if you
  50 +wish to. Upon the completion of this survey there will be a thank you page
  51 +listing all identified participant.</p>
  52 +
  53 +<p>Your help is very much appreciated!</p>
  54 +
  55 +</div>
  56 +
  57 +</div><!-- class="innertube" -->
  58 +</div><!-- id="maincontent" -->
  59 +
  60 +
... ...