Commit 11fc8731a1246b3f06a729395cdfefcc9bc0322c

Authored by Tássia Camões Araújo
2 parents 5075d5c0 1e970ec5
Exists in master and in 1 other branch add_vagrant

Merging content of survey index.

@@ -46,6 +46,7 @@ class Config(): @@ -46,6 +46,7 @@ class Config():
46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir") 46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir")
47 self.clusters_dir = os.path.expanduser("~/.app-recommender/clusters_dir") 47 self.clusters_dir = os.path.expanduser("~/.app-recommender/clusters_dir")
48 self.k_medoids = 100 48 self.k_medoids = 100
  49 + self.max_popcon = 1000
49 self.index_mode = "old" 50 self.index_mode = "old"
50 self.strategy = "cb" 51 self.strategy = "cb"
51 self.weight = "bm25" 52 self.weight = "bm25"
@@ -71,6 +72,7 @@ class Config(): @@ -71,6 +72,7 @@ class Config():
71 print " -u, --indexmode= 'old'|'reindex'|'cluster'|'recluster'" 72 print " -u, --indexmode= 'old'|'reindex'|'cluster'|'recluster'"
72 print " -l, --clustersdir=PATH Path to popcon clusters dir" 73 print " -l, --clustersdir=PATH Path to popcon clusters dir"
73 print " -c, --medoids=k Number of medoids for clustering" 74 print " -c, --medoids=k Number of medoids for clustering"
  75 + print " -x, --maxpopcon=k Number of submissions to be considered"
74 print "" 76 print ""
75 print " [ recommender ]" 77 print " [ recommender ]"
76 print " -w, --weight=OPTION Search weighting scheme" 78 print " -w, --weight=OPTION Search weighting scheme"
@@ -112,8 +114,8 @@ class Config(): @@ -112,8 +114,8 @@ class Config():
112 logging.error("Error in config file syntax: %s", str(err)) 114 logging.error("Error in config file syntax: %s", str(err))
113 os.abort() 115 os.abort()
114 116
115 - self.debug = self.read_option('general', 'debug')  
116 - self.debug = self.read_option('general', 'verbose') 117 + self.debug = int(self.read_option('general', 'debug'))
  118 + self.debug = int(self.read_option('general', 'verbose'))
117 self.output_filename = self.read_option('general', 'output') 119 self.output_filename = self.read_option('general', 'output')
118 self.survey_mode = self.read_option('general', 'survey_mode') 120 self.survey_mode = self.read_option('general', 'survey_mode')
119 121
@@ -123,16 +125,18 @@ class Config(): @@ -123,16 +125,18 @@ class Config():
123 self.popcon_dir = os.path.expanduser(self.read_option('data_sources', 'popcon_dir')) 125 self.popcon_dir = os.path.expanduser(self.read_option('data_sources', 'popcon_dir'))
124 self.index_mode = self.read_option('data_sources', 'index_mode') 126 self.index_mode = self.read_option('data_sources', 'index_mode')
125 self.clusters_dir = os.path.expanduser(self.read_option('data_sources', 'clusters_dir')) 127 self.clusters_dir = os.path.expanduser(self.read_option('data_sources', 'clusters_dir'))
126 - self.k_medoids = self.read_option('data_sources', 'k_medoids') 128 + self.k_medoids = int(self.read_option('data_sources', 'k_medoids'))
  129 + self.max_popcon = int(self.read_option('data_sources', 'max_popcon'))
127 130
128 self.weight = self.read_option('recommender', 'weight') 131 self.weight = self.read_option('recommender', 'weight')
129 self.strategy = self.read_option('recommender', 'strategy') 132 self.strategy = self.read_option('recommender', 'strategy')
130 - self.profile_size = self.read_option('recommender', 'profile_size') 133 + self.profile_size = int(self.read_option('recommender',
  134 + 'profile_size'))
131 135
132 - short_options = "hdvo:a:e:p:m:ul:c:w:s:z:" 136 + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:"
133 long_options = ["help", "debug", "verbose", "output=", 137 long_options = ["help", "debug", "verbose", "output=",
134 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", 138 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=",
135 - "clustersdir=", "kmedoids=", "weight=", "strategy=", 139 + "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=",
136 "profile_size="] 140 "profile_size="]
137 try: 141 try:
138 opts, args = getopt.getopt(sys.argv[1:], short_options, 142 opts, args = getopt.getopt(sys.argv[1:], short_options,
@@ -166,13 +170,15 @@ class Config(): @@ -166,13 +170,15 @@ class Config():
166 elif o in ("-l", "--clustersdir"): 170 elif o in ("-l", "--clustersdir"):
167 self.clusters_dir = p 171 self.clusters_dir = p
168 elif o in ("-c", "--kmedoids"): 172 elif o in ("-c", "--kmedoids"):
169 - self.k_medoids = p 173 + self.k_medoids = int(p)
  174 + elif o in ("-x", "--max_popcon"):
  175 + self.max_popcon = int(p)
170 elif o in ("-w", "--weight"): 176 elif o in ("-w", "--weight"):
171 self.weight = p 177 self.weight = p
172 elif o in ("-s", "--strategy"): 178 elif o in ("-s", "--strategy"):
173 self.strategy = p 179 self.strategy = p
174 elif o in ("-z", "--profile_size"): 180 elif o in ("-z", "--profile_size"):
175 - self.strategy = p 181 + self.strategy = int(p)
176 else: 182 else:
177 assert False, "unhandled option" 183 assert False, "unhandled option"
178 184
@@ -82,7 +82,7 @@ class AppAptXapianIndex(xapian.WritableDatabase): @@ -82,7 +82,7 @@ class AppAptXapianIndex(xapian.WritableDatabase):
82 except: 82 except:
83 logging.info("Doc %d not found in axi." % docid) 83 logging.info("Doc %d not found in axi." % docid)
84 logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." % 84 logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
85 - self.get_doccount(), self.get_lastdocid()) 85 + (self.get_doccount(), self.get_lastdocid()))
86 86
87 def __str__(self): 87 def __str__(self):
88 return print_index(self) 88 return print_index(self)
@@ -166,6 +166,7 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -166,6 +166,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
166 raise Error 166 raise Error
167 if cfg.index_mode == "reindex": 167 if cfg.index_mode == "reindex":
168 self.source_dir = os.path.expanduser(cfg.popcon_dir) 168 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  169 + logging.debug(self.source_dir)
169 else: 170 else:
170 self.source_dir = os.path.expanduser(cfg.clusters_dir) 171 self.source_dir = os.path.expanduser(cfg.clusters_dir)
171 if not os.path.exists(cfg.clusters_dir): 172 if not os.path.exists(cfg.clusters_dir):
@@ -180,10 +181,12 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -180,10 +181,12 @@ class PopconXapianIndex(xapian.WritableDatabase):
180 % cfg.clusters_dir) 181 % cfg.clusters_dir)
181 distance = JaccardDistance() 182 distance = JaccardDistance()
182 data = self.get_submissions(cfg.popcon_dir) 183 data = self.get_submissions(cfg.popcon_dir)
  184 + logging.debug(type(data))
183 self.cluster_dispersion = \ 185 self.cluster_dispersion = \
184 self.kmedoids_clustering(data, cfg.clusters_dir, 186 self.kmedoids_clustering(data, cfg.clusters_dir,
185 - distance, cfg.k_medoids)  
186 - logging.info("Clusters dispersion: %f.2", 187 + distance, cfg.k_medoids,
  188 + cfg.max_popcon)
  189 + logging.info("Clusters dispersion: %.2f",
187 self.cluster_dispersion) 190 self.cluster_dispersion)
188 else: 191 else:
189 logging.info("Using clusters from \'%s\'" % 192 logging.info("Using clusters from \'%s\'" %
@@ -221,8 +224,9 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -221,8 +224,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
221 self.path) 224 self.path)
222 xapian.WritableDatabase.__init__(self,self.path, 225 xapian.WritableDatabase.__init__(self,self.path,
223 xapian.DB_CREATE_OR_OVERWRITE) 226 xapian.DB_CREATE_OR_OVERWRITE)
224 - except xapian.DatabaseError: 227 + except xapian.DatabaseError as e:
225 logging.critical("Could not create popcon xapian index.") 228 logging.critical("Could not create popcon xapian index.")
  229 + logging.critical(str(e))
226 raise Error 230 raise Error
227 231
228 for root, dirs, files in os.walk(self.source_dir): 232 for root, dirs, files in os.walk(self.source_dir):
@@ -254,29 +258,32 @@ class PopconXapianIndex(xapian.WritableDatabase): @@ -254,29 +258,32 @@ class PopconXapianIndex(xapian.WritableDatabase):
254 submissions = [] 258 submissions = []
255 for root, dirs, files in os.walk(submissions_dir): 259 for root, dirs, files in os.walk(submissions_dir):
256 for popcon_file in files: 260 for popcon_file in files:
  261 + logging.debug("Parsing submission %s" % popcon_file)
257 submission = PopconSubmission(os.path.join(root, popcon_file)) 262 submission = PopconSubmission(os.path.join(root, popcon_file))
258 submissions.append(submission) 263 submissions.append(submission)
259 return submissions 264 return submissions
260 265
261 - def kmedoids_clustering(self,data,clusters_dir,distance,k_medoids): 266 + def kmedoids_clustering(self,data,clusters_dir,distance,k_medoids,max_popcon):
262 clusters = KMedoidsClustering(data,lambda x,y: 267 clusters = KMedoidsClustering(data,lambda x,y:
263 distance(x.packages.keys(), 268 distance(x.packages.keys(),
264 - y.packages.keys())) 269 + y.packages.keys()),max_popcon)
265 medoids,dispersion = clusters.getMedoids(k_medoids) 270 medoids,dispersion = clusters.getMedoids(k_medoids)
266 for submission in medoids: 271 for submission in medoids:
  272 + logging.debug("Copying submission %s" % submission.user_id)
267 shutil.copyfile(submission.path,os.path.join(clusters_dir, 273 shutil.copyfile(submission.path,os.path.join(clusters_dir,
268 submission.user_id)) 274 submission.user_id))
269 return dispersion 275 return dispersion
270 276
271 class KMedoidsClustering(cluster.KMeansClustering): 277 class KMedoidsClustering(cluster.KMeansClustering):
272 278
273 - def __init__(self,data,distance,max_data=100):  
274 - # if len(data)<max_data:  
275 - # data_sample = data  
276 - # else:  
277 - # data_sample = random.sample(data,max_data)  
278 - # cluster.KMeansClustering.__init__(self, data_sample, distance)  
279 - cluster.KMeansClustering.__init__(self, data, distance) 279 + def __init__(self,data,distance,max_data):
  280 + if len(data)<max_data:
  281 + data_sample = data
  282 + else:
  283 + data_sample = random.sample(data,max_data)
  284 + print data_sample
  285 + cluster.KMeansClustering.__init__(self, data_sample, distance)
  286 + # cluster.KMeansClustering.__init__(self, data, distance)
280 self.distanceMatrix = {} 287 self.distanceMatrix = {}
281 for submission in self._KMeansClustering__data: 288 for submission in self._KMeansClustering__data:
282 self.distanceMatrix[submission.user_id] = {} 289 self.distanceMatrix[submission.user_id] = {}
@@ -335,6 +342,8 @@ class KMedoidsClustering(cluster.KMeansClustering): @@ -335,6 +342,8 @@ class KMedoidsClustering(cluster.KMeansClustering):
335 """ 342 """
336 #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)] 343 #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
337 medoids_distances = [] 344 medoids_distances = []
  345 + logging.debug("initial length %s" % self._KMeansClustering__initial_length)
  346 + logging.debug("n %d" % n)
338 for cluster in self.getclusters(n): 347 for cluster in self.getclusters(n):
339 type(cluster) 348 type(cluster)
340 print cluster 349 print cluster
src/examples/cross_validation.py
@@ -53,7 +53,7 @@ if __name__ == &#39;__main__&#39;: @@ -53,7 +53,7 @@ if __name__ == &#39;__main__&#39;:
53 metrics.append(F1()) 53 metrics.append(F1())
54 metrics.append(Accuracy()) 54 metrics.append(Accuracy())
55 metrics.append(SimpleAccuracy()) 55 metrics.append(SimpleAccuracy())
56 - validation = CrossValidation(0.3,10,rec,metrics,0.005) 56 + validation = CrossValidation(0.9,10,rec,metrics,0.1)
57 validation.run(user) 57 validation.run(user)
58 print validation 58 print validation
59 59
src/experiments/experiments.cfg
@@ -5,7 +5,8 @@ path = &#39;results&#39; @@ -5,7 +5,8 @@ path = &#39;results&#39;
5 experiment = 'grid' 5 experiment = 'grid'
6 weight = ['bm25', 'trad'] 6 weight = ['bm25', 'trad']
7 ;profile_size = range(10,100,10) 7 ;profile_size = range(10,100,10)
8 -sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 8 +;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  9 +sample = [0.6, 0.7, 0.8, 0.9]
9 10
10 [content] 11 [content]
11 strategy = ['cb','cbt','cbd'] 12 strategy = ['cb','cbt','cbd']
src/web/templates/about.html
@@ -15,20 +15,32 @@ $var jsfiles: static/js/facebox.js @@ -15,20 +15,32 @@ $var jsfiles: static/js/facebox.js
15 <div id="maincontent"> 15 <div id="maincontent">
16 <div class="innertube"> 16 <div class="innertube">
17 17
18 -<a rel="facebox" href="/static/images/diaappr.png" title="AppRecommender Diagram"><img style="float: right; margin: 10px;" alt="AppRecommender Diagram" src="/static/images/diaappr.png" width="230px" /></a> 18 +<h1>AppRecommender Survey</h1>
  19 +<h2>About</h2>
19 20
20 -<h1>About</h1>  
21 -  
22 -<p>This experiment aims to compare and validate automated application  
23 -recommendations produced by various strategies and algorithms tunnings. We 21 +<p align="justify">This experiment aims to compare and validate automated application
  22 +recommendations produced by various strategies and algorithms tuning. We
24 believe that real users evaluation regarding the relevance of recommendations is 23 believe that real users evaluation regarding the relevance of recommendations is
25 the most accurate data source for computing recommender system effectiveness.</p> 24 the most accurate data source for computing recommender system effectiveness.</p>
26 -<br />  
27 -<p>The engine that is being tested is a free software called <a 25 +<br/>
  26 +
  27 +<a rel="facebox" href="/static/images/diaappr.png" title="AppRecommender Diagram">
  28 +<img style="float: right; margin: 10px;" alt="AppRecommender dataflow"
  29 +src="/static/images/diaappr.png" width="230px" /></a>
  30 +
  31 +<p align="justify">The engine that is being tested is a free software called <a
28 href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was 32 href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was
29 initially developed using the Debian Project infrasctructure, but the solution 33 initially developed using the Debian Project infrasctructure, but the solution
30 -is essentially distro-independent and could even be adapted to non GNU/Linux  
31 -systems given that there was available data for that.</p> 34 +is essentially distro-independent and can even be adapted to non GNU/Linux
  35 +systems given that there is available data for that.</p>
  36 +<br />
  37 +
  38 +<p align="justify">The picture on the right gives an idea of the data workflow
  39 +for AppRecommender. The user provides a set of applications installed in his
  40 +system and the recommender suggestes a set of applications that he might also
  41 +be interested in. Different strategies can be used to compose the recommendation,
  42 +based on this user and other similar users profiles, using Apt-xapian-index,
  43 +Popcon and UDD as data sources.</p>
32 44
33 </div><!-- id="innertube" --> 45 </div><!-- id="innertube" -->
34 </div><!-- id="maincontent" --> 46 </div><!-- id="maincontent" -->
src/web/templates/layout.html
1 $def with (content) 1 $def with (content)
2 -$ url_base = "http://localhost:8080" 2 +$ url_base = "/"
3 <!--Force IE6 into quirks mode with this comment tag--> 3 <!--Force IE6 into quirks mode with this comment tag-->
4 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 4 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 5 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -162,7 +162,7 @@ $:content @@ -162,7 +162,7 @@ $:content
162 <div id="navbar"> 162 <div id="navbar">
163 <ul> 163 <ul>
164 <li><a href="$url_base">Home</a></li> 164 <li><a href="$url_base">Home</a></li>
165 - <li><a href="$url_base/about">About</a></li> 165 + <li><a href="$(url_base)about">About</a></li>
166 <li><a href="http://github.com/tassia/AppRecommender">Development</a></li> 166 <li><a href="http://github.com/tassia/AppRecommender">Development</a></li>
167 </ul> 167 </ul>
168 </div><!-- id="navbar" --> 168 </div><!-- id="navbar" -->
src/web/templates/survey.html
@@ -28,7 +28,11 @@ $var jsfiles: static/coda-slider-2.0/javascripts/jquery-1.3.2.min.js static/coda @@ -28,7 +28,11 @@ $var jsfiles: static/coda-slider-2.0/javascripts/jquery-1.3.2.min.js static/coda
28 <div class="show-end" style="display: none;"> 28 <div class="show-end" style="display: none;">
29 29
30 <p> 30 <p>
31 -A class <b>show-end</b> ou <b>hide-end</b> mostra um elemento ou esconde um elemento ao final do último Next. 31 +<!--A class <b>show-end</b> ou <b>hide-end</b> mostra um elemento ou esconde um
  32 +elemento ao final do último Next.-->
  33 +You have completed this round of evaluations. If you have time to do some more,
  34 +please do so. Otherwise, please conclude your participation clicking in the
  35 +button below.
32 </p> 36 </p>
33 37
34 </div> 38 </div>
src/web/templates/survey_index.html
@@ -28,8 +28,8 @@ field of each line. For instance, you can run the following command and upload t @@ -28,8 +28,8 @@ field of each line. For instance, you can run the following command and upload t
28 generated 'packages.list' file.</p> 28 generated 'packages.list' file.</p>
29 <p><code> # dpkg-query --show > packages.list </code></p> 29 <p><code> # dpkg-query --show > packages.list </code></p>
30 <p>Given the produced recommendations you will be asked to evaluate the list of 30 <p>Given the produced recommendations you will be asked to evaluate the list of
31 -applications suggested. You need to analyse at least 10 sugestions to be considered  
32 -in the survey, though we appreciate if you do as many as you can.</p> 31 +applications suggested. You need to analyse at least 10 sugestions to be
  32 +considered in the survey, though we appreciate if you do as many as you can.</p>
33 <br /> 33 <br />
34 <p>Your help is very much appreciated!</p> 34 <p>Your help is very much appreciated!</p>
35 </div> 35 </div>