Commit 11fc8731a1246b3f06a729395cdfefcc9bc0322c

Authored by Tássia Camões Araújo
2 parents 5075d5c0 1e970ec5
Exists in master and in 1 other branch add_vagrant

Merging content of survey index.

src/config.py
... ... @@ -46,6 +46,7 @@ class Config():
46 46 self.popcon_dir = os.path.expanduser("~/.app-recommender/popcon_dir")
47 47 self.clusters_dir = os.path.expanduser("~/.app-recommender/clusters_dir")
48 48 self.k_medoids = 100
  49 + self.max_popcon = 1000
49 50 self.index_mode = "old"
50 51 self.strategy = "cb"
51 52 self.weight = "bm25"
... ... @@ -71,6 +72,7 @@ class Config():
71 72 print " -u, --indexmode= 'old'|'reindex'|'cluster'|'recluster'"
72 73 print " -l, --clustersdir=PATH Path to popcon clusters dir"
73 74 print " -c, --medoids=k Number of medoids for clustering"
  75 + print " -x, --maxpopcon=k Number of submissions to be considered"
74 76 print ""
75 77 print " [ recommender ]"
76 78 print " -w, --weight=OPTION Search weighting scheme"
... ... @@ -112,8 +114,8 @@ class Config():
112 114 logging.error("Error in config file syntax: %s", str(err))
113 115 os.abort()
114 116  
115   - self.debug = self.read_option('general', 'debug')
116   - self.debug = self.read_option('general', 'verbose')
  117 + self.debug = int(self.read_option('general', 'debug'))
  118 + self.debug = int(self.read_option('general', 'verbose'))
117 119 self.output_filename = self.read_option('general', 'output')
118 120 self.survey_mode = self.read_option('general', 'survey_mode')
119 121  
... ... @@ -123,16 +125,18 @@ class Config():
123 125 self.popcon_dir = os.path.expanduser(self.read_option('data_sources', 'popcon_dir'))
124 126 self.index_mode = self.read_option('data_sources', 'index_mode')
125 127 self.clusters_dir = os.path.expanduser(self.read_option('data_sources', 'clusters_dir'))
126   - self.k_medoids = self.read_option('data_sources', 'k_medoids')
  128 + self.k_medoids = int(self.read_option('data_sources', 'k_medoids'))
  129 + self.max_popcon = int(self.read_option('data_sources', 'max_popcon'))
127 130  
128 131 self.weight = self.read_option('recommender', 'weight')
129 132 self.strategy = self.read_option('recommender', 'strategy')
130   - self.profile_size = self.read_option('recommender', 'profile_size')
  133 + self.profile_size = int(self.read_option('recommender',
  134 + 'profile_size'))
131 135  
132   - short_options = "hdvo:a:e:p:m:ul:c:w:s:z:"
  136 + short_options = "hdvo:a:e:p:m:ul:c:x:w:s:z:"
133 137 long_options = ["help", "debug", "verbose", "output=",
134 138 "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=",
135   - "clustersdir=", "kmedoids=", "weight=", "strategy=",
  139 + "clustersdir=", "kmedoids=", "max_popcon=", "weight=", "strategy=",
136 140 "profile_size="]
137 141 try:
138 142 opts, args = getopt.getopt(sys.argv[1:], short_options,
... ... @@ -166,13 +170,15 @@ class Config():
166 170 elif o in ("-l", "--clustersdir"):
167 171 self.clusters_dir = p
168 172 elif o in ("-c", "--kmedoids"):
169   - self.k_medoids = p
  173 + self.k_medoids = int(p)
  174 + elif o in ("-x", "--max_popcon"):
  175 + self.max_popcon = int(p)
170 176 elif o in ("-w", "--weight"):
171 177 self.weight = p
172 178 elif o in ("-s", "--strategy"):
173 179 self.strategy = p
174 180 elif o in ("-z", "--profile_size"):
175   - self.strategy = p
  181 + self.strategy = int(p)
176 182 else:
177 183 assert False, "unhandled option"
178 184  
... ...
src/data.py
... ... @@ -82,7 +82,7 @@ class AppAptXapianIndex(xapian.WritableDatabase):
82 82 except:
83 83 logging.info("Doc %d not found in axi." % docid)
84 84 logging.info("AppAptXapianIndex size: %d (lastdocid: %d)." %
85   - self.get_doccount(), self.get_lastdocid())
  85 + (self.get_doccount(), self.get_lastdocid()))
86 86  
87 87 def __str__(self):
88 88 return print_index(self)
... ... @@ -166,6 +166,7 @@ class PopconXapianIndex(xapian.WritableDatabase):
166 166 raise Error
167 167 if cfg.index_mode == "reindex":
168 168 self.source_dir = os.path.expanduser(cfg.popcon_dir)
  169 + logging.debug(self.source_dir)
169 170 else:
170 171 self.source_dir = os.path.expanduser(cfg.clusters_dir)
171 172 if not os.path.exists(cfg.clusters_dir):
... ... @@ -180,10 +181,12 @@ class PopconXapianIndex(xapian.WritableDatabase):
180 181 % cfg.clusters_dir)
181 182 distance = JaccardDistance()
182 183 data = self.get_submissions(cfg.popcon_dir)
  184 + logging.debug(type(data))
183 185 self.cluster_dispersion = \
184 186 self.kmedoids_clustering(data, cfg.clusters_dir,
185   - distance, cfg.k_medoids)
186   - logging.info("Clusters dispersion: %f.2",
  187 + distance, cfg.k_medoids,
  188 + cfg.max_popcon)
  189 + logging.info("Clusters dispersion: %.2f",
187 190 self.cluster_dispersion)
188 191 else:
189 192 logging.info("Using clusters from \'%s\'" %
... ... @@ -221,8 +224,9 @@ class PopconXapianIndex(xapian.WritableDatabase):
221 224 self.path)
222 225 xapian.WritableDatabase.__init__(self,self.path,
223 226 xapian.DB_CREATE_OR_OVERWRITE)
224   - except xapian.DatabaseError:
  227 + except xapian.DatabaseError as e:
225 228 logging.critical("Could not create popcon xapian index.")
  229 + logging.critical(str(e))
226 230 raise Error
227 231  
228 232 for root, dirs, files in os.walk(self.source_dir):
... ... @@ -254,29 +258,32 @@ class PopconXapianIndex(xapian.WritableDatabase):
254 258 submissions = []
255 259 for root, dirs, files in os.walk(submissions_dir):
256 260 for popcon_file in files:
  261 + logging.debug("Parsing submission %s" % popcon_file)
257 262 submission = PopconSubmission(os.path.join(root, popcon_file))
258 263 submissions.append(submission)
259 264 return submissions
260 265  
261   - def kmedoids_clustering(self,data,clusters_dir,distance,k_medoids):
  266 + def kmedoids_clustering(self,data,clusters_dir,distance,k_medoids,max_popcon):
262 267 clusters = KMedoidsClustering(data,lambda x,y:
263 268 distance(x.packages.keys(),
264   - y.packages.keys()))
  269 + y.packages.keys()),max_popcon)
265 270 medoids,dispersion = clusters.getMedoids(k_medoids)
266 271 for submission in medoids:
  272 + logging.debug("Copying submission %s" % submission.user_id)
267 273 shutil.copyfile(submission.path,os.path.join(clusters_dir,
268 274 submission.user_id))
269 275 return dispersion
270 276  
271 277 class KMedoidsClustering(cluster.KMeansClustering):
272 278  
273   - def __init__(self,data,distance,max_data=100):
274   - # if len(data)<max_data:
275   - # data_sample = data
276   - # else:
277   - # data_sample = random.sample(data,max_data)
278   - # cluster.KMeansClustering.__init__(self, data_sample, distance)
279   - cluster.KMeansClustering.__init__(self, data, distance)
  279 + def __init__(self,data,distance,max_data):
  280 + if len(data)<max_data:
  281 + data_sample = data
  282 + else:
  283 + data_sample = random.sample(data,max_data)
  284 + print data_sample
  285 + cluster.KMeansClustering.__init__(self, data_sample, distance)
  286 + # cluster.KMeansClustering.__init__(self, data, distance)
280 287 self.distanceMatrix = {}
281 288 for submission in self._KMeansClustering__data:
282 289 self.distanceMatrix[submission.user_id] = {}
... ... @@ -335,6 +342,8 @@ class KMedoidsClustering(cluster.KMeansClustering):
335 342 """
336 343 #medoids_distances = [self.getMedoid(cluster) for cluster in self.getclusters(n)]
337 344 medoids_distances = []
  345 + logging.debug("initial length %s" % self._KMeansClustering__initial_length)
  346 + logging.debug("n %d" % n)
338 347 for cluster in self.getclusters(n):
339 348 type(cluster)
340 349 print cluster
... ...
src/examples/cross_validation.py
... ... @@ -53,7 +53,7 @@ if __name__ == &#39;__main__&#39;:
53 53 metrics.append(F1())
54 54 metrics.append(Accuracy())
55 55 metrics.append(SimpleAccuracy())
56   - validation = CrossValidation(0.3,10,rec,metrics,0.005)
  56 + validation = CrossValidation(0.9,10,rec,metrics,0.1)
57 57 validation.run(user)
58 58 print validation
59 59  
... ...
src/experiments/experiments.cfg
... ... @@ -5,7 +5,8 @@ path = &#39;results&#39;
5 5 experiment = 'grid'
6 6 weight = ['bm25', 'trad']
7 7 ;profile_size = range(10,100,10)
8   -sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  8 +;sample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
  9 +sample = [0.6, 0.7, 0.8, 0.9]
9 10  
10 11 [content]
11 12 strategy = ['cb','cbt','cbd']
... ...
src/web/templates/about.html
... ... @@ -15,20 +15,32 @@ $var jsfiles: static/js/facebox.js
15 15 <div id="maincontent">
16 16 <div class="innertube">
17 17  
18   -<a rel="facebox" href="/static/images/diaappr.png" title="AppRecommender Diagram"><img style="float: right; margin: 10px;" alt="AppRecommender Diagram" src="/static/images/diaappr.png" width="230px" /></a>
  18 +<h1>AppRecommender Survey</h1>
  19 +<h2>About</h2>
19 20  
20   -<h1>About</h1>
21   -
22   -<p>This experiment aims to compare and validate automated application
23   -recommendations produced by various strategies and algorithms tunnings. We
  21 +<p align="justify">This experiment aims to compare and validate automated application
  22 +recommendations produced by various strategies and algorithms tuning. We
24 23 believe that real users evaluation regarding the relevance of recommendations is
25 24 the most accurate data source for computing recommender system effectiveness.</p>
26   -<br />
27   -<p>The engine that is being tested is a free software called <a
  25 +<br/>
  26 +
  27 +<a rel="facebox" href="/static/images/diaappr.png" title="AppRecommender Diagram">
  28 +<img style="float: right; margin: 10px;" alt="AppRecommender dataflow"
  29 +src="/static/images/diaappr.png" width="230px" /></a>
  30 +
  31 +<p align="justify">The engine that is being tested is a free software called <a
28 32 href="http://github.com/tassia/AppRecommender">AppRecommender</a>. It was
29 33 initially developed using the Debian Project infrasctructure, but the solution
30   -is essentially distro-independent and could even be adapted to non GNU/Linux
31   -systems given that there was available data for that.</p>
  34 +is essentially distro-independent and can even be adapted to non GNU/Linux
  35 +systems given that there is available data for that.</p>
  36 +<br />
  37 +
  38 +<p align="justify">The picture on the right gives an idea of the data workflow
  39 +for AppRecommender. The user provides a set of applications installed in his
  40 +system and the recommender suggestes a set of applications that he might also
  41 +be interested in. Different strategies can be used to compose the recommendation,
  42 +based on this user and other similar users profiles, using Apt-xapian-index,
  43 +Popcon and UDD as data sources.</p>
32 44  
33 45 </div><!-- id="innertube" -->
34 46 </div><!-- id="maincontent" -->
... ...
src/web/templates/layout.html
1 1 $def with (content)
2   -$ url_base = "http://localhost:8080"
  2 +$ url_base = "/"
3 3 <!--Force IE6 into quirks mode with this comment tag-->
4 4 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5 5 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
... ... @@ -162,7 +162,7 @@ $:content
162 162 <div id="navbar">
163 163 <ul>
164 164 <li><a href="$url_base">Home</a></li>
165   - <li><a href="$url_base/about">About</a></li>
  165 + <li><a href="$(url_base)about">About</a></li>
166 166 <li><a href="http://github.com/tassia/AppRecommender">Development</a></li>
167 167 </ul>
168 168 </div><!-- id="navbar" -->
... ...
src/web/templates/survey.html
... ... @@ -28,7 +28,11 @@ $var jsfiles: static/coda-slider-2.0/javascripts/jquery-1.3.2.min.js static/coda
28 28 <div class="show-end" style="display: none;">
29 29  
30 30 <p>
31   -A class <b>show-end</b> ou <b>hide-end</b> mostra um elemento ou esconde um elemento ao final do último Next.
  31 +<!--A class <b>show-end</b> ou <b>hide-end</b> mostra um elemento ou esconde um
  32 +elemento ao final do último Next.-->
  33 +You have completed this round of evaluations. If you have time to do some more,
  34 +please do so. Otherwise, please conclude your participation clicking in the
  35 +button below.
32 36 </p>
33 37  
34 38 </div>
... ...
src/web/templates/survey_index.html
... ... @@ -28,8 +28,8 @@ field of each line. For instance, you can run the following command and upload t
28 28 generated 'packages.list' file.</p>
29 29 <p><code> # dpkg-query --show > packages.list </code></p>
30 30 <p>Given the produced recommendations you will be asked to evaluate the list of
31   -applications suggested. You need to analyse at least 10 sugestions to be considered
32   -in the survey, though we appreciate if you do as many as you can.</p>
  31 +applications suggested. You need to analyse at least 10 sugestions to be
  32 +considered in the survey, though we appreciate if you do as many as you can.</p>
33 33 <br />
34 34 <p>Your help is very much appreciated!</p>
35 35 </div>
... ...