test_api.rake
16.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
require 'fastercsv'
namespace :test_api do
task :all => [:question_vote_consistency,:generate_density_information]
desc "Don't run unless you know what you are doing"
task(:generate_lots_of_votes => :environment) do
if Rails.env.production?
print "You probably don't want to run this in production as it will falsify a bunch of random votes"
end
current_user = User.first
3000.times do
question = Question.find(120) # test question change as needed
@p = Prompt.find(question.catchup_choose_prompt_id)
current_user.record_vote("test_vote", @p, rand(2))
end
end
desc "Generate appearances for any votes that have no current appearance, should only need to be run once"
task(:generate_appearances_for_existing_votes => :environment) do
votes = Vote.all
count = 0
votes.each do |v|
if v.appearance.nil?
print "."
a = Appearance.create(:voter_id => v.voter_id, :site_id => v.site_id, :prompt_id => v.prompt_id, :question_id => v.question_id, :created_at => v.created_at, :updated_at => v.updated_at)
v.appearance = a
v.save
count += 1
end
end
print count
end
desc "Generate past density information"
task(:generate_past_densities => :environment) do
#this is not elegant, but should only be run once, so quick and dirty wins
start_date = Vote.find(:all, :conditions => 'loser_choice_id IS NOT NULL', :order => :created_at, :limit => 1).first.created_at.to_date
start_date.upto(Date.today) do |the_date|
questions = Question.find(:all, :conditions => ['created_at < ?', the_date])
print the_date.to_s
questions.each do |q|
puts q.id
relevant_choices = q.choices.find(:all, :conditions => ['created_at < ?', the_date])
seed_choices = 0
if relevant_choices == 0
next
#this question had not been created yet
end
relevant_choices.each do |c|
if !c.user_created
seed_choices+=1
end
end
nonseed_choices = relevant_choices.size - seed_choices
seed_seed_total = seed_choices **2 - seed_choices
nonseed_nonseed_total = nonseed_choices **2 - nonseed_choices
seed_nonseed_total = seed_choices * nonseed_choices
nonseed_seed_total = seed_choices * nonseed_choices
seed_seed_sum = 0
seed_nonseed_sum= 0
nonseed_seed_sum= 0
nonseed_nonseed_sum= 0
q.appearances.find_each(:conditions => ['prompt_id IS NOT NULL AND created_at < ?', the_date]) do |a|
p = a.prompt
if p.left_choice.user_created == false && p.right_choice.user_created == false
seed_seed_sum += 1
elsif p.left_choice.user_created == false && p.right_choice.user_created == true
seed_nonseed_sum += 1
elsif p.left_choice.user_created == true && p.right_choice.user_created == false
nonseed_seed_sum += 1
elsif p.left_choice.user_created == true && p.right_choice.user_created == true
nonseed_nonseed_sum += 1
end
end
densities = {}
densities[:seed_seed] = seed_seed_sum.to_f / seed_seed_total.to_f
densities[:seed_nonseed] = seed_nonseed_sum.to_f / seed_nonseed_total.to_f
densities[:nonseed_seed] = nonseed_seed_sum.to_f / nonseed_seed_total.to_f
densities[:nonseed_nonseed] = nonseed_nonseed_sum.to_f / nonseed_nonseed_total.to_f
densities.each do |type, average|
d = Density.new
d.created_at = the_date
d.question_id = q.id
d.prompt_type = type.to_s
d.value = average.nan? ? nil : average
d.save!
end
puts "Seed_seed sum: #{seed_seed_sum}, seed_seed total num: #{seed_seed_total}"
puts "Seed_nonseed sum: #{seed_nonseed_sum}, seed_nonseed total num: #{seed_nonseed_total}"
puts "Nonseed_seed sum: #{nonseed_seed_sum}, nonseed_seed total num: #{nonseed_seed_total}"
puts "Nonseed_nonseed sum: #{nonseed_nonseed_sum}, nonseed_nonseed total num: #{nonseed_nonseed_total}"
end
end
end
desc "Should only need to be run once"
task(:generate_all_possible_prompts => :environment) do
Question.find(:all).each do |q|
choices = q.choices
if q.prompts.size > choices.size**2 - choices.size
print "ERROR: #{q.id}\n"
next
elsif q.prompts.size == choices.size**2 - choices.size
print "#{q.id} has enough prompts, skipping...\n"
next
else
print "#{q.id} should add #{(choices.size ** 2 - choices.size) - q.prompts.size}\n"
end
created_timestring = q.created_at.to_s(:db)
updated_timestring = Time.now.to_s(:db) #isn't rails awesome?
promptscount=0
inserts = []
the_prompts = Prompt.find(:all, :select => 'id, left_choice_id, right_choice_id', :conditions => {:question_id => q.id})
the_prompts_hash = {}
the_prompts.each do |p|
the_prompts_hash["#{p.left_choice_id},#{p.right_choice_id}"] = 1
end
choices.each do |l|
choices.each do |r|
if l.id == r.id
next
else
#p = the_prompts.find{|o| o.left_choice_id == l.id && o.right_choice_id == r.id}
keystring = "#{l.id},#{r.id}"
p = the_prompts_hash[keystring]
if p.nil?
inserts.push("(NULL, #{q.id}, NULL, #{l.id}, '#{created_timestring}', '#{updated_timestring}', NULL, 0, #{r.id}, NULL, NULL)")
promptscount+=1
end
end
end
end
print "Added #{promptscount} to #{q.id}\n"
sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}"
unless inserts.empty?
ActiveRecord::Base.connection.execute(sql)
end
Question.update_counters(q.id, :prompts_count => promptscount)
end
end
desc "Dump votes of a question by left vs right id"
task(:make_csv => :environment) do
q = Question.find(120)
the_prompts = q.prompts_hash_by_choice_ids
#hash_of_choice_ids_from_left_to_right_to_votes
the_hash = {}
the_prompts.each do |key, p|
left_id, right_id = key.split(", ")
if not the_hash.has_key?(left_id)
the_hash[left_id] = {}
the_hash[left_id][left_id] = 0
end
the_hash[left_id][right_id] = p.votes.size
end
the_hash.sort.each do |xval, row|
rowarray = []
row.sort.each do |yval, cell|
rowarray << cell
end
puts rowarray.join(", ")
end
end
desc "Generate density information for each question - should be run nightly"
task(:generate_density_information => :environment) do
Question.find(:all).each do |q|
q.save_densities!
end
end
desc "Description here"
task(:question_vote_consistency => :environment) do
questions = Question.find(:all)
error_msg = ""
bad_choices = []
bad_votes = []
questions.each do |question|
total_wins =0
total_votes =0
total_generated_prompts_on_left = 0
total_generated_prompts_on_right = 0
total_scores_gte_fifty= 0
total_scores_lte_fifty= 0
error_bool = false
question.choices.each do |choice|
if choice.wins
total_wins += choice.wins
total_votes += choice.wins
end
if choice.losses
total_votes += choice.losses
end
total_generated_prompts_on_left += choice.prompts_on_the_left.size
total_generated_prompts_on_right += choice.prompts_on_the_right.size
cached_score = choice.score.to_f
generated_score = choice.compute_score.to_f
delta = 0.001
if (cached_score - generated_score).abs >= delta
error_msg += "Error! The cached_score is not equal to the calculated score for choice #{choice.id}"
print "This score is wrong! #{choice.id} , Question ID: #{question.id}, #{cached_score}, #{generated_score}, updated: #{choice.updated_at}\n"
bad_choices << choice.id
end
if cached_score == 0.0 || cached_score == 100.0 || cached_score.nil?
error_msg += "Error! The cached_score for choice #{choice.id} is exactly 0 or 100, the value: #{cached_score}"
print "Either 0 or 100 This score is wrong! #{choice.id} , Question ID: #{question.id}, #{cached_score}, #{generated_score}, updated: #{choice.updated_at}\n"
bad_choices << choice.id
end
if cached_score >= 50
total_scores_gte_fifty +=1
end
if cached_score <= 50
total_scores_lte_fifty +=1
end
end
if (2*total_wins != total_votes)
error_msg += "Error 1: 2 x Total Wins != Total votes"
error_bool= true
end
if(total_votes % 2 != 0)
error_msg += "Error 2: Total votes is not Even!"
error_bool= true
end
if(total_votes != 2* question.votes_count)
error_msg += "Error 3: Total votes != 2 x # vote objects"
error_bool = true
end
if(total_generated_prompts_on_right != total_generated_prompts_on_right)
error_msg += "Error 4: Total generated prompts on left != Total generated prompts on right"
error_bool = true
end
if(total_scores_lte_fifty == question.choices.size || total_scores_gte_fifty == question.choices.size) && (total_scores_lte_fifty != total_scores_gte_fifty)
error_msg += "Error: The scores of all choices are either all above 50, or all below 50. This is probably wrong"
error_bool = true
puts "Error score fifty: #{question.id}"
end
wins_by_choice_id = question.votes.active.count(:group => :choice_id)
losses_by_choice_id= question.votes.active.count(:conditions => "loser_choice_id IS NOT NULL", :group => :loser_choice_id)
#Rails returns an ordered hash, which doesn't allow for blocks to change merging logic.
#A little hack to create a normal hash
wins_hash = {}
wins_hash.merge!(wins_by_choice_id)
losses_hash = {}
losses_hash.merge!(losses_by_choice_id)
appearances_by_choice_id = wins_hash.merge(losses_hash) do |key, oldval, newval| oldval + newval end
sum = total_appearances = appearances_by_choice_id.values.inject(0) {|sum, x| sum +=x}
mean = average_appearances = total_appearances.to_f / appearances_by_choice_id.size.to_f
if sum > 0:
stddev = Math.sqrt( appearances_by_choice_id.values.inject(0) { |sum, e| sum + (e - mean) ** 2 } / appearances_by_choice_id.size.to_f )
appearances_by_choice_id.each do |choice_id, n_i|
if (n_i < (mean - 6*stddev)) || (n_i > mean + 6 *stddev)
error_msg += "Choice #{choice_id} in Question ##{question.id} has an irregular number of appearances: #{n_i}, as compared to the mean: #{mean} and stddev #{stddev} for this question"
error_bool = true
end
end
end
# Checks that counter_cache is working as expected
cached_prompts_size = question.prompts.size
actual_prompts_size = question.prompts.count
if cached_prompts_size != actual_prompts_size
error_msg += "Error! Question #{question.id} has an inconsistent # of prompts! cached#: #{cached_prompts_size}, actual#: #{actual_prompts_size}\n"
end
cached_votes_size = question.votes.size
actual_votes_size = question.votes.count
if cached_votes_size != actual_votes_size
error_msg += "Error! Question #{question.id} has an inconsistent # of votes! cached#: #{cached_votes_size}, actual#: #{actual_votes_size}\n"
end
cached_choices_size = question.choices.size
actual_choices_size = question.choices.count
if cached_choices_size != actual_choices_size
error_msg += "Error! Question #{question.id} has an inconsistent # of choices! cached#: #{cached_choices_size}, actual#: #{actual_choices_size}\n"
end
if cached_prompts_size != question.choices.size **2 - question.choices.size
error_msg += "Error! Question #{question.id} has an incorrect number of prompts! Expected #{question.choices.size **2 - question.choices.size}, Actual: #{cached_prompts_size}\n"
end
#catchup specific
if question.uses_catchup?
misses = question.get_prompt_cache_misses(Date.yesterday).to_i
hits = question.get_prompt_cache_hits(Date.yesterday).to_i
question.expire_prompt_cache_tracking_keys(Date.yesterday)
yesterday_votes = question.appearances.count(:conditions => ['date(created_at) = ?', Date.yesterday])
if misses + hits != yesterday_votes
error_msg += "Error! Question #{question.id} isn't tracking prompt cache hits and misses accurately! Expected #{yesterday_votes}, Actual: #{misses+hits}\n"
end
miss_rate = misses.to_f / yesterday_votes.to_f
if miss_rate > 0.1
error_msg += "Error! Question #{question.id} has less than 90% of appearances taken from a pre-generated cache! Expected <#{0.1}, Actual: #{miss_rate}\n"
end
end
if error_bool
error_msg += "Question #{question.id}: 2*wins = #{2*total_wins}, total votes = #{total_votes}, vote_count = #{question.votes_count}\n"
end
error_bool = false
end
votes_without_appearances= Vote.count(:conditions => {:appearance_id => nil})
if (votes_without_appearances > 0)
error_msg += "Error! There are #{votes_without_appearances} votes without associated appearance objects."
end
skips_without_appearances= Skip.count(:conditions => {:appearance_id => nil})
if (skips_without_appearances > 0)
error_msg += "Error! There are #{skips_without_appearances} skips without associated appearance objects."
end
recording_client_time_start_date = Vote.find(:all, :conditions => 'time_viewed IS NOT NULL', :order => 'created_at', :limit => 1).first.created_at
Vote.find_each(:batch_size => 1000, :include => :appearance) do |v|
# Subtracting DateTime objects results in the difference in days
server_response_time = v.created_at.to_f - v.appearance.created_at.to_f
if server_response_time < 0
the_error_msg = "Error! Vote #{v.id} was created before the appearance associated with it: Appearance id: #{v.appearance.id}, Vote creation time: #{v.created_at.to_s}, Appearance creation time: #{v.appearance.created_at.to_s}\n"
error_msg += the_error_msg
print the_error_msg
print "Error!"
end
if v.time_viewed && v.time_viewed/1000 > server_response_time
the_error_msg = "Error! Vote #{v.id} with Appearance #{v.appearance.id}, has a longer client response time than is possible. Vote creation time: #{v.created_at.to_s}, Appearance creation time: #{v.appearance.created_at.to_s}, Client side response time: #{v.time_viewed}\n"
error_msg += the_error_msg
print the_error_msg
bad_votes << v.id
elsif v.time_viewed.nil?
if v.created_at > recording_client_time_start_date && v.missing_response_time_exp != 'invalid'
the_error_msg = "Error! Vote #{v.id} with Appearance #{v.appearance.id}, does not have a client response, even though it should! Vote creation time: #{v.created_at.to_s}, Appearance creation time: #{v.appearance.created_at.to_s}, Client side response time: #{v.time_viewed}\n"
error_msg += the_error_msg
print the_error_msg
end
end
end
if error_msg.blank?
success_msg = "Conducted the following tests on API data and found no inconsistencies.\n" +
"For each of the #{questions.length} questions in the database: \n" +
" 2 x Total Wins = Total Votes\n" +
" Total Votes (wins + losses) is Even\n" +
" Total Votes (wins + losses) = 2 x the number of vote objects that belong to the question\n" +
" Total generated prompts on left = Total generated prompts on right\n" +
" Each choice has appeared n times, where n falls within 6 stddevs of the mean number of appearances for a question\n" +
" Note: this applies only to seed choices (not user submitted) and choices currently marked active\n" +
" The cached score value matches the calculated score value for each choice\n" +
" The cached vote count matches the actual number of votes for each question\n" +
" The cached choices count matches the actual number of choices for each question\n" +
" The cached prompt count matches the actual number of prompts for each question\n" +
" The prompt count matches the expected number of prompts ( num_choices ^2 - num choices) for each question\n" +
" All Vote objects have an associated appearance object\n" +
" All Vote objects have an client response time < calculated server roundtrip time\n"
" More than 90% of prompts on catchup algorithm questions were served from cache\n"
print success_msg
CronMailer.deliver_info_message(CRON_EMAIL, "Test of API Vote Consistency passed", success_msg)
else
CronMailer.deliver_info_message("#{CRON_EMAIL},#{ERRORS_EMAIL}", "Error! Failure of API Vote Consistency " , error_msg)
puts "There were errors: "
puts error_msg
unless bad_choices.blank?
puts "Here's a list of choice ids that you may want to modify: #{bad_choices.uniq.inspect}"
end
unless bad_votes.blank?
puts "Here's a list of vote ids that you may want to modify: #{bad_votes.uniq.inspect}"
end
print error_msg
end
end
end