Commit 344dc7328a6b5e0017239e38cf9d75996d44c266

Authored by Luke Baker
1 parent c390551c

move timezone tasks to separate rake file

Showing 2 changed files with 176 additions and 173 deletions   Show diff stats
lib/tasks/prune_db.rake
1 namespace :prune_db do 1 namespace :prune_db do
2 2
3 - desc "Finds ambiguous times due to daylight savings time"  
4 - task :find_ambiguous_times => :environment do  
5 - datetime_fields = {  
6 - :appearances => ['created_at', 'updated_at'],  
7 - :choices => ['created_at', 'updated_at'],  
8 - :clicks => ['created_at', 'updated_at'],  
9 - :densities => ['created_at', 'updated_at'],  
10 - :flags => ['created_at', 'updated_at'],  
11 - :prompts => ['created_at', 'updated_at'],  
12 - :skips => ['created_at', 'updated_at'],  
13 - :votes => ['created_at', 'updated_at'],  
14 - :visitors => ['created_at', 'updated_at'],  
15 - :users => ['created_at', 'updated_at'],  
16 - :questions => ['created_at', 'updated_at'],  
17 - :question_versions => ['created_at', 'updated_at'],  
18 - :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],  
19 - }  
20 - datetime_fields.each do |table, columns|  
21 - where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ")  
22 - rows = ActiveRecord::Base.connection.select_all(  
23 - "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}"  
24 - )  
25 - puts rows.inspect if rows.length > 0  
26 - end  
27 - end  
28 -  
29 - # There is a very similar task in the AOI code base as well.  
30 - # Any core changes to this task should probably be reflected there.  
31 - desc "Converts all dates from PT to UTC"  
32 - task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args|  
33 - args.with_defaults(:workerid => "0", :workers => "1")  
34 - raise "workerid can not be greater than workers" if args[:workerid] > args[:workers]  
35 - time_spans = [  
36 - { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8},  
37 - { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7},  
38 - { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil},  
39 - { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8},  
40 - { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7},  
41 - { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil},  
42 - { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8},  
43 - { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7}  
44 - ]  
45 - unambiguator = {  
46 - :appearances => [  
47 - { :range => 454229..454229, :h => 7},  
48 - { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour  
49 - { :range => 454502..454745, :h => 8},  
50 - { :range => 4005307..4005522, :h => 7 },  
51 - { :range => 4005523..4005556, :h => 8 }  
52 - ],  
53 - :choices => [  
54 - { :range => 181957..181957, :h => 7} # based on appearance id 8392753  
55 - ],  
56 - :prompts => [  
57 - { :range => 5191157..5191225, :h => 7},  
58 - { :range => 5191226..5191876, :h => 8},  
59 - { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361  
60 - ],  
61 - :question_versions => [  
62 - { :range => 7126..7128, :h => 7} # based on choice 181957  
63 - ],  
64 - :questions => [  
65 - { :range => 1855..1855, :h => 7} # based on question_versions 7128  
66 - ],  
67 - :skips => [  
68 - { :range => 30948..30952, :h => 8}, # based on vote 326681  
69 - { :range => 365240..365276, :h => 7},  
70 - { :range => 365277..365281, :h => 8},  
71 - ],  
72 - :visitors => [  
73 - { :range => 594751..594777, :h => 7},  
74 - { :range => 594778..594795, :h => 8},  
75 - { :range => 91350..91358, :h => 7},  
76 - { :range => 91359..91366, :h => 8}  
77 - ],  
78 - :votes => [  
79 - { :range => 3145774..3145926, :h => 7},  
80 - { :range => 3145927..3145935, :h => 8},  
81 - { :range => 326504..326571, :h => 7},  
82 - { :range => 326572..326803, :h => 8},  
83 - ],  
84 - }  
85 - # UTC because Rails will be thinking DB is in UTC when we run this  
86 - #time_spans.map! do |t|  
87 - # { :gt => Time.parse("#{t[:gt]} UTC"),  
88 - # :lt => Time.parse("#{t[:lt]} UTC"),  
89 - # :h => t[:h] }  
90 - #end  
91 - datetime_fields = {  
92 - #:appearances => ['created_at', 'updated_at'],  
93 - #:choices => ['created_at', 'updated_at'],  
94 - #:clicks => ['created_at', 'updated_at'],  
95 - #:densities => ['created_at', 'updated_at'],  
96 - #:flags => ['created_at', 'updated_at'],  
97 - #:prompts => ['created_at', 'updated_at'],  
98 - :skips => ['created_at', 'updated_at'],  
99 - #:votes => ['created_at', 'updated_at'],  
100 - #:visitors => ['created_at', 'updated_at'],  
101 - #:users => ['created_at', 'updated_at'],  
102 - #:questions => ['created_at', 'updated_at'],  
103 - #:question_versions => ['created_at', 'updated_at'],  
104 - #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],  
105 - }  
106 -  
107 - STDOUT.sync = true  
108 - logger = Rails.logger  
109 - datetime_fields.each do |table, columns|  
110 - print "#{table}"  
111 - batch_size = 10000  
112 - i = 0  
113 - where = ''  
114 - # This is how we split the rows of a table between the various workers  
115 - # so that they don't attempt to work on the same row as another worker.  
116 - # The workerid is any number 0 through workers - 1.  
117 - if args[:workers] > "1"  
118 - where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}"  
119 - end  
120 - while true do  
121 - rows = ActiveRecord::Base.connection.select_all(  
122 - "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}"  
123 - )  
124 - print "."  
125 -  
126 - rows.each do |row|  
127 - updated_values = {}  
128 - # delete any value where the value is blank (just for delayed_jobs)  
129 - row.delete_if {|key, value| value.blank? }  
130 - row.each do |column, value|  
131 - next if column == "id"  
132 - time_spans.each do |span|  
133 - if value < span[:lt] && value > span[:gt]  
134 - # if blank then ambiguous and we don't know how to translate  
135 - if span[:h].blank?  
136 - updated_values[column] = nil  
137 - if unambiguator[table] && unambiguator[table].length > 0  
138 - unambiguator[table].each do |ids|  
139 - updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i  
140 - end  
141 - end  
142 -  
143 - logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank?  
144 - else  
145 - updated_values[column] = span[:h]  
146 - end  
147 - break  
148 - end  
149 - end  
150 - end  
151 - # Check if some columns did not match any spans  
152 - key_diff = row.keys - updated_values.keys - ["id"]  
153 - if key_diff.length > 0  
154 - logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}"  
155 - end  
156 - # remove ambiguous columns (we set them to nil above)  
157 - updated_values.delete_if {|key, value| value.blank? }  
158 - if updated_values.length > 0  
159 - update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}"  
160 - num = ActiveRecord::Base.connection.update_sql(update)  
161 - if num == 1  
162 - logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}"  
163 - else  
164 - logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}"  
165 - end  
166 - end  
167 - end  
168 -  
169 - i+= 1  
170 - break if rows.length < batchsize  
171 - end  
172 - print "\n"  
173 - end  
174 - end  
175 -  
176 desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id" 3 desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id"
177 task :fix_promptid_mismatch => :environment do 4 task :fix_promptid_mismatch => :environment do
178 bad_records = Vote.connection.select_all " 5 bad_records = Vote.connection.select_all "
lib/tasks/timezone.rake 0 → 100644
@@ -0,0 +1,176 @@ @@ -0,0 +1,176 @@
  1 +namespace :timezone do
  2 +
  3 + # There is a very similar task in the AOI code base as well.
  4 + # Any core changes to this task should probably be reflected there.
  5 + desc "Converts all dates from PT to UTC"
  6 + task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args|
  7 + args.with_defaults(:workerid => "0", :workers => "1")
  8 + raise "workerid can not be greater than workers" if args[:workerid] > args[:workers]
  9 + time_spans = [
  10 + { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8},
  11 + { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7},
  12 + { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil},
  13 + { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8},
  14 + { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7},
  15 + { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil},
  16 + { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8},
  17 + { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7}
  18 + ]
  19 + unambiguator = {
  20 + :appearances => [
  21 + { :range => 454229..454229, :h => 7},
  22 + { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour
  23 + { :range => 454502..454745, :h => 8},
  24 + { :range => 4005307..4005522, :h => 7 },
  25 + { :range => 4005523..4005556, :h => 8 }
  26 + ],
  27 + :choices => [
  28 + { :range => 181957..181957, :h => 7} # based on appearance id 8392753
  29 + ],
  30 + :prompts => [
  31 + { :range => 5191157..5191225, :h => 7},
  32 + { :range => 5191226..5191876, :h => 8},
  33 + { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361
  34 + ],
  35 + :question_versions => [
  36 + { :range => 7126..7128, :h => 7} # based on choice 181957
  37 + ],
  38 + :questions => [
  39 + { :range => 1855..1855, :h => 7} # based on question_versions 7128
  40 + ],
  41 + :skips => [
  42 + { :range => 30948..30952, :h => 8}, # based on vote 326681
  43 + { :range => 365240..365276, :h => 7},
  44 + { :range => 365277..365281, :h => 8},
  45 + ],
  46 + :visitors => [
  47 + { :range => 594751..594777, :h => 7},
  48 + { :range => 594778..594795, :h => 8},
  49 + { :range => 91350..91358, :h => 7},
  50 + { :range => 91359..91366, :h => 8}
  51 + ],
  52 + :votes => [
  53 + { :range => 3145774..3145926, :h => 7},
  54 + { :range => 3145927..3145935, :h => 8},
  55 + { :range => 326504..326571, :h => 7},
  56 + { :range => 326572..326803, :h => 8},
  57 + ],
  58 + }
  59 + # UTC because Rails will be thinking DB is in UTC when we run this
  60 + #time_spans.map! do |t|
  61 + # { :gt => Time.parse("#{t[:gt]} UTC"),
  62 + # :lt => Time.parse("#{t[:lt]} UTC"),
  63 + # :h => t[:h] }
  64 + #end
  65 + datetime_fields = {
  66 + #:appearances => ['created_at', 'updated_at'],
  67 + #:choices => ['created_at', 'updated_at'],
  68 + #:clicks => ['created_at', 'updated_at'],
  69 + #:densities => ['created_at', 'updated_at'],
  70 + #:flags => ['created_at', 'updated_at'],
  71 + #:prompts => ['created_at', 'updated_at'],
  72 + :skips => ['created_at', 'updated_at'],
  73 + #:votes => ['created_at', 'updated_at'],
  74 + #:visitors => ['created_at', 'updated_at'],
  75 + #:users => ['created_at', 'updated_at'],
  76 + #:questions => ['created_at', 'updated_at'],
  77 + #:question_versions => ['created_at', 'updated_at'],
  78 + #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
  79 + }
  80 +
  81 + STDOUT.sync = true
  82 + logger = Rails.logger
  83 + datetime_fields.each do |table, columns|
  84 + print "#{table}"
  85 + batch_size = 10000
  86 + i = 0
  87 + where = ''
  88 + # This is how we split the rows of a table between the various workers
  89 + # so that they don't attempt to work on the same row as another worker.
  90 + # The workerid is any number 0 through workers - 1.
  91 + if args[:workers] > "1"
  92 + where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}"
  93 + end
  94 + while true do
  95 + rows = ActiveRecord::Base.connection.select_all(
  96 + "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}"
  97 + )
  98 + print "."
  99 +
  100 + rows.each do |row|
  101 + updated_values = {}
  102 + # delete any value where the value is blank (just for delayed_jobs)
  103 + row.delete_if {|key, value| value.blank? }
  104 + row.each do |column, value|
  105 + next if column == "id"
  106 + time_spans.each do |span|
  107 + if value < span[:lt] && value > span[:gt]
  108 + # if blank then ambiguous and we don't know how to translate
  109 + if span[:h].blank?
  110 + updated_values[column] = nil
  111 + if unambiguator[table] && unambiguator[table].length > 0
  112 + unambiguator[table].each do |ids|
  113 + updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i
  114 + end
  115 + end
  116 +
  117 + logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank?
  118 + else
  119 + updated_values[column] = span[:h]
  120 + end
  121 + break
  122 + end
  123 + end
  124 + end
  125 + # Check if some columns did not match any spans
  126 + key_diff = row.keys - updated_values.keys - ["id"]
  127 + if key_diff.length > 0
  128 + logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}"
  129 + end
  130 + # remove ambiguous columns (we set them to nil above)
  131 + updated_values.delete_if {|key, value| value.blank? }
  132 + if updated_values.length > 0
  133 + update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}"
  134 + num = ActiveRecord::Base.connection.update_sql(update)
  135 + if num == 1
  136 + logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}"
  137 + else
  138 + logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}"
  139 + end
  140 + end
  141 + end
  142 +
  143 + i+= 1
  144 + break if rows.length < batchsize
  145 + end
  146 + print "\n"
  147 + end
  148 + end
  149 +
  150 + desc "Finds ambiguous times due to daylight savings time"
  151 + task :find_ambiguous_times => :environment do
  152 + datetime_fields = {
  153 + :appearances => ['created_at', 'updated_at'],
  154 + :choices => ['created_at', 'updated_at'],
  155 + :clicks => ['created_at', 'updated_at'],
  156 + :densities => ['created_at', 'updated_at'],
  157 + :flags => ['created_at', 'updated_at'],
  158 + :prompts => ['created_at', 'updated_at'],
  159 + :skips => ['created_at', 'updated_at'],
  160 + :votes => ['created_at', 'updated_at'],
  161 + :visitors => ['created_at', 'updated_at'],
  162 + :users => ['created_at', 'updated_at'],
  163 + :questions => ['created_at', 'updated_at'],
  164 + :question_versions => ['created_at', 'updated_at'],
  165 + :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
  166 + }
  167 + datetime_fields.each do |table, columns|
  168 + where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ")
  169 + rows = ActiveRecord::Base.connection.select_all(
  170 + "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}"
  171 + )
  172 + puts rows.inspect if rows.length > 0
  173 + end
  174 + end
  175 +
  176 +end