Commit 344dc7328a6b5e0017239e38cf9d75996d44c266
1 parent
c390551c
Exists in
master
and in
1 other branch
move timezone tasks to separate rake file
Showing
2 changed files
with
176 additions
and
173 deletions
Show diff stats
lib/tasks/prune_db.rake
1 | 1 | namespace :prune_db do |
2 | 2 | |
3 | - desc "Finds ambiguous times due to daylight savings time" | |
4 | - task :find_ambiguous_times => :environment do | |
5 | - datetime_fields = { | |
6 | - :appearances => ['created_at', 'updated_at'], | |
7 | - :choices => ['created_at', 'updated_at'], | |
8 | - :clicks => ['created_at', 'updated_at'], | |
9 | - :densities => ['created_at', 'updated_at'], | |
10 | - :flags => ['created_at', 'updated_at'], | |
11 | - :prompts => ['created_at', 'updated_at'], | |
12 | - :skips => ['created_at', 'updated_at'], | |
13 | - :votes => ['created_at', 'updated_at'], | |
14 | - :visitors => ['created_at', 'updated_at'], | |
15 | - :users => ['created_at', 'updated_at'], | |
16 | - :questions => ['created_at', 'updated_at'], | |
17 | - :question_versions => ['created_at', 'updated_at'], | |
18 | - :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
19 | - } | |
20 | - datetime_fields.each do |table, columns| | |
21 | - where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | |
22 | - rows = ActiveRecord::Base.connection.select_all( | |
23 | - "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | |
24 | - ) | |
25 | - puts rows.inspect if rows.length > 0 | |
26 | - end | |
27 | - end | |
28 | - | |
29 | - # There is a very similar task in the AOI code base as well. | |
30 | - # Any core changes to this task should probably be reflected there. | |
31 | - desc "Converts all dates from PT to UTC" | |
32 | - task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | |
33 | - args.with_defaults(:workerid => "0", :workers => "1") | |
34 | - raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | |
35 | - time_spans = [ | |
36 | - { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | |
37 | - { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | |
38 | - { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | |
39 | - { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | |
40 | - { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | |
41 | - { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | |
42 | - { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | |
43 | - { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | |
44 | - ] | |
45 | - unambiguator = { | |
46 | - :appearances => [ | |
47 | - { :range => 454229..454229, :h => 7}, | |
48 | - { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | |
49 | - { :range => 454502..454745, :h => 8}, | |
50 | - { :range => 4005307..4005522, :h => 7 }, | |
51 | - { :range => 4005523..4005556, :h => 8 } | |
52 | - ], | |
53 | - :choices => [ | |
54 | - { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | |
55 | - ], | |
56 | - :prompts => [ | |
57 | - { :range => 5191157..5191225, :h => 7}, | |
58 | - { :range => 5191226..5191876, :h => 8}, | |
59 | - { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | |
60 | - ], | |
61 | - :question_versions => [ | |
62 | - { :range => 7126..7128, :h => 7} # based on choice 181957 | |
63 | - ], | |
64 | - :questions => [ | |
65 | - { :range => 1855..1855, :h => 7} # based on question_versions 7128 | |
66 | - ], | |
67 | - :skips => [ | |
68 | - { :range => 30948..30952, :h => 8}, # based on vote 326681 | |
69 | - { :range => 365240..365276, :h => 7}, | |
70 | - { :range => 365277..365281, :h => 8}, | |
71 | - ], | |
72 | - :visitors => [ | |
73 | - { :range => 594751..594777, :h => 7}, | |
74 | - { :range => 594778..594795, :h => 8}, | |
75 | - { :range => 91350..91358, :h => 7}, | |
76 | - { :range => 91359..91366, :h => 8} | |
77 | - ], | |
78 | - :votes => [ | |
79 | - { :range => 3145774..3145926, :h => 7}, | |
80 | - { :range => 3145927..3145935, :h => 8}, | |
81 | - { :range => 326504..326571, :h => 7}, | |
82 | - { :range => 326572..326803, :h => 8}, | |
83 | - ], | |
84 | - } | |
85 | - # UTC because Rails will be thinking DB is in UTC when we run this | |
86 | - #time_spans.map! do |t| | |
87 | - # { :gt => Time.parse("#{t[:gt]} UTC"), | |
88 | - # :lt => Time.parse("#{t[:lt]} UTC"), | |
89 | - # :h => t[:h] } | |
90 | - #end | |
91 | - datetime_fields = { | |
92 | - #:appearances => ['created_at', 'updated_at'], | |
93 | - #:choices => ['created_at', 'updated_at'], | |
94 | - #:clicks => ['created_at', 'updated_at'], | |
95 | - #:densities => ['created_at', 'updated_at'], | |
96 | - #:flags => ['created_at', 'updated_at'], | |
97 | - #:prompts => ['created_at', 'updated_at'], | |
98 | - :skips => ['created_at', 'updated_at'], | |
99 | - #:votes => ['created_at', 'updated_at'], | |
100 | - #:visitors => ['created_at', 'updated_at'], | |
101 | - #:users => ['created_at', 'updated_at'], | |
102 | - #:questions => ['created_at', 'updated_at'], | |
103 | - #:question_versions => ['created_at', 'updated_at'], | |
104 | - #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
105 | - } | |
106 | - | |
107 | - STDOUT.sync = true | |
108 | - logger = Rails.logger | |
109 | - datetime_fields.each do |table, columns| | |
110 | - print "#{table}" | |
111 | - batch_size = 10000 | |
112 | - i = 0 | |
113 | - where = '' | |
114 | - # This is how we split the rows of a table between the various workers | |
115 | - # so that they don't attempt to work on the same row as another worker. | |
116 | - # The workerid is any number 0 through workers - 1. | |
117 | - if args[:workers] > "1" | |
118 | - where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | |
119 | - end | |
120 | - while true do | |
121 | - rows = ActiveRecord::Base.connection.select_all( | |
122 | - "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | |
123 | - ) | |
124 | - print "." | |
125 | - | |
126 | - rows.each do |row| | |
127 | - updated_values = {} | |
128 | - # delete any value where the value is blank (just for delayed_jobs) | |
129 | - row.delete_if {|key, value| value.blank? } | |
130 | - row.each do |column, value| | |
131 | - next if column == "id" | |
132 | - time_spans.each do |span| | |
133 | - if value < span[:lt] && value > span[:gt] | |
134 | - # if blank then ambiguous and we don't know how to translate | |
135 | - if span[:h].blank? | |
136 | - updated_values[column] = nil | |
137 | - if unambiguator[table] && unambiguator[table].length > 0 | |
138 | - unambiguator[table].each do |ids| | |
139 | - updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | |
140 | - end | |
141 | - end | |
142 | - | |
143 | - logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | |
144 | - else | |
145 | - updated_values[column] = span[:h] | |
146 | - end | |
147 | - break | |
148 | - end | |
149 | - end | |
150 | - end | |
151 | - # Check if some columns did not match any spans | |
152 | - key_diff = row.keys - updated_values.keys - ["id"] | |
153 | - if key_diff.length > 0 | |
154 | - logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | |
155 | - end | |
156 | - # remove ambiguous columns (we set them to nil above) | |
157 | - updated_values.delete_if {|key, value| value.blank? } | |
158 | - if updated_values.length > 0 | |
159 | - update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | |
160 | - num = ActiveRecord::Base.connection.update_sql(update) | |
161 | - if num == 1 | |
162 | - logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | |
163 | - else | |
164 | - logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | |
165 | - end | |
166 | - end | |
167 | - end | |
168 | - | |
169 | - i+= 1 | |
170 | - break if rows.length < batchsize | |
171 | - end | |
172 | - print "\n" | |
173 | - end | |
174 | - end | |
175 | - | |
176 | 3 | desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id" |
177 | 4 | task :fix_promptid_mismatch => :environment do |
178 | 5 | bad_records = Vote.connection.select_all " | ... | ... |
... | ... | @@ -0,0 +1,176 @@ |
1 | +namespace :timezone do | |
2 | + | |
3 | + # There is a very similar task in the AOI code base as well. | |
4 | + # Any core changes to this task should probably be reflected there. | |
5 | + desc "Converts all dates from PT to UTC" | |
6 | + task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | |
7 | + args.with_defaults(:workerid => "0", :workers => "1") | |
8 | + raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | |
9 | + time_spans = [ | |
10 | + { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | |
11 | + { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | |
12 | + { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | |
13 | + { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | |
14 | + { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | |
15 | + { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | |
16 | + { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | |
17 | + { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | |
18 | + ] | |
19 | + unambiguator = { | |
20 | + :appearances => [ | |
21 | + { :range => 454229..454229, :h => 7}, | |
22 | + { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | |
23 | + { :range => 454502..454745, :h => 8}, | |
24 | + { :range => 4005307..4005522, :h => 7 }, | |
25 | + { :range => 4005523..4005556, :h => 8 } | |
26 | + ], | |
27 | + :choices => [ | |
28 | + { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | |
29 | + ], | |
30 | + :prompts => [ | |
31 | + { :range => 5191157..5191225, :h => 7}, | |
32 | + { :range => 5191226..5191876, :h => 8}, | |
33 | + { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | |
34 | + ], | |
35 | + :question_versions => [ | |
36 | + { :range => 7126..7128, :h => 7} # based on choice 181957 | |
37 | + ], | |
38 | + :questions => [ | |
39 | + { :range => 1855..1855, :h => 7} # based on question_versions 7128 | |
40 | + ], | |
41 | + :skips => [ | |
42 | + { :range => 30948..30952, :h => 8}, # based on vote 326681 | |
43 | + { :range => 365240..365276, :h => 7}, | |
44 | + { :range => 365277..365281, :h => 8}, | |
45 | + ], | |
46 | + :visitors => [ | |
47 | + { :range => 594751..594777, :h => 7}, | |
48 | + { :range => 594778..594795, :h => 8}, | |
49 | + { :range => 91350..91358, :h => 7}, | |
50 | + { :range => 91359..91366, :h => 8} | |
51 | + ], | |
52 | + :votes => [ | |
53 | + { :range => 3145774..3145926, :h => 7}, | |
54 | + { :range => 3145927..3145935, :h => 8}, | |
55 | + { :range => 326504..326571, :h => 7}, | |
56 | + { :range => 326572..326803, :h => 8}, | |
57 | + ], | |
58 | + } | |
59 | + # UTC because Rails will be thinking DB is in UTC when we run this | |
60 | + #time_spans.map! do |t| | |
61 | + # { :gt => Time.parse("#{t[:gt]} UTC"), | |
62 | + # :lt => Time.parse("#{t[:lt]} UTC"), | |
63 | + # :h => t[:h] } | |
64 | + #end | |
65 | + datetime_fields = { | |
66 | + #:appearances => ['created_at', 'updated_at'], | |
67 | + #:choices => ['created_at', 'updated_at'], | |
68 | + #:clicks => ['created_at', 'updated_at'], | |
69 | + #:densities => ['created_at', 'updated_at'], | |
70 | + #:flags => ['created_at', 'updated_at'], | |
71 | + #:prompts => ['created_at', 'updated_at'], | |
72 | + :skips => ['created_at', 'updated_at'], | |
73 | + #:votes => ['created_at', 'updated_at'], | |
74 | + #:visitors => ['created_at', 'updated_at'], | |
75 | + #:users => ['created_at', 'updated_at'], | |
76 | + #:questions => ['created_at', 'updated_at'], | |
77 | + #:question_versions => ['created_at', 'updated_at'], | |
78 | + #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
79 | + } | |
80 | + | |
81 | + STDOUT.sync = true | |
82 | + logger = Rails.logger | |
83 | + datetime_fields.each do |table, columns| | |
84 | + print "#{table}" | |
85 | + batch_size = 10000 | |
86 | + i = 0 | |
87 | + where = '' | |
88 | + # This is how we split the rows of a table between the various workers | |
89 | + # so that they don't attempt to work on the same row as another worker. | |
90 | + # The workerid is any number 0 through workers - 1. | |
91 | + if args[:workers] > "1" | |
92 | + where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | |
93 | + end | |
94 | + while true do | |
95 | + rows = ActiveRecord::Base.connection.select_all( | |
96 | + "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | |
97 | + ) | |
98 | + print "." | |
99 | + | |
100 | + rows.each do |row| | |
101 | + updated_values = {} | |
102 | + # delete any value where the value is blank (just for delayed_jobs) | |
103 | + row.delete_if {|key, value| value.blank? } | |
104 | + row.each do |column, value| | |
105 | + next if column == "id" | |
106 | + time_spans.each do |span| | |
107 | + if value < span[:lt] && value > span[:gt] | |
108 | + # if blank then ambiguous and we don't know how to translate | |
109 | + if span[:h].blank? | |
110 | + updated_values[column] = nil | |
111 | + if unambiguator[table] && unambiguator[table].length > 0 | |
112 | + unambiguator[table].each do |ids| | |
113 | + updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | |
114 | + end | |
115 | + end | |
116 | + | |
117 | + logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | |
118 | + else | |
119 | + updated_values[column] = span[:h] | |
120 | + end | |
121 | + break | |
122 | + end | |
123 | + end | |
124 | + end | |
125 | + # Check if some columns did not match any spans | |
126 | + key_diff = row.keys - updated_values.keys - ["id"] | |
127 | + if key_diff.length > 0 | |
128 | + logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | |
129 | + end | |
130 | + # remove ambiguous columns (we set them to nil above) | |
131 | + updated_values.delete_if {|key, value| value.blank? } | |
132 | + if updated_values.length > 0 | |
133 | + update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | |
134 | + num = ActiveRecord::Base.connection.update_sql(update) | |
135 | + if num == 1 | |
136 | + logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | |
137 | + else | |
138 | + logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | |
139 | + end | |
140 | + end | |
141 | + end | |
142 | + | |
143 | + i+= 1 | |
144 | + break if rows.length < batchsize | |
145 | + end | |
146 | + print "\n" | |
147 | + end | |
148 | + end | |
149 | + | |
150 | + desc "Finds ambiguous times due to daylight savings time" | |
151 | + task :find_ambiguous_times => :environment do | |
152 | + datetime_fields = { | |
153 | + :appearances => ['created_at', 'updated_at'], | |
154 | + :choices => ['created_at', 'updated_at'], | |
155 | + :clicks => ['created_at', 'updated_at'], | |
156 | + :densities => ['created_at', 'updated_at'], | |
157 | + :flags => ['created_at', 'updated_at'], | |
158 | + :prompts => ['created_at', 'updated_at'], | |
159 | + :skips => ['created_at', 'updated_at'], | |
160 | + :votes => ['created_at', 'updated_at'], | |
161 | + :visitors => ['created_at', 'updated_at'], | |
162 | + :users => ['created_at', 'updated_at'], | |
163 | + :questions => ['created_at', 'updated_at'], | |
164 | + :question_versions => ['created_at', 'updated_at'], | |
165 | + :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
166 | + } | |
167 | + datetime_fields.each do |table, columns| | |
168 | + where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | |
169 | + rows = ActiveRecord::Base.connection.select_all( | |
170 | + "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | |
171 | + ) | |
172 | + puts rows.inspect if rows.length > 0 | |
173 | + end | |
174 | + end | |
175 | + | |
176 | +end | ... | ... |