Commit 344dc7328a6b5e0017239e38cf9d75996d44c266
1 parent
c390551c
Exists in
master
and in
1 other branch
move timezone tasks to separate rake file
Showing
2 changed files
with
176 additions
and
173 deletions
Show diff stats
lib/tasks/prune_db.rake
| 1 | namespace :prune_db do | 1 | namespace :prune_db do |
| 2 | 2 | ||
| 3 | - desc "Finds ambiguous times due to daylight savings time" | ||
| 4 | - task :find_ambiguous_times => :environment do | ||
| 5 | - datetime_fields = { | ||
| 6 | - :appearances => ['created_at', 'updated_at'], | ||
| 7 | - :choices => ['created_at', 'updated_at'], | ||
| 8 | - :clicks => ['created_at', 'updated_at'], | ||
| 9 | - :densities => ['created_at', 'updated_at'], | ||
| 10 | - :flags => ['created_at', 'updated_at'], | ||
| 11 | - :prompts => ['created_at', 'updated_at'], | ||
| 12 | - :skips => ['created_at', 'updated_at'], | ||
| 13 | - :votes => ['created_at', 'updated_at'], | ||
| 14 | - :visitors => ['created_at', 'updated_at'], | ||
| 15 | - :users => ['created_at', 'updated_at'], | ||
| 16 | - :questions => ['created_at', 'updated_at'], | ||
| 17 | - :question_versions => ['created_at', 'updated_at'], | ||
| 18 | - :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | ||
| 19 | - } | ||
| 20 | - datetime_fields.each do |table, columns| | ||
| 21 | - where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | ||
| 22 | - rows = ActiveRecord::Base.connection.select_all( | ||
| 23 | - "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | ||
| 24 | - ) | ||
| 25 | - puts rows.inspect if rows.length > 0 | ||
| 26 | - end | ||
| 27 | - end | ||
| 28 | - | ||
| 29 | - # There is a very similar task in the AOI code base as well. | ||
| 30 | - # Any core changes to this task should probably be reflected there. | ||
| 31 | - desc "Converts all dates from PT to UTC" | ||
| 32 | - task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | ||
| 33 | - args.with_defaults(:workerid => "0", :workers => "1") | ||
| 34 | - raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | ||
| 35 | - time_spans = [ | ||
| 36 | - { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | ||
| 37 | - { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | ||
| 38 | - { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | ||
| 39 | - { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | ||
| 40 | - { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | ||
| 41 | - { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | ||
| 42 | - { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | ||
| 43 | - { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | ||
| 44 | - ] | ||
| 45 | - unambiguator = { | ||
| 46 | - :appearances => [ | ||
| 47 | - { :range => 454229..454229, :h => 7}, | ||
| 48 | - { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | ||
| 49 | - { :range => 454502..454745, :h => 8}, | ||
| 50 | - { :range => 4005307..4005522, :h => 7 }, | ||
| 51 | - { :range => 4005523..4005556, :h => 8 } | ||
| 52 | - ], | ||
| 53 | - :choices => [ | ||
| 54 | - { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | ||
| 55 | - ], | ||
| 56 | - :prompts => [ | ||
| 57 | - { :range => 5191157..5191225, :h => 7}, | ||
| 58 | - { :range => 5191226..5191876, :h => 8}, | ||
| 59 | - { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | ||
| 60 | - ], | ||
| 61 | - :question_versions => [ | ||
| 62 | - { :range => 7126..7128, :h => 7} # based on choice 181957 | ||
| 63 | - ], | ||
| 64 | - :questions => [ | ||
| 65 | - { :range => 1855..1855, :h => 7} # based on question_versions 7128 | ||
| 66 | - ], | ||
| 67 | - :skips => [ | ||
| 68 | - { :range => 30948..30952, :h => 8}, # based on vote 326681 | ||
| 69 | - { :range => 365240..365276, :h => 7}, | ||
| 70 | - { :range => 365277..365281, :h => 8}, | ||
| 71 | - ], | ||
| 72 | - :visitors => [ | ||
| 73 | - { :range => 594751..594777, :h => 7}, | ||
| 74 | - { :range => 594778..594795, :h => 8}, | ||
| 75 | - { :range => 91350..91358, :h => 7}, | ||
| 76 | - { :range => 91359..91366, :h => 8} | ||
| 77 | - ], | ||
| 78 | - :votes => [ | ||
| 79 | - { :range => 3145774..3145926, :h => 7}, | ||
| 80 | - { :range => 3145927..3145935, :h => 8}, | ||
| 81 | - { :range => 326504..326571, :h => 7}, | ||
| 82 | - { :range => 326572..326803, :h => 8}, | ||
| 83 | - ], | ||
| 84 | - } | ||
| 85 | - # UTC because Rails will be thinking DB is in UTC when we run this | ||
| 86 | - #time_spans.map! do |t| | ||
| 87 | - # { :gt => Time.parse("#{t[:gt]} UTC"), | ||
| 88 | - # :lt => Time.parse("#{t[:lt]} UTC"), | ||
| 89 | - # :h => t[:h] } | ||
| 90 | - #end | ||
| 91 | - datetime_fields = { | ||
| 92 | - #:appearances => ['created_at', 'updated_at'], | ||
| 93 | - #:choices => ['created_at', 'updated_at'], | ||
| 94 | - #:clicks => ['created_at', 'updated_at'], | ||
| 95 | - #:densities => ['created_at', 'updated_at'], | ||
| 96 | - #:flags => ['created_at', 'updated_at'], | ||
| 97 | - #:prompts => ['created_at', 'updated_at'], | ||
| 98 | - :skips => ['created_at', 'updated_at'], | ||
| 99 | - #:votes => ['created_at', 'updated_at'], | ||
| 100 | - #:visitors => ['created_at', 'updated_at'], | ||
| 101 | - #:users => ['created_at', 'updated_at'], | ||
| 102 | - #:questions => ['created_at', 'updated_at'], | ||
| 103 | - #:question_versions => ['created_at', 'updated_at'], | ||
| 104 | - #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | ||
| 105 | - } | ||
| 106 | - | ||
| 107 | - STDOUT.sync = true | ||
| 108 | - logger = Rails.logger | ||
| 109 | - datetime_fields.each do |table, columns| | ||
| 110 | - print "#{table}" | ||
| 111 | - batch_size = 10000 | ||
| 112 | - i = 0 | ||
| 113 | - where = '' | ||
| 114 | - # This is how we split the rows of a table between the various workers | ||
| 115 | - # so that they don't attempt to work on the same row as another worker. | ||
| 116 | - # The workerid is any number 0 through workers - 1. | ||
| 117 | - if args[:workers] > "1" | ||
| 118 | - where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | ||
| 119 | - end | ||
| 120 | - while true do | ||
| 121 | - rows = ActiveRecord::Base.connection.select_all( | ||
| 122 | - "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | ||
| 123 | - ) | ||
| 124 | - print "." | ||
| 125 | - | ||
| 126 | - rows.each do |row| | ||
| 127 | - updated_values = {} | ||
| 128 | - # delete any value where the value is blank (just for delayed_jobs) | ||
| 129 | - row.delete_if {|key, value| value.blank? } | ||
| 130 | - row.each do |column, value| | ||
| 131 | - next if column == "id" | ||
| 132 | - time_spans.each do |span| | ||
| 133 | - if value < span[:lt] && value > span[:gt] | ||
| 134 | - # if blank then ambiguous and we don't know how to translate | ||
| 135 | - if span[:h].blank? | ||
| 136 | - updated_values[column] = nil | ||
| 137 | - if unambiguator[table] && unambiguator[table].length > 0 | ||
| 138 | - unambiguator[table].each do |ids| | ||
| 139 | - updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | ||
| 140 | - end | ||
| 141 | - end | ||
| 142 | - | ||
| 143 | - logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | ||
| 144 | - else | ||
| 145 | - updated_values[column] = span[:h] | ||
| 146 | - end | ||
| 147 | - break | ||
| 148 | - end | ||
| 149 | - end | ||
| 150 | - end | ||
| 151 | - # Check if some columns did not match any spans | ||
| 152 | - key_diff = row.keys - updated_values.keys - ["id"] | ||
| 153 | - if key_diff.length > 0 | ||
| 154 | - logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | ||
| 155 | - end | ||
| 156 | - # remove ambiguous columns (we set them to nil above) | ||
| 157 | - updated_values.delete_if {|key, value| value.blank? } | ||
| 158 | - if updated_values.length > 0 | ||
| 159 | - update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | ||
| 160 | - num = ActiveRecord::Base.connection.update_sql(update) | ||
| 161 | - if num == 1 | ||
| 162 | - logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | ||
| 163 | - else | ||
| 164 | - logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | ||
| 165 | - end | ||
| 166 | - end | ||
| 167 | - end | ||
| 168 | - | ||
| 169 | - i+= 1 | ||
| 170 | - break if rows.length < batchsize | ||
| 171 | - end | ||
| 172 | - print "\n" | ||
| 173 | - end | ||
| 174 | - end | ||
| 175 | - | ||
| 176 | desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id" | 3 | desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id" |
| 177 | task :fix_promptid_mismatch => :environment do | 4 | task :fix_promptid_mismatch => :environment do |
| 178 | bad_records = Vote.connection.select_all " | 5 | bad_records = Vote.connection.select_all " |
| @@ -0,0 +1,176 @@ | @@ -0,0 +1,176 @@ | ||
| 1 | +namespace :timezone do | ||
| 2 | + | ||
| 3 | + # There is a very similar task in the AOI code base as well. | ||
| 4 | + # Any core changes to this task should probably be reflected there. | ||
| 5 | + desc "Converts all dates from PT to UTC" | ||
| 6 | + task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | ||
| 7 | + args.with_defaults(:workerid => "0", :workers => "1") | ||
| 8 | + raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | ||
| 9 | + time_spans = [ | ||
| 10 | + { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | ||
| 11 | + { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | ||
| 12 | + { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | ||
| 13 | + { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | ||
| 14 | + { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | ||
| 15 | + { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | ||
| 16 | + { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | ||
| 17 | + { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | ||
| 18 | + ] | ||
| 19 | + unambiguator = { | ||
| 20 | + :appearances => [ | ||
| 21 | + { :range => 454229..454229, :h => 7}, | ||
| 22 | + { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | ||
| 23 | + { :range => 454502..454745, :h => 8}, | ||
| 24 | + { :range => 4005307..4005522, :h => 7 }, | ||
| 25 | + { :range => 4005523..4005556, :h => 8 } | ||
| 26 | + ], | ||
| 27 | + :choices => [ | ||
| 28 | + { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | ||
| 29 | + ], | ||
| 30 | + :prompts => [ | ||
| 31 | + { :range => 5191157..5191225, :h => 7}, | ||
| 32 | + { :range => 5191226..5191876, :h => 8}, | ||
| 33 | + { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | ||
| 34 | + ], | ||
| 35 | + :question_versions => [ | ||
| 36 | + { :range => 7126..7128, :h => 7} # based on choice 181957 | ||
| 37 | + ], | ||
| 38 | + :questions => [ | ||
| 39 | + { :range => 1855..1855, :h => 7} # based on question_versions 7128 | ||
| 40 | + ], | ||
| 41 | + :skips => [ | ||
| 42 | + { :range => 30948..30952, :h => 8}, # based on vote 326681 | ||
| 43 | + { :range => 365240..365276, :h => 7}, | ||
| 44 | + { :range => 365277..365281, :h => 8}, | ||
| 45 | + ], | ||
| 46 | + :visitors => [ | ||
| 47 | + { :range => 594751..594777, :h => 7}, | ||
| 48 | + { :range => 594778..594795, :h => 8}, | ||
| 49 | + { :range => 91350..91358, :h => 7}, | ||
| 50 | + { :range => 91359..91366, :h => 8} | ||
| 51 | + ], | ||
| 52 | + :votes => [ | ||
| 53 | + { :range => 3145774..3145926, :h => 7}, | ||
| 54 | + { :range => 3145927..3145935, :h => 8}, | ||
| 55 | + { :range => 326504..326571, :h => 7}, | ||
| 56 | + { :range => 326572..326803, :h => 8}, | ||
| 57 | + ], | ||
| 58 | + } | ||
| 59 | + # UTC because Rails will be thinking DB is in UTC when we run this | ||
| 60 | + #time_spans.map! do |t| | ||
| 61 | + # { :gt => Time.parse("#{t[:gt]} UTC"), | ||
| 62 | + # :lt => Time.parse("#{t[:lt]} UTC"), | ||
| 63 | + # :h => t[:h] } | ||
| 64 | + #end | ||
| 65 | + datetime_fields = { | ||
| 66 | + #:appearances => ['created_at', 'updated_at'], | ||
| 67 | + #:choices => ['created_at', 'updated_at'], | ||
| 68 | + #:clicks => ['created_at', 'updated_at'], | ||
| 69 | + #:densities => ['created_at', 'updated_at'], | ||
| 70 | + #:flags => ['created_at', 'updated_at'], | ||
| 71 | + #:prompts => ['created_at', 'updated_at'], | ||
| 72 | + :skips => ['created_at', 'updated_at'], | ||
| 73 | + #:votes => ['created_at', 'updated_at'], | ||
| 74 | + #:visitors => ['created_at', 'updated_at'], | ||
| 75 | + #:users => ['created_at', 'updated_at'], | ||
| 76 | + #:questions => ['created_at', 'updated_at'], | ||
| 77 | + #:question_versions => ['created_at', 'updated_at'], | ||
| 78 | + #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + STDOUT.sync = true | ||
| 82 | + logger = Rails.logger | ||
| 83 | + datetime_fields.each do |table, columns| | ||
| 84 | + print "#{table}" | ||
| 85 | + batch_size = 10000 | ||
| 86 | + i = 0 | ||
| 87 | + where = '' | ||
| 88 | + # This is how we split the rows of a table between the various workers | ||
| 89 | + # so that they don't attempt to work on the same row as another worker. | ||
| 90 | + # The workerid is any number 0 through workers - 1. | ||
| 91 | + if args[:workers] > "1" | ||
| 92 | + where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | ||
| 93 | + end | ||
| 94 | + while true do | ||
| 95 | + rows = ActiveRecord::Base.connection.select_all( | ||
| 96 | + "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | ||
| 97 | + ) | ||
| 98 | + print "." | ||
| 99 | + | ||
| 100 | + rows.each do |row| | ||
| 101 | + updated_values = {} | ||
| 102 | + # delete any value where the value is blank (just for delayed_jobs) | ||
| 103 | + row.delete_if {|key, value| value.blank? } | ||
| 104 | + row.each do |column, value| | ||
| 105 | + next if column == "id" | ||
| 106 | + time_spans.each do |span| | ||
| 107 | + if value < span[:lt] && value > span[:gt] | ||
| 108 | + # if blank then ambiguous and we don't know how to translate | ||
| 109 | + if span[:h].blank? | ||
| 110 | + updated_values[column] = nil | ||
| 111 | + if unambiguator[table] && unambiguator[table].length > 0 | ||
| 112 | + unambiguator[table].each do |ids| | ||
| 113 | + updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | ||
| 114 | + end | ||
| 115 | + end | ||
| 116 | + | ||
| 117 | + logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | ||
| 118 | + else | ||
| 119 | + updated_values[column] = span[:h] | ||
| 120 | + end | ||
| 121 | + break | ||
| 122 | + end | ||
| 123 | + end | ||
| 124 | + end | ||
| 125 | + # Check if some columns did not match any spans | ||
| 126 | + key_diff = row.keys - updated_values.keys - ["id"] | ||
| 127 | + if key_diff.length > 0 | ||
| 128 | + logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | ||
| 129 | + end | ||
| 130 | + # remove ambiguous columns (we set them to nil above) | ||
| 131 | + updated_values.delete_if {|key, value| value.blank? } | ||
| 132 | + if updated_values.length > 0 | ||
| 133 | + update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | ||
| 134 | + num = ActiveRecord::Base.connection.update_sql(update) | ||
| 135 | + if num == 1 | ||
| 136 | + logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | ||
| 137 | + else | ||
| 138 | + logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | ||
| 139 | + end | ||
| 140 | + end | ||
| 141 | + end | ||
| 142 | + | ||
| 143 | + i+= 1 | ||
| 144 | + break if rows.length < batchsize | ||
| 145 | + end | ||
| 146 | + print "\n" | ||
| 147 | + end | ||
| 148 | + end | ||
| 149 | + | ||
| 150 | + desc "Finds ambiguous times due to daylight savings time" | ||
| 151 | + task :find_ambiguous_times => :environment do | ||
| 152 | + datetime_fields = { | ||
| 153 | + :appearances => ['created_at', 'updated_at'], | ||
| 154 | + :choices => ['created_at', 'updated_at'], | ||
| 155 | + :clicks => ['created_at', 'updated_at'], | ||
| 156 | + :densities => ['created_at', 'updated_at'], | ||
| 157 | + :flags => ['created_at', 'updated_at'], | ||
| 158 | + :prompts => ['created_at', 'updated_at'], | ||
| 159 | + :skips => ['created_at', 'updated_at'], | ||
| 160 | + :votes => ['created_at', 'updated_at'], | ||
| 161 | + :visitors => ['created_at', 'updated_at'], | ||
| 162 | + :users => ['created_at', 'updated_at'], | ||
| 163 | + :questions => ['created_at', 'updated_at'], | ||
| 164 | + :question_versions => ['created_at', 'updated_at'], | ||
| 165 | + :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | ||
| 166 | + } | ||
| 167 | + datetime_fields.each do |table, columns| | ||
| 168 | + where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | ||
| 169 | + rows = ActiveRecord::Base.connection.select_all( | ||
| 170 | + "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | ||
| 171 | + ) | ||
| 172 | + puts rows.inspect if rows.length > 0 | ||
| 173 | + end | ||
| 174 | + end | ||
| 175 | + | ||
| 176 | +end |