Commit 344dc7328a6b5e0017239e38cf9d75996d44c266
1 parent
c390551c
Exists in
master
and in
1 other branch
move timezone tasks to separate rake file
Showing
2 changed files
with
176 additions
and
173 deletions
Show diff stats
lib/tasks/prune_db.rake
| 1 | 1 | namespace :prune_db do |
| 2 | 2 | |
| 3 | - desc "Finds ambiguous times due to daylight savings time" | |
| 4 | - task :find_ambiguous_times => :environment do | |
| 5 | - datetime_fields = { | |
| 6 | - :appearances => ['created_at', 'updated_at'], | |
| 7 | - :choices => ['created_at', 'updated_at'], | |
| 8 | - :clicks => ['created_at', 'updated_at'], | |
| 9 | - :densities => ['created_at', 'updated_at'], | |
| 10 | - :flags => ['created_at', 'updated_at'], | |
| 11 | - :prompts => ['created_at', 'updated_at'], | |
| 12 | - :skips => ['created_at', 'updated_at'], | |
| 13 | - :votes => ['created_at', 'updated_at'], | |
| 14 | - :visitors => ['created_at', 'updated_at'], | |
| 15 | - :users => ['created_at', 'updated_at'], | |
| 16 | - :questions => ['created_at', 'updated_at'], | |
| 17 | - :question_versions => ['created_at', 'updated_at'], | |
| 18 | - :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
| 19 | - } | |
| 20 | - datetime_fields.each do |table, columns| | |
| 21 | - where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | |
| 22 | - rows = ActiveRecord::Base.connection.select_all( | |
| 23 | - "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | |
| 24 | - ) | |
| 25 | - puts rows.inspect if rows.length > 0 | |
| 26 | - end | |
| 27 | - end | |
| 28 | - | |
| 29 | - # There is a very similar task in the AOI code base as well. | |
| 30 | - # Any core changes to this task should probably be reflected there. | |
| 31 | - desc "Converts all dates from PT to UTC" | |
| 32 | - task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | |
| 33 | - args.with_defaults(:workerid => "0", :workers => "1") | |
| 34 | - raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | |
| 35 | - time_spans = [ | |
| 36 | - { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | |
| 37 | - { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | |
| 38 | - { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | |
| 39 | - { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | |
| 40 | - { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | |
| 41 | - { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | |
| 42 | - { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | |
| 43 | - { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | |
| 44 | - ] | |
| 45 | - unambiguator = { | |
| 46 | - :appearances => [ | |
| 47 | - { :range => 454229..454229, :h => 7}, | |
| 48 | - { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | |
| 49 | - { :range => 454502..454745, :h => 8}, | |
| 50 | - { :range => 4005307..4005522, :h => 7 }, | |
| 51 | - { :range => 4005523..4005556, :h => 8 } | |
| 52 | - ], | |
| 53 | - :choices => [ | |
| 54 | - { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | |
| 55 | - ], | |
| 56 | - :prompts => [ | |
| 57 | - { :range => 5191157..5191225, :h => 7}, | |
| 58 | - { :range => 5191226..5191876, :h => 8}, | |
| 59 | - { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | |
| 60 | - ], | |
| 61 | - :question_versions => [ | |
| 62 | - { :range => 7126..7128, :h => 7} # based on choice 181957 | |
| 63 | - ], | |
| 64 | - :questions => [ | |
| 65 | - { :range => 1855..1855, :h => 7} # based on question_versions 7128 | |
| 66 | - ], | |
| 67 | - :skips => [ | |
| 68 | - { :range => 30948..30952, :h => 8}, # based on vote 326681 | |
| 69 | - { :range => 365240..365276, :h => 7}, | |
| 70 | - { :range => 365277..365281, :h => 8}, | |
| 71 | - ], | |
| 72 | - :visitors => [ | |
| 73 | - { :range => 594751..594777, :h => 7}, | |
| 74 | - { :range => 594778..594795, :h => 8}, | |
| 75 | - { :range => 91350..91358, :h => 7}, | |
| 76 | - { :range => 91359..91366, :h => 8} | |
| 77 | - ], | |
| 78 | - :votes => [ | |
| 79 | - { :range => 3145774..3145926, :h => 7}, | |
| 80 | - { :range => 3145927..3145935, :h => 8}, | |
| 81 | - { :range => 326504..326571, :h => 7}, | |
| 82 | - { :range => 326572..326803, :h => 8}, | |
| 83 | - ], | |
| 84 | - } | |
| 85 | - # UTC because Rails will be thinking DB is in UTC when we run this | |
| 86 | - #time_spans.map! do |t| | |
| 87 | - # { :gt => Time.parse("#{t[:gt]} UTC"), | |
| 88 | - # :lt => Time.parse("#{t[:lt]} UTC"), | |
| 89 | - # :h => t[:h] } | |
| 90 | - #end | |
| 91 | - datetime_fields = { | |
| 92 | - #:appearances => ['created_at', 'updated_at'], | |
| 93 | - #:choices => ['created_at', 'updated_at'], | |
| 94 | - #:clicks => ['created_at', 'updated_at'], | |
| 95 | - #:densities => ['created_at', 'updated_at'], | |
| 96 | - #:flags => ['created_at', 'updated_at'], | |
| 97 | - #:prompts => ['created_at', 'updated_at'], | |
| 98 | - :skips => ['created_at', 'updated_at'], | |
| 99 | - #:votes => ['created_at', 'updated_at'], | |
| 100 | - #:visitors => ['created_at', 'updated_at'], | |
| 101 | - #:users => ['created_at', 'updated_at'], | |
| 102 | - #:questions => ['created_at', 'updated_at'], | |
| 103 | - #:question_versions => ['created_at', 'updated_at'], | |
| 104 | - #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
| 105 | - } | |
| 106 | - | |
| 107 | - STDOUT.sync = true | |
| 108 | - logger = Rails.logger | |
| 109 | - datetime_fields.each do |table, columns| | |
| 110 | - print "#{table}" | |
| 111 | - batch_size = 10000 | |
| 112 | - i = 0 | |
| 113 | - where = '' | |
| 114 | - # This is how we split the rows of a table between the various workers | |
| 115 | - # so that they don't attempt to work on the same row as another worker. | |
| 116 | - # The workerid is any number 0 through workers - 1. | |
| 117 | - if args[:workers] > "1" | |
| 118 | - where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | |
| 119 | - end | |
| 120 | - while true do | |
| 121 | - rows = ActiveRecord::Base.connection.select_all( | |
| 122 | - "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | |
| 123 | - ) | |
| 124 | - print "." | |
| 125 | - | |
| 126 | - rows.each do |row| | |
| 127 | - updated_values = {} | |
| 128 | - # delete any value where the value is blank (just for delayed_jobs) | |
| 129 | - row.delete_if {|key, value| value.blank? } | |
| 130 | - row.each do |column, value| | |
| 131 | - next if column == "id" | |
| 132 | - time_spans.each do |span| | |
| 133 | - if value < span[:lt] && value > span[:gt] | |
| 134 | - # if blank then ambiguous and we don't know how to translate | |
| 135 | - if span[:h].blank? | |
| 136 | - updated_values[column] = nil | |
| 137 | - if unambiguator[table] && unambiguator[table].length > 0 | |
| 138 | - unambiguator[table].each do |ids| | |
| 139 | - updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | |
| 140 | - end | |
| 141 | - end | |
| 142 | - | |
| 143 | - logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | |
| 144 | - else | |
| 145 | - updated_values[column] = span[:h] | |
| 146 | - end | |
| 147 | - break | |
| 148 | - end | |
| 149 | - end | |
| 150 | - end | |
| 151 | - # Check if some columns did not match any spans | |
| 152 | - key_diff = row.keys - updated_values.keys - ["id"] | |
| 153 | - if key_diff.length > 0 | |
| 154 | - logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | |
| 155 | - end | |
| 156 | - # remove ambiguous columns (we set them to nil above) | |
| 157 | - updated_values.delete_if {|key, value| value.blank? } | |
| 158 | - if updated_values.length > 0 | |
| 159 | - update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | |
| 160 | - num = ActiveRecord::Base.connection.update_sql(update) | |
| 161 | - if num == 1 | |
| 162 | - logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | |
| 163 | - else | |
| 164 | - logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | |
| 165 | - end | |
| 166 | - end | |
| 167 | - end | |
| 168 | - | |
| 169 | - i+= 1 | |
| 170 | - break if rows.length < batchsize | |
| 171 | - end | |
| 172 | - print "\n" | |
| 173 | - end | |
| 174 | - end | |
| 175 | - | |
| 176 | 3 | desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id" |
| 177 | 4 | task :fix_promptid_mismatch => :environment do |
| 178 | 5 | bad_records = Vote.connection.select_all " | ... | ... |
| ... | ... | @@ -0,0 +1,176 @@ |
| 1 | +namespace :timezone do | |
| 2 | + | |
| 3 | + # There is a very similar task in the AOI code base as well. | |
| 4 | + # Any core changes to this task should probably be reflected there. | |
| 5 | + desc "Converts all dates from PT to UTC" | |
| 6 | + task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args| | |
| 7 | + args.with_defaults(:workerid => "0", :workers => "1") | |
| 8 | + raise "workerid can not be greater than workers" if args[:workerid] > args[:workers] | |
| 9 | + time_spans = [ | |
| 10 | + { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8}, | |
| 11 | + { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7}, | |
| 12 | + { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil}, | |
| 13 | + { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8}, | |
| 14 | + { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7}, | |
| 15 | + { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil}, | |
| 16 | + { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8}, | |
| 17 | + { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7} | |
| 18 | + ] | |
| 19 | + unambiguator = { | |
| 20 | + :appearances => [ | |
| 21 | + { :range => 454229..454229, :h => 7}, | |
| 22 | + { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour | |
| 23 | + { :range => 454502..454745, :h => 8}, | |
| 24 | + { :range => 4005307..4005522, :h => 7 }, | |
| 25 | + { :range => 4005523..4005556, :h => 8 } | |
| 26 | + ], | |
| 27 | + :choices => [ | |
| 28 | + { :range => 181957..181957, :h => 7} # based on appearance id 8392753 | |
| 29 | + ], | |
| 30 | + :prompts => [ | |
| 31 | + { :range => 5191157..5191225, :h => 7}, | |
| 32 | + { :range => 5191226..5191876, :h => 8}, | |
| 33 | + { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361 | |
| 34 | + ], | |
| 35 | + :question_versions => [ | |
| 36 | + { :range => 7126..7128, :h => 7} # based on choice 181957 | |
| 37 | + ], | |
| 38 | + :questions => [ | |
| 39 | + { :range => 1855..1855, :h => 7} # based on question_versions 7128 | |
| 40 | + ], | |
| 41 | + :skips => [ | |
| 42 | + { :range => 30948..30952, :h => 8}, # based on vote 326681 | |
| 43 | + { :range => 365240..365276, :h => 7}, | |
| 44 | + { :range => 365277..365281, :h => 8}, | |
| 45 | + ], | |
| 46 | + :visitors => [ | |
| 47 | + { :range => 594751..594777, :h => 7}, | |
| 48 | + { :range => 594778..594795, :h => 8}, | |
| 49 | + { :range => 91350..91358, :h => 7}, | |
| 50 | + { :range => 91359..91366, :h => 8} | |
| 51 | + ], | |
| 52 | + :votes => [ | |
| 53 | + { :range => 3145774..3145926, :h => 7}, | |
| 54 | + { :range => 3145927..3145935, :h => 8}, | |
| 55 | + { :range => 326504..326571, :h => 7}, | |
| 56 | + { :range => 326572..326803, :h => 8}, | |
| 57 | + ], | |
| 58 | + } | |
| 59 | + # UTC because Rails will be thinking DB is in UTC when we run this | |
| 60 | + #time_spans.map! do |t| | |
| 61 | + # { :gt => Time.parse("#{t[:gt]} UTC"), | |
| 62 | + # :lt => Time.parse("#{t[:lt]} UTC"), | |
| 63 | + # :h => t[:h] } | |
| 64 | + #end | |
| 65 | + datetime_fields = { | |
| 66 | + #:appearances => ['created_at', 'updated_at'], | |
| 67 | + #:choices => ['created_at', 'updated_at'], | |
| 68 | + #:clicks => ['created_at', 'updated_at'], | |
| 69 | + #:densities => ['created_at', 'updated_at'], | |
| 70 | + #:flags => ['created_at', 'updated_at'], | |
| 71 | + #:prompts => ['created_at', 'updated_at'], | |
| 72 | + :skips => ['created_at', 'updated_at'], | |
| 73 | + #:votes => ['created_at', 'updated_at'], | |
| 74 | + #:visitors => ['created_at', 'updated_at'], | |
| 75 | + #:users => ['created_at', 'updated_at'], | |
| 76 | + #:questions => ['created_at', 'updated_at'], | |
| 77 | + #:question_versions => ['created_at', 'updated_at'], | |
| 78 | + #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
| 79 | + } | |
| 80 | + | |
| 81 | + STDOUT.sync = true | |
| 82 | + logger = Rails.logger | |
| 83 | + datetime_fields.each do |table, columns| | |
| 84 | + print "#{table}" | |
| 85 | + batch_size = 10000 | |
| 86 | + i = 0 | |
| 87 | + where = '' | |
| 88 | + # This is how we split the rows of a table between the various workers | |
| 89 | + # so that they don't attempt to work on the same row as another worker. | |
| 90 | + # The workerid is any number 0 through workers - 1. | |
| 91 | + if args[:workers] > "1" | |
| 92 | + where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}" | |
| 93 | + end | |
| 94 | + while true do | |
| 95 | + rows = ActiveRecord::Base.connection.select_all( | |
| 96 | + "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}" | |
| 97 | + ) | |
| 98 | + print "." | |
| 99 | + | |
| 100 | + rows.each do |row| | |
| 101 | + updated_values = {} | |
| 102 | + # delete any value where the value is blank (just for delayed_jobs) | |
| 103 | + row.delete_if {|key, value| value.blank? } | |
| 104 | + row.each do |column, value| | |
| 105 | + next if column == "id" | |
| 106 | + time_spans.each do |span| | |
| 107 | + if value < span[:lt] && value > span[:gt] | |
| 108 | + # if blank then ambiguous and we don't know how to translate | |
| 109 | + if span[:h].blank? | |
| 110 | + updated_values[column] = nil | |
| 111 | + if unambiguator[table] && unambiguator[table].length > 0 | |
| 112 | + unambiguator[table].each do |ids| | |
| 113 | + updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i | |
| 114 | + end | |
| 115 | + end | |
| 116 | + | |
| 117 | + logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank? | |
| 118 | + else | |
| 119 | + updated_values[column] = span[:h] | |
| 120 | + end | |
| 121 | + break | |
| 122 | + end | |
| 123 | + end | |
| 124 | + end | |
| 125 | + # Check if some columns did not match any spans | |
| 126 | + key_diff = row.keys - updated_values.keys - ["id"] | |
| 127 | + if key_diff.length > 0 | |
| 128 | + logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}" | |
| 129 | + end | |
| 130 | + # remove ambiguous columns (we set them to nil above) | |
| 131 | + updated_values.delete_if {|key, value| value.blank? } | |
| 132 | + if updated_values.length > 0 | |
| 133 | + update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}" | |
| 134 | + num = ActiveRecord::Base.connection.update_sql(update) | |
| 135 | + if num == 1 | |
| 136 | + logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}" | |
| 137 | + else | |
| 138 | + logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}" | |
| 139 | + end | |
| 140 | + end | |
| 141 | + end | |
| 142 | + | |
| 143 | + i+= 1 | |
| 144 | + break if rows.length < batchsize | |
| 145 | + end | |
| 146 | + print "\n" | |
| 147 | + end | |
| 148 | + end | |
| 149 | + | |
| 150 | + desc "Finds ambiguous times due to daylight savings time" | |
| 151 | + task :find_ambiguous_times => :environment do | |
| 152 | + datetime_fields = { | |
| 153 | + :appearances => ['created_at', 'updated_at'], | |
| 154 | + :choices => ['created_at', 'updated_at'], | |
| 155 | + :clicks => ['created_at', 'updated_at'], | |
| 156 | + :densities => ['created_at', 'updated_at'], | |
| 157 | + :flags => ['created_at', 'updated_at'], | |
| 158 | + :prompts => ['created_at', 'updated_at'], | |
| 159 | + :skips => ['created_at', 'updated_at'], | |
| 160 | + :votes => ['created_at', 'updated_at'], | |
| 161 | + :visitors => ['created_at', 'updated_at'], | |
| 162 | + :users => ['created_at', 'updated_at'], | |
| 163 | + :questions => ['created_at', 'updated_at'], | |
| 164 | + :question_versions => ['created_at', 'updated_at'], | |
| 165 | + :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'], | |
| 166 | + } | |
| 167 | + datetime_fields.each do |table, columns| | |
| 168 | + where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ") | |
| 169 | + rows = ActiveRecord::Base.connection.select_all( | |
| 170 | + "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}" | |
| 171 | + ) | |
| 172 | + puts rows.inspect if rows.length > 0 | |
| 173 | + end | |
| 174 | + end | |
| 175 | + | |
| 176 | +end | ... | ... |