Commit 344dc7328a6b5e0017239e38cf9d75996d44c266

Authored by Luke Baker
1 parent c390551c

move timezone tasks to separate rake file

Showing 2 changed files with 176 additions and 173 deletions   Show diff stats
lib/tasks/prune_db.rake
1 1 namespace :prune_db do
2 2  
3   - desc "Finds ambiguous times due to daylight savings time"
4   - task :find_ambiguous_times => :environment do
5   - datetime_fields = {
6   - :appearances => ['created_at', 'updated_at'],
7   - :choices => ['created_at', 'updated_at'],
8   - :clicks => ['created_at', 'updated_at'],
9   - :densities => ['created_at', 'updated_at'],
10   - :flags => ['created_at', 'updated_at'],
11   - :prompts => ['created_at', 'updated_at'],
12   - :skips => ['created_at', 'updated_at'],
13   - :votes => ['created_at', 'updated_at'],
14   - :visitors => ['created_at', 'updated_at'],
15   - :users => ['created_at', 'updated_at'],
16   - :questions => ['created_at', 'updated_at'],
17   - :question_versions => ['created_at', 'updated_at'],
18   - :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
19   - }
20   - datetime_fields.each do |table, columns|
21   - where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ")
22   - rows = ActiveRecord::Base.connection.select_all(
23   - "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}"
24   - )
25   - puts rows.inspect if rows.length > 0
26   - end
27   - end
28   -
29   - # There is a very similar task in the AOI code base as well.
30   - # Any core changes to this task should probably be reflected there.
31   - desc "Converts all dates from PT to UTC"
32   - task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args|
33   - args.with_defaults(:workerid => "0", :workers => "1")
34   - raise "workerid can not be greater than workers" if args[:workerid] > args[:workers]
35   - time_spans = [
36   - { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8},
37   - { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7},
38   - { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil},
39   - { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8},
40   - { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7},
41   - { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil},
42   - { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8},
43   - { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7}
44   - ]
45   - unambiguator = {
46   - :appearances => [
47   - { :range => 454229..454229, :h => 7},
48   - { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour
49   - { :range => 454502..454745, :h => 8},
50   - { :range => 4005307..4005522, :h => 7 },
51   - { :range => 4005523..4005556, :h => 8 }
52   - ],
53   - :choices => [
54   - { :range => 181957..181957, :h => 7} # based on appearance id 8392753
55   - ],
56   - :prompts => [
57   - { :range => 5191157..5191225, :h => 7},
58   - { :range => 5191226..5191876, :h => 8},
59   - { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361
60   - ],
61   - :question_versions => [
62   - { :range => 7126..7128, :h => 7} # based on choice 181957
63   - ],
64   - :questions => [
65   - { :range => 1855..1855, :h => 7} # based on question_versions 7128
66   - ],
67   - :skips => [
68   - { :range => 30948..30952, :h => 8}, # based on vote 326681
69   - { :range => 365240..365276, :h => 7},
70   - { :range => 365277..365281, :h => 8},
71   - ],
72   - :visitors => [
73   - { :range => 594751..594777, :h => 7},
74   - { :range => 594778..594795, :h => 8},
75   - { :range => 91350..91358, :h => 7},
76   - { :range => 91359..91366, :h => 8}
77   - ],
78   - :votes => [
79   - { :range => 3145774..3145926, :h => 7},
80   - { :range => 3145927..3145935, :h => 8},
81   - { :range => 326504..326571, :h => 7},
82   - { :range => 326572..326803, :h => 8},
83   - ],
84   - }
85   - # UTC because Rails will be thinking DB is in UTC when we run this
86   - #time_spans.map! do |t|
87   - # { :gt => Time.parse("#{t[:gt]} UTC"),
88   - # :lt => Time.parse("#{t[:lt]} UTC"),
89   - # :h => t[:h] }
90   - #end
91   - datetime_fields = {
92   - #:appearances => ['created_at', 'updated_at'],
93   - #:choices => ['created_at', 'updated_at'],
94   - #:clicks => ['created_at', 'updated_at'],
95   - #:densities => ['created_at', 'updated_at'],
96   - #:flags => ['created_at', 'updated_at'],
97   - #:prompts => ['created_at', 'updated_at'],
98   - :skips => ['created_at', 'updated_at'],
99   - #:votes => ['created_at', 'updated_at'],
100   - #:visitors => ['created_at', 'updated_at'],
101   - #:users => ['created_at', 'updated_at'],
102   - #:questions => ['created_at', 'updated_at'],
103   - #:question_versions => ['created_at', 'updated_at'],
104   - #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
105   - }
106   -
107   - STDOUT.sync = true
108   - logger = Rails.logger
109   - datetime_fields.each do |table, columns|
110   - print "#{table}"
111   - batch_size = 10000
112   - i = 0
113   - where = ''
114   - # This is how we split the rows of a table between the various workers
115   - # so that they don't attempt to work on the same row as another worker.
116   - # The workerid is any number 0 through workers - 1.
117   - if args[:workers] > "1"
118   - where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}"
119   - end
120   - while true do
121   - rows = ActiveRecord::Base.connection.select_all(
122   - "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}"
123   - )
124   - print "."
125   -
126   - rows.each do |row|
127   - updated_values = {}
128   - # delete any value where the value is blank (just for delayed_jobs)
129   - row.delete_if {|key, value| value.blank? }
130   - row.each do |column, value|
131   - next if column == "id"
132   - time_spans.each do |span|
133   - if value < span[:lt] && value > span[:gt]
134   - # if blank then ambiguous and we don't know how to translate
135   - if span[:h].blank?
136   - updated_values[column] = nil
137   - if unambiguator[table] && unambiguator[table].length > 0
138   - unambiguator[table].each do |ids|
139   - updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i
140   - end
141   - end
142   -
143   - logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank?
144   - else
145   - updated_values[column] = span[:h]
146   - end
147   - break
148   - end
149   - end
150   - end
151   - # Check if some columns did not match any spans
152   - key_diff = row.keys - updated_values.keys - ["id"]
153   - if key_diff.length > 0
154   - logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}"
155   - end
156   - # remove ambiguous columns (we set them to nil above)
157   - updated_values.delete_if {|key, value| value.blank? }
158   - if updated_values.length > 0
159   - update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}"
160   - num = ActiveRecord::Base.connection.update_sql(update)
161   - if num == 1
162   - logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}"
163   - else
164   - logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}"
165   - end
166   - end
167   - end
168   -
169   - i+= 1
170   - break if rows.length < batchsize
171   - end
172   - print "\n"
173   - end
174   - end
175   -
176 3 desc "Fixes a mis-match between a vote's prompt_id and its appearance's prompt_id. Sets the appearance prompt_id to match the vote's prompt_id"
177 4 task :fix_promptid_mismatch => :environment do
178 5 bad_records = Vote.connection.select_all "
... ...
lib/tasks/timezone.rake 0 → 100644
... ... @@ -0,0 +1,176 @@
  1 +namespace :timezone do
  2 +
  3 + # There is a very similar task in the AOI code base as well.
  4 + # Any core changes to this task should probably be reflected there.
  5 + desc "Converts all dates from PT to UTC"
  6 + task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args|
  7 + args.with_defaults(:workerid => "0", :workers => "1")
  8 + raise "workerid can not be greater than workers" if args[:workerid] > args[:workers]
  9 + time_spans = [
  10 + { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8},
  11 + { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7},
  12 + { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil},
  13 + { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8},
  14 + { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7},
  15 + { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil},
  16 + { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8},
  17 + { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7}
  18 + ]
  19 + unambiguator = {
  20 + :appearances => [
  21 + { :range => 454229..454229, :h => 7},
  22 + { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour
  23 + { :range => 454502..454745, :h => 8},
  24 + { :range => 4005307..4005522, :h => 7 },
  25 + { :range => 4005523..4005556, :h => 8 }
  26 + ],
  27 + :choices => [
  28 + { :range => 181957..181957, :h => 7} # based on appearance id 8392753
  29 + ],
  30 + :prompts => [
  31 + { :range => 5191157..5191225, :h => 7},
  32 + { :range => 5191226..5191876, :h => 8},
  33 + { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361
  34 + ],
  35 + :question_versions => [
  36 + { :range => 7126..7128, :h => 7} # based on choice 181957
  37 + ],
  38 + :questions => [
  39 + { :range => 1855..1855, :h => 7} # based on question_versions 7128
  40 + ],
  41 + :skips => [
  42 + { :range => 30948..30952, :h => 8}, # based on vote 326681
  43 + { :range => 365240..365276, :h => 7},
  44 + { :range => 365277..365281, :h => 8},
  45 + ],
  46 + :visitors => [
  47 + { :range => 594751..594777, :h => 7},
  48 + { :range => 594778..594795, :h => 8},
  49 + { :range => 91350..91358, :h => 7},
  50 + { :range => 91359..91366, :h => 8}
  51 + ],
  52 + :votes => [
  53 + { :range => 3145774..3145926, :h => 7},
  54 + { :range => 3145927..3145935, :h => 8},
  55 + { :range => 326504..326571, :h => 7},
  56 + { :range => 326572..326803, :h => 8},
  57 + ],
  58 + }
  59 + # UTC because Rails will be thinking DB is in UTC when we run this
  60 + #time_spans.map! do |t|
  61 + # { :gt => Time.parse("#{t[:gt]} UTC"),
  62 + # :lt => Time.parse("#{t[:lt]} UTC"),
  63 + # :h => t[:h] }
  64 + #end
  65 + datetime_fields = {
  66 + #:appearances => ['created_at', 'updated_at'],
  67 + #:choices => ['created_at', 'updated_at'],
  68 + #:clicks => ['created_at', 'updated_at'],
  69 + #:densities => ['created_at', 'updated_at'],
  70 + #:flags => ['created_at', 'updated_at'],
  71 + #:prompts => ['created_at', 'updated_at'],
  72 + :skips => ['created_at', 'updated_at'],
  73 + #:votes => ['created_at', 'updated_at'],
  74 + #:visitors => ['created_at', 'updated_at'],
  75 + #:users => ['created_at', 'updated_at'],
  76 + #:questions => ['created_at', 'updated_at'],
  77 + #:question_versions => ['created_at', 'updated_at'],
  78 + #:delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
  79 + }
  80 +
  81 + STDOUT.sync = true
  82 + logger = Rails.logger
  83 + datetime_fields.each do |table, columns|
  84 + print "#{table}"
  85 + batch_size = 10000
  86 + i = 0
  87 + where = ''
  88 + # This is how we split the rows of a table between the various workers
  89 + # so that they don't attempt to work on the same row as another worker.
  90 + # The workerid is any number 0 through workers - 1.
  91 + if args[:workers] > "1"
  92 + where = "WHERE MOD(id, #{args[:workers]}) = #{args[:workerid]}"
  93 + end
  94 + while true do
  95 + rows = ActiveRecord::Base.connection.select_all(
  96 + "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}"
  97 + )
  98 + print "."
  99 +
  100 + rows.each do |row|
  101 + updated_values = {}
  102 + # delete any value where the value is blank (just for delayed_jobs)
  103 + row.delete_if {|key, value| value.blank? }
  104 + row.each do |column, value|
  105 + next if column == "id"
  106 + time_spans.each do |span|
  107 + if value < span[:lt] && value > span[:gt]
  108 + # if blank then ambiguous and we don't know how to translate
  109 + if span[:h].blank?
  110 + updated_values[column] = nil
  111 + if unambiguator[table] && unambiguator[table].length > 0
  112 + unambiguator[table].each do |ids|
  113 + updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i
  114 + end
  115 + end
  116 +
  117 + logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank?
  118 + else
  119 + updated_values[column] = span[:h]
  120 + end
  121 + break
  122 + end
  123 + end
  124 + end
  125 + # Check if some columns did not match any spans
  126 + key_diff = row.keys - updated_values.keys - ["id"]
  127 + if key_diff.length > 0
  128 + logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}"
  129 + end
  130 + # remove ambiguous columns (we set them to nil above)
  131 + updated_values.delete_if {|key, value| value.blank? }
  132 + if updated_values.length > 0
  133 + update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}"
  134 + num = ActiveRecord::Base.connection.update_sql(update)
  135 + if num == 1
  136 + logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}"
  137 + else
  138 + logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}"
  139 + end
  140 + end
  141 + end
  142 +
  143 + i+= 1
  144 + break if rows.length < batchsize
  145 + end
  146 + print "\n"
  147 + end
  148 + end
  149 +
  150 + desc "Finds ambiguous times due to daylight savings time"
  151 + task :find_ambiguous_times => :environment do
  152 + datetime_fields = {
  153 + :appearances => ['created_at', 'updated_at'],
  154 + :choices => ['created_at', 'updated_at'],
  155 + :clicks => ['created_at', 'updated_at'],
  156 + :densities => ['created_at', 'updated_at'],
  157 + :flags => ['created_at', 'updated_at'],
  158 + :prompts => ['created_at', 'updated_at'],
  159 + :skips => ['created_at', 'updated_at'],
  160 + :votes => ['created_at', 'updated_at'],
  161 + :visitors => ['created_at', 'updated_at'],
  162 + :users => ['created_at', 'updated_at'],
  163 + :questions => ['created_at', 'updated_at'],
  164 + :question_versions => ['created_at', 'updated_at'],
  165 + :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
  166 + }
  167 + datetime_fields.each do |table, columns|
  168 + where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ")
  169 + rows = ActiveRecord::Base.connection.select_all(
  170 + "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}"
  171 + )
  172 + puts rows.inspect if rows.length > 0
  173 + end
  174 + end
  175 +
  176 +end
... ...