timezone.rake 7.87 KB
namespace :timezone do

  # There is a very similar task in the AOI code base as well.
  # Any core changes to this task should probably be reflected there.
  desc "Converts all dates from PT to UTC"
  task :convert_dates_to_utc, [:workerid, :workers] => [:environment] do|t,args|
    args.with_defaults(:workerid => "0", :workers => "1")
    raise "workerid can not be greater than workers" if args[:workerid] > args[:workers]
    time_spans = [
      { :gt => "2009-11-01 01:59:59", :lt => "2010-03-14 02:00:00", :h => 8},
      { :gt => "2010-03-14 01:59:59", :lt => "2010-11-07 01:00:00", :h => 7},
      { :gt => "2010-11-07 00:59:59", :lt => "2010-11-07 02:00:00", :h => nil},
      { :gt => "2010-11-07 01:59:59", :lt => "2011-03-13 02:00:00", :h => 8},
      { :gt => "2011-03-13 01:59:59", :lt => "2011-11-06 01:00:00", :h => 7},
      { :gt => "2011-11-06 00:59:59", :lt => "2011-11-06 02:00:00", :h => nil},
      { :gt => "2011-11-06 01:59:59", :lt => "2012-03-11 02:00:00", :h => 8},
      { :gt => "2012-03-11 01:59:59", :lt => "2012-11-04 01:00:00", :h => 7}
    ]
    unambiguator = {
      :appearances => [
        { :range => 454229..454229, :h => 7},
        { :range => 454426..454501, :h => 7}, # 454501 updated_at needs additional hour
        { :range => 454502..454745, :h => 8},
	      { :range => 4005307..4005522, :h => 7 },
	      { :range => 4005523..4005556, :h => 8 }
      ],
      :choices => [
        { :range => 181957..181957, :h => 7} # based on appearance id 8392753
      ],
      :prompts => [
        { :range => 5191157..5191225, :h => 7},
        { :range => 5191226..5191876, :h => 8},
        { :range => 8392753..8392758, :h => 7}, # based on appearance id 4005361
      ],
      :question_versions => [
        { :range => 7126..7128, :h => 7} # based on choice 181957
      ],
      :questions => [
        { :range => 1855..1855, :h => 7} # based on question_versions 7128
      ],
      :skips => [
        { :range => 30948..30952, :h => 8}, # based on vote 326681
        { :range => 365240..365276, :h => 7},
        { :range => 365277..365281, :h => 8},
      ],
      :visitors => [
        { :range => 594751..594777, :h => 7},
        { :range => 594778..594795, :h => 8},
        { :range => 91350..91358, :h => 7},
        { :range => 91359..91366, :h => 8}
      ],
      :votes => [
        { :range => 3145774..3145926, :h => 7},
        { :range => 3145927..3145935, :h => 8},
        { :range => 326504..326571, :h => 7},
        { :range => 326572..326803, :h => 8},
      ],
    }
    # UTC because Rails will be thinking DB is in UTC when we run this
    #time_spans.map! do |t|
    #  { :gt => Time.parse("#{t[:gt]} UTC"),
    #    :lt => Time.parse("#{t[:lt]} UTC"),
    #    :h  => t[:h] }
    #end
    datetime_fields = {
      :appearances  => ['created_at', 'updated_at'],
      :choices      => ['created_at', 'updated_at'],
      :clicks       => ['created_at', 'updated_at'],
      :densities    => ['created_at', 'updated_at'],
      :flags        => ['created_at', 'updated_at'],
      :prompts      => ['created_at', 'updated_at'],
      :skips        => ['created_at', 'updated_at'],
      :votes        => ['created_at', 'updated_at'],
      :visitors     => ['created_at', 'updated_at'],
      :users        => ['created_at', 'updated_at'],
      :questions    => ['created_at', 'updated_at'],
      :question_versions => ['created_at', 'updated_at'],
      :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
    }
    max_ids = {
      :appearances  => 5065164,
      :choices      => 199505,
      :clicks       => 9874,
      :densities    => 2417129,
      :flags        => 1528,
      :prompts      => 10130927,
      :skips        => 435948,
      :votes        => 3660738,
      :visitors     => 1095417,
      :users        => 9,
      :questions    => 2253,
      :question_versions => 15063,
      :delayed_jobs => 4623375,
    }

    STDOUT.sync = true
    logger = Rails.logger
    datetime_fields.each do |table, columns|
      print "#{table}"
      batch_size = 10000
      i = 0
      where = "WHERE id < #{max_ids[table]}"
      # This is how we split the rows of a table between the various workers
      # so that they don't attempt to work on the same row as another worker.
      # The workerid is any number 0 through workers - 1.
      if args[:workers] > "1"
        where += " AND MOD(id, #{args[:workers]}) = #{args[:workerid]}"
      end
      while true do
        rows = ActiveRecord::Base.connection.select_all(
          "SELECT id, #{columns.join(", ")} FROM #{table} #{where} ORDER BY id LIMIT #{i*batch_size}, #{batch_size}"
        )
        print "."

        rows.each do |row|
          updated_values = {}
          # delete any value where the value is blank (just for delayed_jobs)
          row.delete_if {|key, value| value.blank? }
          row.each do |column, value|
            next if column == "id"
            time_spans.each do |span|
              if value < span[:lt] && value > span[:gt]
                # if blank then ambiguous and we don't know how to translate
                if span[:h].blank?
                  updated_values[column] = nil
                  if unambiguator[table] && unambiguator[table].length > 0
                    unambiguator[table].each do |ids|
                      updated_values[column] = ids[:h] if ids[:range].include? row["id"].to_i
                    end
                  end

                  logger.info "AMBIGUOUS: #{table} #{row["id"]} #{column}: #{value}" if updated_values[column].blank?
                else
                  updated_values[column] = span[:h]
                end
                break
              end
            end
          end
          # Check if some columns did not match any spans
          key_diff = row.keys - updated_values.keys - ["id"]
          if key_diff.length > 0
            logger.info "MISSING SPAN: #{table} #{row["id"]} #{key_diff.inspect} #{row.inspect}"
          end
          # remove ambiguous columns (we set them to nil above)
          updated_values.delete_if {|key, value| value.blank? }
          if updated_values.length > 0
            update = "UPDATE #{table} SET #{updated_values.map{|k,v| "#{k} = DATE_ADD(#{k}, INTERVAL #{v} HOUR)"}.join(", ")} WHERE id = #{row["id"]}"
	    num = ActiveRecord::Base.connection.update_sql(update)
	    if num == 1
              logger.info "UPDATE: #{table} #{row.inspect} #{updated_values.inspect}"
	    else
              logger.info "UPDATE FAILED: #{table} #{row.inspect} #{updated_values.inspect} #{num.inspect}"
	    end
          end
        end

        i+= 1
        break if rows.length < batch_size
      end
      print "\n"
    end
  end

  desc "Finds ambiguous times due to daylight savings time"
  task :find_ambiguous_times => :environment do
    datetime_fields = {
      :appearances  => ['created_at', 'updated_at'],
      :choices      => ['created_at', 'updated_at'],
      :clicks       => ['created_at', 'updated_at'],
      :densities    => ['created_at', 'updated_at'],
      :flags        => ['created_at', 'updated_at'],
      :prompts      => ['created_at', 'updated_at'],
      :skips        => ['created_at', 'updated_at'],
      :votes        => ['created_at', 'updated_at'],
      :visitors     => ['created_at', 'updated_at'],
      :users        => ['created_at', 'updated_at'],
      :questions    => ['created_at', 'updated_at'],
      :question_versions => ['created_at', 'updated_at'],
      :delayed_jobs => ['created_at', 'updated_at', 'run_at', 'locked_at', 'failed_at'],
    }
    datetime_fields.each do |table, columns|
      where = columns.map{|c| "((#{c} > '2010-11-07 00:59:59' AND #{c} < '2010-11-07 02:00:00') OR (#{c} > '2011-11-06 00:59:59' AND #{c} < '2011-11-06 02:00:00'))"}.join(" OR ")
      rows = ActiveRecord::Base.connection.select_all(
        "SELECT id, #{columns.join(", ")} FROM #{table} WHERE #{where}"
      )
      puts rows.inspect if rows.length > 0
    end
  end

end