Commit cdc8ec4982e99b1dac8a9d6e8417871a176c527d

Authored by Dhruv Kapadia
1 parent c01226e1

Bradley terry values can be calculated, mapped to score

app/models/choice.rb
... ... @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base
82 82 self.item.creator_id != self.question.creator_id
83 83 end
84 84  
  85 + def compute_bt_score(btprobs = nil)
  86 + if btprobs.nil?
  87 + btprobs = self.question.bradley_terry_probs
  88 + end
  89 +
  90 + p_i = btprobs[self.id]
  91 +
  92 + total = 0
  93 + btprobs.each do |id, p_j|
  94 + if id == self.id
  95 + next
  96 + end
  97 +
  98 + total += (p_i / (p_i + p_j))
  99 + end
  100 +
  101 + total / (btprobs.size-1)
  102 +
  103 + end
  104 +
85 105  
86 106 protected
87 107  
... ...
app/models/question.rb
... ... @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base
93 93 weighted.each_with_index {|item, i| weighted[i] = item/sum}
94 94 end
95 95 end
  96 +
  97 + def bradley_terry_probs
  98 + probs = []
  99 + prev_probs = []
  100 +
  101 + # What ordering key we use is unimportant, just need a consistent way to link index of prob to id
  102 + the_choices = self.choices.sort{|x,y| x.id<=>y.id}
  103 +
  104 + # This hash is keyed by pairs of choices - 'LC.id, RC.id'
  105 + the_prompts = prompts_hash_by_choice_ids
  106 +
  107 + # Initial probabilities chosen at random
  108 + the_choices.size.times do
  109 + probs << rand
  110 + prev_probs << rand
  111 + end
  112 +
  113 + t=0
  114 + probs_size = probs.size
  115 +
  116 + # probably want to add a fuzz here to account for floating rounding
  117 + until probs == prev_probs do
  118 + s = t % probs_size
  119 + prev_probs = probs.dup
  120 + choice = the_choices[s]
  121 +
  122 + numerator = choice.wins.to_f
  123 +
  124 + denominator = 0.0
  125 + the_choices.each_with_index do |c, index|
  126 + if(index == s)
  127 + next
  128 + end
  129 +
  130 + wins_and_losses = the_prompts["#{choice.id}, #{c.id}"].votes.size + the_prompts["#{c.id}, #{choice.id}"].votes.size
  131 + denominator+= (wins_and_losses).to_f / (prev_probs[s] + prev_probs[index])
  132 + end
  133 + probs[s] = numerator / denominator
  134 + normalize!(probs)
  135 + t+=1
  136 + end
  137 +
  138 + probs_hash = {}
  139 + probs.each_with_index do |item, index|
  140 + probs_hash[the_choices[index].id] = item
  141 + end
  142 + probs_hash
  143 + end
  144 +
  145 +
  146 + def all_bt_scores
  147 + btprobs = bradley_terry_probs
  148 + btprobs.each do |key, value|
  149 + c = Choice.find(key)
  150 + puts "#{c.id}: #{c.votes.size} #{c.compute_bt_score(btprobs)}"
  151 + end
  152 +
  153 + end
  154 +
  155 + def prompts_hash_by_choice_ids
  156 + the_prompts = {}
  157 + self.prompts.each do |p|
  158 + the_prompts["#{p.left_choice_id}, #{p.right_choice_id}"] = p
  159 + end
  160 + the_prompts
  161 + end
  162 +
96 163  
97 164 def distinct_array_of_choice_ids(rank = 2, only_active = true)
98 165 @choice_ids = choice_ids
... ...
lib/tasks/import_bt_test_data.rb 0 → 100644
... ... @@ -0,0 +1,62 @@
  1 +require 'faster_csv'
  2 +
  3 +BASEDIR = "/home/dhruv/CITP/bt_test/"
  4 +
  5 +q = Question.new(:name => "test for bt", :creator_id => 1, :site_id => 1)
  6 +q.save()
  7 +
  8 +choice_offset = Choice.last.id
  9 +
  10 +inserts = []
  11 +
  12 +timestring = Time.now.to_s(:db) #isn't rails awesome?
  13 +totalchoices=0
  14 +FasterCSV.foreach(BASEDIR + "choices_7000.txt", {:headers => :first_row, :return_headers => false}) do |choice|
  15 +# for each choice, create an insert with unique id
  16 + id = choice[0].to_i + choice_offset
  17 + wins = choice[1].to_i
  18 + inserts.push("(#{q.id}, #{id}, #{wins}, '#{timestring}', '#{timestring}')")
  19 + totalchoices+=1
  20 +end
  21 +
  22 +sql = "INSERT INTO `choices` (`question_id`, `item_id`, `votes_count`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}"
  23 +
  24 +ActiveRecord::Base.connection.execute(sql)
  25 +
  26 +inserts = []
  27 +prompt_offset = Prompt.last.id
  28 +totalprompts = 0
  29 +FasterCSV.foreach(BASEDIR + "prompts_7000.txt", {:headers => :first_row, :return_headers => false}) do |prompt|
  30 + id = prompt[0].to_i + prompt_offset
  31 + left_choice_id = prompt[1].to_i + choice_offset
  32 + right_choice_id = prompt[2].to_i + choice_offset
  33 + votes_count = prompt[3].to_i
  34 +
  35 + inserts.push("(NULL, #{q.id}, NULL, #{left_choice_id}, '#{timestring}', '#{timestring}', NULL, #{votes_count}, #{right_choice_id}, NULL, NULL)")
  36 + totalprompts +=1
  37 +end
  38 +
  39 +sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}"
  40 +
  41 +ActiveRecord::Base.connection.execute(sql)
  42 +
  43 +inserts = []
  44 +vote_offset = Vote.last.id
  45 +totalvotes=0
  46 +FasterCSV.foreach(BASEDIR + "votes_7000.txt", {:headers => :first_row, :return_headers => false}) do |vote|
  47 + id = vote[0].to_i + vote_offset
  48 + prompt_id = vote[1].to_i + prompt_offset
  49 + choice_id = vote[2].to_i + choice_offset
  50 + loser_choice_id = vote[3].to_i
  51 +
  52 + inserts.push("(#{prompt_id}, #{q.id}, #{choice_id}, #{loser_choice_id}, '#{timestring}', '#{timestring}')")
  53 + totalvotes +=1
  54 +end
  55 +
  56 +sql = "INSERT INTO `votes` (`prompt_id`, `question_id`, `choice_id`, `loser_choice_id`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}"
  57 +
  58 +ActiveRecord::Base.connection.execute(sql)
  59 +
  60 +sql = "UPDATE questions SET votes_count=#{totalvotes}, prompts_count=#{totalprompts}, choices_count=#{totalchoices} WHERE id=#{q.id}"
  61 +
  62 +ActiveRecord::Base.connection.execute(sql)
... ...
lib/tasks/makecsvfromhash.rb 0 → 100644
... ... @@ -0,0 +1,25 @@
  1 +require 'fastercsv'
  2 +
  3 +q = Question.find(109)
  4 +
  5 +the_prompts = q.prompts_hash_by_choice_ids
  6 +
  7 +#hash_of_choice_ids_from_left_to_right_to_votes
  8 +the_hash = {}
  9 +the_prompts.each do |key, p|
  10 + left_id, right_id = key.split(", ")
  11 + if not the_hash.has_key?(left_id)
  12 + the_hash[left_id] = {}
  13 + the_hash[left_id][left_id] = 0
  14 + end
  15 +
  16 + the_hash[left_id][right_id] = p.votes.size
  17 +end
  18 +
  19 +the_hash.sort.each do |xval, row|
  20 + rowarray = []
  21 + row.sort.each do |yval, cell|
  22 + rowarray << cell
  23 + end
  24 + puts rowarray.join(", ")
  25 +end
... ...