Commit cdc8ec4982e99b1dac8a9d6e8417871a176c527d
1 parent
c01226e1
Exists in
master
and in
1 other branch
Bradley terry values can be calculated, mapped to score
Showing
4 changed files
with
174 additions
and
0 deletions
Show diff stats
app/models/choice.rb
| @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base | @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base | ||
| 82 | self.item.creator_id != self.question.creator_id | 82 | self.item.creator_id != self.question.creator_id |
| 83 | end | 83 | end |
| 84 | 84 | ||
| 85 | + def compute_bt_score(btprobs = nil) | ||
| 86 | + if btprobs.nil? | ||
| 87 | + btprobs = self.question.bradley_terry_probs | ||
| 88 | + end | ||
| 89 | + | ||
| 90 | + p_i = btprobs[self.id] | ||
| 91 | + | ||
| 92 | + total = 0 | ||
| 93 | + btprobs.each do |id, p_j| | ||
| 94 | + if id == self.id | ||
| 95 | + next | ||
| 96 | + end | ||
| 97 | + | ||
| 98 | + total += (p_i / (p_i + p_j)) | ||
| 99 | + end | ||
| 100 | + | ||
| 101 | + total / (btprobs.size-1) | ||
| 102 | + | ||
| 103 | + end | ||
| 104 | + | ||
| 85 | 105 | ||
| 86 | protected | 106 | protected |
| 87 | 107 |
app/models/question.rb
| @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base | @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base | ||
| 93 | weighted.each_with_index {|item, i| weighted[i] = item/sum} | 93 | weighted.each_with_index {|item, i| weighted[i] = item/sum} |
| 94 | end | 94 | end |
| 95 | end | 95 | end |
| 96 | + | ||
| 97 | + def bradley_terry_probs | ||
| 98 | + probs = [] | ||
| 99 | + prev_probs = [] | ||
| 100 | + | ||
| 101 | + # What ordering key we use is unimportant, just need a consistent way to link index of prob to id | ||
| 102 | + the_choices = self.choices.sort{|x,y| x.id<=>y.id} | ||
| 103 | + | ||
| 104 | + # This hash is keyed by pairs of choices - 'LC.id, RC.id' | ||
| 105 | + the_prompts = prompts_hash_by_choice_ids | ||
| 106 | + | ||
| 107 | + # Initial probabilities chosen at random | ||
| 108 | + the_choices.size.times do | ||
| 109 | + probs << rand | ||
| 110 | + prev_probs << rand | ||
| 111 | + end | ||
| 112 | + | ||
| 113 | + t=0 | ||
| 114 | + probs_size = probs.size | ||
| 115 | + | ||
| 116 | + # probably want to add a fuzz here to account for floating rounding | ||
| 117 | + until probs == prev_probs do | ||
| 118 | + s = t % probs_size | ||
| 119 | + prev_probs = probs.dup | ||
| 120 | + choice = the_choices[s] | ||
| 121 | + | ||
| 122 | + numerator = choice.wins.to_f | ||
| 123 | + | ||
| 124 | + denominator = 0.0 | ||
| 125 | + the_choices.each_with_index do |c, index| | ||
| 126 | + if(index == s) | ||
| 127 | + next | ||
| 128 | + end | ||
| 129 | + | ||
| 130 | + wins_and_losses = the_prompts["#{choice.id}, #{c.id}"].votes.size + the_prompts["#{c.id}, #{choice.id}"].votes.size | ||
| 131 | + denominator+= (wins_and_losses).to_f / (prev_probs[s] + prev_probs[index]) | ||
| 132 | + end | ||
| 133 | + probs[s] = numerator / denominator | ||
| 134 | + normalize!(probs) | ||
| 135 | + t+=1 | ||
| 136 | + end | ||
| 137 | + | ||
| 138 | + probs_hash = {} | ||
| 139 | + probs.each_with_index do |item, index| | ||
| 140 | + probs_hash[the_choices[index].id] = item | ||
| 141 | + end | ||
| 142 | + probs_hash | ||
| 143 | + end | ||
| 144 | + | ||
| 145 | + | ||
| 146 | + def all_bt_scores | ||
| 147 | + btprobs = bradley_terry_probs | ||
| 148 | + btprobs.each do |key, value| | ||
| 149 | + c = Choice.find(key) | ||
| 150 | + puts "#{c.id}: #{c.votes.size} #{c.compute_bt_score(btprobs)}" | ||
| 151 | + end | ||
| 152 | + | ||
| 153 | + end | ||
| 154 | + | ||
| 155 | + def prompts_hash_by_choice_ids | ||
| 156 | + the_prompts = {} | ||
| 157 | + self.prompts.each do |p| | ||
| 158 | + the_prompts["#{p.left_choice_id}, #{p.right_choice_id}"] = p | ||
| 159 | + end | ||
| 160 | + the_prompts | ||
| 161 | + end | ||
| 162 | + | ||
| 96 | 163 | ||
| 97 | def distinct_array_of_choice_ids(rank = 2, only_active = true) | 164 | def distinct_array_of_choice_ids(rank = 2, only_active = true) |
| 98 | @choice_ids = choice_ids | 165 | @choice_ids = choice_ids |
| @@ -0,0 +1,62 @@ | @@ -0,0 +1,62 @@ | ||
| 1 | +require 'faster_csv' | ||
| 2 | + | ||
| 3 | +BASEDIR = "/home/dhruv/CITP/bt_test/" | ||
| 4 | + | ||
| 5 | +q = Question.new(:name => "test for bt", :creator_id => 1, :site_id => 1) | ||
| 6 | +q.save() | ||
| 7 | + | ||
| 8 | +choice_offset = Choice.last.id | ||
| 9 | + | ||
| 10 | +inserts = [] | ||
| 11 | + | ||
| 12 | +timestring = Time.now.to_s(:db) #isn't rails awesome? | ||
| 13 | +totalchoices=0 | ||
| 14 | +FasterCSV.foreach(BASEDIR + "choices_7000.txt", {:headers => :first_row, :return_headers => false}) do |choice| | ||
| 15 | +# for each choice, create an insert with unique id | ||
| 16 | + id = choice[0].to_i + choice_offset | ||
| 17 | + wins = choice[1].to_i | ||
| 18 | + inserts.push("(#{q.id}, #{id}, #{wins}, '#{timestring}', '#{timestring}')") | ||
| 19 | + totalchoices+=1 | ||
| 20 | +end | ||
| 21 | + | ||
| 22 | +sql = "INSERT INTO `choices` (`question_id`, `item_id`, `votes_count`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | ||
| 23 | + | ||
| 24 | +ActiveRecord::Base.connection.execute(sql) | ||
| 25 | + | ||
| 26 | +inserts = [] | ||
| 27 | +prompt_offset = Prompt.last.id | ||
| 28 | +totalprompts = 0 | ||
| 29 | +FasterCSV.foreach(BASEDIR + "prompts_7000.txt", {:headers => :first_row, :return_headers => false}) do |prompt| | ||
| 30 | + id = prompt[0].to_i + prompt_offset | ||
| 31 | + left_choice_id = prompt[1].to_i + choice_offset | ||
| 32 | + right_choice_id = prompt[2].to_i + choice_offset | ||
| 33 | + votes_count = prompt[3].to_i | ||
| 34 | + | ||
| 35 | + inserts.push("(NULL, #{q.id}, NULL, #{left_choice_id}, '#{timestring}', '#{timestring}', NULL, #{votes_count}, #{right_choice_id}, NULL, NULL)") | ||
| 36 | + totalprompts +=1 | ||
| 37 | +end | ||
| 38 | + | ||
| 39 | +sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}" | ||
| 40 | + | ||
| 41 | +ActiveRecord::Base.connection.execute(sql) | ||
| 42 | + | ||
| 43 | +inserts = [] | ||
| 44 | +vote_offset = Vote.last.id | ||
| 45 | +totalvotes=0 | ||
| 46 | +FasterCSV.foreach(BASEDIR + "votes_7000.txt", {:headers => :first_row, :return_headers => false}) do |vote| | ||
| 47 | + id = vote[0].to_i + vote_offset | ||
| 48 | + prompt_id = vote[1].to_i + prompt_offset | ||
| 49 | + choice_id = vote[2].to_i + choice_offset | ||
| 50 | + loser_choice_id = vote[3].to_i | ||
| 51 | + | ||
| 52 | + inserts.push("(#{prompt_id}, #{q.id}, #{choice_id}, #{loser_choice_id}, '#{timestring}', '#{timestring}')") | ||
| 53 | + totalvotes +=1 | ||
| 54 | +end | ||
| 55 | + | ||
| 56 | +sql = "INSERT INTO `votes` (`prompt_id`, `question_id`, `choice_id`, `loser_choice_id`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | ||
| 57 | + | ||
| 58 | +ActiveRecord::Base.connection.execute(sql) | ||
| 59 | + | ||
| 60 | +sql = "UPDATE questions SET votes_count=#{totalvotes}, prompts_count=#{totalprompts}, choices_count=#{totalchoices} WHERE id=#{q.id}" | ||
| 61 | + | ||
| 62 | +ActiveRecord::Base.connection.execute(sql) |
| @@ -0,0 +1,25 @@ | @@ -0,0 +1,25 @@ | ||
| 1 | +require 'fastercsv' | ||
| 2 | + | ||
| 3 | +q = Question.find(109) | ||
| 4 | + | ||
| 5 | +the_prompts = q.prompts_hash_by_choice_ids | ||
| 6 | + | ||
| 7 | +#hash_of_choice_ids_from_left_to_right_to_votes | ||
| 8 | +the_hash = {} | ||
| 9 | +the_prompts.each do |key, p| | ||
| 10 | + left_id, right_id = key.split(", ") | ||
| 11 | + if not the_hash.has_key?(left_id) | ||
| 12 | + the_hash[left_id] = {} | ||
| 13 | + the_hash[left_id][left_id] = 0 | ||
| 14 | + end | ||
| 15 | + | ||
| 16 | + the_hash[left_id][right_id] = p.votes.size | ||
| 17 | +end | ||
| 18 | + | ||
| 19 | +the_hash.sort.each do |xval, row| | ||
| 20 | + rowarray = [] | ||
| 21 | + row.sort.each do |yval, cell| | ||
| 22 | + rowarray << cell | ||
| 23 | + end | ||
| 24 | + puts rowarray.join(", ") | ||
| 25 | +end |