Commit cdc8ec4982e99b1dac8a9d6e8417871a176c527d
1 parent
c01226e1
Exists in
master
and in
1 other branch
Bradley terry values can be calculated, mapped to score
Showing
4 changed files
with
174 additions
and
0 deletions
Show diff stats
app/models/choice.rb
@@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base | @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base | ||
82 | self.item.creator_id != self.question.creator_id | 82 | self.item.creator_id != self.question.creator_id |
83 | end | 83 | end |
84 | 84 | ||
85 | + def compute_bt_score(btprobs = nil) | ||
86 | + if btprobs.nil? | ||
87 | + btprobs = self.question.bradley_terry_probs | ||
88 | + end | ||
89 | + | ||
90 | + p_i = btprobs[self.id] | ||
91 | + | ||
92 | + total = 0 | ||
93 | + btprobs.each do |id, p_j| | ||
94 | + if id == self.id | ||
95 | + next | ||
96 | + end | ||
97 | + | ||
98 | + total += (p_i / (p_i + p_j)) | ||
99 | + end | ||
100 | + | ||
101 | + total / (btprobs.size-1) | ||
102 | + | ||
103 | + end | ||
104 | + | ||
85 | 105 | ||
86 | protected | 106 | protected |
87 | 107 |
app/models/question.rb
@@ -93,6 +93,73 @@ class Question < ActiveRecord::Base | @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base | ||
93 | weighted.each_with_index {|item, i| weighted[i] = item/sum} | 93 | weighted.each_with_index {|item, i| weighted[i] = item/sum} |
94 | end | 94 | end |
95 | end | 95 | end |
96 | + | ||
97 | + def bradley_terry_probs | ||
98 | + probs = [] | ||
99 | + prev_probs = [] | ||
100 | + | ||
101 | + # What ordering key we use is unimportant, just need a consistent way to link index of prob to id | ||
102 | + the_choices = self.choices.sort{|x,y| x.id<=>y.id} | ||
103 | + | ||
104 | + # This hash is keyed by pairs of choices - 'LC.id, RC.id' | ||
105 | + the_prompts = prompts_hash_by_choice_ids | ||
106 | + | ||
107 | + # Initial probabilities chosen at random | ||
108 | + the_choices.size.times do | ||
109 | + probs << rand | ||
110 | + prev_probs << rand | ||
111 | + end | ||
112 | + | ||
113 | + t=0 | ||
114 | + probs_size = probs.size | ||
115 | + | ||
116 | + # probably want to add a fuzz here to account for floating rounding | ||
117 | + until probs == prev_probs do | ||
118 | + s = t % probs_size | ||
119 | + prev_probs = probs.dup | ||
120 | + choice = the_choices[s] | ||
121 | + | ||
122 | + numerator = choice.wins.to_f | ||
123 | + | ||
124 | + denominator = 0.0 | ||
125 | + the_choices.each_with_index do |c, index| | ||
126 | + if(index == s) | ||
127 | + next | ||
128 | + end | ||
129 | + | ||
130 | + wins_and_losses = the_prompts["#{choice.id}, #{c.id}"].votes.size + the_prompts["#{c.id}, #{choice.id}"].votes.size | ||
131 | + denominator+= (wins_and_losses).to_f / (prev_probs[s] + prev_probs[index]) | ||
132 | + end | ||
133 | + probs[s] = numerator / denominator | ||
134 | + normalize!(probs) | ||
135 | + t+=1 | ||
136 | + end | ||
137 | + | ||
138 | + probs_hash = {} | ||
139 | + probs.each_with_index do |item, index| | ||
140 | + probs_hash[the_choices[index].id] = item | ||
141 | + end | ||
142 | + probs_hash | ||
143 | + end | ||
144 | + | ||
145 | + | ||
146 | + def all_bt_scores | ||
147 | + btprobs = bradley_terry_probs | ||
148 | + btprobs.each do |key, value| | ||
149 | + c = Choice.find(key) | ||
150 | + puts "#{c.id}: #{c.votes.size} #{c.compute_bt_score(btprobs)}" | ||
151 | + end | ||
152 | + | ||
153 | + end | ||
154 | + | ||
155 | + def prompts_hash_by_choice_ids | ||
156 | + the_prompts = {} | ||
157 | + self.prompts.each do |p| | ||
158 | + the_prompts["#{p.left_choice_id}, #{p.right_choice_id}"] = p | ||
159 | + end | ||
160 | + the_prompts | ||
161 | + end | ||
162 | + | ||
96 | 163 | ||
97 | def distinct_array_of_choice_ids(rank = 2, only_active = true) | 164 | def distinct_array_of_choice_ids(rank = 2, only_active = true) |
98 | @choice_ids = choice_ids | 165 | @choice_ids = choice_ids |
@@ -0,0 +1,62 @@ | @@ -0,0 +1,62 @@ | ||
1 | +require 'faster_csv' | ||
2 | + | ||
3 | +BASEDIR = "/home/dhruv/CITP/bt_test/" | ||
4 | + | ||
5 | +q = Question.new(:name => "test for bt", :creator_id => 1, :site_id => 1) | ||
6 | +q.save() | ||
7 | + | ||
8 | +choice_offset = Choice.last.id | ||
9 | + | ||
10 | +inserts = [] | ||
11 | + | ||
12 | +timestring = Time.now.to_s(:db) #isn't rails awesome? | ||
13 | +totalchoices=0 | ||
14 | +FasterCSV.foreach(BASEDIR + "choices_7000.txt", {:headers => :first_row, :return_headers => false}) do |choice| | ||
15 | +# for each choice, create an insert with unique id | ||
16 | + id = choice[0].to_i + choice_offset | ||
17 | + wins = choice[1].to_i | ||
18 | + inserts.push("(#{q.id}, #{id}, #{wins}, '#{timestring}', '#{timestring}')") | ||
19 | + totalchoices+=1 | ||
20 | +end | ||
21 | + | ||
22 | +sql = "INSERT INTO `choices` (`question_id`, `item_id`, `votes_count`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | ||
23 | + | ||
24 | +ActiveRecord::Base.connection.execute(sql) | ||
25 | + | ||
26 | +inserts = [] | ||
27 | +prompt_offset = Prompt.last.id | ||
28 | +totalprompts = 0 | ||
29 | +FasterCSV.foreach(BASEDIR + "prompts_7000.txt", {:headers => :first_row, :return_headers => false}) do |prompt| | ||
30 | + id = prompt[0].to_i + prompt_offset | ||
31 | + left_choice_id = prompt[1].to_i + choice_offset | ||
32 | + right_choice_id = prompt[2].to_i + choice_offset | ||
33 | + votes_count = prompt[3].to_i | ||
34 | + | ||
35 | + inserts.push("(NULL, #{q.id}, NULL, #{left_choice_id}, '#{timestring}', '#{timestring}', NULL, #{votes_count}, #{right_choice_id}, NULL, NULL)") | ||
36 | + totalprompts +=1 | ||
37 | +end | ||
38 | + | ||
39 | +sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}" | ||
40 | + | ||
41 | +ActiveRecord::Base.connection.execute(sql) | ||
42 | + | ||
43 | +inserts = [] | ||
44 | +vote_offset = Vote.last.id | ||
45 | +totalvotes=0 | ||
46 | +FasterCSV.foreach(BASEDIR + "votes_7000.txt", {:headers => :first_row, :return_headers => false}) do |vote| | ||
47 | + id = vote[0].to_i + vote_offset | ||
48 | + prompt_id = vote[1].to_i + prompt_offset | ||
49 | + choice_id = vote[2].to_i + choice_offset | ||
50 | + loser_choice_id = vote[3].to_i | ||
51 | + | ||
52 | + inserts.push("(#{prompt_id}, #{q.id}, #{choice_id}, #{loser_choice_id}, '#{timestring}', '#{timestring}')") | ||
53 | + totalvotes +=1 | ||
54 | +end | ||
55 | + | ||
56 | +sql = "INSERT INTO `votes` (`prompt_id`, `question_id`, `choice_id`, `loser_choice_id`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | ||
57 | + | ||
58 | +ActiveRecord::Base.connection.execute(sql) | ||
59 | + | ||
60 | +sql = "UPDATE questions SET votes_count=#{totalvotes}, prompts_count=#{totalprompts}, choices_count=#{totalchoices} WHERE id=#{q.id}" | ||
61 | + | ||
62 | +ActiveRecord::Base.connection.execute(sql) |
@@ -0,0 +1,25 @@ | @@ -0,0 +1,25 @@ | ||
1 | +require 'fastercsv' | ||
2 | + | ||
3 | +q = Question.find(109) | ||
4 | + | ||
5 | +the_prompts = q.prompts_hash_by_choice_ids | ||
6 | + | ||
7 | +#hash_of_choice_ids_from_left_to_right_to_votes | ||
8 | +the_hash = {} | ||
9 | +the_prompts.each do |key, p| | ||
10 | + left_id, right_id = key.split(", ") | ||
11 | + if not the_hash.has_key?(left_id) | ||
12 | + the_hash[left_id] = {} | ||
13 | + the_hash[left_id][left_id] = 0 | ||
14 | + end | ||
15 | + | ||
16 | + the_hash[left_id][right_id] = p.votes.size | ||
17 | +end | ||
18 | + | ||
19 | +the_hash.sort.each do |xval, row| | ||
20 | + rowarray = [] | ||
21 | + row.sort.each do |yval, cell| | ||
22 | + rowarray << cell | ||
23 | + end | ||
24 | + puts rowarray.join(", ") | ||
25 | +end |