Commit cdc8ec4982e99b1dac8a9d6e8417871a176c527d
1 parent
c01226e1
Exists in
master
and in
1 other branch
Bradley terry values can be calculated, mapped to score
Showing
4 changed files
with
174 additions
and
0 deletions
Show diff stats
app/models/choice.rb
... | ... | @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base |
82 | 82 | self.item.creator_id != self.question.creator_id |
83 | 83 | end |
84 | 84 | |
85 | + def compute_bt_score(btprobs = nil) | |
86 | + if btprobs.nil? | |
87 | + btprobs = self.question.bradley_terry_probs | |
88 | + end | |
89 | + | |
90 | + p_i = btprobs[self.id] | |
91 | + | |
92 | + total = 0 | |
93 | + btprobs.each do |id, p_j| | |
94 | + if id == self.id | |
95 | + next | |
96 | + end | |
97 | + | |
98 | + total += (p_i / (p_i + p_j)) | |
99 | + end | |
100 | + | |
101 | + total / (btprobs.size-1) | |
102 | + | |
103 | + end | |
104 | + | |
85 | 105 | |
86 | 106 | protected |
87 | 107 | ... | ... |
app/models/question.rb
... | ... | @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base |
93 | 93 | weighted.each_with_index {|item, i| weighted[i] = item/sum} |
94 | 94 | end |
95 | 95 | end |
96 | + | |
97 | + def bradley_terry_probs | |
98 | + probs = [] | |
99 | + prev_probs = [] | |
100 | + | |
101 | + # What ordering key we use is unimportant, just need a consistent way to link index of prob to id | |
102 | + the_choices = self.choices.sort{|x,y| x.id<=>y.id} | |
103 | + | |
104 | + # This hash is keyed by pairs of choices - 'LC.id, RC.id' | |
105 | + the_prompts = prompts_hash_by_choice_ids | |
106 | + | |
107 | + # Initial probabilities chosen at random | |
108 | + the_choices.size.times do | |
109 | + probs << rand | |
110 | + prev_probs << rand | |
111 | + end | |
112 | + | |
113 | + t=0 | |
114 | + probs_size = probs.size | |
115 | + | |
116 | + # probably want to add a fuzz here to account for floating rounding | |
117 | + until probs == prev_probs do | |
118 | + s = t % probs_size | |
119 | + prev_probs = probs.dup | |
120 | + choice = the_choices[s] | |
121 | + | |
122 | + numerator = choice.wins.to_f | |
123 | + | |
124 | + denominator = 0.0 | |
125 | + the_choices.each_with_index do |c, index| | |
126 | + if(index == s) | |
127 | + next | |
128 | + end | |
129 | + | |
130 | + wins_and_losses = the_prompts["#{choice.id}, #{c.id}"].votes.size + the_prompts["#{c.id}, #{choice.id}"].votes.size | |
131 | + denominator+= (wins_and_losses).to_f / (prev_probs[s] + prev_probs[index]) | |
132 | + end | |
133 | + probs[s] = numerator / denominator | |
134 | + normalize!(probs) | |
135 | + t+=1 | |
136 | + end | |
137 | + | |
138 | + probs_hash = {} | |
139 | + probs.each_with_index do |item, index| | |
140 | + probs_hash[the_choices[index].id] = item | |
141 | + end | |
142 | + probs_hash | |
143 | + end | |
144 | + | |
145 | + | |
146 | + def all_bt_scores | |
147 | + btprobs = bradley_terry_probs | |
148 | + btprobs.each do |key, value| | |
149 | + c = Choice.find(key) | |
150 | + puts "#{c.id}: #{c.votes.size} #{c.compute_bt_score(btprobs)}" | |
151 | + end | |
152 | + | |
153 | + end | |
154 | + | |
155 | + def prompts_hash_by_choice_ids | |
156 | + the_prompts = {} | |
157 | + self.prompts.each do |p| | |
158 | + the_prompts["#{p.left_choice_id}, #{p.right_choice_id}"] = p | |
159 | + end | |
160 | + the_prompts | |
161 | + end | |
162 | + | |
96 | 163 | |
97 | 164 | def distinct_array_of_choice_ids(rank = 2, only_active = true) |
98 | 165 | @choice_ids = choice_ids | ... | ... |
... | ... | @@ -0,0 +1,62 @@ |
1 | +require 'faster_csv' | |
2 | + | |
3 | +BASEDIR = "/home/dhruv/CITP/bt_test/" | |
4 | + | |
5 | +q = Question.new(:name => "test for bt", :creator_id => 1, :site_id => 1) | |
6 | +q.save() | |
7 | + | |
8 | +choice_offset = Choice.last.id | |
9 | + | |
10 | +inserts = [] | |
11 | + | |
12 | +timestring = Time.now.to_s(:db) #isn't rails awesome? | |
13 | +totalchoices=0 | |
14 | +FasterCSV.foreach(BASEDIR + "choices_7000.txt", {:headers => :first_row, :return_headers => false}) do |choice| | |
15 | +# for each choice, create an insert with unique id | |
16 | + id = choice[0].to_i + choice_offset | |
17 | + wins = choice[1].to_i | |
18 | + inserts.push("(#{q.id}, #{id}, #{wins}, '#{timestring}', '#{timestring}')") | |
19 | + totalchoices+=1 | |
20 | +end | |
21 | + | |
22 | +sql = "INSERT INTO `choices` (`question_id`, `item_id`, `votes_count`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | |
23 | + | |
24 | +ActiveRecord::Base.connection.execute(sql) | |
25 | + | |
26 | +inserts = [] | |
27 | +prompt_offset = Prompt.last.id | |
28 | +totalprompts = 0 | |
29 | +FasterCSV.foreach(BASEDIR + "prompts_7000.txt", {:headers => :first_row, :return_headers => false}) do |prompt| | |
30 | + id = prompt[0].to_i + prompt_offset | |
31 | + left_choice_id = prompt[1].to_i + choice_offset | |
32 | + right_choice_id = prompt[2].to_i + choice_offset | |
33 | + votes_count = prompt[3].to_i | |
34 | + | |
35 | + inserts.push("(NULL, #{q.id}, NULL, #{left_choice_id}, '#{timestring}', '#{timestring}', NULL, #{votes_count}, #{right_choice_id}, NULL, NULL)") | |
36 | + totalprompts +=1 | |
37 | +end | |
38 | + | |
39 | +sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}" | |
40 | + | |
41 | +ActiveRecord::Base.connection.execute(sql) | |
42 | + | |
43 | +inserts = [] | |
44 | +vote_offset = Vote.last.id | |
45 | +totalvotes=0 | |
46 | +FasterCSV.foreach(BASEDIR + "votes_7000.txt", {:headers => :first_row, :return_headers => false}) do |vote| | |
47 | + id = vote[0].to_i + vote_offset | |
48 | + prompt_id = vote[1].to_i + prompt_offset | |
49 | + choice_id = vote[2].to_i + choice_offset | |
50 | + loser_choice_id = vote[3].to_i | |
51 | + | |
52 | + inserts.push("(#{prompt_id}, #{q.id}, #{choice_id}, #{loser_choice_id}, '#{timestring}', '#{timestring}')") | |
53 | + totalvotes +=1 | |
54 | +end | |
55 | + | |
56 | +sql = "INSERT INTO `votes` (`prompt_id`, `question_id`, `choice_id`, `loser_choice_id`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" | |
57 | + | |
58 | +ActiveRecord::Base.connection.execute(sql) | |
59 | + | |
60 | +sql = "UPDATE questions SET votes_count=#{totalvotes}, prompts_count=#{totalprompts}, choices_count=#{totalchoices} WHERE id=#{q.id}" | |
61 | + | |
62 | +ActiveRecord::Base.connection.execute(sql) | ... | ... |
... | ... | @@ -0,0 +1,25 @@ |
1 | +require 'fastercsv' | |
2 | + | |
3 | +q = Question.find(109) | |
4 | + | |
5 | +the_prompts = q.prompts_hash_by_choice_ids | |
6 | + | |
7 | +#hash_of_choice_ids_from_left_to_right_to_votes | |
8 | +the_hash = {} | |
9 | +the_prompts.each do |key, p| | |
10 | + left_id, right_id = key.split(", ") | |
11 | + if not the_hash.has_key?(left_id) | |
12 | + the_hash[left_id] = {} | |
13 | + the_hash[left_id][left_id] = 0 | |
14 | + end | |
15 | + | |
16 | + the_hash[left_id][right_id] = p.votes.size | |
17 | +end | |
18 | + | |
19 | +the_hash.sort.each do |xval, row| | |
20 | + rowarray = [] | |
21 | + row.sort.each do |yval, cell| | |
22 | + rowarray << cell | |
23 | + end | |
24 | + puts rowarray.join(", ") | |
25 | +end | ... | ... |