diff --git a/app/models/choice.rb b/app/models/choice.rb index ef39331..ce96cf2 100644 --- a/app/models/choice.rb +++ b/app/models/choice.rb @@ -82,6 +82,26 @@ class Choice < ActiveRecord::Base self.item.creator_id != self.question.creator_id end + def compute_bt_score(btprobs = nil) + if btprobs.nil? + btprobs = self.question.bradley_terry_probs + end + + p_i = btprobs[self.id] + + total = 0 + btprobs.each do |id, p_j| + if id == self.id + next + end + + total += (p_i / (p_i + p_j)) + end + + total / (btprobs.size-1) + + end + protected diff --git a/app/models/question.rb b/app/models/question.rb index b77df03..7087b9e 100644 --- a/app/models/question.rb +++ b/app/models/question.rb @@ -93,6 +93,73 @@ class Question < ActiveRecord::Base weighted.each_with_index {|item, i| weighted[i] = item/sum} end end + + def bradley_terry_probs + probs = [] + prev_probs = [] + + # What ordering key we use is unimportant, just need a consistent way to link index of prob to id + the_choices = self.choices.sort{|x,y| x.id<=>y.id} + + # This hash is keyed by pairs of choices - 'LC.id, RC.id' + the_prompts = prompts_hash_by_choice_ids + + # Initial probabilities chosen at random + the_choices.size.times do + probs << rand + prev_probs << rand + end + + t=0 + probs_size = probs.size + + # probably want to add a fuzz here to account for floating rounding + until probs == prev_probs do + s = t % probs_size + prev_probs = probs.dup + choice = the_choices[s] + + numerator = choice.wins.to_f + + denominator = 0.0 + the_choices.each_with_index do |c, index| + if(index == s) + next + end + + wins_and_losses = the_prompts["#{choice.id}, #{c.id}"].votes.size + the_prompts["#{c.id}, #{choice.id}"].votes.size + denominator+= (wins_and_losses).to_f / (prev_probs[s] + prev_probs[index]) + end + probs[s] = numerator / denominator + normalize!(probs) + t+=1 + end + + probs_hash = {} + probs.each_with_index do |item, index| + probs_hash[the_choices[index].id] = item + end + probs_hash + end + + + def all_bt_scores + btprobs = bradley_terry_probs + btprobs.each do |key, value| + c = Choice.find(key) + puts "#{c.id}: #{c.votes.size} #{c.compute_bt_score(btprobs)}" + end + + end + + def prompts_hash_by_choice_ids + the_prompts = {} + self.prompts.each do |p| + the_prompts["#{p.left_choice_id}, #{p.right_choice_id}"] = p + end + the_prompts + end + def distinct_array_of_choice_ids(rank = 2, only_active = true) @choice_ids = choice_ids diff --git a/lib/tasks/import_bt_test_data.rb b/lib/tasks/import_bt_test_data.rb new file mode 100644 index 0000000..7775f54 --- /dev/null +++ b/lib/tasks/import_bt_test_data.rb @@ -0,0 +1,62 @@ +require 'faster_csv' + +BASEDIR = "/home/dhruv/CITP/bt_test/" + +q = Question.new(:name => "test for bt", :creator_id => 1, :site_id => 1) +q.save() + +choice_offset = Choice.last.id + +inserts = [] + +timestring = Time.now.to_s(:db) #isn't rails awesome? +totalchoices=0 +FasterCSV.foreach(BASEDIR + "choices_7000.txt", {:headers => :first_row, :return_headers => false}) do |choice| +# for each choice, create an insert with unique id + id = choice[0].to_i + choice_offset + wins = choice[1].to_i + inserts.push("(#{q.id}, #{id}, #{wins}, '#{timestring}', '#{timestring}')") + totalchoices+=1 +end + +sql = "INSERT INTO `choices` (`question_id`, `item_id`, `votes_count`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" + +ActiveRecord::Base.connection.execute(sql) + +inserts = [] +prompt_offset = Prompt.last.id +totalprompts = 0 +FasterCSV.foreach(BASEDIR + "prompts_7000.txt", {:headers => :first_row, :return_headers => false}) do |prompt| + id = prompt[0].to_i + prompt_offset + left_choice_id = prompt[1].to_i + choice_offset + right_choice_id = prompt[2].to_i + choice_offset + votes_count = prompt[3].to_i + + inserts.push("(NULL, #{q.id}, NULL, #{left_choice_id}, '#{timestring}', '#{timestring}', NULL, #{votes_count}, #{right_choice_id}, NULL, NULL)") + totalprompts +=1 +end + +sql = "INSERT INTO `prompts` (`algorithm_id`, `question_id`, `voter_id`, `left_choice_id`, `created_at`, `updated_at`, `tracking`, `votes_count`, `right_choice_id`, `active`, `randomkey`) VALUES #{inserts.join(', ')}" + +ActiveRecord::Base.connection.execute(sql) + +inserts = [] +vote_offset = Vote.last.id +totalvotes=0 +FasterCSV.foreach(BASEDIR + "votes_7000.txt", {:headers => :first_row, :return_headers => false}) do |vote| + id = vote[0].to_i + vote_offset + prompt_id = vote[1].to_i + prompt_offset + choice_id = vote[2].to_i + choice_offset + loser_choice_id = vote[3].to_i + + inserts.push("(#{prompt_id}, #{q.id}, #{choice_id}, #{loser_choice_id}, '#{timestring}', '#{timestring}')") + totalvotes +=1 +end + +sql = "INSERT INTO `votes` (`prompt_id`, `question_id`, `choice_id`, `loser_choice_id`, `created_at`, `updated_at`) VALUES #{inserts.join(', ')}" + +ActiveRecord::Base.connection.execute(sql) + +sql = "UPDATE questions SET votes_count=#{totalvotes}, prompts_count=#{totalprompts}, choices_count=#{totalchoices} WHERE id=#{q.id}" + +ActiveRecord::Base.connection.execute(sql) diff --git a/lib/tasks/makecsvfromhash.rb b/lib/tasks/makecsvfromhash.rb new file mode 100644 index 0000000..334fef3 --- /dev/null +++ b/lib/tasks/makecsvfromhash.rb @@ -0,0 +1,25 @@ +require 'fastercsv' + +q = Question.find(109) + +the_prompts = q.prompts_hash_by_choice_ids + +#hash_of_choice_ids_from_left_to_right_to_votes +the_hash = {} +the_prompts.each do |key, p| + left_id, right_id = key.split(", ") + if not the_hash.has_key?(left_id) + the_hash[left_id] = {} + the_hash[left_id][left_id] = 0 + end + + the_hash[left_id][right_id] = p.votes.size +end + +the_hash.sort.each do |xval, row| + rowarray = [] + row.sort.each do |yval, cell| + rowarray << cell + end + puts rowarray.join(", ") +end -- libgit2 0.21.2