From f13bc74853837cd7fbc7719bc884ad85910a5ef8 Mon Sep 17 00:00:00 2001 From: Stephen Crosby Date: Wed, 16 Sep 2015 20:16:58 -0700 Subject: [PATCH] rework the problem cache and refingerprinting code --- app/interactors/notice_refingerprinter.rb | 36 ++++++++++++++++++++++++++++++++++++ app/interactors/problem_merge.rb | 8 +------- app/interactors/problem_recacher.rb | 24 ++++++++++++++++++++++++ app/interactors/problem_updater_cache.rb | 88 ---------------------------------------------------------------------------------------- app/models/notice.rb | 9 ++++++++- app/models/notice_fingerprinter.rb | 2 +- app/models/problem.rb | 18 ++++++++++++------ lib/tasks/errbit/database.rake | 93 +++++++++++++++++++++++++++------------------------------------------------------------------ spec/fabricators/backtrace_fabricator.rb | 19 +++++++++++++++++++ spec/fabricators/err_fabricator.rb | 26 -------------------------- spec/fabricators/notice_fabricator.rb | 14 ++++++++++++++ spec/interactors/notice_refingerprinter_spec.rb | 16 ++++++++++++++++ spec/interactors/problem_merge_spec.rb | 2 +- spec/interactors/problem_updater_cache_spec.rb | 145 ------------------------------------------------------------------------------------------------------------------------------------------------- spec/models/problem_spec.rb | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 15 files changed, 255 insertions(+), 341 deletions(-) create mode 100644 app/interactors/notice_refingerprinter.rb create mode 100644 app/interactors/problem_recacher.rb delete mode 100644 app/interactors/problem_updater_cache.rb create mode 100644 spec/fabricators/backtrace_fabricator.rb create mode 100644 spec/fabricators/notice_fabricator.rb create mode 100644 spec/interactors/notice_refingerprinter_spec.rb delete mode 100644 spec/interactors/problem_updater_cache_spec.rb diff --git a/app/interactors/notice_refingerprinter.rb b/app/interactors/notice_refingerprinter.rb new file mode 100644 index 0000000..bf2652b --- /dev/null +++ b/app/interactors/notice_refingerprinter.rb @@ -0,0 +1,36 @@ +class NoticeRefingerprinter + LOG_EVERY = 100 + LOG_ITR = '%.1f%% complete, %i notice(s) remaining' + LOG_START = 'Regenerating notice fingerprints for %i notices' + + def self.run + count = Notice.count + puts format(LOG_START, count) + + Notice.no_timeout.each_with_index do |notice, index| + refingerprint(notice) + + next unless (index + 1) % LOG_EVERY == 0 + puts format(LOG_ITR, (index * 100 / count), count - index) + end + + puts 'Finished generating notice fingerprints' + end + + def self.apps + @apps ||= App.all.index_by(&:id) + end + + def self.refingerprint(notice) + app = apps[notice.try(:err).try(:problem).try(:app_id)] + app.find_or_create_err!( + error_class: notice.error_class, + environment: notice.environment_name, + fingerprint: app.notice_fingerprinter.generate(app.api_key, notice, notice.backtrace) + ) + end + + def self.puts(*args) + Rails.logger.info(*args) + end +end diff --git a/app/interactors/problem_merge.rb b/app/interactors/problem_merge.rb index 7610fb2..75ba63a 100644 --- a/app/interactors/problem_merge.rb +++ b/app/interactors/problem_merge.rb @@ -16,13 +16,7 @@ class ProblemMerge problem.reload # deference all associate objet to avoid delete him after ProblemDestroy.execute(problem) end - reset_cached_attributes + merged_problem.recache merged_problem end - - private - - def reset_cached_attributes - ProblemUpdaterCache.new(merged_problem).update - end end diff --git a/app/interactors/problem_recacher.rb b/app/interactors/problem_recacher.rb new file mode 100644 index 0000000..5e1f7c1 --- /dev/null +++ b/app/interactors/problem_recacher.rb @@ -0,0 +1,24 @@ +class ProblemRecacher + LOG_EVERY = 100 + LOG_ITR = '%.1f%% complete, %i problem(s) remaining' + LOG_START = 'Re-caching problem attributes for %i problems' + + def self.run + count = Problem.count + puts format(LOG_START, count) + + Problem.no_timeout.each_with_index do |problem, index| + problem.recache + problem.destroy if problem.notices_count == 0 + + next unless (index + 1) % LOG_EVERY == 0 + puts format(LOG_ITR, (index * 100 / count), count - index) + end + + puts "Finished re-caching problem attributes" + end + + def self.puts(*args) + Rails.logger.info(*args) + end +end diff --git a/app/interactors/problem_updater_cache.rb b/app/interactors/problem_updater_cache.rb deleted file mode 100644 index 29891a8..0000000 --- a/app/interactors/problem_updater_cache.rb +++ /dev/null @@ -1,88 +0,0 @@ -class ProblemUpdaterCache - def initialize(problem, notice=nil) - @problem = problem - @notice = notice - end - attr_reader :problem - - ## - # Update cache information about child associate to this problem - # - # update the notices count, and some notice informations - # - # @return [ Problem ] the problem with this update - # - def update - update_notices_count - update_notices_cache - problem - end - - private - - def update_notices_count - if @notice - problem.inc(notices_count: 1) - else - problem.update_attribute( - :notices_count, problem.notices.count - ) - end - end - - ## - # Update problem statistique from some notice information - # - def update_notices_cache - first_notice = notices.first - last_notice = notices.last - notice ||= @notice || first_notice - - attrs = {} - attrs[:first_notice_at] = first_notice.created_at if first_notice - attrs[:last_notice_at] = last_notice.created_at if last_notice - attrs.merge!( - :message => notice.message, - :where => notice.where, - :messages => attribute_count(:message, messages), - :hosts => attribute_count(:host, hosts), - :user_agents => attribute_count(:user_agent_string, user_agents) - ) if notice - problem.update_attributes!(attrs) - end - - def notices - @notices ||= @notice ? [@notice].sort(&:created_at) : problem.notices.order_by([:created_at, :asc]) - end - - def messages - @notice ? problem.messages : {} - end - - def hosts - @notice ? problem.hosts : {} - end - - def user_agents - @notice ? problem.user_agents : {} - end - - private - - def attribute_count(value, init) - init.tap do |counts| - notices.each do |notice| - counts[attribute_index(notice.send(value))] ||= { - 'value' => notice.send(value), - 'count' => 0 - } - counts[attribute_index(notice.send(value))]['count'] += 1 - end - end - end - - def attribute_index(value) - @attributes_index ||= {} - @attributes_index[value.to_s] ||= Digest::MD5.hexdigest(value.to_s) - end -end diff --git a/app/models/notice.rb b/app/models/notice.rb index 7916260..889369b 100644 --- a/app/models/notice.rb +++ b/app/models/notice.rb @@ -48,7 +48,8 @@ class Notice end def environment_name - server_environment['server-environment'] || server_environment['environment-name'] + n = server_environment['server-environment'] || server_environment['environment-name'] + n.blank? ? 'development' : n end def component @@ -119,6 +120,12 @@ class Notice end end + # filter memory addresses out of object strings + # example: "#" becomes "#" + def filtered_message + message.gsub(/(#<.+?):[0-9a-f]x[0-9a-f]+(>)/, '\1\2') + end + protected def problem_recache diff --git a/app/models/notice_fingerprinter.rb b/app/models/notice_fingerprinter.rb index 0ce1cf8..f3aa115 100644 --- a/app/models/notice_fingerprinter.rb +++ b/app/models/notice_fingerprinter.rb @@ -16,7 +16,7 @@ class NoticeFingerprinter def generate(api_key, notice, backtrace) material = [] material << notice.error_class if error_class - material << notice.message if message + material << notice.filtered_message if message material << notice.component if component material << notice.action if action material << notice.environment_name if environment_name diff --git a/app/models/problem.rb b/app/models/problem.rb index c8be8ec..7314772 100644 --- a/app/models/problem.rb +++ b/app/models/problem.rb @@ -142,16 +142,22 @@ class Problem { "$match" => { err_id: { "$in" => err_ids } } }, { "$group" => { _id: "$#{v}", count: {"$sum" => 1} } } ]).each do |agg| - next if agg[:_id] == nil - - send(k)[Digest::MD5.hexdigest(agg[:_id])] = { - value: agg[:_id], - count: agg[:count] + send(k)[Digest::MD5.hexdigest(agg[:_id] || 'N/A')] = { + 'value' => agg[:_id] || 'N/A', + 'count' => agg[:count] } end end - self.notices_count = Notice.where({ err_id: { "$in" => err_ids }}).count + first_notice = notices.order_by([:created_at, :asc]).first + last_notice = notices.order_by([:created_at, :desc]).first + + self.notices_count = notices.count + self.first_notice_at = first_notice.created_at if first_notice + self.message = first_notice.message if first_notice + self.where = first_notice.where if first_notice + self.last_notice_at = last_notice.created_at if last_notice + save end diff --git a/lib/tasks/errbit/database.rake b/lib/tasks/errbit/database.rake index 940a7d1..b26c05f 100644 --- a/lib/tasks/errbit/database.rake +++ b/lib/tasks/errbit/database.rake @@ -1,74 +1,35 @@ -require 'digest/sha1' - namespace :errbit do - namespace :db do - - desc "Updates cached attributes on Problem" - task :update_problem_attrs => :environment do - puts "Updating problems" - Problem.no_timeout.all.each do |problem| - ProblemUpdaterCache.new(problem).update - end - end - - desc "Updates Problem#notices_count" - task :update_notices_count => :environment do - puts "Updating problem.notices_count" - Problem.no_timeout.all.each do |pr| - pr.update_attributes(:notices_count => pr.notices.count) - end - end - - desc "Delete resolved errors from the database. (Useful for limited heroku databases)" - task :clear_resolved => :environment do - require 'resolved_problem_clearer' - puts "=== Cleared #{ResolvedProblemClearer.new.execute} resolved errors from the database." - end - - desc "Regenerate fingerprints" - task :regenerate_fingerprints => :environment do - - def normalize_backtrace(backtrace) - backtrace[0...3].map do |trace| - trace.merge 'method' => trace['method'].to_s.gsub(/[0-9_]{10,}+/, "__FRAGMENT__") - end - end + desc "Updates cached attributes on Problem" + task :problem_recache => :environment do + ProblemRecacher.run + end - def fingerprint(source) - Digest::SHA1.hexdigest(source.to_s) - end + desc "Delete resolved errors from the database. (Useful for limited heroku databases)" + task :clear_resolved => :environment do + require 'resolved_problem_clearer' + puts "=== Cleared #{ResolvedProblemClearer.new.execute} resolved errors from the database." + end - puts "Regenerating Err fingerprints" - Err.create_indexes - Err.all.each do |err| - next if err.notices.count == 0 - source = { - :backtrace => normalize_backtrace(err.notices.first.backtrace).to_s, - :error_class => err.error_class, - :component => err.component, - :action => err.action, - :environment => err.environment, - :api_key => err.app.api_key - } - err.update_attributes(:fingerprint => fingerprint(source)) - end - end + desc "Regenerate fingerprints" + task :notice_refingerprint => :environment do + NoticeRefingerprinter.run + ProblemRecacher.run + end - desc "Remove notices in batch" - task :notices_delete, [ :problem_id ] => [ :environment ] do - BATCH_SIZE = 1000 - if args[:problem_id] - item_count = Problem.find(args[:problem_id]).notices.count - removed_count = 0 - puts "Notices to remove: #{item_count}" - while item_count > 0 - Problem.find(args[:problem_id]).notices.limit(BATCH_SIZE).each do |notice| - notice.remove - removed_count += 1 - end - item_count -= BATCH_SIZE - puts "Removed #{removed_count} notices" + desc "Remove notices in batch" + task :notices_delete, [ :problem_id ] => [ :environment ] do + BATCH_SIZE = 1000 + if args[:problem_id] + item_count = Problem.find(args[:problem_id]).notices.count + removed_count = 0 + puts "Notices to remove: #{item_count}" + while item_count > 0 + Problem.find(args[:problem_id]).notices.limit(BATCH_SIZE).each do |notice| + notice.remove + removed_count += 1 end + item_count -= BATCH_SIZE + puts "Removed #{removed_count} notices" end end end diff --git a/spec/fabricators/backtrace_fabricator.rb b/spec/fabricators/backtrace_fabricator.rb new file mode 100644 index 0000000..dc9e3bb --- /dev/null +++ b/spec/fabricators/backtrace_fabricator.rb @@ -0,0 +1,19 @@ +Fabricator :backtrace do + lines(:count => 99) do + { + number: rand(999), + file: "/path/to/file/#{SecureRandom.hex(4)}.rb", + method: ActiveSupport.methods.shuffle.first + } + end +end + +Fabricator :short_backtrace do + lines(:count => 5) do + { + number: rand(999), + file: "/path/to/file/#{SecureRandom.hex(4)}.rb", + method: ActiveSupport.methods.shuffle.first + } + end +end diff --git a/spec/fabricators/err_fabricator.rb b/spec/fabricators/err_fabricator.rb index 3a3dcb2..103557c 100644 --- a/spec/fabricators/err_fabricator.rb +++ b/spec/fabricators/err_fabricator.rb @@ -2,29 +2,3 @@ Fabricator :err do problem fingerprint 'some-finger-print' end - -Fabricator :notice do - app - err - error_class 'FooError' - message 'Too Much Bar' - backtrace - server_environment { {'environment-name' => 'production'} } - request {{ 'component' => 'foo', 'action' => 'bar' }} - notifier {{ 'name' => 'Notifier', 'version' => '1', 'url' => 'http://toad.com' }} - - after_create do - Problem.cache_notice(err.problem_id, self) - problem.reload - end -end - -Fabricator :backtrace do - lines(:count => 99) do - { - number: rand(999), - file: "/path/to/file/#{SecureRandom.hex(4)}.rb", - method: ActiveSupport.methods.shuffle.first - } - end -end diff --git a/spec/fabricators/notice_fabricator.rb b/spec/fabricators/notice_fabricator.rb new file mode 100644 index 0000000..90e79c7 --- /dev/null +++ b/spec/fabricators/notice_fabricator.rb @@ -0,0 +1,14 @@ +Fabricator :notice do + err + error_class 'FooError' + message 'Too Much Bar' + backtrace + server_environment {{ 'environment-name' => 'production' }} + request {{ 'component' => 'foo', 'action' => 'bar' }} + notifier {{ 'name' => 'Notifier', 'version' => '1', 'url' => 'http://toad.com' }} + + after_create do + Problem.cache_notice(err.problem_id, self) + problem.reload + end +end diff --git a/spec/interactors/notice_refingerprinter_spec.rb b/spec/interactors/notice_refingerprinter_spec.rb new file mode 100644 index 0000000..22c8d7c --- /dev/null +++ b/spec/interactors/notice_refingerprinter_spec.rb @@ -0,0 +1,16 @@ +describe NoticeRefingerprinter do + let(:backtrace) do + Fabricate(:backtrace) + end + + let(:notices) do + 5.times.map do + Fabricate(:notice, backtrace: backtrace) + end + end + + it 'shits' do + binding.pry + 1 + end +end diff --git a/spec/interactors/problem_merge_spec.rb b/spec/interactors/problem_merge_spec.rb index 9dcf653..2436b96 100644 --- a/spec/interactors/problem_merge_spec.rb +++ b/spec/interactors/problem_merge_spec.rb @@ -47,7 +47,7 @@ describe ProblemMerge do end it 'update problem cache' do - expect(ProblemUpdaterCache).to receive(:new).with(problem).and_return(double(:update => true)) + expect(problem).to receive(:recache) problem_merge.merge end diff --git a/spec/interactors/problem_updater_cache_spec.rb b/spec/interactors/problem_updater_cache_spec.rb deleted file mode 100644 index 5b56166..0000000 --- a/spec/interactors/problem_updater_cache_spec.rb +++ /dev/null @@ -1,145 +0,0 @@ -describe ProblemUpdaterCache do - let(:problem) { Fabricate(:problem_with_errs) } - let(:first_errs) { problem.errs } - let!(:notice) { Fabricate(:notice, :err => first_errs.first) } - - describe "#update" do - context "without notice pass args" do - before do - problem.update_attribute(:notices_count, 0) - end - - it 'update the notice_count' do - expect { - ProblemUpdaterCache.new(problem).update - }.to change{ - problem.notices_count - }.from(0).to(1) - end - - context "with only one notice" do - before do - problem.update_attributes!(:messages => {}) - ProblemUpdaterCache.new(problem).update - end - - it 'update information about this notice' do - expect(problem.message).to eq notice.message - expect(problem.where).to eq notice.where - end - - it 'update first_notice_at' do - expect(problem.first_notice_at).to eq notice.reload.created_at - end - - it 'update last_notice_at' do - expect(problem.last_notice_at).to eq notice.reload.created_at - end - - it 'update stats messages' do - expect(problem.messages).to eq({ - Digest::MD5.hexdigest(notice.message) => {'value' => notice.message, 'count' => 1} - }) - end - - it 'update stats hosts' do - expect(problem.hosts).to eq({ - Digest::MD5.hexdigest(notice.host) => {'value' => notice.host, 'count' => 1} - }) - end - - it 'update stats user_agents' do - expect(problem.user_agents).to eq({ - Digest::MD5.hexdigest(notice.user_agent_string) => {'value' => notice.user_agent_string, 'count' => 1} - }) - end - end - - context "with several notices" do - let!(:notice_2) { Fabricate(:notice, :err => first_errs.first) } - let!(:notice_3) { Fabricate(:notice, :err => first_errs.first) } - before do - problem.update_attributes!(:messages => {}) - ProblemUpdaterCache.new(problem).update - end - it 'update information about this notice' do - expect(problem.message).to eq notice.message - expect(problem.where).to eq notice.where - end - - it 'update first_notice_at' do - expect(problem.first_notice_at.to_i).to be_within(2).of(notice.created_at.to_i) - end - - it 'update last_notice_at' do - expect(problem.last_notice_at.to_i).to be_within(2).of(notice.created_at.to_i) - end - - it 'update stats messages' do - expect(problem.messages).to eq({ - Digest::MD5.hexdigest(notice.message) => {'value' => notice.message, 'count' => 3} - }) - end - - it 'update stats hosts' do - expect(problem.hosts).to eq({ - Digest::MD5.hexdigest(notice.host) => {'value' => notice.host, 'count' => 3} - }) - end - - it 'update stats user_agents' do - expect(problem.user_agents).to eq({ - Digest::MD5.hexdigest(notice.user_agent_string) => {'value' => notice.user_agent_string, 'count' => 3} - }) - end - - end - end - - context "with notice pass in args" do - - before do - ProblemUpdaterCache.new(problem, notice).update - end - - it 'increase notices_count by 1' do - expect { - ProblemUpdaterCache.new(problem, notice).update - }.to change{ - problem.notices_count - }.by(1) - end - - it 'update information about this notice' do - expect(problem.message).to eq notice.message - expect(problem.where).to eq notice.where - end - - it 'update first_notice_at' do - expect(problem.first_notice_at).to eq notice.created_at - end - - it 'update last_notice_at' do - expect(problem.last_notice_at).to eq notice.created_at - end - - it 'inc stats messages' do - expect(problem.messages).to eq({ - Digest::MD5.hexdigest(notice.message) => {'value' => notice.message, 'count' => 2} - }) - end - - it 'inc stats hosts' do - expect(problem.hosts).to eq({ - Digest::MD5.hexdigest(notice.host) => {'value' => notice.host, 'count' => 2} - }) - end - - it 'inc stats user_agents' do - expect(problem.user_agents).to eq({ - Digest::MD5.hexdigest(notice.user_agent_string) => {'value' => notice.user_agent_string, 'count' => 2} - }) - end - end - end -end diff --git a/spec/models/problem_spec.rb b/spec/models/problem_spec.rb index b05ebaa..9d38b8f 100644 --- a/spec/models/problem_spec.rb +++ b/spec/models/problem_spec.rb @@ -397,4 +397,100 @@ describe Problem, type: 'model' do end end end + + describe '#recache' do + let(:problem) { Fabricate(:problem_with_errs) } + let(:first_errs) { problem.errs } + let!(:notice) { Fabricate(:notice, :err => first_errs.first) } + + before do + problem.update_attribute(:notices_count, 0) + end + + it 'update the notice_count' do + expect { + problem.recache + }.to change{ + problem.notices_count + }.from(0).to(1) + end + + context "with only one notice" do + before do + problem.update_attributes!(:messages => {}) + problem.recache + end + + it 'update information about this notice' do + expect(problem.message).to eq notice.message + expect(problem.where).to eq notice.where + end + + it 'update first_notice_at' do + expect(problem.first_notice_at).to eq notice.reload.created_at + end + + it 'update last_notice_at' do + expect(problem.last_notice_at).to eq notice.reload.created_at + end + + it 'update stats messages' do + expect(problem.messages).to eq({ + Digest::MD5.hexdigest(notice.message) => {'value' => notice.message, 'count' => 1} + }) + end + + it 'update stats hosts' do + expect(problem.hosts).to eq({ + Digest::MD5.hexdigest(notice.host) => {'value' => notice.host, 'count' => 1} + }) + end + + it 'update stats user_agents' do + expect(problem.user_agents).to eq({ + Digest::MD5.hexdigest(notice.user_agent_string) => {'value' => notice.user_agent_string, 'count' => 1} + }) + end + end + + context "with several notices" do + let!(:notice_2) { Fabricate(:notice, :err => first_errs.first) } + let!(:notice_3) { Fabricate(:notice, :err => first_errs.first) } + before do + problem.update_attributes!(:messages => {}) + problem.recache + end + + it 'update information about this notice' do + expect(problem.message).to eq notice.message + expect(problem.where).to eq notice.where + end + + it 'update first_notice_at' do + expect(problem.first_notice_at.to_i).to be_within(2).of(notice.created_at.to_i) + end + + it 'update last_notice_at' do + expect(problem.last_notice_at.to_i).to be_within(2).of(notice.created_at.to_i) + end + + it 'update stats messages' do + expect(problem.messages).to eq({ + Digest::MD5.hexdigest(notice.message) => {'value' => notice.message, 'count' => 3} + }) + end + + it 'update stats hosts' do + expect(problem.hosts).to eq({ + Digest::MD5.hexdigest(notice.host) => {'value' => notice.host, 'count' => 3} + }) + end + + it 'update stats user_agents' do + expect(problem.user_agents).to eq({ + Digest::MD5.hexdigest(notice.user_agent_string) => {'value' => notice.user_agent_string, 'count' => 3} + }) + end + end + end end -- libgit2 0.21.2