From e0416de4174d637cc2e0e330662b59f3eaefefdf Mon Sep 17 00:00:00 2001 From: Braulio Bhavamitra Date: Wed, 29 Jun 2016 16:54:12 -0300 Subject: [PATCH] analytics: identify bots and filter them out by default --- plugins/analytics/Gemfile | 2 ++ plugins/analytics/controllers/myprofile/analytics_plugin/stats_controller.rb | 33 ++++++++++++++++++++++++++++++--- plugins/analytics/controllers/profile/analytics_plugin/time_on_page_controller.rb | 5 ++++- plugins/analytics/db/migrate/20151030122634_add_title_and_is_bot_to_analytics_plugin_page_view.rb | 40 ++++++++++++++++++++++++++++++++++++++++ plugins/analytics/lib/analytics_plugin.rb | 11 +++++++++++ plugins/analytics/lib/analytics_plugin/base.rb | 10 +++++----- plugins/analytics/lib/ext/profile.rb | 14 ++++++-------- plugins/analytics/locales/en.yml | 6 +++++- plugins/analytics/locales/pt.yml | 7 ++++++- plugins/analytics/models/analytics_plugin/page_view.rb | 61 ++++++++++++++++++++++++++++++++++++++++++++++++------------- plugins/analytics/models/analytics_plugin/visit.rb | 10 ++++++++-- plugins/analytics/public/javascripts/analytics.js | 7 ++++++- plugins/analytics/public/javascripts/views/settings.tag.slim | 33 +++++++++++++++++++++++++++++++++ plugins/analytics/test/functional/content_viewer_controller_test.rb | 4 ++-- plugins/analytics/views/analytics_plugin/stats/_table.html.slim | 48 +++++++++++++++++++++++------------------------- plugins/analytics/views/analytics_plugin/stats/index.html.slim | 15 +++++++++++---- 16 files changed, 240 insertions(+), 66 deletions(-) create mode 100644 plugins/analytics/Gemfile create mode 100644 plugins/analytics/db/migrate/20151030122634_add_title_and_is_bot_to_analytics_plugin_page_view.rb create mode 100644 plugins/analytics/public/javascripts/views/settings.tag.slim diff --git a/plugins/analytics/Gemfile b/plugins/analytics/Gemfile new file mode 100644 index 0000000..744e094 --- /dev/null +++ b/plugins/analytics/Gemfile @@ -0,0 +1,2 @@ +gem 'browser', '~> 2.2.0' + diff --git a/plugins/analytics/controllers/myprofile/analytics_plugin/stats_controller.rb b/plugins/analytics/controllers/myprofile/analytics_plugin/stats_controller.rb index 8ebd23b..d5ef7f6 100644 --- a/plugins/analytics/controllers/myprofile/analytics_plugin/stats_controller.rb +++ b/plugins/analytics/controllers/myprofile/analytics_plugin/stats_controller.rb @@ -7,12 +7,39 @@ class AnalyticsPlugin::StatsController < MyProfileController def index end + def edit + return render_access_denied unless user.has_permission? 'edit_profile', profile + + params[:analytics_settings][:enabled] = params[:analytics_settings][:enabled] == 'true' + params[:analytics_settings][:anonymous] = params[:analytics_settings][:anonymous] == 'true' + @settings = profile.analytics_settings params[:analytics_settings] || {} + @settings.save! + render nothing: true + end + + def view + params[:profile_ids] ||= [profile.id] + ids = params[:profile_ids].map(&:to_i) + user.adminships # FIXME just to cache #adminship_ids + ids = ids.select{ |id| id.in? user.adminship_ids } unless @user_is_admin + + @profiles = environment.profiles.find ids + @user = environment.people.find params[:user_id] + @visits = AnalyticsPlugin::Visit.eager_load(:users_page_views). + where(profile_id: ids, analytics_plugin_page_views: {user_id: @user.id}) + + render partial: 'table', locals: {visits: @visits} + + end + protected - def default_url_options - # avoid rails' use_relative_controller! - {use_route: '/'} + # inherit routes from core skipping use_relative_controller! + def url_for options + options[:controller] = "/#{options[:controller]}" if options.is_a? Hash and options[:controller] and not options[:controller].to_s.starts_with? '/' + super options end + helper_method :url_for def skip_page_view @analytics_skip_page_view = true diff --git a/plugins/analytics/controllers/profile/analytics_plugin/time_on_page_controller.rb b/plugins/analytics/controllers/profile/analytics_plugin/time_on_page_controller.rb index 471e2a2..f2a6d10 100644 --- a/plugins/analytics/controllers/profile/analytics_plugin/time_on_page_controller.rb +++ b/plugins/analytics/controllers/profile/analytics_plugin/time_on_page_controller.rb @@ -7,7 +7,10 @@ class AnalyticsPlugin::TimeOnPageController < ProfileController Noosfero::Scheduler::Defer.later do page_view = profile.page_views.where(request_id: params[:id]).first page_view.request = request - page_view.page_load! + AnalyticsPlugin::PageView.transaction do + page_view.page_load! Time.at(params[:time].to_i) + page_view.update_column :title, params[:title] if params[:title].present? + end end render nothing: true diff --git a/plugins/analytics/db/migrate/20151030122634_add_title_and_is_bot_to_analytics_plugin_page_view.rb b/plugins/analytics/db/migrate/20151030122634_add_title_and_is_bot_to_analytics_plugin_page_view.rb new file mode 100644 index 0000000..264020e --- /dev/null +++ b/plugins/analytics/db/migrate/20151030122634_add_title_and_is_bot_to_analytics_plugin_page_view.rb @@ -0,0 +1,40 @@ +class AddTitleAndIsBotToAnalyticsPluginPageView < ActiveRecord::Migration + + def up + add_column :analytics_plugin_page_views, :title, :text + add_column :analytics_plugin_page_views, :is_bot, :boolean + + # missing indexes for performance + add_index :analytics_plugin_page_views, :type + add_index :analytics_plugin_page_views, :visit_id + add_index :analytics_plugin_page_views, :request_started_at + add_index :analytics_plugin_page_views, :page_loaded_at + add_index :analytics_plugin_page_views, :is_bot + + AnalyticsPlugin::PageView.transaction do + AnalyticsPlugin::PageView.find_each do |page_view| + page_view.send :fill_is_bot + page_view.update_column :is_bot, page_view.is_bot + end + end + + change_table :analytics_plugin_visits do |t| + t.timestamps + end + AnalyticsPlugin::Visit.transaction do + AnalyticsPlugin::Visit.find_each do |visit| + visit.created_at = visit.page_views.first.request_started_at + visit.updated_at = visit.page_views.last.request_started_at + visit.save! + end + end + + # never used + remove_column :analytics_plugin_page_views, :track_id + end + + def down + say "this migration can't be reverted" + end + +end diff --git a/plugins/analytics/lib/analytics_plugin.rb b/plugins/analytics/lib/analytics_plugin.rb index 5332918..7c914c9 100644 --- a/plugins/analytics/lib/analytics_plugin.rb +++ b/plugins/analytics/lib/analytics_plugin.rb @@ -13,4 +13,15 @@ module AnalyticsPlugin I18n.t'analytics_plugin.lib.plugin.description' end + def self.clear_non_users + ActiveRecord::Base.transaction do + AnalyticsPlugin::PageView.bots.delete_all + AnalyticsPlugin::PageView.not_page_loaded.delete_all + # delete_all does not work here + AnalyticsPlugin::Visit.without_page_views.destroy_all + end + end + end + +Browser::Bot.detect_empty_ua! diff --git a/plugins/analytics/lib/analytics_plugin/base.rb b/plugins/analytics/lib/analytics_plugin/base.rb index 36bcfc6..f7e7d2f 100644 --- a/plugins/analytics/lib/analytics_plugin/base.rb +++ b/plugins/analytics/lib/analytics_plugin/base.rb @@ -3,6 +3,7 @@ class AnalyticsPlugin::Base < Noosfero::Plugin def body_ending return unless profile and profile.analytics_enabled? + return if @analytics_skip_page_view lambda do render 'analytics_plugin/body_ending' end @@ -12,6 +13,7 @@ class AnalyticsPlugin::Base < Noosfero::Plugin ['analytics'].map{ |j| "javascripts/#{j}" } end + # FIXME: not reloading on development, need server restart def application_controller_filters [{ type: 'around_filter', options: {}, block: -> &block do @@ -23,15 +25,12 @@ class AnalyticsPlugin::Base < Noosfero::Plugin return unless profile and profile.analytics_enabled? Noosfero::Scheduler::Defer.later 'analytics: register page view' do - page_view = profile.page_views.build request: request, profile_id: profile, + page_view = profile.page_views.build request: request, profile_id: profile.id, request_started_at: request_started_at, request_finished_at: request_finished_at - unless profile.analytics_anonymous? - session_id = session.id page_view.user = user - page_view.session_id = session_id + page_view.session_id = session.id end - page_view.save! end end, @@ -39,6 +38,7 @@ class AnalyticsPlugin::Base < Noosfero::Plugin end def control_panel_buttons + return unless user.is_admin? environment { title: I18n.t('analytics_plugin.lib.plugin.panel_button'), icon: 'analytics-access', diff --git a/plugins/analytics/lib/ext/profile.rb b/plugins/analytics/lib/ext/profile.rb index dd1da3b..bfa6bff 100644 --- a/plugins/analytics/lib/ext/profile.rb +++ b/plugins/analytics/lib/ext/profile.rb @@ -1,16 +1,14 @@ require_dependency 'profile' -require_dependency 'community' -([Profile] + Profile.descendants).each do |subclass| -subclass.class_eval do +class Profile - has_many :visits, foreign_key: :profile_id, class_name: 'AnalyticsPlugin::Visit' - has_many :page_views, foreign_key: :profile_id, class_name: 'AnalyticsPlugin::PageView' + has_many :users_visits, -> { latest.with_users_page_views }, foreign_key: :profile_id, class_name: 'AnalyticsPlugin::Visit' -end -end + has_many :visits, -> { latest.eager_load :page_views }, foreign_key: :profile_id, class_name: 'AnalyticsPlugin::Visit' + has_many :page_views, foreign_key: :profile_id, class_name: 'AnalyticsPlugin::PageView' -class Profile + has_many :user_visits, -> { latest.eager_load :page_views }, foreign_key: :user_id, class_name: 'AnalyticsPlugin::PageView' + has_many :user_page_views, foreign_key: :user_id, class_name: 'AnalyticsPlugin::PageView' def analytics_settings attrs = {} @analytics_settings ||= Noosfero::Plugin::Settings.new self, ::AnalyticsPlugin, attrs diff --git a/plugins/analytics/locales/en.yml b/plugins/analytics/locales/en.yml index 18f7f00..97f9240 100644 --- a/plugins/analytics/locales/en.yml +++ b/plugins/analytics/locales/en.yml @@ -9,10 +9,14 @@ en: &en views: stats: + enable: "Enable tracking on the profile '%{profile}'" + anonymous: "Don't associate users' login" + config_save: "Configuration saved" user: 'User' initial_time: 'Time' + ip: 'IP' pages: 'Pages' -en-US: +en_US: <<: *en diff --git a/plugins/analytics/locales/pt.yml b/plugins/analytics/locales/pt.yml index 0208077..90929a9 100644 --- a/plugins/analytics/locales/pt.yml +++ b/plugins/analytics/locales/pt.yml @@ -9,9 +9,14 @@ pt: &pt views: stats: + enable: "Ativar rastreio no perfil '%{profile}'" + anonymous: "Não associar login de usuários" + config_save: "Configuração salva" user: 'Usuário' initial_time: 'Horário' + ip: 'IP' pages: 'Páginas' -pt-BR: +pt_BR: <<: *pt + diff --git a/plugins/analytics/models/analytics_plugin/page_view.rb b/plugins/analytics/models/analytics_plugin/page_view.rb index b4c457f..4d4680b 100644 --- a/plugins/analytics/models/analytics_plugin/page_view.rb +++ b/plugins/analytics/models/analytics_plugin/page_view.rb @@ -10,22 +10,34 @@ class AnalyticsPlugin::PageView < ApplicationRecord acts_as_having_settings field: :options - belongs_to :visit, class_name: 'AnalyticsPlugin::Visit' - belongs_to :referer_page_view, class_name: 'AnalyticsPlugin::PageView' + belongs_to :profile, validate: true + belongs_to :visit, class_name: 'AnalyticsPlugin::Visit', touch: true, validate: true - belongs_to :user, class_name: 'Person' - belongs_to :session, primary_key: :session_id, foreign_key: :session_id, class_name: 'Session' - belongs_to :profile + belongs_to :referer_page_view, class_name: 'AnalyticsPlugin::PageView', validate: false - validates_presence_of :visit - validates_presence_of :request, on: :create - validates_presence_of :url + belongs_to :user, class_name: 'Person', validate: false + belongs_to :session, primary_key: :session_id, foreign_key: :session_id, class_name: 'Session', validate: false + + validates :request, presence: true, on: :create + validates :url, presence: true before_validation :extract_request_data, on: :create before_validation :fill_referer_page_view, on: :create before_validation :fill_visit, on: :create + before_validation :fill_is_bot, on: :create + + after_update :destroy_empty_visit + after_destroy :destroy_empty_visit + + scope :in_sequence, -> { order 'analytics_plugin_page_views.request_started_at ASC' } + + scope :page_loaded, -> { where 'analytics_plugin_page_views.page_loaded_at IS NOT NULL' } + scope :not_page_loaded, -> { where 'analytics_plugin_page_views.page_loaded_at IS NULL' } - scope :latest, -> { order 'request_started_at DESC' } + scope :no_bots, -> { where.not is_bot: true } + scope :bots, -> { where is_bot: true } + + scope :loaded_users, -> { in_sequence.page_loaded.no_bots } def request_duration self.request_finished_at - self.request_started_at @@ -43,8 +55,8 @@ class AnalyticsPlugin::PageView < ApplicationRecord Time.now < self.user_last_time_seen + AnalyticsPlugin::TimeOnPageUpdateInterval end - def page_load! - self.page_loaded_at = Time.now + def page_load! time + self.page_loaded_at = time self.update_column :page_loaded_at, self.page_loaded_at end @@ -56,6 +68,16 @@ class AnalyticsPlugin::PageView < ApplicationRecord self.update_column :time_on_page, self.time_on_page end + def find_referer_page_view + return if self.referer_url.blank? + AnalyticsPlugin::PageView.order('request_started_at DESC'). + where(url: self.referer_url, session_id: self.session_id, user_id: self.user_id, profile_id: self.profile_id).first + end + + def browser + @browser ||= Browser.new self.user_agent + end + protected def extract_request_data @@ -64,16 +86,29 @@ class AnalyticsPlugin::PageView < ApplicationRecord self.user_agent = self.request.headers['User-Agent'] self.request_id = self.request.env['action_dispatch.request_id'] self.remote_ip = self.request.remote_ip + true end def fill_referer_page_view - self.referer_page_view = AnalyticsPlugin::PageView.order('request_started_at DESC'). - where(url: self.referer_url, session_id: self.session_id, user_id: self.user_id, profile_id: self.profile_id).first if self.referer_url.present? + self.referer_page_view = self.find_referer_page_view + true end def fill_visit self.visit = self.referer_page_view.visit if self.referer_page_view and self.referer_page_view.user_on_page? self.visit ||= AnalyticsPlugin::Visit.new profile: profile + true + end + + def fill_is_bot + self.is_bot = self.browser.bot? + true + end + + def destroy_empty_visit + return unless self.visit_id_changed? + old_visit = AnalyticsPlugin::Visit.find self.visit_id_was + old_visit.destroy if old_visit.page_views.empty? end end diff --git a/plugins/analytics/models/analytics_plugin/visit.rb b/plugins/analytics/models/analytics_plugin/visit.rb index 4935d2f..5a223a3 100644 --- a/plugins/analytics/models/analytics_plugin/visit.rb +++ b/plugins/analytics/models/analytics_plugin/visit.rb @@ -5,10 +5,16 @@ class AnalyticsPlugin::Visit < ApplicationRecord belongs_to :profile has_many :page_views, class_name: 'AnalyticsPlugin::PageView', dependent: :destroy + has_many :users_page_views, -> { loaded_users }, class_name: 'AnalyticsPlugin::PageView', dependent: :destroy - default_scope -> { joins(:page_views).includes :page_views } + scope :latest, -> { order 'updated_at DESC' } - scope :latest, -> { order 'analytics_plugin_page_views.request_started_at DESC' } + scope :with_users_page_views, -> { + eager_load(:users_page_views).where.not analytics_plugin_page_views: {visit_id: nil} + } + scope :without_page_views, -> { + eager_load(:page_views).where analytics_plugin_page_views: {visit_id: nil} + } def first_page_view self.page_views.first diff --git a/plugins/analytics/public/javascripts/analytics.js b/plugins/analytics/public/javascripts/analytics.js index f40808b..df7ba69 100644 --- a/plugins/analytics/public/javascripts/analytics.js +++ b/plugins/analytics/public/javascripts/analytics.js @@ -1,4 +1,9 @@ analytics = { + + t: function (key, options) { + return I18n.t(key, $.extend(options, {scope: 'analytics_plugin'})) + }, + requestId: '', timeOnPage: { @@ -27,7 +32,7 @@ analytics = { pageLoad: function() { $.ajax(analytics.timeOnPage.baseUrl+'/page_load', { - type: 'POST', data: {id: analytics.requestId}, + type: 'POST', data: {id: analytics.requestId, title: document.title, time: Math.floor(Date.now()/1000)}, success: function(data) { }, }); diff --git a/plugins/analytics/public/javascripts/views/settings.tag.slim b/plugins/analytics/public/javascripts/views/settings.tag.slim new file mode 100644 index 0000000..9031d96 --- /dev/null +++ b/plugins/analytics/public/javascripts/views/settings.tag.slim @@ -0,0 +1,33 @@ +analytics-settings + .checkbox + label name='enabled' + input type='checkbox' name='enabled' value='1' checked='{settings.enabled}' onchange='{toggleEnabled}' + |{anl.t('views.stats.enable', {profile: noosfero.profile})} + + .checkbox if='{settings.enabled}' + label name='anonymous' + input type='checkbox' name='anonymous' value='1' checked='{settings.anonymous}' onchange='{toggleAnonymous}' + |{anl.t('views.stats.anonymous')} + + javascript: + this.anl = window.analytics + this.settings = opts.settings + this.updateUrl = Routes.analytics_plugin_stats_path({profile: noosfero.profile, action: 'edit'}) + + toggleEnabled (e) { + this.settings.enabled = !this.settings.enabled + this.update() + this.save(e) + } + toggleAnonymous (e) { + this.settings.anonymous = !this.settings.anonymous + this.save(e) + } + + save (e) { + var self = this + $.post(this.updateUrl, {analytics_settings: this.settings}, function() { + display_notice(self.anl.t('views.stats.config_save')) + }) + } + diff --git a/plugins/analytics/test/functional/content_viewer_controller_test.rb b/plugins/analytics/test/functional/content_viewer_controller_test.rb index a64bf45..1fafb53 100644 --- a/plugins/analytics/test/functional/content_viewer_controller_test.rb +++ b/plugins/analytics/test/functional/content_viewer_controller_test.rb @@ -37,7 +37,7 @@ class ContentViewerControllerTest < ActionController::TestCase @request.env['HTTP_REFERER'] = first_url get :view_page, profile: @community.identifier, page: @community.articles.last.path.split('/') assert_equal 2, @community.page_views.count - assert_equal 2, @community.visits.count + assert_equal 1, @community.visits.count second_page_view = @community.page_views.order(:id).last assert_equal first_page_view, second_page_view.referer_page_view @@ -48,7 +48,7 @@ class ContentViewerControllerTest < ActionController::TestCase future = Time.now + 2*AnalyticsPlugin::TimeOnPageUpdateInterval Time.stubs(:now).returns(future) get :view_page, profile: @community.identifier, page: @community.articles.last.path.split('/') - assert_equal 3, @community.visits.count + assert_equal 2, @community.visits.count end end diff --git a/plugins/analytics/views/analytics_plugin/stats/_table.html.slim b/plugins/analytics/views/analytics_plugin/stats/_table.html.slim index 5b6d7dc..01dbf9f 100644 --- a/plugins/analytics/views/analytics_plugin/stats/_table.html.slim +++ b/plugins/analytics/views/analytics_plugin/stats/_table.html.slim @@ -1,35 +1,33 @@ -table#analytics-stats.table data-toggle='table' data-striped='true' data-sortable='true' data-icons-prefix='fa' - thead - - unless profile.analytics_anonymous? +.table-responsive + table#analytics-stats.table data-toggle='table' data-striped='true' data-sortable='true' data-icons-prefix='fa' + thead th= t'analytics_plugin.views.stats.user' - th= t'analytics_plugin.views.stats.initial_time' - th= t'analytics_plugin.views.stats.pages' + th= t'analytics_plugin.views.stats.initial_time' + th= t'analytics_plugin.views.stats.ip' + th= t'analytics_plugin.views.stats.pages' - tbody - - profile.visits.each do |visit| - tr - td= link_to visit.user.name, visit.user.url - td - div data-toggle="tooltip" data-title='#{l visit.initial_time}' - = time_ago_in_words(visit.initial_time) - |  - = _'ago' - td - - visit.page_views.each do |page_view| - = link_to page_view.url, page_view.url - |  - = "(#{distance_of_time_in_words page_view.time_on_page})" - | ->  + tbody + - visits.each do |visit| + tr data-visit-id='#{visit.id}' + td= link_to visit.user.name, visit.user.url if visit.user + td + div data-toggle="tooltip" data-title='#{l visit.initial_time}' + = time_ago_in_words visit.initial_time + |  + = _'ago' + td= visit.users_page_views.first.remote_ip + td + ol + - visit.users_page_views.each do |page_view| + li + = link_to (if page_view.title.present? then page_view.title else page_view.url end), page_view.url, target: '_blank' + |  + = "(#{distance_of_time_in_words page_view.time_on_page})" javascript: $('#analytics-stats').bootstrapTable({ striped: true, - columns: [ - {sortable: true}, - {sortable: true}, - {sortable: true}, - ], }) $(document).ready(function() { diff --git a/plugins/analytics/views/analytics_plugin/stats/index.html.slim b/plugins/analytics/views/analytics_plugin/stats/index.html.slim index d0cd6d1..2eca84f 100644 --- a/plugins/analytics/views/analytics_plugin/stats/index.html.slim +++ b/plugins/analytics/views/analytics_plugin/stats/index.html.slim @@ -1,5 +1,12 @@ -- content_for :head - = javascript_include_tag 'https://cdnjs.cloudflare.com/ajax/libs/bootstrap-table/1.8.1/bootstrap-table-all.min.js' - = stylesheet_link_tag 'https://cdnjs.cloudflare.com/ajax/libs/bootstrap-table/1.8.1/bootstrap-table.css' += render 'shared/bootstrap_table' + += button :back, _('Back to control panel'), controller: 'profile_editor' + += js_translations_include plugin: :analytics += javascript_include_tag 'plugins/analytics/javascripts/views/settings' +analytics-settings data-opts="#{CGI.escapeHTML({settings: {enabled: profile.analytics_settings.enabled, anonymous: profile.analytics_settings.anonymous}}.to_json)}" data-riot='' +/ needs html_safe to work +/= riot_component :analytics_settings, settings: {enabled: profile.analytics_settings.enabled, anonymous: profile.analytics_settings.anonymous} + += render 'table', visits: profile.users_visits.limit(50) -= render 'table' -- libgit2 0.21.2