From 704011a1c03550dc680697b37c09a49b2baab005 Mon Sep 17 00:00:00 2001 From: Kyle Drake Date: Sat, 2 May 2015 14:34:21 -0700 Subject: [PATCH] finish up pruning for stats --- Rakefile | 5 ++++- models/stat.rb | 24 +++++------------------ models/stat_location.rb | 8 ++++++-- models/stat_path.rb | 8 ++++++-- models/stat_referrer.rb | 8 ++++++-- tests/stat_tests.rb | 43 ++++++++++++++++++++++++++++++++--------- 6 files changed, 61 insertions(+), 35 deletions(-) diff --git a/Rakefile b/Rakefile index c7fc9814..6533fe95 100644 --- a/Rakefile +++ b/Rakefile @@ -31,8 +31,11 @@ end desc "parse logs" task :parse_logs => [:environment] do - Stat.parse_logfiles $config['logs_path'] Stat.prune! + StatLocation.prune! + StatReferrer.prune! + StatPath.prune! + Stat.parse_logfiles $config['logs_path'] end desc 'Update banned IPs list' diff --git a/models/stat.rb b/models/stat.rb index 5ba31416..c6b6932b 100644 --- a/models/stat.rb +++ b/models/stat.rb @@ -1,6 +1,5 @@ class Stat < Sequel::Model - FREE_RETAINMENT_DAYS = 7 - REFERRAL_RETAINMENT_DAYS = 7 + FREE_RETAINMENT_DAYS = 30 many_to_one :site one_to_many :stat_referrers @@ -9,23 +8,10 @@ class Stat < Sequel::Model class << self def prune! - supporter_site_ids = DB["select id from sites where plan_type is not null or plan_type != 'free'"].all.collect {|s| s[:id]} - - delete_stats_dataset = where{created_at < (FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s}.exclude(site_id: supporter_site_ids) - - deleted_stat_ids = delete_stats_dataset.select(:id).all.collect {|s| s.id} - delete_stats_dataset.delete - - puts "TODO: stat_referrers/paths/locations needs created_at timestamp for pruning." - - StatReferrer.where(stat_id: deleted_stat_ids).delete - - #DB[ - # "DELETE FROM stats WHERE created_at < ? AND site_id NOT IN (SELECT id FROM sites WHERE plan_type IS NOT NULL OR plan_type != 'free')", - # (FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s - #].first - - #binding.pry + DB[ + "DELETE FROM stats WHERE created_at < ? AND site_id NOT IN (SELECT id FROM sites WHERE plan_type IS NOT NULL OR plan_type != 'free')", + (FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s + ].first end def parse_logfiles(path) diff --git a/models/stat_location.rb b/models/stat_location.rb index 02398ab1..f1a8e863 100644 --- a/models/stat_location.rb +++ b/models/stat_location.rb @@ -2,10 +2,14 @@ require 'geoip' class StatLocation < Sequel::Model GEOCITY_PATH = './files/GeoLiteCity.dat' - RETAINMENT_PERIOD = 7.days + RETAINMENT_DAYS = 7 many_to_one :site + def self.prune! + where{created_at < (RETAINMENT_DAYS-2).days.ago.to_date}.delete + end + def self.create_or_get(site_id, ip) geoip = GeoIP.new GEOCITY_PATH city = geoip.city ip @@ -13,7 +17,7 @@ class StatLocation < Sequel::Model return nil if city.nil? opts = {site_id: site_id, country_code2: city.country_code2, region_name: city.region_name, city_name: city.city_name} - stat_location = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first + stat_location = where(opts).where{created_at > RETAINMENT_DAYS.days.ago}.first DB[table_name].lock('EXCLUSIVE') { stat_location = create opts.merge(latitude: city.latitude, longitude: city.longitude, created_at: Date.today) } if stat_location.nil? diff --git a/models/stat_path.rb b/models/stat_path.rb index 64fefb55..8dcd6308 100644 --- a/models/stat_path.rb +++ b/models/stat_path.rb @@ -1,11 +1,15 @@ class StatPath < Sequel::Model - RETAINMENT_PERIOD = 7.days + RETAINMENT_DAYS = 7 many_to_one :site + def self.prune! + where{created_at < (RETAINMENT_DAYS-2).days.ago.to_date}.delete + end + def self.create_or_get(site_id, name) opts = {site_id: site_id, name: name} - stat_path = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first + stat_path = where(opts).where{created_at > RETAINMENT_DAYS.days.ago}.first DB[table_name].lock('EXCLUSIVE') { stat_path = create opts.merge created_at: Date.today } if stat_path.nil? diff --git a/models/stat_referrer.rb b/models/stat_referrer.rb index e5119bc0..edeb52c8 100644 --- a/models/stat_referrer.rb +++ b/models/stat_referrer.rb @@ -1,10 +1,14 @@ class StatReferrer < Sequel::Model many_to_one :site - RETAINMENT_PERIOD = 7.days + RETAINMENT_DAYS = 7 + + def self.prune! + where{created_at < (RETAINMENT_DAYS-2).days.ago.to_date}.delete + end def self.create_or_get(site_id, url) opts = {site_id: site_id, url: url} - stat_referrer = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first + stat_referrer = where(opts).where{created_at > RETAINMENT_DAYS.days.ago}.first DB[table_name].lock('EXCLUSIVE') { stat_referrer = create opts.merge(created_at: Date.today) diff --git a/tests/stat_tests.rb b/tests/stat_tests.rb index a3ec7d31..8b85021e 100644 --- a/tests/stat_tests.rb +++ b/tests/stat_tests.rb @@ -26,7 +26,7 @@ describe 'stats' do file.write log.join("\n") end end -=begin + it 'prunes logs for free sites' do @free_site = Fabricate :site @supporter_site = Fabricate :site, plan_type: 'supporter' @@ -42,19 +42,44 @@ describe 'stats' do count_site_ids = [@free_site.id, @supporter_site.id] expected_stat_count = (Stat::FREE_RETAINMENT_DAYS+1)*2 - [@free_site, @supporter_site].each do |site| - site.stats.last.add_stat_referrer url: 'https://example.com' - end - Stat.where(site_id: count_site_ids).count.must_equal expected_stat_count Stat.prune! Stat.where(site_id: count_site_ids).count.must_equal expected_stat_count-1 Stat.where(site_id: @supporter_site.id).count.must_equal expected_stat_count/2 - - @free_site.stats.last.stat_referrers.length.must_equal 0 - @supporter_site.stats.last.stat_referrers.length.must_equal 1 end -=end + + it 'prunes referrers' do + stat_referrer_now = @site_one.add_stat_referrer created_at: Date.today, url: 'http://example.com/now' + stat_referrer = @site_one.add_stat_referrer created_at: (StatReferrer::RETAINMENT_DAYS-1).days.ago, url: 'http://example.com' + StatReferrer[stat_referrer.id].wont_be_nil + @site_one.stat_referrers_dataset.count.must_equal 2 + StatReferrer.prune! + @site_one.stat_referrers_dataset.count.must_equal 1 + StatReferrer[stat_referrer.id].must_be_nil + end + + it 'prunes locations' do + stat_location = @site_one.add_stat_location( + created_at: (StatLocation::RETAINMENT_DAYS-1).days.ago, + country_code2: 'US', + region_name: 'Minnesota', + city_name: 'Minneapolis' + ) + StatLocation[stat_location.id].wont_be_nil + StatLocation.prune! + StatLocation[stat_location.id].must_be_nil + end + + it 'prunes paths' do + stat_path = @site_one.add_stat_path( + created_at: (StatPath::RETAINMENT_DAYS-1).days.ago, + name: '/derpie.html' + ) + StatPath[stat_path.id].wont_be_nil + StatPath.prune! + StatPath[stat_path.id].must_be_nil + end + it 'parses logfile' do Stat.parse_logfiles STAT_LOGS_PATH