stat referrers, locations, and paths

This commit is contained in:
Kyle Drake 2015-05-02 02:34:24 -07:00
parent d9babf5cd6
commit 227b123fc9
10 changed files with 195 additions and 47 deletions

View file

@ -0,0 +1,18 @@
# This migration detaches stat_referrers, stat_locations and stat_paths
# from stats. Instead of stat_id, we'll add a created_at timestamp and remove
# after 7 days for both free and supporter plans (for now).
Sequel.migration do
up {
[:stat_referrers, :stat_paths, :stat_locations].each do |stat_table|
drop_column stat_table, :stat_id
add_column stat_table, :created_at, :date, index: true
end
}
down {
[:stat_referrers, :stat_paths, :stat_locations].each do |stat_table|
drop_column stat_table, :created_at
add_column stat_table, :stat_id, :integer, index: true
end
}
end

View file

@ -0,0 +1,16 @@
# This migration detaches stat_referrers, stat_locations and stat_paths
# from stats. Instead of stat_id, we'll add a created_at timestamp and remove
# after 7 days for both free and supporter plans (for now).
Sequel.migration do
up {
[:stat_referrers, :stat_paths, :stat_locations].each do |stat_table|
add_column stat_table, :site_id, :integer, index: true
end
}
down {
[:stat_referrers, :stat_paths, :stat_locations].each do |stat_table|
drop_column stat_table, :site_id
end
}
end

View file

@ -0,0 +1,12 @@
Sequel.migration do
up {
drop_column :stat_locations, :latitude
drop_column :stat_locations, :longitude
add_column :stat_locations, :latitude, :float
add_column :stat_locations, :longitude, :float
}
down {
# meh.
}
end

View file

@ -0,0 +1,9 @@
Sequel.migration do
up {
add_column :stats, :bandwidth, :bigint, default: 0
}
down {
drop_column :stats, :bandwidth
}
end

View file

@ -161,6 +161,11 @@ class Site < Sequel::Model
one_to_many :site_files
one_to_many :stats
one_to_many :stat_referrers
one_to_many :stat_locations
one_to_many :stat_paths
def account_sites_dataset
Site.where(Sequel.|({id: owner.id}, {parent_site_id: owner.id})).order(:parent_site_id.desc, :username)
end

View file

@ -1,6 +1,6 @@
class Stat < Sequel::Model
GEOCITY_PATH = './files/GeoLiteCity.dat'
FREE_RETAINMENT_DAYS = 7
REFERRAL_RETAINMENT_DAYS = 7
many_to_one :site
one_to_many :stat_referrers
@ -9,13 +9,23 @@ class Stat < Sequel::Model
class << self
def prune!
DB[
"DELETE FROM stats WHERE created_at < ? AND site_id NOT IN (SELECT id FROM sites WHERE plan_type IS NOT NULL OR plan_type != 'free')",
(FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s
].first
supporter_site_ids = DB["select id from sites where plan_type is not null or plan_type != 'free'"].all.collect {|s| s[:id]}
binding.pry
delete_stats_dataset = where{created_at < (FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s}.exclude(site_id: supporter_site_ids)
deleted_stat_ids = delete_stats_dataset.select(:id).all.collect {|s| s.id}
delete_stats_dataset.delete
puts "TODO: stat_referrers/paths/locations needs created_at timestamp for pruning."
StatReferrer.where(stat_id: deleted_stat_ids).delete
#DB[
# "DELETE FROM stats WHERE created_at < ? AND site_id NOT IN (SELECT id FROM sites WHERE plan_type IS NOT NULL OR plan_type != 'free')",
# (FREE_RETAINMENT_DAYS-1).days.ago.to_date.to_s
#].first
#binding.pry
end
def parse_logfiles(path)
@ -31,6 +41,7 @@ class Stat < Sequel::Model
site_logs[username] = {
hits: 0,
views: 0,
bandwidth: 0,
view_ips: [],
ips: [],
referrers: {},
@ -38,6 +49,7 @@ class Stat < Sequel::Model
} unless site_logs[username]
site_logs[username][:hits] += 1
site_logs[username][:bandwidth] += size.to_i
unless site_logs[username][:view_ips].include?(ip)
site_logs[username][:views] += 1
@ -76,19 +88,28 @@ class Stat < Sequel::Model
DB[:stats].lock('EXCLUSIVE') { stat = Stat.create opts } if stat.nil?
DB[
'update stats set hits=hits+?, views=views+? where site_id=?',
'update stats set hits=hits+?, views=views+?, bandwidth=bandwidth+? where site_id=?',
site_log[:hits],
site_log[:views],
site_log[:bandwidth],
site_log[:id]
].first
site_log[:referrers].each do |referrer,views|
opts = {stat_id: stat.id, url: referrer}
stat_referrer = StatReferrer.select(:id).where(opts).first
DB[:stat_referrers].lock('EXCLUSIVE') {
stat_referrer = StatReferrer.create opts
} if stat_referrer.nil?
DB['update stat_referrers set views=views+? where stat_id=?', views, stat.id].first
site_log[:referrers].each do |referrer, views|
stat_referrer = StatReferrer.create_or_get site_log[:id], referrer
DB['update stat_referrers set views=views+? where site_id=?', views, site_log[:id]].first
end
site_log[:view_ips].each do |ip|
site_location = StatLocation.create_or_get site_log[:id], ip
next if site_location.nil?
DB['update stat_locations set views=views+1 where id=?', site_location.id].first
end
site_log[:paths].each do |path, views|
site_path = StatPath.create_or_get site_log[:id], path
next if site_path.nil?
DB['update stat_paths set views=views+? where id=?', views, site_path.id].first
end
end
end

View file

@ -1,3 +1,23 @@
require 'geoip'
class StatLocation < Sequel::Model
many_to_one :stat
GEOCITY_PATH = './files/GeoLiteCity.dat'
RETAINMENT_PERIOD = 7.days
many_to_one :site
def self.create_or_get(site_id, ip)
geoip = GeoIP.new GEOCITY_PATH
city = geoip.city ip
return nil if city.nil?
opts = {site_id: site_id, country_code2: city.country_code2, region_name: city.region_name, city_name: city.city_name}
stat_location = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first
DB[table_name].lock('EXCLUSIVE') {
stat_location = create opts.merge(latitude: city.latitude, longitude: city.longitude, created_at: Date.today)
} if stat_location.nil?
stat_location
end
end

View file

@ -1,3 +1,15 @@
class StatPath < Sequel::Model
many_to_one :stat
RETAINMENT_PERIOD = 7.days
many_to_one :site
def self.create_or_get(site_id, name)
opts = {site_id: site_id, name: name}
stat_path = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first
DB[table_name].lock('EXCLUSIVE') {
stat_path = create opts.merge created_at: Date.today
} if stat_path.nil?
stat_path
end
end

View file

@ -1,3 +1,15 @@
class StatReferrer < Sequel::Model
many_to_one :stat
many_to_one :site
RETAINMENT_PERIOD = 7.days
def self.create_or_get(site_id, url)
opts = {site_id: site_id, url: url}
stat_referrer = where(opts).where{created_at > RETAINMENT_PERIOD.ago}.first
DB[table_name].lock('EXCLUSIVE') {
stat_referrer = create opts.merge(created_at: Date.today)
} if stat_referrer.nil?
stat_referrer
end
end

View file

@ -9,11 +9,24 @@ describe 'stats' do
@site_one = Fabricate :site
@site_two = Fabricate :site
@t = Time.now.iso8601
@s1u = @site_one.username
@s2u = @site_two.username
end
@time = Time.now
@time_iso8601 = @time.iso8601
log = [
"#{@time_iso8601} #{@site_one.username} 5000 / 67.180.75.140 http://example.com",
"#{@time_iso8601} #{@site_one.username} 5000 / 67.180.75.140 http://example.com",
"#{@time_iso8601} #{@site_one.username} 5000 / 172.56.16.152 http://example.com",
"#{@time_iso8601} #{@site_one.username} 5000 / 172.56.16.152 -",
"#{@time_iso8601} #{@site_two.username} 5000 / 67.180.75.140 http://example.com",
"#{@time_iso8601} #{@site_two.username} 5000 / 127.0.0.1 -",
"#{@time_iso8601} #{@site_two.username} 5000 /derp.html 127.0.0.2 https://example.com"
]
File.open("tests/stat_logs/#{SecureRandom.uuid}.log", 'w') do |file|
file.write log.join("\n")
end
end
=begin
it 'prunes logs for free sites' do
@free_site = Fabricate :site
@supporter_site = Fabricate :site, plan_type: 'supporter'
@ -29,28 +42,20 @@ describe 'stats' do
count_site_ids = [@free_site.id, @supporter_site.id]
expected_stat_count = (Stat::FREE_RETAINMENT_DAYS+1)*2
[@free_site, @supporter_site].each do |site|
site.stats.last.add_stat_referrer url: 'https://example.com'
end
Stat.where(site_id: count_site_ids).count.must_equal expected_stat_count
Stat.prune!
Stat.where(site_id: count_site_ids).count.must_equal expected_stat_count-1
Stat.where(site_id: @supporter_site.id).count.must_equal expected_stat_count/2
end
@free_site.stats.last.stat_referrers.length.must_equal 0
@supporter_site.stats.last.stat_referrers.length.must_equal 1
end
=end
it 'parses logfile' do
time = Time.now.iso8601
log = [
"#{time} #{@site_one.username} 5000 / 67.180.75.140 http://example.com",
"#{time} #{@site_one.username} 5000 / 67.180.75.140 http://example.com",
"#{time} #{@site_one.username} 5000 / 172.56.16.152 http://example.com",
"#{time} #{@site_one.username} 5000 / 172.56.16.152 -",
"#{time} #{@site_two.username} 5000 / 67.180.75.140 http://example.com",
"#{time} #{@site_two.username} 5000 / 127.0.0.1 -",
"#{time} #{@site_two.username} 5000 / 127.0.0.2 https://example.com"
]
File.open("tests/stat_logs/#{SecureRandom.uuid}.log", 'w') do |file|
file.write log.join("\n")
end
Stat.parse_logfiles STAT_LOGS_PATH
@site_one.reload
@ -59,9 +64,24 @@ describe 'stats' do
stat = @site_one.stats.first
stat.hits.must_equal 4
stat.views.must_equal 2
referrer = stat.stat_referrers.first
referrer.url.must_equal 'http://example.com'
referrer.views.must_equal 2
stat.bandwidth.must_equal 20_000
@site_one.stat_referrers.count.must_equal 1
stat_referrer = @site_one.stat_referrers.first
stat_referrer.url.must_equal 'http://example.com'
stat_referrer.created_at.must_equal @time.to_date
stat_referrer.views.must_equal 2
@site_one.stat_paths.length.must_equal 1
stat_path = @site_one.stat_paths.first
stat_path.name.must_equal '/'
stat_path.views.must_equal 4
@site_one.stat_locations.length.must_equal 2
stat_location = @site_one.stat_locations.first
stat_location.country_code2.must_equal 'US'
stat_location.region_name.must_equal 'CA'
stat_location.city_name.must_equal 'Menlo Park'
stat_location.views.must_equal 1
@site_two.reload
@site_two.hits.must_equal 3
@ -69,13 +89,16 @@ describe 'stats' do
stat = @site_two.stats.first
stat.hits.must_equal 3
stat.views.must_equal 3
stat.stat_referrers.length.must_equal 2
referrer = stat.stat_referrers.first
referrer.url.must_equal 'http://example.com'
referrer.views.must_equal 2
referrer = stat.stat_referrers.last
referrer.url.must_equal 'https://example.com'
referrer.views.must_equal 1
stat.bandwidth.must_equal 15_000
@site_two.stat_referrers.count.must_equal 2
stat_referrer = @site_two.stat_referrers.first
stat_referrer.url.must_equal 'http://example.com'
stat_referrer.views.must_equal 2
stat_paths = @site_two.stat_paths
stat_paths.length.must_equal 2
stat_paths.first.name.must_equal '/'
stat_paths.last.name.must_equal '/derp.html'
# [geoip.city('67.180.75.140'), geoip.city('172.56.16.152')]
end