Refactor logs, granularity to one day, with testing

This commit is contained in:
Kyle Drake 2015-04-29 18:18:02 -07:00
parent 434deee366
commit 3bca5e8839
9 changed files with 353 additions and 37 deletions

View file

@ -31,39 +31,9 @@ end
desc "parse logs"
task :parse_logs => [:environment] do
Dir[File.join($config['logs_path'], '*.log')].each do |log_path|
hits = {}
visits = {}
visit_ips = {}
logfile = File.open log_path, 'r'
while hit = logfile.gets
time, username, size, path, ip = hit.split ' '
hits[username] ||= 0
hits[username] += 1
visit_ips[username] = [] if !visit_ips[username]
unless visit_ips[username].include?(ip)
visits[username] ||= 0
visits[username] += 1
visit_ips[username] << ip
end
end
logfile.close
hits.each do |username,hitcount|
DB['update sites set hits=hits+? where username=?', hitcount, username].first
end
visits.each do |username,visitcount|
DB['update sites set views=views+? where username=?', visitcount, username].first
end
FileUtils.rm log_path
Dir[File.join($config['logs_path'], '*.log')].each do |logfile_path|
Stat.parse logfile_path
FileUtils.rm logfile_path
end
end

View file

@ -0,0 +1,55 @@
Sequel.migration do
up {
DB.drop_table :stats
DB.create_table! :stats do
primary_key :id
Integer :site_id, index: true
Date :created_at, index: true
Integer :hits, default: 0
Integer :views, default: 0
Integer :comments, default: 0
Integer :follows, default: 0
Integer :site_updates, default: 0
end
DB.create_table! :stat_referrers do
primary_key :id
Integer :stat_id, index: true
String :url
Integer :views, default: 0
end
DB.create_table! :stat_locations do
primary_key :id
Integer :stat_id, index: true
String :country_code2
String :region_name
String :city_name
Decimal :latitude
Decimal :longitude
Integer :views, default: 0
end
DB.create_table! :stat_paths do
primary_key :id
Integer :stat_id, index: true
String :name
Integer :views, default: 0
end
}
down {
DB.drop_table :stats
DB.create_table! :stats do
primary_key :id
Integer :site_id, index: true
Integer :hits, default: 0
Integer :views, default: 0
DateTime :created_at, index: true
end
DB.drop_table :stat_referrers
DB.drop_table :stat_locations
DB.drop_table :stat_paths
}
end

View file

@ -1,6 +1,5 @@
class SiteFile < Sequel::Model
unrestrict_primary_key
plugin :update_primary_key
many_to_one :site
end
end

View file

@ -1,3 +1,209 @@
class Stat < Sequel::Model
GEOCITY_PATH = './files/GeoLiteCity.dat'
many_to_one :site
end
one_to_many :stat_referrers
one_to_many :stat_locations
one_to_many :stat_paths
class << self
def parse_logfiles(path)
Dir["#{path}/*.log"].each do |log_path|
site_logs = {}
logfile = File.open log_path, 'r'
while hit = logfile.gets
time, username, size, path, ip, referrer = hit.split ' '
next if referrer.match /bot/i
site_logs[username] = {
hits: 0,
views: 0,
view_ips: []
} unless site_logs[username]
site_logs[username][:hits] += 1
unless site_logs[username][:view_ips].include?(ip)
site_logs[username][:views] += 1
site_logs[username][:view_ips] << ip
end
end
logfile.close
current_time = Time.now.utc
current_day_string = current_time.to_date.to_s
Site.select(:id, :username).where(username: site_logs.keys).all.each do |site|
site_logs[site.username][:id] = site.id
end
DB.transaction do
site_logs.each do |username, site_log|
DB['update sites set hits=hits+?, views=views+? where username=?',
site_log[:hits],
site_log[:views],
username
].first
opts = {site_id: site_log[:id], created_at: current_day_string}
stat = Stat.select(:id).where(opts).first
DB[:stats].lock('EXCLUSIVE') { stat = Stat.create opts } if stat.nil?
DB[
'update stats set hits=hits+?, views=views+? where site_id=?',
site_log[:hits],
site_log[:views],
site_log[:id]
].first
end
end
FileUtils.rm log_path
end
end
def get_or_create
DB[:stats].lock 'EXCLUSIVE' do
stat = Stat.where(opts).first
stat ||= Stat.new opts
stat.hits += site_log[:hits]
stat.views += site_log[:views]
end
end
end
end
=begin
require 'io/extra'
require 'geoip'
# Note: This isn't really a class right now.
module Stat
class << self
def parse_logfiles(path)
Dir["#{path}/*.log"].each do |logfile_path|
parse_logfile logfile_path
FileUtils.rm logfile_path
end
end
def parse_logfile(path)
geoip = GeoIP.new GEOCITY_PATH
logfile = File.open path, 'r'
hits = []
while hit = logfile.gets
time, username, size, path, ip, referrer = hit.split ' '
site = Site.select(:id).where(username: username).first
next unless site
paths_dataset = StatsDB[:paths]
path_record = paths_dataset[name: path]
path_id = path_record ? path_record[:id] : paths_dataset.insert(name: path)
referrers_dataset = StatsDB[:referrers]
referrer_record = referrers_dataset[name: referrer]
referrer_id = referrer_record ? referrer_record[:id] : referrers_dataset.insert(name: referrer)
location_id = nil
if city = geoip.city(ip)
locations_dataset = StatsDB[:locations].select(:id)
location_hash = {country_code2: city.country_code2, region_name: city.region_name, city_name: city.city_name}
location = locations_dataset.where(location_hash).first
location_id = location ? location[:id] : locations_dataset.insert(location_hash)
end
hits << [site.id, referrer_id, path_id, location_id, size, time]
end
StatsDB[:hits].import(
[:site_id, :referrer_id, :path_id, :location_id, :bytes_sent, :logged_at],
hits
)
end
end
end
=begin
def parse_logfile(path)
hits = {}
visits = {}
visit_ips = {}
logfile = File.open path, 'r'
while hit = logfile.gets
time, username, size, path, ip, referrer = hit.split ' '
hits[username] ||= 0
hits[username] += 1
visit_ips[username] = [] if !visit_ips[username]
unless visit_ips[username].include? ip
visits[username] ||= 0
visits[username] += 1
visit_ips[username] << ip
end
end
logfile.close
hits.each do |username,hitcount|
DB['update sites set hits=hits+? where username=?', hitcount, username].first
end
visits.each do |username,visitcount|
DB['update sites set views=views+? where username=?', visitcount, username].first
end
end
end
=end
=begin
def self.parse(logfile_path)
hits = {}
visits = {}
visit_ips = {}
logfile = File.open logfile_path, 'r'
while hit = logfile.gets
time, username, size, path, ip = hit.split ' '
hits[username] ||= 0
hits[username] += 1
visit_ips[username] = [] if !visit_ips[username]
unless visit_ips[username].include?(ip)
visits[username] ||= 0
visits[username] += 1
visit_ips[username] << ip
end
end
logfile.close
hits.each do |username,hitcount|
DB['update sites set hits=hits+? where username=?', hitcount, username].first
end
visits.each do |username,visitcount|
DB['update sites set views=views+? where username=?', visitcount, username].first
end
end
=end

3
models/stat_location.rb Normal file
View file

@ -0,0 +1,3 @@
class StatLocation < Sequel::Model
many_to_one :stat
end

3
models/stat_path.rb Normal file
View file

@ -0,0 +1,3 @@
class StatPath < Sequel::Model
many_to_one :stat
end

3
models/stat_referrer.rb Normal file
View file

@ -0,0 +1,3 @@
class StatReferrer < Sequel::Model
many_to_one :stat
end

View file

@ -50,4 +50,4 @@ I18n.enforce_available_locales = true
Mail.defaults do
delivery_method :test
end
end

77
tests/stat_tests.rb Normal file
View file

@ -0,0 +1,77 @@
require_relative './environment.rb'
STAT_LOGS_PATH = 'tests/stat_logs'
STAT_LOGS_DIR_MATCH = "#{STAT_LOGS_PATH}/*.log"
def log(&block)
File.open("tests/stat_logs/#{SecureRandom.uuid}.log", 'w') do |f|
yield f
end
end
def random_time
(Time.now - rand(5000)).iso8601
end
describe 'stats' do
before do
Dir[STAT_LOGS_DIR_MATCH].each {|f| FileUtils.rm f}
@site_one = Fabricate :site
@site_two = Fabricate :site
@t = Time.now.iso8601
@s1u = @site_one.username
@s2u = @site_two.username
end
it 'parses multiple sets of logs' do
geoip = GeoIP.new Stat::GEOCITY_PATH
paths = ["/", "/#{SecureRandom.hex}", "/#{SecureRandom.hex}"]
cities = [geoip.city('67.180.75.140'), geoip.city('172.56.16.152')]
referrers = ['-', "http://#{@site_one.host}", "https://#{@site_one.host}", "http://insaneclownpossee.com"]
sites = [@site_one, @site_two]
test_hits = []
100.times { |i|
test_hits.push({
time: random_time,
username: sites[rand(sites.length)].username,
size: rand(5000),
path: paths[rand(paths.length)],
ip: i.odd? ? cities.first.ip : cities.last.ip,
referrer: referrers[rand(referrers.length)]
})
}
log do |f|
test_hits.each {|h| f.puts "#{h[:time]} #{h[:username]} #{h[:size]} #{h[:path]} #{h[:ip]} #{h[:referrer]}"}
end
Stat.parse_logfiles STAT_LOGS_PATH
Dir["#{STAT_LOGS_PATH}/*.log"].length.must_equal 0
sites_total = 0
[@site_one, @site_two].each do |site|
site.reload
sites_total += site.hits
site.views.must_equal 2
end
sites_total.must_equal 100
stats = Stat.where(site_id: [@site_one.id, @site_two.id]).all
stats.length.must_equal 2
stats.collect {|stat| stat.hits}.inject{|sum,x| sum + x }.must_equal 100
stats.collect {|stat| stat.views}.inject{|sum,x| sum + x }.must_equal 4
sites.each do |site|
test_hits.select {|h| h[:username] == site.username}.length.must_equal(
stats.select {|s| s.site.username == site.username}.first.hits
)
end
end
end