mirror of
https://github.com/neocities/neocities.git
synced 2025-04-24 17:22:35 +02:00
Refactor logs, granularity to one day, with testing
This commit is contained in:
parent
434deee366
commit
3bca5e8839
9 changed files with 353 additions and 37 deletions
36
Rakefile
36
Rakefile
|
@ -31,39 +31,9 @@ end
|
|||
|
||||
desc "parse logs"
|
||||
task :parse_logs => [:environment] do
|
||||
Dir[File.join($config['logs_path'], '*.log')].each do |log_path|
|
||||
hits = {}
|
||||
visits = {}
|
||||
visit_ips = {}
|
||||
|
||||
logfile = File.open log_path, 'r'
|
||||
|
||||
while hit = logfile.gets
|
||||
time, username, size, path, ip = hit.split ' '
|
||||
|
||||
hits[username] ||= 0
|
||||
hits[username] += 1
|
||||
|
||||
visit_ips[username] = [] if !visit_ips[username]
|
||||
|
||||
unless visit_ips[username].include?(ip)
|
||||
visits[username] ||= 0
|
||||
visits[username] += 1
|
||||
visit_ips[username] << ip
|
||||
end
|
||||
end
|
||||
|
||||
logfile.close
|
||||
|
||||
hits.each do |username,hitcount|
|
||||
DB['update sites set hits=hits+? where username=?', hitcount, username].first
|
||||
end
|
||||
|
||||
visits.each do |username,visitcount|
|
||||
DB['update sites set views=views+? where username=?', visitcount, username].first
|
||||
end
|
||||
|
||||
FileUtils.rm log_path
|
||||
Dir[File.join($config['logs_path'], '*.log')].each do |logfile_path|
|
||||
Stat.parse logfile_path
|
||||
FileUtils.rm logfile_path
|
||||
end
|
||||
end
|
||||
|
||||
|
|
55
migrations/059_refactor_stats.rb
Normal file
55
migrations/059_refactor_stats.rb
Normal file
|
@ -0,0 +1,55 @@
|
|||
Sequel.migration do
|
||||
up {
|
||||
DB.drop_table :stats
|
||||
DB.create_table! :stats do
|
||||
primary_key :id
|
||||
Integer :site_id, index: true
|
||||
Date :created_at, index: true
|
||||
Integer :hits, default: 0
|
||||
Integer :views, default: 0
|
||||
Integer :comments, default: 0
|
||||
Integer :follows, default: 0
|
||||
Integer :site_updates, default: 0
|
||||
end
|
||||
|
||||
DB.create_table! :stat_referrers do
|
||||
primary_key :id
|
||||
Integer :stat_id, index: true
|
||||
String :url
|
||||
Integer :views, default: 0
|
||||
end
|
||||
|
||||
DB.create_table! :stat_locations do
|
||||
primary_key :id
|
||||
Integer :stat_id, index: true
|
||||
String :country_code2
|
||||
String :region_name
|
||||
String :city_name
|
||||
Decimal :latitude
|
||||
Decimal :longitude
|
||||
Integer :views, default: 0
|
||||
end
|
||||
|
||||
DB.create_table! :stat_paths do
|
||||
primary_key :id
|
||||
Integer :stat_id, index: true
|
||||
String :name
|
||||
Integer :views, default: 0
|
||||
end
|
||||
}
|
||||
|
||||
down {
|
||||
DB.drop_table :stats
|
||||
DB.create_table! :stats do
|
||||
primary_key :id
|
||||
Integer :site_id, index: true
|
||||
Integer :hits, default: 0
|
||||
Integer :views, default: 0
|
||||
DateTime :created_at, index: true
|
||||
end
|
||||
|
||||
DB.drop_table :stat_referrers
|
||||
DB.drop_table :stat_locations
|
||||
DB.drop_table :stat_paths
|
||||
}
|
||||
end
|
|
@ -1,5 +1,4 @@
|
|||
class SiteFile < Sequel::Model
|
||||
|
||||
unrestrict_primary_key
|
||||
plugin :update_primary_key
|
||||
many_to_one :site
|
||||
|
|
206
models/stat.rb
206
models/stat.rb
|
@ -1,3 +1,209 @@
|
|||
class Stat < Sequel::Model
|
||||
GEOCITY_PATH = './files/GeoLiteCity.dat'
|
||||
|
||||
many_to_one :site
|
||||
one_to_many :stat_referrers
|
||||
one_to_many :stat_locations
|
||||
one_to_many :stat_paths
|
||||
|
||||
class << self
|
||||
def parse_logfiles(path)
|
||||
Dir["#{path}/*.log"].each do |log_path|
|
||||
site_logs = {}
|
||||
logfile = File.open log_path, 'r'
|
||||
|
||||
while hit = logfile.gets
|
||||
time, username, size, path, ip, referrer = hit.split ' '
|
||||
|
||||
next if referrer.match /bot/i
|
||||
|
||||
site_logs[username] = {
|
||||
hits: 0,
|
||||
views: 0,
|
||||
view_ips: []
|
||||
} unless site_logs[username]
|
||||
|
||||
site_logs[username][:hits] += 1
|
||||
|
||||
unless site_logs[username][:view_ips].include?(ip)
|
||||
site_logs[username][:views] += 1
|
||||
site_logs[username][:view_ips] << ip
|
||||
end
|
||||
end
|
||||
|
||||
logfile.close
|
||||
|
||||
current_time = Time.now.utc
|
||||
current_day_string = current_time.to_date.to_s
|
||||
|
||||
Site.select(:id, :username).where(username: site_logs.keys).all.each do |site|
|
||||
site_logs[site.username][:id] = site.id
|
||||
end
|
||||
|
||||
DB.transaction do
|
||||
site_logs.each do |username, site_log|
|
||||
DB['update sites set hits=hits+?, views=views+? where username=?',
|
||||
site_log[:hits],
|
||||
site_log[:views],
|
||||
username
|
||||
].first
|
||||
|
||||
opts = {site_id: site_log[:id], created_at: current_day_string}
|
||||
|
||||
stat = Stat.select(:id).where(opts).first
|
||||
DB[:stats].lock('EXCLUSIVE') { stat = Stat.create opts } if stat.nil?
|
||||
|
||||
DB[
|
||||
'update stats set hits=hits+?, views=views+? where site_id=?',
|
||||
site_log[:hits],
|
||||
site_log[:views],
|
||||
site_log[:id]
|
||||
].first
|
||||
end
|
||||
end
|
||||
|
||||
FileUtils.rm log_path
|
||||
end
|
||||
end
|
||||
|
||||
def get_or_create
|
||||
DB[:stats].lock 'EXCLUSIVE' do
|
||||
stat = Stat.where(opts).first
|
||||
stat ||= Stat.new opts
|
||||
stat.hits += site_log[:hits]
|
||||
stat.views += site_log[:views]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
=begin
|
||||
require 'io/extra'
|
||||
require 'geoip'
|
||||
|
||||
# Note: This isn't really a class right now.
|
||||
module Stat
|
||||
|
||||
|
||||
class << self
|
||||
def parse_logfiles(path)
|
||||
Dir["#{path}/*.log"].each do |logfile_path|
|
||||
parse_logfile logfile_path
|
||||
FileUtils.rm logfile_path
|
||||
end
|
||||
end
|
||||
|
||||
def parse_logfile(path)
|
||||
geoip = GeoIP.new GEOCITY_PATH
|
||||
logfile = File.open path, 'r'
|
||||
|
||||
hits = []
|
||||
|
||||
while hit = logfile.gets
|
||||
time, username, size, path, ip, referrer = hit.split ' '
|
||||
|
||||
site = Site.select(:id).where(username: username).first
|
||||
next unless site
|
||||
|
||||
paths_dataset = StatsDB[:paths]
|
||||
path_record = paths_dataset[name: path]
|
||||
path_id = path_record ? path_record[:id] : paths_dataset.insert(name: path)
|
||||
|
||||
referrers_dataset = StatsDB[:referrers]
|
||||
referrer_record = referrers_dataset[name: referrer]
|
||||
referrer_id = referrer_record ? referrer_record[:id] : referrers_dataset.insert(name: referrer)
|
||||
|
||||
location_id = nil
|
||||
|
||||
if city = geoip.city(ip)
|
||||
locations_dataset = StatsDB[:locations].select(:id)
|
||||
location_hash = {country_code2: city.country_code2, region_name: city.region_name, city_name: city.city_name}
|
||||
|
||||
location = locations_dataset.where(location_hash).first
|
||||
location_id = location ? location[:id] : locations_dataset.insert(location_hash)
|
||||
end
|
||||
|
||||
hits << [site.id, referrer_id, path_id, location_id, size, time]
|
||||
end
|
||||
|
||||
StatsDB[:hits].import(
|
||||
[:site_id, :referrer_id, :path_id, :location_id, :bytes_sent, :logged_at],
|
||||
hits
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
=begin
|
||||
def parse_logfile(path)
|
||||
hits = {}
|
||||
visits = {}
|
||||
visit_ips = {}
|
||||
|
||||
logfile = File.open path, 'r'
|
||||
|
||||
while hit = logfile.gets
|
||||
time, username, size, path, ip, referrer = hit.split ' '
|
||||
|
||||
hits[username] ||= 0
|
||||
hits[username] += 1
|
||||
visit_ips[username] = [] if !visit_ips[username]
|
||||
|
||||
unless visit_ips[username].include? ip
|
||||
visits[username] ||= 0
|
||||
visits[username] += 1
|
||||
visit_ips[username] << ip
|
||||
end
|
||||
end
|
||||
|
||||
logfile.close
|
||||
|
||||
|
||||
hits.each do |username,hitcount|
|
||||
DB['update sites set hits=hits+? where username=?', hitcount, username].first
|
||||
end
|
||||
|
||||
visits.each do |username,visitcount|
|
||||
DB['update sites set views=views+? where username=?', visitcount, username].first
|
||||
end
|
||||
end
|
||||
end
|
||||
=end
|
||||
|
||||
=begin
|
||||
def self.parse(logfile_path)
|
||||
hits = {}
|
||||
visits = {}
|
||||
visit_ips = {}
|
||||
|
||||
logfile = File.open logfile_path, 'r'
|
||||
|
||||
while hit = logfile.gets
|
||||
time, username, size, path, ip = hit.split ' '
|
||||
|
||||
hits[username] ||= 0
|
||||
hits[username] += 1
|
||||
|
||||
visit_ips[username] = [] if !visit_ips[username]
|
||||
|
||||
unless visit_ips[username].include?(ip)
|
||||
visits[username] ||= 0
|
||||
visits[username] += 1
|
||||
visit_ips[username] << ip
|
||||
end
|
||||
end
|
||||
|
||||
logfile.close
|
||||
|
||||
hits.each do |username,hitcount|
|
||||
DB['update sites set hits=hits+? where username=?', hitcount, username].first
|
||||
end
|
||||
|
||||
visits.each do |username,visitcount|
|
||||
DB['update sites set views=views+? where username=?', visitcount, username].first
|
||||
end
|
||||
end
|
||||
=end
|
||||
|
|
3
models/stat_location.rb
Normal file
3
models/stat_location.rb
Normal file
|
@ -0,0 +1,3 @@
|
|||
class StatLocation < Sequel::Model
|
||||
many_to_one :stat
|
||||
end
|
3
models/stat_path.rb
Normal file
3
models/stat_path.rb
Normal file
|
@ -0,0 +1,3 @@
|
|||
class StatPath < Sequel::Model
|
||||
many_to_one :stat
|
||||
end
|
3
models/stat_referrer.rb
Normal file
3
models/stat_referrer.rb
Normal file
|
@ -0,0 +1,3 @@
|
|||
class StatReferrer < Sequel::Model
|
||||
many_to_one :stat
|
||||
end
|
77
tests/stat_tests.rb
Normal file
77
tests/stat_tests.rb
Normal file
|
@ -0,0 +1,77 @@
|
|||
require_relative './environment.rb'
|
||||
|
||||
STAT_LOGS_PATH = 'tests/stat_logs'
|
||||
STAT_LOGS_DIR_MATCH = "#{STAT_LOGS_PATH}/*.log"
|
||||
|
||||
def log(&block)
|
||||
File.open("tests/stat_logs/#{SecureRandom.uuid}.log", 'w') do |f|
|
||||
yield f
|
||||
end
|
||||
end
|
||||
|
||||
def random_time
|
||||
(Time.now - rand(5000)).iso8601
|
||||
end
|
||||
|
||||
describe 'stats' do
|
||||
before do
|
||||
Dir[STAT_LOGS_DIR_MATCH].each {|f| FileUtils.rm f}
|
||||
@site_one = Fabricate :site
|
||||
@site_two = Fabricate :site
|
||||
|
||||
@t = Time.now.iso8601
|
||||
@s1u = @site_one.username
|
||||
@s2u = @site_two.username
|
||||
end
|
||||
|
||||
it 'parses multiple sets of logs' do
|
||||
geoip = GeoIP.new Stat::GEOCITY_PATH
|
||||
|
||||
paths = ["/", "/#{SecureRandom.hex}", "/#{SecureRandom.hex}"]
|
||||
cities = [geoip.city('67.180.75.140'), geoip.city('172.56.16.152')]
|
||||
referrers = ['-', "http://#{@site_one.host}", "https://#{@site_one.host}", "http://insaneclownpossee.com"]
|
||||
sites = [@site_one, @site_two]
|
||||
|
||||
test_hits = []
|
||||
|
||||
100.times { |i|
|
||||
test_hits.push({
|
||||
time: random_time,
|
||||
username: sites[rand(sites.length)].username,
|
||||
size: rand(5000),
|
||||
path: paths[rand(paths.length)],
|
||||
ip: i.odd? ? cities.first.ip : cities.last.ip,
|
||||
referrer: referrers[rand(referrers.length)]
|
||||
})
|
||||
}
|
||||
|
||||
log do |f|
|
||||
test_hits.each {|h| f.puts "#{h[:time]} #{h[:username]} #{h[:size]} #{h[:path]} #{h[:ip]} #{h[:referrer]}"}
|
||||
end
|
||||
|
||||
Stat.parse_logfiles STAT_LOGS_PATH
|
||||
|
||||
Dir["#{STAT_LOGS_PATH}/*.log"].length.must_equal 0
|
||||
|
||||
sites_total = 0
|
||||
[@site_one, @site_two].each do |site|
|
||||
site.reload
|
||||
sites_total += site.hits
|
||||
site.views.must_equal 2
|
||||
end
|
||||
|
||||
sites_total.must_equal 100
|
||||
|
||||
stats = Stat.where(site_id: [@site_one.id, @site_two.id]).all
|
||||
stats.length.must_equal 2
|
||||
|
||||
stats.collect {|stat| stat.hits}.inject{|sum,x| sum + x }.must_equal 100
|
||||
stats.collect {|stat| stat.views}.inject{|sum,x| sum + x }.must_equal 4
|
||||
|
||||
sites.each do |site|
|
||||
test_hits.select {|h| h[:username] == site.username}.length.must_equal(
|
||||
stats.select {|s| s.site.username == site.username}.first.hits
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue