clump logs into single hash before importing to database

This commit is contained in:
Kyle Drake 2021-01-13 14:46:52 -06:00
parent 4115ebffa7
commit f33f661f08
2 changed files with 58 additions and 54 deletions

View file

@ -23,9 +23,9 @@ class Stat < Sequel::Model
cache_control_ips = $config['cache_control_ips']
Dir["#{path}/*.log.gz"].each do |log_path|
site_logs = {}
site_logs = {}
Dir["#{path}/*.log.gz"].each do |log_path|
gzfile = File.open log_path, 'r'
logfile = Zlib::GzipReader.new gzfile
@ -83,65 +83,58 @@ class Stat < Sequel::Model
site_logs[log_time][username][:paths][path] ||= 0
site_logs[log_time][username][:paths][path] += 1
end
logfile.close
FileUtils.rm log_path
rescue => e
puts "Log parse exception: #{e.inspect}"
logfile.close
FileUtils.mv log_path, log_path.gsub('.log', '.brokenlog')
next
end
logfile.close
DB[:stats].lock('EXCLUSIVE') do
DB.transaction do
site_logs.each do |log_time, usernames|
Site.select(:id, :username).where(username: usernames.keys).all.each do |site|
usernames[site.username][:id] = site.id
end
usernames.each do |username, site_log|
next unless site_log[:id]
opts = {site_id: site_log[:id], created_at: log_time.to_date.to_s}
stat = Stat.select(:id).where(opts).first
stat = Stat.create opts if stat.nil?
DB['update sites set hits=hits+?, views=views+? where id=?',
site_log[:hits],
site_log[:views],
site_log[:id]
].first
DB[
'update stats set hits=hits+?, views=views+?, bandwidth=bandwidth+? where id=?',
site_log[:hits],
site_log[:views],
site_log[:bandwidth],
stat.id
].first
end
end
end
end
FileUtils.rm log_path
#FileUtils.rm log_path
end
DB[:daily_site_stats].lock('EXCLUSIVE') do
DB.transaction do
total_site_stats.each do |time, stats|
opts = {created_at: time.to_date.to_s}
stat = DailySiteStat.select(:id).where(opts).first
stat = DailySiteStat.create opts if stat.nil?
DB[
'update daily_site_stats set hits=hits+?, views=views+?, bandwidth=bandwidth+? where created_at=?',
stats[:hits],
stats[:views],
stats[:bandwidth],
time.to_date
].first
end
site_logs.each do |log_time, usernames|
Site.select(:id, :username).where(username: usernames.keys).all.each do |site|
usernames[site.username][:id] = site.id
end
usernames.each do |username, site_log|
next unless site_log[:id]
opts = {site_id: site_log[:id], created_at: log_time.to_date.to_s}
stat = Stat.select(:id).where(opts).first
stat = Stat.create opts if stat.nil?
DB['update sites set hits=hits+?, views=views+? where id=?',
site_log[:hits],
site_log[:views],
site_log[:id]
].first
DB[
'update stats set hits=hits+?, views=views+?, bandwidth=bandwidth+? where id=?',
site_log[:hits],
site_log[:views],
site_log[:bandwidth],
stat.id
].first
end
end
total_site_stats.each do |time, stats|
opts = {created_at: time.to_date.to_s}
stat = DailySiteStat.select(:id).where(opts).first
stat = DailySiteStat.create opts if stat.nil?
DB[
'update daily_site_stats set hits=hits+?, views=views+?, bandwidth=bandwidth+? where created_at=?',
stats[:hits],
stats[:views],
stats[:bandwidth],
time.to_date
].first
end
end
end

View file

@ -12,7 +12,7 @@ describe 'stats' do
@time = Time.now
@time_iso8601 = @time.iso8601
log = [
@log = [
"#{@time_iso8601}\t#{@site_one.username}\t5000\t/\t67.180.75.140\thttp://example.com",
"#{@time_iso8601}\t#{@site_one.username}\t5000\t/\t67.180.75.140\thttp://example.com",
"#{@time_iso8601}\t#{@site_one.username}\t5000\t/\t172.56.16.152\thttp://example.com",
@ -23,10 +23,21 @@ describe 'stats' do
]
Zlib::GzipWriter.open("tests/stat_logs/#{SecureRandom.uuid}.log.gz") do |gz|
gz.write log.join("\n")
gz.write @log.join("\n")
end
end
it 'works with two logfiles' do
Zlib::GzipWriter.open("tests/stat_logs/#{SecureRandom.uuid}.log.gz") do |gz|
gz.write @log.join("\n")
end
Stat.parse_logfiles STAT_LOGS_PATH
stat = @site_one.stats.first
stat.hits.must_equal 8
stat.bandwidth.must_equal 40000
stat.views.must_equal 2
end
it 'deals with spaces in paths' do
@site = Fabricate :site