Refactored sql query for stats calculation

This commit is contained in:
Sergei Tsoganov 2023-12-15 15:49:29 +02:00
parent 533b10aad1
commit c1e48197cb

View file

@ -79,6 +79,195 @@ module Repp
end end
end end
def domains_by_registrar(date_to, date_from)
sql = <<-SQL
SELECT
registrar_id,
SUM(domain_count) AS total_domain_count
FROM (
-- Count domains from the domains table, excluding those in created_domains
SELECT
registrar_id::text AS registrar_id,
COUNT(*) AS domain_count
FROM
domains
WHERE
created_at >= :date_from
GROUP BY
registrar_id
UNION ALL
SELECT
(object_changes->'registrar_id'->>1)::text AS registrar_id,
COUNT(*) * -1 AS domain_count
FROM
log_domains
WHERE
event = 'create'
AND created_at > :date_to
GROUP BY
registrar_id
UNION ALL
-- Query for 'update' events and count domains transferred to a new registrar only for domains in created_domains
SELECT
(object_changes->'registrar_id'->>1)::text AS registrar_id,
COUNT(*) * -1 AS domain_count
FROM
log_domains
WHERE
event = 'update'
AND object_changes->'registrar_id' IS NOT NULL
AND created_at > :date_to
GROUP BY
registrar_id
UNION ALL
-- Query for 'update' events and count domains transferred from an old registrar only for domains in created_domains
SELECT
(object_changes->'registrar_id'->>0)::text AS registrar_id,
COUNT(*) AS domain_count
FROM
log_domains
WHERE
event = 'update'
AND object_changes->'registrar_id' IS NOT NULL
AND created_at > :date_to
GROUP BY
registrar_id
UNION ALL
-- Query for 'destroy' events and count the number of domains destroyed associated with each registrar only for domains in created_domains
SELECT
(object_changes->'registrar_id'->>0)::text AS registrar_id,
COUNT(*) AS domain_count
FROM
log_domains
WHERE
event = 'destroy'
AND created_at > :date_to
GROUP BY
registrar_id
) AS combined
GROUP BY
registrar_id;
SQL
results = ActiveRecord::Base.connection.execute(
ActiveRecord::Base.send(:sanitize_sql_array, [sql, date_from: date_from, date_to: date_to])
).each_with_object({}) do |row, hash|
hash[row['registrar_id']] = row['total_domain_count'].to_i
end
results
end
# def domains_by_registrar(date_to, date_from)
# p 'Count the number of domains created by each registrar'
# sql = <<-SQL
# WITH created_domains AS (
# SELECT
# (object_changes->'registrar_id'->>1)::text AS registrar_id,
# (object_changes->'name'->>1)::text AS domain_name
# FROM
# log_domains
# WHERE
# event = 'create'
# AND created_at BETWEEN :date_from AND :date_to
# )
# SELECT
# registrar_id,
# SUM(domain_count) AS total_domain_count
# FROM (
# -- Count domains from created_domains
# SELECT
# registrar_id,
# COUNT(*) AS domain_count
# FROM
# created_domains
# GROUP BY
# registrar_id
# UNION ALL
# -- Query for 'update' events and count domains transferred to a new registrar only for domains in created_domains
# SELECT
# (object_changes->'registrar_id'->>1)::text AS registrar_id,
# COUNT(*) AS domain_count
# FROM
# log_domains
# WHERE
# event = 'update'
# AND object_changes->'registrar_id' IS NOT NULL
# AND (object->'name')::text IN (SELECT domain_name FROM created_domains)
# AND created_at BETWEEN :date_from AND :date_to
# GROUP BY
# registrar_id
# UNION ALL
# -- Query for 'update' events and count domains transferred from an old registrar only for domains in created_domains
# SELECT
# (object_changes->'registrar_id'->>0)::text AS registrar_id,
# COUNT(*) * -1 AS domain_count
# FROM
# log_domains
# WHERE
# event = 'update'
# AND object_changes->'registrar_id' IS NOT NULL
# AND (object->'name')::text IN (SELECT domain_name FROM created_domains)
# AND created_at BETWEEN :date_from AND :date_to
# GROUP BY
# registrar_id
# UNION ALL
# -- Query for 'destroy' events and count the number of domains destroyed associated with each registrar only for domains in created_domains
# SELECT
# (object_changes->'registrar_id'->>0)::text AS registrar_id,
# COUNT(*) * -1 AS domain_count
# FROM
# log_domains
# WHERE
# event = 'destroy'
# AND (object_changes->'name'->>0)::text IN (SELECT domain_name FROM created_domains)
# AND created_at BETWEEN :date_from AND :date_to
# GROUP BY
# registrar_id
# UNION ALL
# -- Count domains from the domains table, excluding those in created_domains
# SELECT
# registrar_id::text AS registrar_id,
# COUNT(*) AS domain_count
# FROM
# domains
# WHERE
# name NOT IN (SELECT domain_name FROM created_domains)
# AND created_at BETWEEN :date_from AND :date_to
# GROUP BY
# registrar_id
# ) AS combined
# GROUP BY
# registrar_id;
# SQL
# results = ActiveRecord::Base.connection.execute(
# ActiveRecord::Base.send(:sanitize_sql_array, [sql, date_from: date_from, date_to: date_to])
# ).each_with_object({}) do |row, hash|
# hash[row['registrar_id']] = row['total_domain_count'].to_i
# end
# p results
# end
# def domains_by_registrar(date_to, date_from) # def domains_by_registrar(date_to, date_from)
# log_domains_del = log_domains(event: 'destroy', date_to: date_to, date_from: date_from) # log_domains_del = log_domains(event: 'destroy', date_to: date_to, date_from: date_from)
# log_domains_trans = log_domains(event: 'update', date_to: date_to, date_from: date_from) # log_domains_trans = log_domains(event: 'update', date_to: date_to, date_from: date_from)
@ -91,40 +280,40 @@ module Repp
# summarize([group(log_domains_del), group(log_domains_trans), domains_grouped]) # summarize([group(log_domains_del), group(log_domains_trans), domains_grouped])
# end # end
def domains_by_registrar(date_to, date_from) # def domains_by_registrar(date_to, date_from)
domain_versions = ::Version::DomainVersion.where('object_changes IS NOT NULL') # domain_versions = ::Version::DomainVersion.where('object_changes IS NOT NULL')
.where('created_at >= ? AND created_at <= ?', date_from, date_to) # .where('created_at >= ? AND created_at <= ?', date_from, date_to)
.select(:event, :object, :object_changes) # .select(:event, :object, :object_changes)
registrar_counts = Hash.new(0) # registrar_counts = Hash.new(0)
processed_domains = [] # processed_domains = []
Rails.logger.info "Processing total #{domain_versions.size} log_domain records" # Rails.logger.info "Processing total #{domain_versions.size} log_domain records"
domain_versions.each do |v| # domain_versions.each do |v|
registrar_ids = v.object_changes['registrar_id'] # registrar_ids = v.object_changes['registrar_id']
next if registrar_ids.nil? || registrar_ids.empty? # next if registrar_ids.nil? || registrar_ids.empty?
case v.event # case v.event
when 'create' # when 'create'
processed_domains << v.object_changes['name'][1] # processed_domains << v.object_changes['name'][1]
registrar_counts[registrar_ids[1].to_s] += 1 # registrar_counts[registrar_ids[1].to_s] += 1
when 'update' # when 'update'
if processed_domains.include?(v.object['name']) # if processed_domains.include?(v.object['name'])
registrar_counts[registrar_ids[0].to_s] -= 1 # registrar_counts[registrar_ids[0].to_s] -= 1
registrar_counts[registrar_ids[1].to_s] += 1 # registrar_counts[registrar_ids[1].to_s] += 1
else # else
registrar_counts[registrar_ids[1].to_s] += 1 # registrar_counts[registrar_ids[1].to_s] += 1
processed_domains << v.object['name'] # processed_domains << v.object['name']
end # end
when 'destroy' # when 'destroy'
registrar_counts[registrar_ids[0].to_s] -= 1 # registrar_counts[registrar_ids[0].to_s] -= 1
end # end
end # end
current_domains_grouped = ::Domain.where('created_at <= ? AND created_at >= ?', date_to, date_from) # current_domains_grouped = ::Domain.where('created_at <= ? AND created_at >= ?', date_to, date_from)
.where.not(name: processed_domains.uniq) # .where.not(name: processed_domains.uniq)
.group(:registrar_id).count.stringify_keys # .group(:registrar_id).count.stringify_keys
summarize([registrar_counts, current_domains_grouped]) # summarize([registrar_counts, current_domains_grouped])
end # end
def summarize(arr) def summarize(arr)
arr.inject { |memo, el| memo.merge(el) { |_, old_v, new_v| old_v + new_v } } arr.inject { |memo, el| memo.merge(el) { |_, old_v, new_v| old_v + new_v } }
@ -138,7 +327,7 @@ module Repp
.where("object ->> 'created_at' >= ?", date_from) .where("object ->> 'created_at' >= ?", date_from)
.select("DISTINCT ON (object ->> 'name') object, created_at") .select("DISTINCT ON (object ->> 'name') object, created_at")
.order(Arel.sql("object ->> 'name', created_at desc")) .order(Arel.sql("object ->> 'name', created_at desc"))
end end
def group(domains) def group(domains)
domains.group_by { |ld| ld.object['registrar_id'].to_s } domains.group_by { |ld| ld.object['registrar_id'].to_s }