Add transaction report generation code

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=167311547
This commit is contained in:
larryruili 2017-09-01 12:45:06 -07:00 committed by jianglai
parent f26bfbf632
commit 06f3215659
25 changed files with 1284 additions and 75 deletions

View file

@ -27,7 +27,7 @@ SELECT
SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries,
-- We don't support searchable WHOIS.
0 AS searchable_whois_queries,
-- DNS queries for UDP/TCP are all assumed to be recevied/responded.
-- DNS queries for UDP/TCP are all assumed to be received/responded.
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received,
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded,
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received,
@ -66,7 +66,7 @@ SELECT
-- towards a given TLD.
FROM (
SELECT tldStr as tld
FROM `%PROJECT_ID%.%LATEST_DATASTORE_EXPORT%.%REGISTRY_TABLE%`
FROM `%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRY_TABLE%`
WHERE tldType = 'REAL'
) as RealTlds
CROSS JOIN(
@ -97,4 +97,3 @@ CROSS JOIN(
WHERE RealTlds.tld = TldMetrics.tld OR TldMetrics.tld IS NULL
GROUP BY tld
ORDER BY tld

View file

@ -0,0 +1,73 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of attempted adds each registrar made.
-- Since the specification requests all 'attempted' adds, we regex the
-- monthly App Engine logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine logs:
--google.registry.flows.FlowReporter
-- recordToLogs: FLOW-LOG-SIGNATURE-METADATA:
--{"serverTrid":"oNwL2J2eRya7bh7c9oHIzg==-2360a","clientId":"ipmirror"
-- ,"commandType":"hello", "resourceType":"","flowClassName":"HelloFlow"
-- ,"targetId":"","targetIds":[],"tld":"",
-- "tlds":[],"icannActivityReportField":""}
-- This outer select just converts the registrar's clientId to their name.
SELECT
tld,
registrar_table.registrarName AS registrar_name,
'ATTEMPTED_ADDS' AS metricName,
count AS metricValue
FROM (
SELECT
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(logMessages, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM (
SELECT
protoPayload.resource AS requestPath,
ARRAY(
SELECT logMessage
FROM UNNEST(protoPayload.line)) AS logMessage
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE _TABLE_SUFFIX
BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%')
JOIN UNNEST(logMessage) AS logMessages
-- Look for metadata logs from epp and registrar console requests
WHERE requestPath IN ('/_dr/epp', '/_dr/epptool', '/registrar-xhr')
AND STARTS_WITH(logMessages, "%METADATA_LOG_PREFIX%")
-- Look for domain creates
AND REGEXP_CONTAINS(
logMessages, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(
logMessages, r'"prober-[a-z]{2}-((any)|(canary))"') )
GROUP BY tld, clientId ) AS logs_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON logs_table.clientId = registrar_table.__key__.name
ORDER BY tld, registrar_name

View file

@ -27,4 +27,4 @@ SELECT
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE
_TABLE_SUFFIX BETWEEN '%START_OF_MONTH%' AND '%END_OF_MONTH%'
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%'

View file

@ -0,0 +1,30 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Gather a list of all tld-registrar pairs, with their IANA IDs.
-- This establishes which registrars will appear in the reports.
SELECT
allowed_tlds AS tld,
registrarName AS registrar_name,
ianaIdentifier AS iana_id
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`,
UNNEST(allowedTlds) as allowed_tlds
WHERE (type = 'REAL' OR type = 'INTERNAL')
-- Filter out prober data
AND NOT ENDS_WITH(allowed_tlds, "test")
ORDER BY tld, registrarName

View file

@ -0,0 +1,38 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of domains each registrar sponsors per tld.
-- This is just the number of fullyQualifiedDomainNames under each
-- tld-registrar pair.
SELECT
tld,
registrarName as registrar_name,
'TOTAL_DOMAINS' as metricName,
COUNT(fullyQualifiedDomainName) as metricValue
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%DOMAINBASE_TABLE%`
AS domain_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON
currentSponsorClientId = registrar_table.__key__.name
WHERE
domain_table._d = "DomainResource"
AND (registrar_table.type = "REAL" OR registrar_table.type = "INTERNAL")
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -0,0 +1,56 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of referenced nameservers for a registrar's domains.
-- We count the number of unique hosts under each tld-registrar combo by
-- collecting all domains' listed hosts that were still valid at the
-- end of the reporting month.
SELECT
tld,
registrarName AS registrar_name,
'TOTAL_NAMESERVERS' AS metricName,
COUNT(fullyQualifiedHostName) AS metricValue
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%HOSTRESOURCE_TABLE%` AS host_table
JOIN (
SELECT
__key__.name AS clientId,
registrarName
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
WHERE
type = 'REAL'
OR type = 'INTERNAL') AS registrar_table
ON
currentSponsorClientId = registrar_table.clientId
JOIN (
SELECT
tld,
hosts.name AS referencedHostName
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%DOMAINBASE_TABLE%`,
UNNEST(nsHosts) AS hosts
WHERE _d = 'DomainResource'
AND creationTime <= TIMESTAMP("%LATEST_REPORT_TIME%")
AND deletionTime > TIMESTAMP("%LATEST_REPORT_TIME%") ) AS domain_table
ON
host_table.__key__.name = domain_table.referencedHostName
WHERE creationTime <= TIMESTAMP("%LATEST_REPORT_TIME%")
AND deletionTime > TIMESTAMP("%LATEST_REPORT_TIME%")
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -0,0 +1,78 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Counts the number of mutating transactions each registrar made.
-- We populate the fields through explicit logging of
-- DomainTransactionRecords, which contain all necessary information for
-- reporting (such as reporting time, report field, report amount, etc.
-- A special note on transfers: we only record 'TRANSFER_SUCCESSFUL' or
-- 'TRANSFER_NACKED', and we can infer the gaining and losing parties
-- from the enclosing HistoryEntry's clientId and otherClientId
-- respectively. This query templates the client ID, field for transfer
-- success, field for transfer nacks and default field. This allows us to
-- create one query for TRANSFER_GAINING and the other report fields,
-- and one query for TRANSFER_LOSING fields from the same template.
-- This outer select just converts the registrar's clientId to their name.
SELECT
tld,
registrar_table.registrarName AS registrar_name,
metricName,
metricValue
FROM (
SELECT
tld,
clientId,
CASE
WHEN field = 'TRANSFER_SUCCESSFUL' THEN '%TRANSFER_SUCCESS_FIELD%'
WHEN field = 'TRANSFER_NACKED' THEN '%TRANSFER_NACKED_FIELD%'
ELSE %DEFAULT_FIELD%
END AS metricName,
SUM(amount) AS metricValue
FROM (
SELECT
entries.%CLIENT_ID% AS clientId,
entries.domainTransactionRecords.tld[SAFE_OFFSET(index)] AS tld,
entries.domainTransactionRecords.reportingTime[SAFE_OFFSET(index)]
AS reportingTime,
entries.domainTransactionRecords.reportField[SAFE_OFFSET(index)]
AS field,
entries.domainTransactionRecords.reportAmount[SAFE_OFFSET(index)]
AS amount
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%HISTORYENTRY_TABLE%`
AS entries,
-- This allows us to 'loop' through the arrays in parallel by index
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(
entries.domainTransactionRecords.tld) - 1)) AS index
-- Ignore null entries
WHERE entries.domainTransactionRecords IS NOT NULL )
-- Only look at this month's data
WHERE reportingTime
BETWEEN TIMESTAMP('%EARLIEST_REPORT_TIME%')
AND TIMESTAMP('%LATEST_REPORT_TIME%')
-- Ignore prober data
AND NOT ENDS_WITH(tld, "test")
GROUP BY
tld,
clientId,
field ) AS counts_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON
counts_table.clientId = registrar_table.__key__.name

View file

@ -0,0 +1,92 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Construct the transaction reports' rows from the intermediary data views.
-- This query pulls from all intermediary tables to create the activity
-- report csv, via a table transpose and sum over all activity report fields.
SELECT
registrars.tld as tld,
registrars.registrar_name as registrar_name,
registrars.iana_id as iana_id,
SUM(IF(metrics.metricName = 'TOTAL_DOMAINS', metrics.metricValue, 0)) AS total_domains,
SUM(IF(metrics.metricName = 'TOTAL_NAMESERVERS', metrics.metricValue, 0)) AS total_nameservers,
SUM(IF(metrics.metricName = 'NET_ADDS_1_YR', metrics.metricValue, 0)) AS net_adds_1_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_2_YR', metrics.metricValue, 0)) AS net_adds_2_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_3_YR', metrics.metricValue, 0)) AS net_adds_3_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_4_YR', metrics.metricValue, 0)) AS net_adds_4_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_5_YR', metrics.metricValue, 0)) AS net_adds_5_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_6_YR', metrics.metricValue, 0)) AS net_adds_6_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_7_YR', metrics.metricValue, 0)) AS net_adds_7_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_8_YR', metrics.metricValue, 0)) AS net_adds_8_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_9_YR', metrics.metricValue, 0)) AS net_adds_9_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_10_Yr', metrics.metricValue, 0)) AS net_adds_10_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_1_YR', metrics.metricValue, 0)) AS net_renews_1_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_2_YR', metrics.metricValue, 0)) AS net_renews_2_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_3_YR', metrics.metricValue, 0)) AS net_renews_3_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_4_YR', metrics.metricValue, 0)) AS net_renews_4_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_5_YR', metrics.metricValue, 0)) AS net_renews_5_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_6_YR', metrics.metricValue, 0)) AS net_renews_6_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_7_YR', metrics.metricValue, 0)) AS net_renews_7_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_8_YR', metrics.metricValue, 0)) AS net_renews_8_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_9_YR', metrics.metricValue, 0)) AS net_renews_9_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_10_YR', metrics.metricValue, 0)) AS net_renews_10_yr,
SUM(IF(metrics.metricName = 'TRANSFER_GAINING_SUCCESSFUL', metrics.metricValue, 0)) AS transfer_gaining_successful,
SUM(IF(metrics.metricName = 'TRANSFER_GAINING_NACKED', metrics.metricValue, 0)) AS transfer_gaining_nacked,
SUM(IF(metrics.metricName = 'TRANSFER_LOSING_SUCCESSFUL', metrics.metricValue, 0)) AS transfer_losing_successful,
SUM(IF(metrics.metricName = 'TRANSFER_LOSING_NACKED', metrics.metricValue, 0)) AS transfer_losing_nacked,
-- We don't interact with transfer disputes
0 AS transfer_disputed_won,
0 AS transfer_disputed_lost,
0 AS transfer_disputed_nodecision,
SUM(IF(metrics.metricName = 'DELETED_DOMAINS_GRACE', metrics.metricValue, 0)) AS deleted_domains_grace,
SUM(IF(metrics.metricName = 'DELETED_DOMAINS_NOGRACE', metrics.metricValue, 0)) AS deleted_domains_nograce,
SUM(IF(metrics.metricName = 'RESTORED_DOMAINS', metrics.metricValue, 0)) AS restored_domains,
-- We don't require restore reports
0 AS restored_noreport,
-- We don't enforce AGP limits right now
0 AS agp_exemption_requests,
0 AS agp_exemptions_granted,
0 AS agp_exempted_domains,
SUM(IF(metrics.metricName = 'ATTEMPTED_ADDS', metrics.metricValue, 0)) AS attempted_adds
FROM (
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%REGISTRAR_IANA_ID_TABLE%`) AS registrars
-- We LEFT JOIN to produce reports even if the registrar made no transactions
LEFT OUTER JOIN (
-- Gather all intermediary data views
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TOTAL_DOMAINS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TOTAL_NAMESERVERS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TRANSACTION_COUNTS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TRANSACTION_TRANSFER_LOSING_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%ATTEMPTED_ADDS_TABLE%` ) AS metrics
-- Join on tld and registrar name
ON registrars.tld = metrics.tld
AND registrars.registrar_name = metrics.registrar_name
GROUP BY
tld, registrar_name, iana_id
ORDER BY
tld, registrar_name