Add activity reporting SQL query generation code

This allows us to have a modular view of all tables used in activity reporting, to facilitate generating reports in BigQuery.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=161849007
This commit is contained in:
larryruili 2017-07-21 12:45:28 -04:00 committed by Ben McIlwain
parent 2521409e39
commit 4887811fc3
17 changed files with 743 additions and 1 deletions

View file

@ -0,0 +1,93 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- This query pulls from all intermediary tables to create the activity
-- report csv, via a table transpose and sum over all activity report fields.
SELECT
Tld.tld AS tld,
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
SUM(IF(metricName = 'ramp-up-registrars', count, 0)) AS ramp_up_registrars,
SUM(IF(metricName = 'pre-ramp-up-registrars', count, 0)) AS pre_ramp_up_registrars,
-- We don't support ZFA over SFTP, only AXFR.
0 AS zfa_passwords,
SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries,
SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries,
-- We don't support searchable WHOIS.
0 AS searchable_whois_queries,
-- DNS queries for UDP/TCP are all assumed to be recevied/responded.
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received,
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded,
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received,
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded,
-- SRS metrics.
SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check,
SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create,
SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete,
SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info,
SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew,
SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report,
SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request,
SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve,
SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel,
SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query,
SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject,
SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request,
SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update,
SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check,
SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create,
SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete,
SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info,
SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update,
SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check,
SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create,
SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete,
SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info,
SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve,
SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel,
SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query,
SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject,
SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request,
SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update,
-- Cross join a list of all TLDs against TLD-specific metrics and then
-- filter so that only metrics with that TLD or a NULL TLD are counted
-- towards a given TLD.
FROM (
SELECT
tldStr AS tld
FROM
[%LATEST_SNAPSHOT_DATA_SET%.%REGISTRY_TABLE%]
-- Include all real TLDs that are not in pre-delegation testing.
WHERE
tldType = 'REAL'
OMIT
RECORD IF SOME(tldStateTransitions.tldState = 'PDT') ) AS Tld
-- TODO(larryruili): Use LEFT JOIN on Tld.tld = TldMetrics.tld instead.
-- Also obsoletes dummy data.
LEFT OUTER JOIN (
SELECT
tld,
metricName,
count FROM
-- BEGIN INTERMEDIARY DATA SOURCES --
[%ACTIVITY_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%],
-- END INTERMEDIARY DATA SOURCES --
) AS TldMetrics
ON
Tld.tld = TldMetrics.tld
GROUP BY
tld
ORDER BY
tld

View file

@ -0,0 +1,33 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query for DNS metrics.
-- Our DNS provider exports logs for all queries received. However, these
-- tables only have a TTL of 7 days. We make daily exports of the data
-- relevant to us, which allows us to get the full month's UDP and TCP
-- queries when generating activity reports.
SELECT
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
STRING(NULL) AS tld,
metricName,
-- TODO(b/63388735): Change this to actually query the DNS tables when ready.
-1 AS count,
FROM (
SELECT
'dns-udp-queries' AS metricName),
(
SELECT
'dns-tcp-queries' AS metricName)

View file

@ -0,0 +1,64 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query FlowReporter JSON log messages and calculate SRS metrics.
-- We use regex's over the monthly appengine logs to determine how many
-- EPP requests we received for each command.
SELECT
tld,
activityReportField AS metricName,
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
INTEGER(COUNT(*)) AS count,
FROM
-- Flatten the "tld" column (repeated) so that domain checks for names
-- across multiple TLDs are counted towards each checked TLD as though
-- there were one copy of this row per TLD (the effect of flattening).
FLATTEN((
SELECT
-- Use some ugly regex hackery to convert JSON list of strings into
-- repeated string values, since there's no built-in for this.
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
-- once we can use GoogleSQL; example in b/37629674#comment2.
-- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
REGEXP_EXTRACT(
SPLIT(
REGEXP_EXTRACT(
JSON_EXTRACT(json, '$.tlds'),
r'^\[(.*)\]$')),
'^"(.*)"$') AS tld,
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
AS activityReportField,
FROM (
SELECT
-- Extract JSON payload following log signature.
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json,
FROM
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%]
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA'
)
),
-- Second argument to flatten (see above).
tld)
-- Exclude cases that can't be tabulated correctly - activity report field
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
WHERE
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
GROUP BY tld, metricName
ORDER BY tld, metricName

View file

@ -0,0 +1,28 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query to fetch AppEngine request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format.
SELECT
protoPayload.resource AS requestPath,
protoPayload.line.logMessage AS logMessage,
FROM
TABLE_DATE_RANGE_STRICT(
[%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%],
TIMESTAMP('%START_OF_MONTH%'),
-- End timestamp is inclusive, so subtract 1 day from the
-- timestamp representing the start of the next month.
DATE_ADD(TIMESTAMP('%END_OF_MONTH%'), -1, 'DAY'))

View file

@ -0,0 +1,32 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query for all registrar statuses:
-- production, ramping up (OTE), or pre-ramp-up (requested).
SELECT
-- Applies to all TLDs, hence the 'null' magic value.
STRING(NULL) AS tld,
CASE WHEN access_type = 'PROD' AND registrar_name IS NOT NULL
THEN 'operational-registrars'
WHEN access_type = 'OTE' AND registrar_name IS NOT NULL
THEN 'ramp-up-registrars'
WHEN access_type IS NULL AND registrar_name IS NOT NULL
THEN 'pre-ramp-up-registrars'
-- The import process is imprecise; filter out invalid rows.
ELSE 'not-applicable' END AS metricName,
INTEGER(COUNT(registrar_id)) AS count
FROM
[%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%]
GROUP BY metricName

View file

@ -0,0 +1,33 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query for WHOIS metrics.
-- This searches the monthly appengine logs for Whois requests, and
-- counts the number of hits via both endpoints (port 43 and the web).
SELECT
-- Whois applies to all TLDs, hence the 'null' magic value.
STRING(NULL) AS tld,
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
-- while web queries come in via /whois/<params>.
CASE
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries'
END AS metricName,
INTEGER(COUNT(requestPath)) AS count,
FROM
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%]
GROUP BY metricName
HAVING metricName IS NOT NULL