Prepare ICANN reporting for production

This originally started as a small change, but quickly grew into a major refactor as I realized the original parameter structure wasn't conducive to a cron task and manual re-runs.

The changes are as follows:
1. Adds DNS metrics to activity reports, thanks to Nick's work with the Zoneman Dremel -> #plx workflow.
2. Surrounds registrar names in transactions reports with quotes, to escape possible commas.
3. Factors out the report generation logic into IcannReportingStager.
4. Assigns default values to the three main parameters
  - yearMonth defaults to the previous month
  - subdir defaults to "icann/monthly/yearMonth", i.e. "gs://domain-registry-reporting/icann/monthly/yyyy-MM"
  - reportType defaults to both reports
5. Adds "Total" row generation logic to transactions reports
  - This was a previously overlooked requirement.
6. Adds "MANIFEST.txt" generation and upload logic.
  - The MANIFEST lists out which files need to be uploaded in the subdirectory.
7. Increases urlfetch timeout from 5s to 10s in backend tasks.
  - Backend tasks should be more latency tolerant anyway, and this reduces the number of incorrect timeouts we see for services like Bigquery which might take some time to respond.

TESTED=Extensive testing in alpha, and ran FOSS test.
TODO: send out an e-mail for report generation and upload, and add reporting to cron.xml

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=172738344
This commit is contained in:
larryruili 2017-10-19 07:02:22 -07:00 committed by jianglai
parent 06f0ec4f2f
commit f1c76d035f
39 changed files with 1092 additions and 589 deletions

View file

@ -19,8 +19,6 @@
SELECT
RealTlds.tld AS tld,
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
SUM(IF(metricName = 'ramp-up-registrars', count, 0)) AS ramp_up_registrars,
SUM(IF(metricName = 'pre-ramp-up-registrars', count, 0)) AS pre_ramp_up_registrars,
-- We use the Centralized Zone Data Service.
"CZDS" AS zfa_passwords,
SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries,
@ -65,7 +63,7 @@ SELECT
-- filter so that only metrics with that TLD or a NULL TLD are counted
-- towards a given TLD.
FROM (
SELECT tldStr as tld
SELECT tldStr AS tld
FROM `domain-registry-alpha.latest_datastore_export.Registry`
WHERE tldType = 'REAL'
) as RealTlds
@ -82,16 +80,16 @@ CROSS JOIN(
SELECT STRING(NULL) AS tld, STRING(NULL) AS metricName, 0 as count
UNION ALL
SELECT * FROM
`domain-registry-alpha.icann_reporting.registrar_operating_status_201706`
`domain-registry-alpha.icann_reporting.registrar_operating_status_201709`
UNION ALL
SELECT * FROM
`domain-registry-alpha.icann_reporting.dns_counts_201706`
`domain-registry-alpha.icann_reporting.dns_counts_201709`
UNION ALL
SELECT * FROM
`domain-registry-alpha.icann_reporting.epp_metrics_201706`
`domain-registry-alpha.icann_reporting.epp_metrics_201709`
UNION ALL
SELECT * FROM
`domain-registry-alpha.icann_reporting.whois_counts_201706`
`domain-registry-alpha.icann_reporting.whois_counts_201709`
-- END INTERMEDIARY DATA SOURCES --
)) AS TldMetrics
WHERE RealTlds.tld = TldMetrics.tld OR TldMetrics.tld IS NULL

View file

@ -52,8 +52,8 @@ FROM (
FROM
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
WHERE _TABLE_SUFFIX
BETWEEN '20170601'
AND '20170630')
BETWEEN '20170901'
AND '20170930')
JOIN UNNEST(logMessage) AS logMessages
-- Look for metadata logs from epp and registrar console requests
WHERE requestPath IN ('/_dr/epp', '/_dr/epptool', '/registrar-xhr')

View file

@ -0,0 +1,24 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Retrieve per-TLD DNS query counts.
-- This is a hack to enable using DNS counts from the internal-only #plx
-- workflow. See other references to b/67301320 in the codebase to see the
-- full extent of the hackery.
-- TODO(b/67301320): Delete this when we can make open-source DNS metrics.
SELECT *
FROM `domain-registry-alpha.icann_reporting.dns_counts_from_plx`

View file

@ -15,14 +15,13 @@
-- Query for DNS metrics.
-- This is a no-op until after we transition to Google Cloud DNS, which
-- will likely export metrics via Stackdriver.
-- You must configure this yourself to enable activity reporting, according
-- to whatever metrics your DNS provider makes available. We hope to make
-- this available in the open-source build in the near future.
SELECT
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
STRING(NULL) AS tld,
metricName,
-- TODO(b/63388735): Change this to actually query Google Cloud DNS when ready.
-1 AS count
FROM ((
SELECT 'dns-udp-queries' AS metricName)

View file

@ -39,7 +39,7 @@ FROM (
-- Extract the logged JSON payload.
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.icann_reporting.monthly_logs_201706` AS logs
FROM `domain-registry-alpha.icann_reporting.monthly_logs_201709` AS logs
JOIN
UNNEST(logs.logMessage) AS logMessage
WHERE

View file

@ -27,4 +27,4 @@ SELECT
FROM
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170601' AND '20170630'
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930'

View file

@ -26,5 +26,5 @@ FROM
UNNEST(allowedTlds) as allowed_tlds
WHERE (type = 'REAL' OR type = 'INTERNAL')
-- Filter out prober data
AND NOT ENDS_WITH(allowed_tlds, "test")
AND NOT ENDS_WITH(allowed_tlds, ".test")
ORDER BY tld, registrarName

View file

@ -23,5 +23,5 @@ SELECT
FROM
`domain-registry-alpha.latest_datastore_export.Registrar`
WHERE
type = 'REAL'
(type = 'REAL' OR type = 'INTERNAL')
GROUP BY metricName

View file

@ -32,7 +32,7 @@ JOIN
ON
currentSponsorClientId = registrar_table.__key__.name
WHERE
domain_table._d = "DomainResource"
AND (registrar_table.type = "REAL" OR registrar_table.type = "INTERNAL")
domain_table._d = 'DomainResource'
AND (registrar_table.type = 'REAL' OR registrar_table.type = 'INTERNAL')
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -45,12 +45,12 @@ JOIN (
`domain-registry-alpha.latest_datastore_export.DomainBase`,
UNNEST(nsHosts) AS hosts
WHERE _d = 'DomainResource'
AND creationTime <= TIMESTAMP("2017-06-30 23:59:59")
AND deletionTime > TIMESTAMP("2017-06-30 23:59:59") ) AS domain_table
AND creationTime <= TIMESTAMP("2017-09-30 23:59:59")
AND deletionTime > TIMESTAMP("2017-09-30 23:59:59") ) AS domain_table
ON
host_table.__key__.name = domain_table.referencedHostName
WHERE creationTime <= TIMESTAMP("2017-06-30 23:59:59")
AND deletionTime > TIMESTAMP("2017-06-30 23:59:59")
WHERE creationTime <= TIMESTAMP("2017-09-30 23:59:59")
AND deletionTime > TIMESTAMP("2017-09-30 23:59:59")
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -63,10 +63,8 @@ FROM (
WHERE entries.domainTransactionRecords IS NOT NULL )
-- Only look at this month's data
WHERE reportingTime
BETWEEN TIMESTAMP('2017-06-01 00:00:00')
AND TIMESTAMP('2017-06-30 23:59:59')
-- Ignore prober data
AND NOT ENDS_WITH(tld, "test")
BETWEEN TIMESTAMP('2017-09-01 00:00:00')
AND TIMESTAMP('2017-09-30 23:59:59')
GROUP BY
tld,
clientId,

View file

@ -63,10 +63,8 @@ FROM (
WHERE entries.domainTransactionRecords IS NOT NULL )
-- Only look at this month's data
WHERE reportingTime
BETWEEN TIMESTAMP('2017-06-01 00:00:00')
AND TIMESTAMP('2017-06-30 23:59:59')
-- Ignore prober data
AND NOT ENDS_WITH(tld, "test")
BETWEEN TIMESTAMP('2017-09-01 00:00:00')
AND TIMESTAMP('2017-09-30 23:59:59')
GROUP BY
tld,
clientId,

View file

@ -20,7 +20,8 @@
SELECT
registrars.tld as tld,
registrars.registrar_name as registrar_name,
-- Surround registrar names with quotes to handle names containing a comma.
FORMAT("\"%s\"", registrars.registrar_name) as registrar_name,
registrars.iana_id as iana_id,
SUM(IF(metrics.metricName = 'TOTAL_DOMAINS', metrics.metricValue, 0)) AS total_domains,
SUM(IF(metrics.metricName = 'TOTAL_NAMESERVERS', metrics.metricValue, 0)) AS total_nameservers,
@ -62,26 +63,33 @@ SELECT
0 AS agp_exemptions_granted,
0 AS agp_exempted_domains,
SUM(IF(metrics.metricName = 'ATTEMPTED_ADDS', metrics.metricValue, 0)) AS attempted_adds
FROM (
SELECT *
FROM `domain-registry-alpha.icann_reporting.registrar_iana_id_201706`) AS registrars
FROM
-- Only produce reports for real TLDs
(SELECT tldStr AS tld
FROM `domain-registry-alpha.latest_datastore_export.Registry`
WHERE tldType = 'REAL') AS registries
JOIN
(SELECT *
FROM `domain-registry-alpha.icann_reporting.registrar_iana_id_201709`)
AS registrars
ON registries.tld = registrars.tld
-- We LEFT JOIN to produce reports even if the registrar made no transactions
LEFT OUTER JOIN (
-- Gather all intermediary data views
SELECT *
FROM `domain-registry-alpha.icann_reporting.total_domains_201706`
FROM `domain-registry-alpha.icann_reporting.total_domains_201709`
UNION ALL
SELECT *
FROM `domain-registry-alpha.icann_reporting.total_nameservers_201706`
FROM `domain-registry-alpha.icann_reporting.total_nameservers_201709`
UNION ALL
SELECT *
FROM `domain-registry-alpha.icann_reporting.transaction_counts_201706`
FROM `domain-registry-alpha.icann_reporting.transaction_counts_201709`
UNION ALL
SELECT *
FROM `domain-registry-alpha.icann_reporting.transaction_transfer_losing_201706`
FROM `domain-registry-alpha.icann_reporting.transaction_transfer_losing_201709`
UNION ALL
SELECT *
FROM `domain-registry-alpha.icann_reporting.attempted_adds_201706` ) AS metrics
FROM `domain-registry-alpha.icann_reporting.attempted_adds_201709` ) AS metrics
-- Join on tld and registrar name
ON registrars.tld = metrics.tld
AND registrars.registrar_name = metrics.registrar_name

View file

@ -26,7 +26,7 @@ SELECT
END AS metricName,
COUNT(requestPath) AS count
FROM
`domain-registry-alpha.icann_reporting.monthly_logs_201706`
`domain-registry-alpha.icann_reporting.monthly_logs_201709`
GROUP BY
metricName
HAVING