mirror of
https://github.com/google/nomulus.git
synced 2025-04-30 03:57:51 +02:00
Delete obsolete ICANN reporting python code
Now that we've successfully ran ICANN reporting in production with the new Java code, we can safely delete all the code it obsoletes. I've also added a small README to explain why we no longer have report copies in piper. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=175197665
This commit is contained in:
parent
ff1ab08302
commit
8f0c37fc4a
4 changed files with 0 additions and 677 deletions
|
@ -1,17 +0,0 @@
|
||||||
package(default_visibility = ["//java/google/registry:registry_project"])
|
|
||||||
|
|
||||||
licenses(["notice"]) # Apache 2.0
|
|
||||||
|
|
||||||
py_library(
|
|
||||||
name = "icann_report_query_builder",
|
|
||||||
srcs = ["icann_report_query_builder.py"],
|
|
||||||
deps = ["//python:python_directory_import"],
|
|
||||||
)
|
|
||||||
|
|
||||||
py_test(
|
|
||||||
name = "icann_report_query_builder_test",
|
|
||||||
size = "small",
|
|
||||||
srcs = ["icann_report_query_builder_test.py"],
|
|
||||||
data = ["testdata/golden_activity_query.sql"],
|
|
||||||
deps = [":icann_report_query_builder"],
|
|
||||||
)
|
|
|
@ -1,348 +0,0 @@
|
||||||
# Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
"""ICANN reporting BigQuery query construction logic.
|
|
||||||
|
|
||||||
The IcannReportQueryBuilder class contains logic for constructing the
|
|
||||||
multi-part BigQuery queries used to produce ICANN monthly reports. These
|
|
||||||
queries are fairly complicated; see the design doc published to
|
|
||||||
nomulus-discuss@googlegroups.com for an overview.
|
|
||||||
|
|
||||||
Currently, this class only supports building the query for activity
|
|
||||||
reports (not transaction reports).
|
|
||||||
"""
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
# This signature must match the one logged by FlowReporter - see
|
|
||||||
# cs/symbol:google.registry.flows.FlowReporter.METADATA_LOG_SIGNATURE
|
|
||||||
FLOWREPORTER_LOG_SIGNATURE = 'FLOW-LOG-SIGNATURE-METADATA'
|
|
||||||
|
|
||||||
|
|
||||||
class IcannReportQueryBuilder(object):
|
|
||||||
"""Container for methods to build BigQuery queries for ICANN reporting."""
|
|
||||||
|
|
||||||
def BuildActivityReportQuery(self, month, registrar_count):
|
|
||||||
"""Returns the assembled activity report query for a given month.
|
|
||||||
|
|
||||||
Specifically, we instantiate the outermost activity report query by pointing
|
|
||||||
it at the union of a series of "data source" queries that each produce data
|
|
||||||
used to generate certain metrics. These queries in turn rely on some common
|
|
||||||
lower-level data source queries (monthly logs, both raw and EPP-parsed).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
month: (str) month of the report to generate, in YYYY-MM format
|
|
||||||
registrar_count: (int) total number of registrars in the registry system
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(str) the fully-instantiated activity report query SQL
|
|
||||||
"""
|
|
||||||
# Construct some date-related parameters from the given month.
|
|
||||||
this_month_date = datetime.datetime.strptime(month, '%Y-%m').date()
|
|
||||||
# Hacky way to compute the start of the next month - add enough days to get
|
|
||||||
# to the next month (e.g. 31), then set the day to 1. It'd be cleaner to
|
|
||||||
# use dateutils.relativedelta(months=1) but the dependency is a pain.
|
|
||||||
month_delta = datetime.timedelta(days=31)
|
|
||||||
next_month_date = (this_month_date + month_delta).replace(day=1)
|
|
||||||
this_yearmonth = this_month_date.strftime('%Y-%m')
|
|
||||||
next_yearmonth = next_month_date.strftime('%Y-%m')
|
|
||||||
|
|
||||||
# Construct the queries themselves.
|
|
||||||
logs_query = self._MakeMonthlyLogsQuery(this_yearmonth, next_yearmonth)
|
|
||||||
data_source_queries = [
|
|
||||||
self._MakeActivityOperationalRegistrarsQuery(next_yearmonth),
|
|
||||||
self._MakeActivityAllRampedUpRegistrarsQuery(next_yearmonth),
|
|
||||||
self._MakeActivityAllRegistrarsQuery(registrar_count),
|
|
||||||
self._MakeActivityWhoisQuery(logs_query),
|
|
||||||
self._MakeActivityDnsQuery(),
|
|
||||||
self._MakeActivityEppSrsMetricsQuery(logs_query)
|
|
||||||
]
|
|
||||||
return _StripTrailingWhitespaceFromLines(self._MakeActivityReportQuery(
|
|
||||||
data_source_queries))
|
|
||||||
|
|
||||||
def _MakeMonthlyLogsQuery(self, this_yearmonth, next_yearmonth):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query AppEngine request logs for the report month.
|
|
||||||
SELECT
|
|
||||||
protoPayload.resource AS requestPath,
|
|
||||||
protoPayload.line.logMessage AS logMessage,
|
|
||||||
FROM
|
|
||||||
TABLE_DATE_RANGE_STRICT(
|
|
||||||
[appengine_logs.appengine_googleapis_com_request_log_],
|
|
||||||
TIMESTAMP('%(this_yearmonth)s-01'),
|
|
||||||
-- End timestamp is inclusive, so subtract 1 second from the
|
|
||||||
-- timestamp representing the start of the next month.
|
|
||||||
DATE_ADD(TIMESTAMP('%(next_yearmonth)s-01'), -1, 'SECOND'))
|
|
||||||
"""
|
|
||||||
return query % {'this_yearmonth': this_yearmonth,
|
|
||||||
'next_yearmonth': next_yearmonth}
|
|
||||||
|
|
||||||
def _MakeActivityReportQuery(self, data_source_queries):
|
|
||||||
"""Make the overall activity report query.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data_source_queries: list of BigQuery SQL strings to use
|
|
||||||
as source 'tables' for the main query; each of these
|
|
||||||
queries must output a schema as follows:
|
|
||||||
|
|
||||||
STRING tld / STRING metricName / INTEGER count
|
|
||||||
|
|
||||||
A null TLD indicates that the metric counts towards
|
|
||||||
all TLDs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
query as a string of BigQuery SQL
|
|
||||||
"""
|
|
||||||
query = r"""
|
|
||||||
SELECT
|
|
||||||
Tld.tld AS tld,
|
|
||||||
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
|
||||||
-- Compute ramp-up-registrars as all-ramped-up-registrars
|
|
||||||
-- minus operational-registrars, with a floor of 0.
|
|
||||||
GREATEST(0, SUM(
|
|
||||||
CASE
|
|
||||||
WHEN metricName = 'operational-registrars' THEN -count
|
|
||||||
WHEN metricName = 'all-ramped-up-registrars' THEN count
|
|
||||||
ELSE 0
|
|
||||||
END)) AS ramp_up_registrars,
|
|
||||||
-- Compute pre-ramp-up-registrars as all-registrars minus
|
|
||||||
-- all-ramp-up-registrars, with a floor of 0.
|
|
||||||
GREATEST(0, SUM(
|
|
||||||
CASE
|
|
||||||
WHEN metricName = 'all-ramped-up-registrars' THEN -count
|
|
||||||
WHEN metricName = 'all-registrars' THEN count
|
|
||||||
ELSE 0
|
|
||||||
END)) AS pre_ramp_up_registrars,
|
|
||||||
-- We don't support ZFA over SFTP, only AXFR.
|
|
||||||
0 AS zfa_passwords,
|
|
||||||
SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries,
|
|
||||||
SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries,
|
|
||||||
-- We don't support searchable WHOIS.
|
|
||||||
0 AS searchable_whois_queries,
|
|
||||||
-- DNS queries for UDP/TCP are all assumed to be recevied/responded.
|
|
||||||
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received,
|
|
||||||
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded,
|
|
||||||
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received,
|
|
||||||
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded,
|
|
||||||
-- SRS metrics.
|
|
||||||
SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check,
|
|
||||||
SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create,
|
|
||||||
SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete,
|
|
||||||
SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info,
|
|
||||||
SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew,
|
|
||||||
SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report,
|
|
||||||
SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request,
|
|
||||||
SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update,
|
|
||||||
SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check,
|
|
||||||
SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create,
|
|
||||||
SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete,
|
|
||||||
SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info,
|
|
||||||
SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update,
|
|
||||||
SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check,
|
|
||||||
SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create,
|
|
||||||
SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete,
|
|
||||||
SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request,
|
|
||||||
SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update,
|
|
||||||
-- Cross join a list of all TLDs against TLD-specific metrics and then
|
|
||||||
-- filter so that only metrics with that TLD or a NULL TLD are counted
|
|
||||||
-- towards a given TLD.
|
|
||||||
FROM (
|
|
||||||
SELECT tldStr AS tld
|
|
||||||
FROM [latest_snapshot.Registry]
|
|
||||||
-- Include all real TLDs that are not in pre-delegation testing.
|
|
||||||
WHERE tldType = 'REAL'
|
|
||||||
OMIT RECORD IF SOME(tldStateTransitions.tldState = 'PDT')
|
|
||||||
) AS Tld
|
|
||||||
CROSS JOIN (
|
|
||||||
SELECT
|
|
||||||
tld, metricName, count
|
|
||||||
FROM
|
|
||||||
-- Dummy data source that ensures that all TLDs appear in report,
|
|
||||||
-- since they'll all have at least 1 joined row that survives.
|
|
||||||
(SELECT STRING(NULL) AS tld, STRING(NULL) AS metricName, 0 AS count),
|
|
||||||
-- BEGIN JOINED DATA SOURCES --
|
|
||||||
%(joined_data_sources)s
|
|
||||||
-- END JOINED DATA SOURCES --
|
|
||||||
) AS TldMetrics
|
|
||||||
WHERE Tld.tld = TldMetrics.tld OR TldMetrics.tld IS NULL
|
|
||||||
GROUP BY tld
|
|
||||||
ORDER BY tld
|
|
||||||
"""
|
|
||||||
# Turn each data source query into a subquery in parentheses, and join
|
|
||||||
# them together with comments (representing a table union).
|
|
||||||
joined_data_sources = '\n' + ',\n'.join(
|
|
||||||
'(\n%s\n)' % query for query in data_source_queries)
|
|
||||||
return query % {'joined_data_sources': joined_data_sources}
|
|
||||||
|
|
||||||
def _MakeActivityOperationalRegistrarsQuery(self, next_yearmonth):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query for operational-registrars metric.
|
|
||||||
SELECT
|
|
||||||
allowedTlds AS tld,
|
|
||||||
'operational-registrars' AS metricName,
|
|
||||||
INTEGER(COUNT(__key__.name)) AS count,
|
|
||||||
FROM [domain-registry:latest_snapshot.Registrar]
|
|
||||||
WHERE type = 'REAL'
|
|
||||||
AND creationTime < TIMESTAMP('%(next_yearmonth)s-01')
|
|
||||||
GROUP BY tld
|
|
||||||
"""
|
|
||||||
return query % {'next_yearmonth': next_yearmonth}
|
|
||||||
|
|
||||||
def _MakeActivityAllRampedUpRegistrarsQuery(self, next_yearmonth):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query for all-ramped-up-registrars metric.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
'all-ramped-up-registrars' AS metricName,
|
|
||||||
-- Sandbox OT&E registrar names can have either '-{1,2,3,4}' or '{,2,3}'
|
|
||||||
-- as suffixes - strip all of these off to get the "real" name.
|
|
||||||
INTEGER(EXACT_COUNT_DISTINCT(
|
|
||||||
REGEXP_EXTRACT(__key__.name, r'(.+?)(?:-?\d)?$'))) AS count,
|
|
||||||
FROM [domain-registry-sandbox:latest_snapshot.Registrar]
|
|
||||||
WHERE type = 'OTE'
|
|
||||||
AND creationTime < TIMESTAMP('%(next_yearmonth)s-01')
|
|
||||||
"""
|
|
||||||
return query % {'next_yearmonth': next_yearmonth}
|
|
||||||
|
|
||||||
def _MakeActivityAllRegistrarsQuery(self, registrar_count):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = """
|
|
||||||
-- Query for all-registrars metric.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
'all-registrars' AS metricName,
|
|
||||||
INTEGER('%(registrar_count)s') AS count,
|
|
||||||
"""
|
|
||||||
return query % {'registrar_count': registrar_count}
|
|
||||||
|
|
||||||
def _MakeActivityWhoisQuery(self, logs_query):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query for WHOIS metrics.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
|
|
||||||
-- while web queries come in via /whois/<params>.
|
|
||||||
CASE WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
|
||||||
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries'
|
|
||||||
END AS metricName,
|
|
||||||
INTEGER(COUNT(requestPath)) AS count,
|
|
||||||
FROM (
|
|
||||||
-- BEGIN LOGS QUERY --
|
|
||||||
%(logs_query)s
|
|
||||||
-- END LOGS QUERY --
|
|
||||||
)
|
|
||||||
GROUP BY metricName
|
|
||||||
HAVING metricName IS NOT NULL
|
|
||||||
"""
|
|
||||||
return query % {'logs_query': logs_query}
|
|
||||||
|
|
||||||
def _MakeActivityDnsQuery(self):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query for DNS metrics.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld,
|
|
||||||
metricName,
|
|
||||||
-1 AS count,
|
|
||||||
FROM
|
|
||||||
(SELECT 'dns-udp-queries' AS metricName),
|
|
||||||
(SELECT 'dns-tcp-queries' AS metricName)
|
|
||||||
"""
|
|
||||||
return query
|
|
||||||
|
|
||||||
def _MakeActivityEppSrsMetricsQuery(self, logs_query):
|
|
||||||
# TODO(b/20725722): add a real docstring.
|
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
query = r"""
|
|
||||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
|
||||||
SELECT
|
|
||||||
tld,
|
|
||||||
activityReportField AS metricName,
|
|
||||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
|
||||||
INTEGER(COUNT(*)) AS count,
|
|
||||||
FROM
|
|
||||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
|
||||||
-- across multiple TLDs are counted towards each checked TLD as though
|
|
||||||
-- there were one copy of this row per TLD (the effect of flattening).
|
|
||||||
FLATTEN((
|
|
||||||
SELECT
|
|
||||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
|
||||||
-- repeated string values, since there's no built-in for this.
|
|
||||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
|
||||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
SPLIT(
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
JSON_EXTRACT(json, '$.tlds'),
|
|
||||||
r'^\[(.*)\]$')),
|
|
||||||
'^"(.*)"$') AS tld,
|
|
||||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
|
||||||
AS activityReportField,
|
|
||||||
FROM (
|
|
||||||
SELECT
|
|
||||||
-- Extract JSON payload following log signature.
|
|
||||||
REGEXP_EXTRACT(logMessage, r'%(log_signature)s: (.*)\n?$')
|
|
||||||
AS json,
|
|
||||||
FROM (
|
|
||||||
-- BEGIN LOGS QUERY --
|
|
||||||
%(logs_query)s
|
|
||||||
-- END LOGS QUERY --
|
|
||||||
)
|
|
||||||
WHERE logMessage CONTAINS '%(log_signature)s'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
-- Second argument to flatten (see above).
|
|
||||||
tld)
|
|
||||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
|
||||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
|
||||||
WHERE
|
|
||||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
|
||||||
GROUP BY tld, metricName
|
|
||||||
ORDER BY tld, metricName
|
|
||||||
"""
|
|
||||||
return query % {'logs_query': logs_query,
|
|
||||||
'log_signature': FLOWREPORTER_LOG_SIGNATURE}
|
|
||||||
|
|
||||||
|
|
||||||
def _StripTrailingWhitespaceFromLines(string):
|
|
||||||
"""Strips trailing whitespace from each line of the provided string.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
string: (str) string to remove trailing whitespace from
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(str) input string, with trailing whitespace stripped from each line
|
|
||||||
"""
|
|
||||||
return '\n'.join(line.rstrip() for line in string.split('\n'))
|
|
|
@ -1,69 +0,0 @@
|
||||||
# Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
"""Tests for google.registry.reporting.icann_report_query_builder."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.registry.reporting import icann_report_query_builder
|
|
||||||
|
|
||||||
|
|
||||||
class IcannReportQueryBuilderTest(unittest.TestCase):
|
|
||||||
|
|
||||||
testdata_path = None
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
# Using __file__ is a bit of a hack, but it's the only way that "just works"
|
|
||||||
# for internal and external versions of the code, and it's fine for tests.
|
|
||||||
self.testdata_path = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
||||||
|
|
||||||
def testActivityQuery_matchesGoldenQuery(self):
|
|
||||||
self.maxDiff = None # Show long diffs
|
|
||||||
query_builder = icann_report_query_builder.IcannReportQueryBuilder()
|
|
||||||
golden_activity_query_path = os.path.join(self.testdata_path,
|
|
||||||
'golden_activity_query.sql')
|
|
||||||
with open(golden_activity_query_path, 'r') as golden_activity_query:
|
|
||||||
golden_file_contents = golden_activity_query.read()
|
|
||||||
# Remove golden file copyright header by stripping until END OF HEADER.
|
|
||||||
golden_file_sql = golden_file_contents.split('-- END OF HEADER\n')[1]
|
|
||||||
actual_sql = query_builder.BuildActivityReportQuery(
|
|
||||||
month='2016-06', registrar_count=None)
|
|
||||||
try:
|
|
||||||
self.assertMultiLineEqual(golden_file_sql, actual_sql)
|
|
||||||
except AssertionError as e:
|
|
||||||
# Print the actual SQL generated so that it's easy to copy-paste into
|
|
||||||
# the golden file when updating the query.
|
|
||||||
sep = '=' * 50 + '\n'
|
|
||||||
logging.warning(
|
|
||||||
'Generated activity query SQL:\n' + sep + actual_sql + sep)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def testStringTrailingWhitespaceFromLines(self):
|
|
||||||
def do_test(expected, original):
|
|
||||||
self.assertEqual(
|
|
||||||
expected,
|
|
||||||
icann_report_query_builder._StripTrailingWhitespaceFromLines(
|
|
||||||
original))
|
|
||||||
do_test('foo\nbar\nbaz\n', 'foo\nbar\nbaz\n')
|
|
||||||
do_test('foo\nbar\nbaz\n', 'foo \nbar \nbaz \n')
|
|
||||||
do_test('foo\nbar\nbaz', 'foo \nbar \nbaz ')
|
|
||||||
do_test('\nfoo\nbar\nbaz', '\nfoo\nbar\nbaz')
|
|
||||||
do_test('foo\n\n', 'foo\n \n')
|
|
||||||
do_test('foo\n', 'foo\n ')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,243 +0,0 @@
|
||||||
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
|
||||||
--
|
|
||||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
-- you may not use this file except in compliance with the License.
|
|
||||||
-- You may obtain a copy of the License at
|
|
||||||
--
|
|
||||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
--
|
|
||||||
-- Unless required by applicable law or agreed to in writing, software
|
|
||||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
-- See the License for the specific language governing permissions and
|
|
||||||
-- limitations under the License.
|
|
||||||
|
|
||||||
-- END OF HEADER
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
Tld.tld AS tld,
|
|
||||||
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
|
||||||
-- Compute ramp-up-registrars as all-ramped-up-registrars
|
|
||||||
-- minus operational-registrars, with a floor of 0.
|
|
||||||
GREATEST(0, SUM(
|
|
||||||
CASE
|
|
||||||
WHEN metricName = 'operational-registrars' THEN -count
|
|
||||||
WHEN metricName = 'all-ramped-up-registrars' THEN count
|
|
||||||
ELSE 0
|
|
||||||
END)) AS ramp_up_registrars,
|
|
||||||
-- Compute pre-ramp-up-registrars as all-registrars minus
|
|
||||||
-- all-ramp-up-registrars, with a floor of 0.
|
|
||||||
GREATEST(0, SUM(
|
|
||||||
CASE
|
|
||||||
WHEN metricName = 'all-ramped-up-registrars' THEN -count
|
|
||||||
WHEN metricName = 'all-registrars' THEN count
|
|
||||||
ELSE 0
|
|
||||||
END)) AS pre_ramp_up_registrars,
|
|
||||||
-- We don't support ZFA over SFTP, only AXFR.
|
|
||||||
0 AS zfa_passwords,
|
|
||||||
SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries,
|
|
||||||
SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries,
|
|
||||||
-- We don't support searchable WHOIS.
|
|
||||||
0 AS searchable_whois_queries,
|
|
||||||
-- DNS queries for UDP/TCP are all assumed to be recevied/responded.
|
|
||||||
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received,
|
|
||||||
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded,
|
|
||||||
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received,
|
|
||||||
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded,
|
|
||||||
-- SRS metrics.
|
|
||||||
SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check,
|
|
||||||
SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create,
|
|
||||||
SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete,
|
|
||||||
SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info,
|
|
||||||
SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew,
|
|
||||||
SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report,
|
|
||||||
SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject,
|
|
||||||
SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request,
|
|
||||||
SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update,
|
|
||||||
SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check,
|
|
||||||
SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create,
|
|
||||||
SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete,
|
|
||||||
SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info,
|
|
||||||
SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update,
|
|
||||||
SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check,
|
|
||||||
SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create,
|
|
||||||
SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete,
|
|
||||||
SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject,
|
|
||||||
SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request,
|
|
||||||
SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update,
|
|
||||||
-- Cross join a list of all TLDs against TLD-specific metrics and then
|
|
||||||
-- filter so that only metrics with that TLD or a NULL TLD are counted
|
|
||||||
-- towards a given TLD.
|
|
||||||
FROM (
|
|
||||||
SELECT tldStr AS tld
|
|
||||||
FROM [latest_snapshot.Registry]
|
|
||||||
-- Include all real TLDs that are not in pre-delegation testing.
|
|
||||||
WHERE tldType = 'REAL'
|
|
||||||
OMIT RECORD IF SOME(tldStateTransitions.tldState = 'PDT')
|
|
||||||
) AS Tld
|
|
||||||
CROSS JOIN (
|
|
||||||
SELECT
|
|
||||||
tld, metricName, count
|
|
||||||
FROM
|
|
||||||
-- Dummy data source that ensures that all TLDs appear in report,
|
|
||||||
-- since they'll all have at least 1 joined row that survives.
|
|
||||||
(SELECT STRING(NULL) AS tld, STRING(NULL) AS metricName, 0 AS count),
|
|
||||||
-- BEGIN JOINED DATA SOURCES --
|
|
||||||
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query for operational-registrars metric.
|
|
||||||
SELECT
|
|
||||||
allowedTlds AS tld,
|
|
||||||
'operational-registrars' AS metricName,
|
|
||||||
INTEGER(COUNT(__key__.name)) AS count,
|
|
||||||
FROM [domain-registry:latest_snapshot.Registrar]
|
|
||||||
WHERE type = 'REAL'
|
|
||||||
AND creationTime < TIMESTAMP('2016-07-01')
|
|
||||||
GROUP BY tld
|
|
||||||
|
|
||||||
),
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query for all-ramped-up-registrars metric.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
'all-ramped-up-registrars' AS metricName,
|
|
||||||
-- Sandbox OT&E registrar names can have either '-{1,2,3,4}' or '{,2,3}'
|
|
||||||
-- as suffixes - strip all of these off to get the "real" name.
|
|
||||||
INTEGER(EXACT_COUNT_DISTINCT(
|
|
||||||
REGEXP_EXTRACT(__key__.name, r'(.+?)(?:-?\d)?$'))) AS count,
|
|
||||||
FROM [domain-registry-sandbox:latest_snapshot.Registrar]
|
|
||||||
WHERE type = 'OTE'
|
|
||||||
AND creationTime < TIMESTAMP('2016-07-01')
|
|
||||||
|
|
||||||
),
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query for all-registrars metric.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
'all-registrars' AS metricName,
|
|
||||||
INTEGER('None') AS count,
|
|
||||||
|
|
||||||
),
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query for WHOIS metrics.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld, -- Applies to all TLDs.
|
|
||||||
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
|
|
||||||
-- while web queries come in via /whois/<params>.
|
|
||||||
CASE WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
|
||||||
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries'
|
|
||||||
END AS metricName,
|
|
||||||
INTEGER(COUNT(requestPath)) AS count,
|
|
||||||
FROM (
|
|
||||||
-- BEGIN LOGS QUERY --
|
|
||||||
|
|
||||||
-- Query AppEngine request logs for the report month.
|
|
||||||
SELECT
|
|
||||||
protoPayload.resource AS requestPath,
|
|
||||||
protoPayload.line.logMessage AS logMessage,
|
|
||||||
FROM
|
|
||||||
TABLE_DATE_RANGE_STRICT(
|
|
||||||
[appengine_logs.appengine_googleapis_com_request_log_],
|
|
||||||
TIMESTAMP('2016-06-01'),
|
|
||||||
-- End timestamp is inclusive, so subtract 1 second from the
|
|
||||||
-- timestamp representing the start of the next month.
|
|
||||||
DATE_ADD(TIMESTAMP('2016-07-01'), -1, 'SECOND'))
|
|
||||||
|
|
||||||
-- END LOGS QUERY --
|
|
||||||
)
|
|
||||||
GROUP BY metricName
|
|
||||||
HAVING metricName IS NOT NULL
|
|
||||||
|
|
||||||
),
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query for DNS metrics.
|
|
||||||
SELECT
|
|
||||||
STRING(NULL) AS tld,
|
|
||||||
metricName,
|
|
||||||
-1 AS count,
|
|
||||||
FROM
|
|
||||||
(SELECT 'dns-udp-queries' AS metricName),
|
|
||||||
(SELECT 'dns-tcp-queries' AS metricName)
|
|
||||||
|
|
||||||
),
|
|
||||||
(
|
|
||||||
|
|
||||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
|
||||||
SELECT
|
|
||||||
tld,
|
|
||||||
activityReportField AS metricName,
|
|
||||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
|
||||||
INTEGER(COUNT(*)) AS count,
|
|
||||||
FROM
|
|
||||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
|
||||||
-- across multiple TLDs are counted towards each checked TLD as though
|
|
||||||
-- there were one copy of this row per TLD (the effect of flattening).
|
|
||||||
FLATTEN((
|
|
||||||
SELECT
|
|
||||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
|
||||||
-- repeated string values, since there's no built-in for this.
|
|
||||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
|
||||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
SPLIT(
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
JSON_EXTRACT(json, '$.tlds'),
|
|
||||||
r'^\[(.*)\]$')),
|
|
||||||
'^"(.*)"$') AS tld,
|
|
||||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
|
||||||
AS activityReportField,
|
|
||||||
FROM (
|
|
||||||
SELECT
|
|
||||||
-- Extract JSON payload following log signature.
|
|
||||||
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
|
||||||
AS json,
|
|
||||||
FROM (
|
|
||||||
-- BEGIN LOGS QUERY --
|
|
||||||
|
|
||||||
-- Query AppEngine request logs for the report month.
|
|
||||||
SELECT
|
|
||||||
protoPayload.resource AS requestPath,
|
|
||||||
protoPayload.line.logMessage AS logMessage,
|
|
||||||
FROM
|
|
||||||
TABLE_DATE_RANGE_STRICT(
|
|
||||||
[appengine_logs.appengine_googleapis_com_request_log_],
|
|
||||||
TIMESTAMP('2016-06-01'),
|
|
||||||
-- End timestamp is inclusive, so subtract 1 second from the
|
|
||||||
-- timestamp representing the start of the next month.
|
|
||||||
DATE_ADD(TIMESTAMP('2016-07-01'), -1, 'SECOND'))
|
|
||||||
|
|
||||||
-- END LOGS QUERY --
|
|
||||||
)
|
|
||||||
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
-- Second argument to flatten (see above).
|
|
||||||
tld)
|
|
||||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
|
||||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
|
||||||
WHERE
|
|
||||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
|
||||||
GROUP BY tld, metricName
|
|
||||||
ORDER BY tld, metricName
|
|
||||||
|
|
||||||
)
|
|
||||||
-- END JOINED DATA SOURCES --
|
|
||||||
) AS TldMetrics
|
|
||||||
WHERE Tld.tld = TldMetrics.tld OR TldMetrics.tld IS NULL
|
|
||||||
GROUP BY tld
|
|
||||||
ORDER BY tld
|
|
Loading…
Add table
Reference in a new issue