diff --git a/python/google/registry/reporting/BUILD b/python/google/registry/reporting/BUILD deleted file mode 100644 index 5ab025da6..000000000 --- a/python/google/registry/reporting/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -package(default_visibility = ["//java/google/registry:registry_project"]) - -licenses(["notice"]) # Apache 2.0 - -py_library( - name = "icann_report_query_builder", - srcs = ["icann_report_query_builder.py"], - deps = ["//python:python_directory_import"], -) - -py_test( - name = "icann_report_query_builder_test", - size = "small", - srcs = ["icann_report_query_builder_test.py"], - data = ["testdata/golden_activity_query.sql"], - deps = [":icann_report_query_builder"], -) diff --git a/python/google/registry/reporting/icann_report_query_builder.py b/python/google/registry/reporting/icann_report_query_builder.py deleted file mode 100644 index d107db907..000000000 --- a/python/google/registry/reporting/icann_report_query_builder.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright 2017 The Nomulus Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""ICANN reporting BigQuery query construction logic. - -The IcannReportQueryBuilder class contains logic for constructing the -multi-part BigQuery queries used to produce ICANN monthly reports. These -queries are fairly complicated; see the design doc published to -nomulus-discuss@googlegroups.com for an overview. - -Currently, this class only supports building the query for activity -reports (not transaction reports). -""" -import datetime - -# This signature must match the one logged by FlowReporter - see -# cs/symbol:google.registry.flows.FlowReporter.METADATA_LOG_SIGNATURE -FLOWREPORTER_LOG_SIGNATURE = 'FLOW-LOG-SIGNATURE-METADATA' - - -class IcannReportQueryBuilder(object): - """Container for methods to build BigQuery queries for ICANN reporting.""" - - def BuildActivityReportQuery(self, month, registrar_count): - """Returns the assembled activity report query for a given month. - - Specifically, we instantiate the outermost activity report query by pointing - it at the union of a series of "data source" queries that each produce data - used to generate certain metrics. These queries in turn rely on some common - lower-level data source queries (monthly logs, both raw and EPP-parsed). - - Args: - month: (str) month of the report to generate, in YYYY-MM format - registrar_count: (int) total number of registrars in the registry system - - Returns: - (str) the fully-instantiated activity report query SQL - """ - # Construct some date-related parameters from the given month. - this_month_date = datetime.datetime.strptime(month, '%Y-%m').date() - # Hacky way to compute the start of the next month - add enough days to get - # to the next month (e.g. 31), then set the day to 1. It'd be cleaner to - # use dateutils.relativedelta(months=1) but the dependency is a pain. - month_delta = datetime.timedelta(days=31) - next_month_date = (this_month_date + month_delta).replace(day=1) - this_yearmonth = this_month_date.strftime('%Y-%m') - next_yearmonth = next_month_date.strftime('%Y-%m') - - # Construct the queries themselves. - logs_query = self._MakeMonthlyLogsQuery(this_yearmonth, next_yearmonth) - data_source_queries = [ - self._MakeActivityOperationalRegistrarsQuery(next_yearmonth), - self._MakeActivityAllRampedUpRegistrarsQuery(next_yearmonth), - self._MakeActivityAllRegistrarsQuery(registrar_count), - self._MakeActivityWhoisQuery(logs_query), - self._MakeActivityDnsQuery(), - self._MakeActivityEppSrsMetricsQuery(logs_query) - ] - return _StripTrailingWhitespaceFromLines(self._MakeActivityReportQuery( - data_source_queries)) - - def _MakeMonthlyLogsQuery(self, this_yearmonth, next_yearmonth): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query AppEngine request logs for the report month. - SELECT - protoPayload.resource AS requestPath, - protoPayload.line.logMessage AS logMessage, - FROM - TABLE_DATE_RANGE_STRICT( - [appengine_logs.appengine_googleapis_com_request_log_], - TIMESTAMP('%(this_yearmonth)s-01'), - -- End timestamp is inclusive, so subtract 1 second from the - -- timestamp representing the start of the next month. - DATE_ADD(TIMESTAMP('%(next_yearmonth)s-01'), -1, 'SECOND')) - """ - return query % {'this_yearmonth': this_yearmonth, - 'next_yearmonth': next_yearmonth} - - def _MakeActivityReportQuery(self, data_source_queries): - """Make the overall activity report query. - - Args: - data_source_queries: list of BigQuery SQL strings to use - as source 'tables' for the main query; each of these - queries must output a schema as follows: - - STRING tld / STRING metricName / INTEGER count - - A null TLD indicates that the metric counts towards - all TLDs. - - Returns: - query as a string of BigQuery SQL - """ - query = r""" - SELECT - Tld.tld AS tld, - SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, - -- Compute ramp-up-registrars as all-ramped-up-registrars - -- minus operational-registrars, with a floor of 0. - GREATEST(0, SUM( - CASE - WHEN metricName = 'operational-registrars' THEN -count - WHEN metricName = 'all-ramped-up-registrars' THEN count - ELSE 0 - END)) AS ramp_up_registrars, - -- Compute pre-ramp-up-registrars as all-registrars minus - -- all-ramp-up-registrars, with a floor of 0. - GREATEST(0, SUM( - CASE - WHEN metricName = 'all-ramped-up-registrars' THEN -count - WHEN metricName = 'all-registrars' THEN count - ELSE 0 - END)) AS pre_ramp_up_registrars, - -- We don't support ZFA over SFTP, only AXFR. - 0 AS zfa_passwords, - SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries, - SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries, - -- We don't support searchable WHOIS. - 0 AS searchable_whois_queries, - -- DNS queries for UDP/TCP are all assumed to be recevied/responded. - SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received, - SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded, - SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received, - SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded, - -- SRS metrics. - SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check, - SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create, - SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete, - SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info, - SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew, - SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report, - SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request, - SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve, - SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel, - SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query, - SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject, - SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request, - SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update, - SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check, - SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create, - SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete, - SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info, - SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update, - SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check, - SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create, - SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete, - SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info, - SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve, - SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel, - SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query, - SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject, - SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request, - SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update, - -- Cross join a list of all TLDs against TLD-specific metrics and then - -- filter so that only metrics with that TLD or a NULL TLD are counted - -- towards a given TLD. - FROM ( - SELECT tldStr AS tld - FROM [latest_snapshot.Registry] - -- Include all real TLDs that are not in pre-delegation testing. - WHERE tldType = 'REAL' - OMIT RECORD IF SOME(tldStateTransitions.tldState = 'PDT') - ) AS Tld - CROSS JOIN ( - SELECT - tld, metricName, count - FROM - -- Dummy data source that ensures that all TLDs appear in report, - -- since they'll all have at least 1 joined row that survives. - (SELECT STRING(NULL) AS tld, STRING(NULL) AS metricName, 0 AS count), - -- BEGIN JOINED DATA SOURCES -- - %(joined_data_sources)s - -- END JOINED DATA SOURCES -- - ) AS TldMetrics - WHERE Tld.tld = TldMetrics.tld OR TldMetrics.tld IS NULL - GROUP BY tld - ORDER BY tld - """ - # Turn each data source query into a subquery in parentheses, and join - # them together with comments (representing a table union). - joined_data_sources = '\n' + ',\n'.join( - '(\n%s\n)' % query for query in data_source_queries) - return query % {'joined_data_sources': joined_data_sources} - - def _MakeActivityOperationalRegistrarsQuery(self, next_yearmonth): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query for operational-registrars metric. - SELECT - allowedTlds AS tld, - 'operational-registrars' AS metricName, - INTEGER(COUNT(__key__.name)) AS count, - FROM [domain-registry:latest_snapshot.Registrar] - WHERE type = 'REAL' - AND creationTime < TIMESTAMP('%(next_yearmonth)s-01') - GROUP BY tld - """ - return query % {'next_yearmonth': next_yearmonth} - - def _MakeActivityAllRampedUpRegistrarsQuery(self, next_yearmonth): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query for all-ramped-up-registrars metric. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - 'all-ramped-up-registrars' AS metricName, - -- Sandbox OT&E registrar names can have either '-{1,2,3,4}' or '{,2,3}' - -- as suffixes - strip all of these off to get the "real" name. - INTEGER(EXACT_COUNT_DISTINCT( - REGEXP_EXTRACT(__key__.name, r'(.+?)(?:-?\d)?$'))) AS count, - FROM [domain-registry-sandbox:latest_snapshot.Registrar] - WHERE type = 'OTE' - AND creationTime < TIMESTAMP('%(next_yearmonth)s-01') - """ - return query % {'next_yearmonth': next_yearmonth} - - def _MakeActivityAllRegistrarsQuery(self, registrar_count): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = """ - -- Query for all-registrars metric. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - 'all-registrars' AS metricName, - INTEGER('%(registrar_count)s') AS count, - """ - return query % {'registrar_count': registrar_count} - - def _MakeActivityWhoisQuery(self, logs_query): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query for WHOIS metrics. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - -- Whois queries over port 43 get forwarded by the proxy to /_dr/whois, - -- while web queries come in via /whois/. - CASE WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' - WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' - END AS metricName, - INTEGER(COUNT(requestPath)) AS count, - FROM ( - -- BEGIN LOGS QUERY -- - %(logs_query)s - -- END LOGS QUERY -- - ) - GROUP BY metricName - HAVING metricName IS NOT NULL - """ - return query % {'logs_query': logs_query} - - def _MakeActivityDnsQuery(self): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query for DNS metrics. - SELECT - STRING(NULL) AS tld, - metricName, - -1 AS count, - FROM - (SELECT 'dns-udp-queries' AS metricName), - (SELECT 'dns-tcp-queries' AS metricName) - """ - return query - - def _MakeActivityEppSrsMetricsQuery(self, logs_query): - # TODO(b/20725722): add a real docstring. - # pylint: disable=missing-docstring - query = r""" - -- Query FlowReporter JSON log messages and calculate SRS metrics. - SELECT - tld, - activityReportField AS metricName, - -- Manual INTEGER cast to work around a BigQuery bug (b/14560012). - INTEGER(COUNT(*)) AS count, - FROM - -- Flatten the "tld" column (repeated) so that domain checks for names - -- across multiple TLDs are counted towards each checked TLD as though - -- there were one copy of this row per TLD (the effect of flattening). - FLATTEN(( - SELECT - -- Use some ugly regex hackery to convert JSON list of strings into - -- repeated string values, since there's no built-in for this. - -- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF - -- once we can use GoogleSQL; example in b/37629674#comment2. - REGEXP_EXTRACT( - SPLIT( - REGEXP_EXTRACT( - JSON_EXTRACT(json, '$.tlds'), - r'^\[(.*)\]$')), - '^"(.*)"$') AS tld, - -- TODO(b/XXX): remove rawTlds after June 2017 (see below). - JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType, - JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField') - AS activityReportField, - FROM ( - SELECT - -- Extract JSON payload following log signature. - REGEXP_EXTRACT(logMessage, r'%(log_signature)s: (.*)\n?$') - AS json, - FROM ( - -- BEGIN LOGS QUERY -- - %(logs_query)s - -- END LOGS QUERY -- - ) - WHERE logMessage CONTAINS '%(log_signature)s' - ) - ), - -- Second argument to flatten (see above). - tld) - -- Exclude cases that can't be tabulated correctly - activity report field - -- is null/empty, or the TLD is null/empty even though it's a domain flow. - WHERE - activityReportField != '' AND (tld != '' OR resourceType != 'domain') - GROUP BY tld, metricName - ORDER BY tld, metricName - """ - return query % {'logs_query': logs_query, - 'log_signature': FLOWREPORTER_LOG_SIGNATURE} - - -def _StripTrailingWhitespaceFromLines(string): - """Strips trailing whitespace from each line of the provided string. - - Args: - string: (str) string to remove trailing whitespace from - - Returns: - (str) input string, with trailing whitespace stripped from each line - """ - return '\n'.join(line.rstrip() for line in string.split('\n')) diff --git a/python/google/registry/reporting/icann_report_query_builder_test.py b/python/google/registry/reporting/icann_report_query_builder_test.py deleted file mode 100644 index b7b93ee66..000000000 --- a/python/google/registry/reporting/icann_report_query_builder_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017 The Nomulus Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for google.registry.reporting.icann_report_query_builder.""" - -import logging -import os -import unittest - -from google.registry.reporting import icann_report_query_builder - - -class IcannReportQueryBuilderTest(unittest.TestCase): - - testdata_path = None - - def setUp(self): - # Using __file__ is a bit of a hack, but it's the only way that "just works" - # for internal and external versions of the code, and it's fine for tests. - self.testdata_path = os.path.join(os.path.dirname(__file__), 'testdata') - - def testActivityQuery_matchesGoldenQuery(self): - self.maxDiff = None # Show long diffs - query_builder = icann_report_query_builder.IcannReportQueryBuilder() - golden_activity_query_path = os.path.join(self.testdata_path, - 'golden_activity_query.sql') - with open(golden_activity_query_path, 'r') as golden_activity_query: - golden_file_contents = golden_activity_query.read() - # Remove golden file copyright header by stripping until END OF HEADER. - golden_file_sql = golden_file_contents.split('-- END OF HEADER\n')[1] - actual_sql = query_builder.BuildActivityReportQuery( - month='2016-06', registrar_count=None) - try: - self.assertMultiLineEqual(golden_file_sql, actual_sql) - except AssertionError as e: - # Print the actual SQL generated so that it's easy to copy-paste into - # the golden file when updating the query. - sep = '=' * 50 + '\n' - logging.warning( - 'Generated activity query SQL:\n' + sep + actual_sql + sep) - raise e - - def testStringTrailingWhitespaceFromLines(self): - def do_test(expected, original): - self.assertEqual( - expected, - icann_report_query_builder._StripTrailingWhitespaceFromLines( - original)) - do_test('foo\nbar\nbaz\n', 'foo\nbar\nbaz\n') - do_test('foo\nbar\nbaz\n', 'foo \nbar \nbaz \n') - do_test('foo\nbar\nbaz', 'foo \nbar \nbaz ') - do_test('\nfoo\nbar\nbaz', '\nfoo\nbar\nbaz') - do_test('foo\n\n', 'foo\n \n') - do_test('foo\n', 'foo\n ') - - -if __name__ == '__main__': - unittest.main() diff --git a/python/google/registry/reporting/testdata/golden_activity_query.sql b/python/google/registry/reporting/testdata/golden_activity_query.sql deleted file mode 100644 index c81885f63..000000000 --- a/python/google/registry/reporting/testdata/golden_activity_query.sql +++ /dev/null @@ -1,243 +0,0 @@ --- Copyright 2017 The Nomulus Authors. All Rights Reserved. --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. - --- END OF HEADER - - SELECT - Tld.tld AS tld, - SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, - -- Compute ramp-up-registrars as all-ramped-up-registrars - -- minus operational-registrars, with a floor of 0. - GREATEST(0, SUM( - CASE - WHEN metricName = 'operational-registrars' THEN -count - WHEN metricName = 'all-ramped-up-registrars' THEN count - ELSE 0 - END)) AS ramp_up_registrars, - -- Compute pre-ramp-up-registrars as all-registrars minus - -- all-ramp-up-registrars, with a floor of 0. - GREATEST(0, SUM( - CASE - WHEN metricName = 'all-ramped-up-registrars' THEN -count - WHEN metricName = 'all-registrars' THEN count - ELSE 0 - END)) AS pre_ramp_up_registrars, - -- We don't support ZFA over SFTP, only AXFR. - 0 AS zfa_passwords, - SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries, - SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries, - -- We don't support searchable WHOIS. - 0 AS searchable_whois_queries, - -- DNS queries for UDP/TCP are all assumed to be recevied/responded. - SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received, - SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded, - SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received, - SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded, - -- SRS metrics. - SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check, - SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create, - SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete, - SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info, - SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew, - SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report, - SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request, - SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve, - SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel, - SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query, - SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject, - SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request, - SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update, - SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check, - SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create, - SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete, - SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info, - SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update, - SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check, - SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create, - SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete, - SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info, - SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve, - SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel, - SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query, - SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject, - SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request, - SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update, - -- Cross join a list of all TLDs against TLD-specific metrics and then - -- filter so that only metrics with that TLD or a NULL TLD are counted - -- towards a given TLD. - FROM ( - SELECT tldStr AS tld - FROM [latest_snapshot.Registry] - -- Include all real TLDs that are not in pre-delegation testing. - WHERE tldType = 'REAL' - OMIT RECORD IF SOME(tldStateTransitions.tldState = 'PDT') - ) AS Tld - CROSS JOIN ( - SELECT - tld, metricName, count - FROM - -- Dummy data source that ensures that all TLDs appear in report, - -- since they'll all have at least 1 joined row that survives. - (SELECT STRING(NULL) AS tld, STRING(NULL) AS metricName, 0 AS count), - -- BEGIN JOINED DATA SOURCES -- - -( - - -- Query for operational-registrars metric. - SELECT - allowedTlds AS tld, - 'operational-registrars' AS metricName, - INTEGER(COUNT(__key__.name)) AS count, - FROM [domain-registry:latest_snapshot.Registrar] - WHERE type = 'REAL' - AND creationTime < TIMESTAMP('2016-07-01') - GROUP BY tld - -), -( - - -- Query for all-ramped-up-registrars metric. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - 'all-ramped-up-registrars' AS metricName, - -- Sandbox OT&E registrar names can have either '-{1,2,3,4}' or '{,2,3}' - -- as suffixes - strip all of these off to get the "real" name. - INTEGER(EXACT_COUNT_DISTINCT( - REGEXP_EXTRACT(__key__.name, r'(.+?)(?:-?\d)?$'))) AS count, - FROM [domain-registry-sandbox:latest_snapshot.Registrar] - WHERE type = 'OTE' - AND creationTime < TIMESTAMP('2016-07-01') - -), -( - - -- Query for all-registrars metric. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - 'all-registrars' AS metricName, - INTEGER('None') AS count, - -), -( - - -- Query for WHOIS metrics. - SELECT - STRING(NULL) AS tld, -- Applies to all TLDs. - -- Whois queries over port 43 get forwarded by the proxy to /_dr/whois, - -- while web queries come in via /whois/. - CASE WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' - WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' - END AS metricName, - INTEGER(COUNT(requestPath)) AS count, - FROM ( - -- BEGIN LOGS QUERY -- - - -- Query AppEngine request logs for the report month. - SELECT - protoPayload.resource AS requestPath, - protoPayload.line.logMessage AS logMessage, - FROM - TABLE_DATE_RANGE_STRICT( - [appengine_logs.appengine_googleapis_com_request_log_], - TIMESTAMP('2016-06-01'), - -- End timestamp is inclusive, so subtract 1 second from the - -- timestamp representing the start of the next month. - DATE_ADD(TIMESTAMP('2016-07-01'), -1, 'SECOND')) - - -- END LOGS QUERY -- - ) - GROUP BY metricName - HAVING metricName IS NOT NULL - -), -( - - -- Query for DNS metrics. - SELECT - STRING(NULL) AS tld, - metricName, - -1 AS count, - FROM - (SELECT 'dns-udp-queries' AS metricName), - (SELECT 'dns-tcp-queries' AS metricName) - -), -( - - -- Query FlowReporter JSON log messages and calculate SRS metrics. - SELECT - tld, - activityReportField AS metricName, - -- Manual INTEGER cast to work around a BigQuery bug (b/14560012). - INTEGER(COUNT(*)) AS count, - FROM - -- Flatten the "tld" column (repeated) so that domain checks for names - -- across multiple TLDs are counted towards each checked TLD as though - -- there were one copy of this row per TLD (the effect of flattening). - FLATTEN(( - SELECT - -- Use some ugly regex hackery to convert JSON list of strings into - -- repeated string values, since there's no built-in for this. - -- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF - -- once we can use GoogleSQL; example in b/37629674#comment2. - REGEXP_EXTRACT( - SPLIT( - REGEXP_EXTRACT( - JSON_EXTRACT(json, '$.tlds'), - r'^\[(.*)\]$')), - '^"(.*)"$') AS tld, - -- TODO(b/XXX): remove rawTlds after June 2017 (see below). - JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType, - JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField') - AS activityReportField, - FROM ( - SELECT - -- Extract JSON payload following log signature. - REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') - AS json, - FROM ( - -- BEGIN LOGS QUERY -- - - -- Query AppEngine request logs for the report month. - SELECT - protoPayload.resource AS requestPath, - protoPayload.line.logMessage AS logMessage, - FROM - TABLE_DATE_RANGE_STRICT( - [appengine_logs.appengine_googleapis_com_request_log_], - TIMESTAMP('2016-06-01'), - -- End timestamp is inclusive, so subtract 1 second from the - -- timestamp representing the start of the next month. - DATE_ADD(TIMESTAMP('2016-07-01'), -1, 'SECOND')) - - -- END LOGS QUERY -- - ) - WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA' - ) - ), - -- Second argument to flatten (see above). - tld) - -- Exclude cases that can't be tabulated correctly - activity report field - -- is null/empty, or the TLD is null/empty even though it's a domain flow. - WHERE - activityReportField != '' AND (tld != '' OR resourceType != 'domain') - GROUP BY tld, metricName - ORDER BY tld, metricName - -) - -- END JOINED DATA SOURCES -- - ) AS TldMetrics - WHERE Tld.tld = TldMetrics.tld OR TldMetrics.tld IS NULL - GROUP BY tld - ORDER BY tld