mirror of
https://github.com/google/nomulus.git
synced 2025-05-01 12:37:52 +02:00
We only log FLOW-LOG-SIGNATURE-METADATA from one place- FlowRunner. As a result, we can swap the generalized regex for a prefix-only regex, saving a <strong>lot</strong> of processing for our epp query (which is the most expensive of the bunch). I've also changed the test dates from 2017-05 to 2017-06, allowing us to copy-paste the test data into Bigquery to verify their function. The reason for 2017-06 in particular is because June was the first month that populated all the metadata necessary to generate these reports. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165391715
58 lines
2.2 KiB
SQL
58 lines
2.2 KiB
SQL
#standardSQL
|
|
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
|
--
|
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
|
-- you may not use this file except in compliance with the License.
|
|
-- You may obtain a copy of the License at
|
|
--
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
--
|
|
-- Unless required by applicable law or agreed to in writing, software
|
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
-- See the License for the specific language governing permissions and
|
|
-- limitations under the License.
|
|
|
|
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
|
|
|
-- We use ugly regex's over the monthly appengine logs to determine how many
|
|
-- EPP requests we received for each command. For example:
|
|
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
|
|
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
|
|
|
|
SELECT
|
|
-- Remove quotation marks from tld fields.
|
|
REGEXP_EXTRACT(tld, '^"(.*)"$') AS tld,
|
|
activityReportField AS metricName,
|
|
COUNT(*) AS count
|
|
FROM (
|
|
SELECT
|
|
-- TODO(b/32486667): Replace with JSON.parse() UDF when available for views
|
|
SPLIT(
|
|
REGEXP_EXTRACT(JSON_EXTRACT(json, '$.tlds'), r'^\[(.*)\]$')) AS tlds,
|
|
JSON_EXTRACT_SCALAR(json,
|
|
'$.resourceType') AS resourceType,
|
|
JSON_EXTRACT_SCALAR(json,
|
|
'$.icannActivityReportField') AS activityReportField
|
|
FROM (
|
|
SELECT
|
|
-- Extract the logged JSON payload.
|
|
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
|
AS json
|
|
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%MONTHLY_LOGS_TABLE%` AS logs
|
|
JOIN
|
|
UNNEST(logs.logMessage) AS logMessage
|
|
WHERE
|
|
STARTS_WITH(logMessage, "%METADATA_LOG_PREFIX%"))) AS regexes
|
|
JOIN
|
|
-- Unnest the JSON-parsed tlds.
|
|
UNNEST(regexes.tlds) AS tld
|
|
-- Exclude cases that can't be tabulated correctly, where activityReportField
|
|
-- is null/empty, or TLD is null/empty despite being a domain flow.
|
|
WHERE
|
|
activityReportField != ''
|
|
AND (tld != '' OR resourceType != 'domain')
|
|
GROUP BY
|
|
tld, metricName
|
|
ORDER BY
|
|
tld, metricName
|