mirror of
https://github.com/google/nomulus.git
synced 2025-05-13 16:07:15 +02:00
Change ICANN SRS/EPP activity query to use new JSON log line
This finally fixes b/37629674 by cutting over the ICANN activity reporting query for EPP/SRS metrics to use the new JSON-based structured log line in FlowReporter, which is much easier to parse and interpret correctly than the old XML logging which was not designed to be ingested in BigQuery. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=159633467
This commit is contained in:
parent
02a5e3d20f
commit
dce0daafc3
2 changed files with 95 additions and 210 deletions
|
@ -24,18 +24,9 @@ reports (not transaction reports).
|
|||
"""
|
||||
import datetime
|
||||
|
||||
# This regex pattern matches the full signature of the 'EPP Command' log line
|
||||
# from FlowRunner.run(), i.e. it matches the logging class/method that prefixes
|
||||
# the log message, plus the 'EPP Command' string, up to the newline.
|
||||
# Queries used below depend on matching this log line and parsing its
|
||||
# exact format, so it must be kept in sync with the logging site.
|
||||
# TODO(b/20725722): make the log statement format more robust.
|
||||
FLOWRUNNER_LOG_SIGNATURE_PATTERN = '(?:{}): EPP Command'.format('|'.join([
|
||||
'google.registry.flows.FlowRunner run',
|
||||
'com.google.domain.registry.flows.FlowRunner run',
|
||||
# TODO(b/29397966): figure out why this is FormattingLogger vs FlowRunner.
|
||||
'com.google.domain.registry.util.FormattingLogger log',
|
||||
'google.registry.util.FormattingLogger log']))
|
||||
# This signature must match the one logged by FlowReporter - see
|
||||
# cs/symbol:google.registry.flows.FlowReporter.METADATA_LOG_SIGNATURE
|
||||
FLOWREPORTER_LOG_SIGNATURE = 'FLOW-LOG-SIGNATURE-METADATA'
|
||||
|
||||
|
||||
class IcannReportQueryBuilder(object):
|
||||
|
@ -68,14 +59,13 @@ class IcannReportQueryBuilder(object):
|
|||
|
||||
# Construct the queries themselves.
|
||||
logs_query = self._MakeMonthlyLogsQuery(this_yearmonth, next_yearmonth)
|
||||
epp_xml_logs_query = self._MakeEppXmlLogsQuery(logs_query)
|
||||
data_source_queries = [
|
||||
self._MakeActivityOperationalRegistrarsQuery(next_yearmonth),
|
||||
self._MakeActivityAllRampedUpRegistrarsQuery(next_yearmonth),
|
||||
self._MakeActivityAllRegistrarsQuery(registrar_count),
|
||||
self._MakeActivityWhoisQuery(logs_query),
|
||||
self._MakeActivityDnsQuery(),
|
||||
self._MakeActivityEppSrsMetricsQuery(epp_xml_logs_query)
|
||||
self._MakeActivityEppSrsMetricsQuery(logs_query)
|
||||
]
|
||||
return _StripTrailingWhitespaceFromLines(self._MakeActivityReportQuery(
|
||||
data_source_queries))
|
||||
|
@ -99,32 +89,6 @@ class IcannReportQueryBuilder(object):
|
|||
return query % {'this_yearmonth': this_yearmonth,
|
||||
'next_yearmonth': next_yearmonth}
|
||||
|
||||
def _MakeEppXmlLogsQuery(self, logs_query):
|
||||
# TODO(b/20725722): add a real docstring.
|
||||
# pylint: disable=missing-docstring
|
||||
# This query relies on regex-parsing the precise format of the 'EPP Command'
|
||||
# log line from FlowRunner.run(), so it must be kept in sync.
|
||||
# TODO(b/20725722): make the log statement format more robust.
|
||||
query = r"""
|
||||
-- Query EPP request logs and extract the clientId and raw EPP XML.
|
||||
SELECT
|
||||
REGEXP_EXTRACT(logMessage, r'^%(log_signature)s\n\t.*\n\t(.*)\n') AS clientId,
|
||||
REGEXP_EXTRACT(logMessage, r'^%(log_signature)s\n\t.*\n\t.*\n\t.*\n\t((?s).*)$') AS xml,
|
||||
FROM (
|
||||
-- BEGIN LOGS QUERY --
|
||||
%(logs_query)s
|
||||
-- END LOGS QUERY --
|
||||
)
|
||||
WHERE
|
||||
-- EPP endpoints from the proxy, regtool, and console respectively.
|
||||
(requestPath IN ('/_dr/epp', '/_dr/epptool', '/registrar-xhr')
|
||||
OR LEFT(requestPath, 7) = '/check?')
|
||||
AND REGEXP_MATCH(logMessage, r'^%(log_signature)s')
|
||||
AND NOT logMessage CONTAINS 'DRY_RUN'
|
||||
"""
|
||||
return query % {'logs_query': logs_query,
|
||||
'log_signature': FLOWRUNNER_LOG_SIGNATURE_PATTERN}
|
||||
|
||||
def _MakeActivityReportQuery(self, data_source_queries):
|
||||
"""Make the overall activity report query.
|
||||
|
||||
|
@ -316,91 +280,60 @@ class IcannReportQueryBuilder(object):
|
|||
"""
|
||||
return query
|
||||
|
||||
def _MakeActivityEppSrsMetricsQuery(self, epp_xml_logs_query):
|
||||
def _MakeActivityEppSrsMetricsQuery(self, logs_query):
|
||||
# TODO(b/20725722): add a real docstring.
|
||||
# pylint: disable=missing-docstring
|
||||
query = r"""
|
||||
-- Query EPP XML messages and calculate SRS metrics.
|
||||
SELECT * FROM (
|
||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
||||
SELECT
|
||||
LOWER(domainTld) AS tld,
|
||||
-- SRS metric names follow a set pattern corresponding to the EPP
|
||||
-- protocol elements. First we extract the 'inner' command element in
|
||||
-- EPP, e.g. <domain:create>, which is the resource type followed by
|
||||
-- the standard EPP command type. To get the metric name, we add the
|
||||
-- prefix 'srs-', abbreviate 'domain' as 'dom' and 'contact' as 'cont',
|
||||
-- and replace ':' with '-' to produce 'srs-dom-create'.
|
||||
--
|
||||
-- Transfers have subcommands indicated by an 'op' attribute, which we
|
||||
-- extract and add as an extra suffix for transfer commands, so e.g.
|
||||
-- 'srs-cont-transfer-approve'. Domain restores are domain updates
|
||||
-- with a special <rgp:restore> element; if present, the command counts
|
||||
-- under the srs-dom-rgp-restore-{request,report} metric (depending on
|
||||
-- the value of the 'op' attribute) instead of srs-dom-update.
|
||||
CONCAT(
|
||||
'srs-',
|
||||
REPLACE(REPLACE(REPLACE(
|
||||
CASE
|
||||
WHEN NOT restoreOp IS NULL THEN CONCAT('domain-rgp-restore-', restoreOp)
|
||||
WHEN commandType = 'transfer' THEN CONCAT(innerCommand, '-', commandOpArg)
|
||||
ELSE innerCommand
|
||||
END,
|
||||
':', '-'), 'domain', 'dom'), 'contact', 'cont')
|
||||
) AS metricName,
|
||||
INTEGER(COUNT(xml)) AS count,
|
||||
FROM (
|
||||
SELECT
|
||||
-- Extract salient bits of the EPP XML using regexes. This is fairly
|
||||
-- safe since the EPP gets schema-validated and pretty-printed before
|
||||
-- getting logged, and so it looks something like this:
|
||||
--
|
||||
-- <command>
|
||||
-- <transfer op="request">
|
||||
-- <domain:transfer ...
|
||||
--
|
||||
-- From that, we parse out 'transfer' as the command type from the name
|
||||
-- of the first element after <command>, 'request' as the value of the
|
||||
-- 'op' attribute of that element (if any), and 'domain:transfer' as
|
||||
-- the inner command from the name of the subsequent element.
|
||||
--
|
||||
-- Domain commands all have at least one <domain:name> element (more
|
||||
-- than one for domain checks, but we just count the first), from which
|
||||
-- we extract the domain TLD as everything after the first dot in the
|
||||
-- element value. This won't work if the client mistakenly sends a
|
||||
-- hostname (e.g. 'www.foo.example') as the domain name, but we prefer
|
||||
-- this over taking everything after the last dot so that multipart
|
||||
-- TLDs like 'co.uk' can be supported.
|
||||
--
|
||||
-- Domain restores are indicated by an <rgp:restore> element, from
|
||||
-- which we extract the value of the 'op' attribute.
|
||||
--
|
||||
-- TODO(b/20725722): preprocess the XML in FlowRunner so we don't need
|
||||
-- regex parsing of XML here (http://stackoverflow.com/a/1732454).
|
||||
--
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<([a-z]+)') AS commandType,
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<[a-z]+ op="(.+?)"') AS commandOpArg,
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<.+?>.*?<([a-z]+:[a-z]+)') AS innerCommand,
|
||||
REGEXP_EXTRACT(xml, '<domain:name.*?>[^.]*[.](.+)</domain:name>') AS domainTld,
|
||||
REGEXP_EXTRACT(xml, '<rgp:restore op="(.+?)"/>') AS restoreOp,
|
||||
xml,
|
||||
FROM (
|
||||
-- BEGIN EPP XML LOGS QUERY --
|
||||
%(epp_xml_logs_query)s
|
||||
-- END EPP XML LOGS QUERY --
|
||||
)
|
||||
-- Filter to just XML that contains a <command> element (no <hello>s).
|
||||
WHERE xml CONTAINS '<command>'
|
||||
)
|
||||
-- Whitelist of EPP command types that we care about for metrics;
|
||||
-- excludes login, logout, and poll.
|
||||
WHERE commandType IN ('check', 'create', 'delete', 'info', 'renew', 'transfer', 'update')
|
||||
tld,
|
||||
activityReportField AS metricName,
|
||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
||||
INTEGER(COUNT(*)) AS count,
|
||||
FROM
|
||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
||||
-- across multiple TLDs are counted towards each checked TLD as though
|
||||
-- there were one copy of this row per TLD (the effect of flattening).
|
||||
FLATTEN((
|
||||
SELECT
|
||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
||||
-- repeated string values, since there's no built-in for this.
|
||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
||||
REGEXP_EXTRACT(
|
||||
SPLIT(
|
||||
REGEXP_EXTRACT(
|
||||
JSON_EXTRACT(json, '$.tlds'),
|
||||
r'^\[(.*)\]$')),
|
||||
'^"(.*)"$') AS tld,
|
||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
||||
AS activityReportField,
|
||||
FROM (
|
||||
SELECT
|
||||
-- Extract JSON payload following log signature.
|
||||
REGEXP_EXTRACT(logMessage, r'%(log_signature)s: (.*)\n?$')
|
||||
AS json,
|
||||
FROM (
|
||||
-- BEGIN LOGS QUERY --
|
||||
%(logs_query)s
|
||||
-- END LOGS QUERY --
|
||||
)
|
||||
WHERE logMessage CONTAINS '%(log_signature)s'
|
||||
)
|
||||
),
|
||||
-- Second argument to flatten (see above).
|
||||
tld)
|
||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
||||
WHERE
|
||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
||||
GROUP BY tld, metricName
|
||||
)
|
||||
-- Exclude domain-related EPP requests with no parsed TLD, otherwise
|
||||
-- a NULL tld will make them apply to all TLDs like contact/host requests.
|
||||
WHERE NOT (metricName CONTAINS 'srs-dom' AND tld IS NULL)
|
||||
ORDER BY tld, metricName
|
||||
"""
|
||||
return query % {'epp_xml_logs_query': epp_xml_logs_query}
|
||||
return query % {'logs_query': logs_query,
|
||||
'log_signature': FLOWREPORTER_LOG_SIGNATURE}
|
||||
|
||||
|
||||
def _StripTrailingWhitespaceFromLines(string):
|
||||
|
|
|
@ -175,78 +175,39 @@
|
|||
),
|
||||
(
|
||||
|
||||
-- Query EPP XML messages and calculate SRS metrics.
|
||||
SELECT * FROM (
|
||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
||||
SELECT
|
||||
LOWER(domainTld) AS tld,
|
||||
-- SRS metric names follow a set pattern corresponding to the EPP
|
||||
-- protocol elements. First we extract the 'inner' command element in
|
||||
-- EPP, e.g. <domain:create>, which is the resource type followed by
|
||||
-- the standard EPP command type. To get the metric name, we add the
|
||||
-- prefix 'srs-', abbreviate 'domain' as 'dom' and 'contact' as 'cont',
|
||||
-- and replace ':' with '-' to produce 'srs-dom-create'.
|
||||
--
|
||||
-- Transfers have subcommands indicated by an 'op' attribute, which we
|
||||
-- extract and add as an extra suffix for transfer commands, so e.g.
|
||||
-- 'srs-cont-transfer-approve'. Domain restores are domain updates
|
||||
-- with a special <rgp:restore> element; if present, the command counts
|
||||
-- under the srs-dom-rgp-restore-{request,report} metric (depending on
|
||||
-- the value of the 'op' attribute) instead of srs-dom-update.
|
||||
CONCAT(
|
||||
'srs-',
|
||||
REPLACE(REPLACE(REPLACE(
|
||||
CASE
|
||||
WHEN NOT restoreOp IS NULL THEN CONCAT('domain-rgp-restore-', restoreOp)
|
||||
WHEN commandType = 'transfer' THEN CONCAT(innerCommand, '-', commandOpArg)
|
||||
ELSE innerCommand
|
||||
END,
|
||||
':', '-'), 'domain', 'dom'), 'contact', 'cont')
|
||||
) AS metricName,
|
||||
INTEGER(COUNT(xml)) AS count,
|
||||
FROM (
|
||||
SELECT
|
||||
-- Extract salient bits of the EPP XML using regexes. This is fairly
|
||||
-- safe since the EPP gets schema-validated and pretty-printed before
|
||||
-- getting logged, and so it looks something like this:
|
||||
--
|
||||
-- <command>
|
||||
-- <transfer op="request">
|
||||
-- <domain:transfer ...
|
||||
--
|
||||
-- From that, we parse out 'transfer' as the command type from the name
|
||||
-- of the first element after <command>, 'request' as the value of the
|
||||
-- 'op' attribute of that element (if any), and 'domain:transfer' as
|
||||
-- the inner command from the name of the subsequent element.
|
||||
--
|
||||
-- Domain commands all have at least one <domain:name> element (more
|
||||
-- than one for domain checks, but we just count the first), from which
|
||||
-- we extract the domain TLD as everything after the first dot in the
|
||||
-- element value. This won't work if the client mistakenly sends a
|
||||
-- hostname (e.g. 'www.foo.example') as the domain name, but we prefer
|
||||
-- this over taking everything after the last dot so that multipart
|
||||
-- TLDs like 'co.uk' can be supported.
|
||||
--
|
||||
-- Domain restores are indicated by an <rgp:restore> element, from
|
||||
-- which we extract the value of the 'op' attribute.
|
||||
--
|
||||
-- TODO(b/20725722): preprocess the XML in FlowRunner so we don't need
|
||||
-- regex parsing of XML here (http://stackoverflow.com/a/1732454).
|
||||
--
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<([a-z]+)') AS commandType,
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<[a-z]+ op="(.+?)"') AS commandOpArg,
|
||||
REGEXP_EXTRACT(xml, '(?s)<command>.*?<.+?>.*?<([a-z]+:[a-z]+)') AS innerCommand,
|
||||
REGEXP_EXTRACT(xml, '<domain:name.*?>[^.]*[.](.+)</domain:name>') AS domainTld,
|
||||
REGEXP_EXTRACT(xml, '<rgp:restore op="(.+?)"/>') AS restoreOp,
|
||||
xml,
|
||||
FROM (
|
||||
-- BEGIN EPP XML LOGS QUERY --
|
||||
|
||||
-- Query EPP request logs and extract the clientId and raw EPP XML.
|
||||
SELECT
|
||||
REGEXP_EXTRACT(logMessage, r'^(?:google.registry.flows.FlowRunner run|com.google.domain.registry.flows.FlowRunner run|com.google.domain.registry.util.FormattingLogger log|google.registry.util.FormattingLogger log): EPP Command\n\t.*\n\t(.*)\n') AS clientId,
|
||||
REGEXP_EXTRACT(logMessage, r'^(?:google.registry.flows.FlowRunner run|com.google.domain.registry.flows.FlowRunner run|com.google.domain.registry.util.FormattingLogger log|google.registry.util.FormattingLogger log): EPP Command\n\t.*\n\t.*\n\t.*\n\t((?s).*)$') AS xml,
|
||||
FROM (
|
||||
-- BEGIN LOGS QUERY --
|
||||
tld,
|
||||
activityReportField AS metricName,
|
||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
||||
INTEGER(COUNT(*)) AS count,
|
||||
FROM
|
||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
||||
-- across multiple TLDs are counted towards each checked TLD as though
|
||||
-- there were one copy of this row per TLD (the effect of flattening).
|
||||
FLATTEN((
|
||||
SELECT
|
||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
||||
-- repeated string values, since there's no built-in for this.
|
||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
||||
REGEXP_EXTRACT(
|
||||
SPLIT(
|
||||
REGEXP_EXTRACT(
|
||||
JSON_EXTRACT(json, '$.tlds'),
|
||||
r'^\[(.*)\]$')),
|
||||
'^"(.*)"$') AS tld,
|
||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
||||
AS activityReportField,
|
||||
FROM (
|
||||
SELECT
|
||||
-- Extract JSON payload following log signature.
|
||||
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
||||
AS json,
|
||||
FROM (
|
||||
-- BEGIN LOGS QUERY --
|
||||
|
||||
-- Query AppEngine request logs for the report month.
|
||||
SELECT
|
||||
|
@ -260,28 +221,19 @@
|
|||
-- timestamp representing the start of the next month.
|
||||
DATE_ADD(TIMESTAMP('2016-07-01'), -1, 'SECOND'))
|
||||
|
||||
-- END LOGS QUERY --
|
||||
)
|
||||
-- END LOGS QUERY --
|
||||
)
|
||||
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA'
|
||||
)
|
||||
),
|
||||
-- Second argument to flatten (see above).
|
||||
tld)
|
||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
||||
WHERE
|
||||
-- EPP endpoints from the proxy, regtool, and console respectively.
|
||||
(requestPath IN ('/_dr/epp', '/_dr/epptool', '/registrar-xhr')
|
||||
OR LEFT(requestPath, 7) = '/check?')
|
||||
AND REGEXP_MATCH(logMessage, r'^(?:google.registry.flows.FlowRunner run|com.google.domain.registry.flows.FlowRunner run|com.google.domain.registry.util.FormattingLogger log|google.registry.util.FormattingLogger log): EPP Command')
|
||||
AND NOT logMessage CONTAINS 'DRY_RUN'
|
||||
|
||||
-- END EPP XML LOGS QUERY --
|
||||
)
|
||||
-- Filter to just XML that contains a <command> element (no <hello>s).
|
||||
WHERE xml CONTAINS '<command>'
|
||||
)
|
||||
-- Whitelist of EPP command types that we care about for metrics;
|
||||
-- excludes login, logout, and poll.
|
||||
WHERE commandType IN ('check', 'create', 'delete', 'info', 'renew', 'transfer', 'update')
|
||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
||||
GROUP BY tld, metricName
|
||||
)
|
||||
-- Exclude domain-related EPP requests with no parsed TLD, otherwise
|
||||
-- a NULL tld will make them apply to all TLDs like contact/host requests.
|
||||
WHERE NOT (metricName CONTAINS 'srs-dom' AND tld IS NULL)
|
||||
ORDER BY tld, metricName
|
||||
|
||||
)
|
||||
-- END JOINED DATA SOURCES --
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue