Upgrade activity reporting queries to StandardSQL

This also brings the SQL template parameters in-line with the anticipated Bigquery dataset format, and switches from DateTime to the more appropriate LocalDate (since we only need monthly granularity).

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=162747692
This commit is contained in:
larryruili 2017-07-21 09:13:50 -07:00 committed by Ben McIlwain
parent 8a921f08ed
commit 33eb5f1c87
14 changed files with 160 additions and 171 deletions

View file

@ -20,7 +20,7 @@ import google.registry.util.ResourceUtils;
import google.registry.util.SqlTemplate; import google.registry.util.SqlTemplate;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import org.joda.time.DateTime; import org.joda.time.LocalDate;
import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatter;
@ -30,28 +30,30 @@ import org.joda.time.format.DateTimeFormatter;
public final class ActivityReportingQueryBuilder { public final class ActivityReportingQueryBuilder {
// Names for intermediary tables for overall activity reporting query. // Names for intermediary tables for overall activity reporting query.
static final String ACTIVITY_REPORTING = "activity_reporting"; static final String ICANN_REPORTING_DATA_SET = "icann_reporting";
static final String MONTHLY_LOGS = "monthly_logs"; static final String MONTHLY_LOGS_TABLE = "monthly_logs";
static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status"; static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status";
static final String DNS_COUNTS = "dns_counts"; static final String DNS_COUNTS = "dns_counts";
static final String EPP_METRICS = "epp_metrics"; static final String EPP_METRICS = "epp_metrics";
static final String WHOIS_COUNTS = "whois_counts"; static final String WHOIS_COUNTS = "whois_counts";
/** Sets the month we're doing activity reporting for, and initializes the query map. */ /** Sets the month we're doing activity reporting for, and initializes the query map. */
static ImmutableMap<String, String> getQueryMap(DateTime reportingMonth) throws IOException { static ImmutableMap<String, String> getQueryMap(
// Convert the DateTime reportingMonth into YYYY-MM-01 format for start and end of month LocalDate reportingMonth, String projectId) throws IOException {
DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYY-MM-01"); // Convert reportingMonth into YYYYMM01 format for Bigquery table partition pattern-matching.
DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYYMM01");
String startOfMonth = formatter.print(reportingMonth); String startOfMonth = formatter.print(reportingMonth);
String endOfMonth = formatter.print(reportingMonth.plusMonths(1)); String endOfMonth = formatter.print(reportingMonth.plusMonths(1));
return createQueryMap(startOfMonth, endOfMonth); return createQueryMap(startOfMonth, endOfMonth, projectId);
} }
private static ImmutableMap<String, String> createQueryMap( private static ImmutableMap<String, String> createQueryMap(
String startOfMonth, String endOfMonth) throws IOException { String startOfMonth, String endOfMonth, String projectId) throws IOException {
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder(); ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
String operationalRegistrarsQuery = String operationalRegistrarsQuery =
SqlTemplate.create(getQueryFromFile("registrar_operating_status.sql")) SqlTemplate.create(getQueryFromFile("registrar_operating_status.sql"))
.put("PROJECT_ID", projectId)
.put("REGISTRAR_DATA_SET", "registrar_data") .put("REGISTRAR_DATA_SET", "registrar_data")
.put("REGISTRAR_STATUS_TABLE", "registrar_status") .put("REGISTRAR_STATUS_TABLE", "registrar_status")
.build(); .build();
@ -65,29 +67,33 @@ public final class ActivityReportingQueryBuilder {
// The monthly logs query is a shared dependency for epp counts and whois metrics // The monthly logs query is a shared dependency for epp counts and whois metrics
String monthlyLogsQuery = String monthlyLogsQuery =
SqlTemplate.create(getQueryFromFile("monthly_logs.sql")) SqlTemplate.create(getQueryFromFile("monthly_logs.sql"))
.put("START_OF_MONTH", startOfMonth).put("END_OF_MONTH", endOfMonth) .put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs") .put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_") .put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
.put("START_OF_MONTH", startOfMonth)
.put("END_OF_MONTH", endOfMonth)
.build(); .build();
queriesBuilder.put("monthly_logs", monthlyLogsQuery); queriesBuilder.put("monthly_logs", monthlyLogsQuery);
String eppQuery = String eppQuery =
SqlTemplate.create(getQueryFromFile("epp_metrics.sql")) SqlTemplate.create(getQueryFromFile("epp_metrics.sql"))
.put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS) .put("PROJECT_ID", projectId)
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table") .put("ICANN_REPORTING_DATA_SET", ICANN_REPORTING_DATA_SET)
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS_TABLE)
.build(); .build();
queriesBuilder.put(EPP_METRICS, eppQuery); queriesBuilder.put(EPP_METRICS, eppQuery);
String whoisQuery = String whoisQuery =
SqlTemplate.create(getQueryFromFile("whois_counts.sql")) SqlTemplate.create(getQueryFromFile("whois_counts.sql"))
.put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS) .put("PROJECT_ID", projectId)
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table") .put("ICANN_REPORTING_DATA_SET", MONTHLY_LOGS_TABLE)
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS_TABLE)
.build(); .build();
queriesBuilder.put(WHOIS_COUNTS, whoisQuery); queriesBuilder.put(WHOIS_COUNTS, whoisQuery);
String activityQuery = String activityQuery =
SqlTemplate.create(getQueryFromFile("activity_report_aggregation.sql")) SqlTemplate.create(getQueryFromFile("activity_report_aggregation.sql"))
.put("ACTIVITY_REPORTING_DATA_SET", ACTIVITY_REPORTING) .put("ICANN_REPORTING_DATA_SET", ICANN_REPORTING_DATA_SET)
.put("REGISTRAR_OPERATING_STATUS_TABLE", REGISTRAR_OPERATING_STATUS) .put("REGISTRAR_OPERATING_STATUS_TABLE", REGISTRAR_OPERATING_STATUS)
.put("DNS_COUNTS_TABLE", DNS_COUNTS) .put("DNS_COUNTS_TABLE", DNS_COUNTS)
.put("EPP_METRICS_TABLE", EPP_METRICS) .put("EPP_METRICS_TABLE", EPP_METRICS)

View file

@ -11,8 +11,10 @@
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and -- See the License for the specific language governing permissions and
-- limitations under the License. -- limitations under the License.
-- This query pulls from all intermediary tables to create the activity -- This query pulls from all intermediary tables to create the activity
-- report csv, via a table transpose and sum over all activity report fields. -- report csv, via a table transpose and sum over all activity report fields.
SELECT SELECT
Tld.tld AS tld, Tld.tld AS tld,
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
@ -79,10 +81,10 @@ LEFT OUTER JOIN (
metricName, metricName,
count FROM count FROM
-- BEGIN INTERMEDIARY DATA SOURCES -- -- BEGIN INTERMEDIARY DATA SOURCES --
[%ACTIVITY_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%], [%ICANN_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%], [%ICANN_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%], [%ICANN_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%],
[%ACTIVITY_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%], [%ICANN_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%],
-- END INTERMEDIARY DATA SOURCES -- -- END INTERMEDIARY DATA SOURCES --
) AS TldMetrics ) AS TldMetrics
ON ON

View file

@ -14,20 +14,16 @@
-- Query for DNS metrics. -- Query for DNS metrics.
-- Our DNS provider exports logs for all queries received. However, these -- This is a no-op until after we transition to Google Cloud DNS, which
-- tables only have a TTL of 7 days. We make daily exports of the data -- will likely export metrics via Stackdriver.
-- relevant to us, which allows us to get the full month's UDP and TCP
-- queries when generating activity reports.
SELECT SELECT
-- DNS metrics apply to all tlds, which requires the 'null' magic value. -- DNS metrics apply to all tlds, which requires the 'null' magic value.
STRING(NULL) AS tld, STRING(NULL) AS tld,
metricName, metricName,
-- TODO(b/63388735): Change this to actually query the DNS tables when ready. -- TODO(b/63388735): Change this to actually query Google Cloud DNS when ready.
-1 AS count, -1 AS count
FROM ( FROM ((
SELECT SELECT 'dns-udp-queries' AS metricName)
'dns-udp-queries' AS metricName), UNION ALL
( (SELECT 'dns-tcp-queries' AS metricName))
SELECT
'dns-tcp-queries' AS metricName)

View file

@ -14,51 +14,44 @@
-- Query FlowReporter JSON log messages and calculate SRS metrics. -- Query FlowReporter JSON log messages and calculate SRS metrics.
-- We use regex's over the monthly appengine logs to determine how many -- We use ugly regex's over the monthly appengine logs to determine how many
-- EPP requests we received for each command. -- EPP requests we received for each command. For example:
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
SELECT SELECT
tld, -- Remove quotation marks from tld fields.
REGEXP_EXTRACT(tld, '^"(.*)"$') AS tld,
activityReportField AS metricName, activityReportField AS metricName,
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012). COUNT(*) AS count
INTEGER(COUNT(*)) AS count, FROM (
FROM
-- Flatten the "tld" column (repeated) so that domain checks for names
-- across multiple TLDs are counted towards each checked TLD as though
-- there were one copy of this row per TLD (the effect of flattening).
FLATTEN((
SELECT SELECT
-- Use some ugly regex hackery to convert JSON list of strings into -- TODO(b/32486667): Replace with JSON.parse() UDF when available for views
-- repeated string values, since there's no built-in for this.
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
-- once we can use GoogleSQL; example in b/37629674#comment2.
-- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
REGEXP_EXTRACT(
SPLIT( SPLIT(
REGEXP_EXTRACT( REGEXP_EXTRACT(JSON_EXTRACT(json, '$.tlds'), r'^\[(.*)\]$')) AS tlds,
JSON_EXTRACT(json, '$.tlds'), JSON_EXTRACT_SCALAR(json,
r'^\[(.*)\]$')), '$.resourceType') AS resourceType,
'^"(.*)"$') AS tld, JSON_EXTRACT_SCALAR(json,
-- TODO(b/XXX): remove rawTlds after June 2017 (see below). '$.icannActivityReportField') AS activityReportField
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
AS activityReportField,
FROM ( FROM (
SELECT SELECT
-- Extract JSON payload following log signature. -- Extract the logged JSON payload.
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json, AS json
FROM FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%MONTHLY_LOGS_TABLE%` AS logs
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%] JOIN
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA' UNNEST(logs.logMessage) AS logMessage
) WHERE
), logMessage LIKE "%FLOW-LOG-SIGNATURE-METADATA%")) AS regexes
-- Second argument to flatten (see above). JOIN
tld) -- Unnest the JSON-parsed tlds.
-- Exclude cases that can't be tabulated correctly - activity report field UNNEST(regexes.tlds) AS tld
-- is null/empty, or the TLD is null/empty even though it's a domain flow. -- Exclude cases that can't be tabulated correctly, where activityReportField
-- is null/empty, or TLD is null/empty despite being a domain flow.
WHERE WHERE
activityReportField != '' AND (tld != '' OR resourceType != 'domain') activityReportField != ''
GROUP BY tld, metricName AND (tld != '' OR resourceType != 'domain')
ORDER BY tld, metricName GROUP BY
tld, metricName
ORDER BY
tld, metricName

View file

@ -14,15 +14,16 @@
-- Query to fetch AppEngine request logs for the report month. -- Query to fetch AppEngine request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format. -- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
SELECT SELECT
protoPayload.resource AS requestPath, protoPayload.resource AS requestPath,
protoPayload.line.logMessage AS logMessage, ARRAY(
SELECT
logMessage
FROM
UNNEST(protoPayload.line)) AS logMessage
FROM FROM
TABLE_DATE_RANGE_STRICT( `%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
[%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%], WHERE
TIMESTAMP('%START_OF_MONTH%'), _TABLE_SUFFIX BETWEEN '%START_OF_MONTH%' AND '%END_OF_MONTH%'
-- End timestamp is inclusive, so subtract 1 day from the
-- timestamp representing the start of the next month.
DATE_ADD(TIMESTAMP('%END_OF_MONTH%'), -1, 'DAY'))

View file

@ -26,7 +26,7 @@ SELECT
THEN 'pre-ramp-up-registrars' THEN 'pre-ramp-up-registrars'
-- The import process is imprecise; filter out invalid rows. -- The import process is imprecise; filter out invalid rows.
ELSE 'not-applicable' END AS metricName, ELSE 'not-applicable' END AS metricName,
INTEGER(COUNT(registrar_id)) AS count COUNT(registrar_id) AS count
FROM FROM
[%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%] `%PROJECT_ID%.%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%`
GROUP BY metricName GROUP BY metricName

View file

@ -18,16 +18,15 @@
-- counts the number of hits via both endpoints (port 43 and the web). -- counts the number of hits via both endpoints (port 43 and the web).
SELECT SELECT
-- Whois applies to all TLDs, hence the 'null' magic value.
STRING(NULL) AS tld, STRING(NULL) AS tld,
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
-- while web queries come in via /whois/<params>.
CASE CASE
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' WHEN SUBSTR(requestPath, 0, 7) = '/whois/' THEN 'web-whois-queries'
END AS metricName, END AS metricName,
INTEGER(COUNT(requestPath)) AS count, COUNT(requestPath) AS count
FROM FROM
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%] `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%MONTHLY_LOGS_TABLE%`
GROUP BY metricName GROUP BY
HAVING metricName IS NOT NULL metricName
HAVING
metricName IS NOT NULL

View file

@ -19,7 +19,7 @@ import static com.google.common.truth.Truth.assertThat;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import java.io.IOException; import java.io.IOException;
import org.joda.time.DateTime; import org.joda.time.LocalDate;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.JUnit4; import org.junit.runners.JUnit4;
@ -34,7 +34,7 @@ public class ActivityReportingQueryBuilderTest {
ImmutableList.of( ImmutableList.of(
ActivityReportingQueryBuilder.REGISTRAR_OPERATING_STATUS, ActivityReportingQueryBuilder.REGISTRAR_OPERATING_STATUS,
ActivityReportingQueryBuilder.DNS_COUNTS, ActivityReportingQueryBuilder.DNS_COUNTS,
ActivityReportingQueryBuilder.MONTHLY_LOGS, ActivityReportingQueryBuilder.MONTHLY_LOGS_TABLE,
ActivityReportingQueryBuilder.EPP_METRICS, ActivityReportingQueryBuilder.EPP_METRICS,
ActivityReportingQueryBuilder.WHOIS_COUNTS, ActivityReportingQueryBuilder.WHOIS_COUNTS,
"activity_report_aggregation"); "activity_report_aggregation");
@ -46,7 +46,8 @@ public class ActivityReportingQueryBuilderTest {
} }
ImmutableMap<String, String> testQueries = testQueryBuilder.build(); ImmutableMap<String, String> testQueries = testQueryBuilder.build();
ImmutableMap<String, String> queries = ImmutableMap<String, String> queries =
ActivityReportingQueryBuilder.getQueryMap(new DateTime(2017, 05, 15, 0, 0)); ActivityReportingQueryBuilder.getQueryMap(
new LocalDate(2017, 05, 15), "domain-registry-alpha");
for (String query : queryNames) { for (String query : queryNames) {
assertThat(queries.get(query)).isEqualTo(testQueries.get(query)); assertThat(queries.get(query)).isEqualTo(testQueries.get(query));
} }

View file

@ -11,8 +11,10 @@
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and -- See the License for the specific language governing permissions and
-- limitations under the License. -- limitations under the License.
-- This query pulls from all intermediary tables to create the activity -- This query pulls from all intermediary tables to create the activity
-- report csv, via a table transpose and sum over all activity report fields. -- report csv, via a table transpose and sum over all activity report fields.
SELECT SELECT
Tld.tld AS tld, Tld.tld AS tld,
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
@ -79,10 +81,10 @@ LEFT OUTER JOIN (
metricName, metricName,
count FROM count FROM
-- BEGIN INTERMEDIARY DATA SOURCES -- -- BEGIN INTERMEDIARY DATA SOURCES --
[activity_reporting.registrar_operating_status], [icann_reporting.registrar_operating_status],
[activity_reporting.dns_counts], [icann_reporting.dns_counts],
[activity_reporting.epp_metrics], [icann_reporting.epp_metrics],
[activity_reporting.whois_counts], [icann_reporting.whois_counts],
-- END INTERMEDIARY DATA SOURCES -- -- END INTERMEDIARY DATA SOURCES --
) AS TldMetrics ) AS TldMetrics
ON ON

View file

@ -14,20 +14,16 @@
-- Query for DNS metrics. -- Query for DNS metrics.
-- Our DNS provider exports logs for all queries received. However, these -- This is a no-op until after we transition to Google Cloud DNS, which
-- tables only have a TTL of 7 days. We make daily exports of the data -- will likely export metrics via Stackdriver.
-- relevant to us, which allows us to get the full month's UDP and TCP
-- queries when generating activity reports.
SELECT SELECT
-- DNS metrics apply to all tlds, which requires the 'null' magic value. -- DNS metrics apply to all tlds, which requires the 'null' magic value.
STRING(NULL) AS tld, STRING(NULL) AS tld,
metricName, metricName,
-- TODO(b/63388735): Change this to actually query the DNS tables when ready. -- TODO(b/63388735): Change this to actually query Google Cloud DNS when ready.
-1 AS count, -1 AS count
FROM ( FROM ((
SELECT SELECT 'dns-udp-queries' AS metricName)
'dns-udp-queries' AS metricName), UNION ALL
( (SELECT 'dns-tcp-queries' AS metricName))
SELECT
'dns-tcp-queries' AS metricName)

View file

@ -14,51 +14,44 @@
-- Query FlowReporter JSON log messages and calculate SRS metrics. -- Query FlowReporter JSON log messages and calculate SRS metrics.
-- We use regex's over the monthly appengine logs to determine how many -- We use ugly regex's over the monthly appengine logs to determine how many
-- EPP requests we received for each command. -- EPP requests we received for each command. For example:
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
SELECT SELECT
tld, -- Remove quotation marks from tld fields.
REGEXP_EXTRACT(tld, '^"(.*)"$') AS tld,
activityReportField AS metricName, activityReportField AS metricName,
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012). COUNT(*) AS count
INTEGER(COUNT(*)) AS count, FROM (
FROM
-- Flatten the "tld" column (repeated) so that domain checks for names
-- across multiple TLDs are counted towards each checked TLD as though
-- there were one copy of this row per TLD (the effect of flattening).
FLATTEN((
SELECT SELECT
-- Use some ugly regex hackery to convert JSON list of strings into -- TODO(b/32486667): Replace with JSON.parse() UDF when available for views
-- repeated string values, since there's no built-in for this.
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
-- once we can use GoogleSQL; example in b/37629674#comment2.
-- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
REGEXP_EXTRACT(
SPLIT( SPLIT(
REGEXP_EXTRACT( REGEXP_EXTRACT(JSON_EXTRACT(json, '$.tlds'), r'^\[(.*)\]$')) AS tlds,
JSON_EXTRACT(json, '$.tlds'), JSON_EXTRACT_SCALAR(json,
r'^\[(.*)\]$')), '$.resourceType') AS resourceType,
'^"(.*)"$') AS tld, JSON_EXTRACT_SCALAR(json,
-- TODO(b/XXX): remove rawTlds after June 2017 (see below). '$.icannActivityReportField') AS activityReportField
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
AS activityReportField,
FROM ( FROM (
SELECT SELECT
-- Extract JSON payload following log signature. -- Extract the logged JSON payload.
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json, AS json
FROM FROM `domain-registry-alpha.icann_reporting.monthly_logs` AS logs
[monthly_logs.monthly_logs_table] JOIN
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA' UNNEST(logs.logMessage) AS logMessage
) WHERE
), logMessage LIKE "%FLOW-LOG-SIGNATURE-METADATA%")) AS regexes
-- Second argument to flatten (see above). JOIN
tld) -- Unnest the JSON-parsed tlds.
-- Exclude cases that can't be tabulated correctly - activity report field UNNEST(regexes.tlds) AS tld
-- is null/empty, or the TLD is null/empty even though it's a domain flow. -- Exclude cases that can't be tabulated correctly, where activityReportField
-- is null/empty, or TLD is null/empty despite being a domain flow.
WHERE WHERE
activityReportField != '' AND (tld != '' OR resourceType != 'domain') activityReportField != ''
GROUP BY tld, metricName AND (tld != '' OR resourceType != 'domain')
ORDER BY tld, metricName GROUP BY
tld, metricName
ORDER BY
tld, metricName

View file

@ -14,15 +14,16 @@
-- Query to fetch AppEngine request logs for the report month. -- Query to fetch AppEngine request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format. -- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
SELECT SELECT
protoPayload.resource AS requestPath, protoPayload.resource AS requestPath,
protoPayload.line.logMessage AS logMessage, ARRAY(
SELECT
logMessage
FROM
UNNEST(protoPayload.line)) AS logMessage
FROM FROM
TABLE_DATE_RANGE_STRICT( `domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
[appengine_logs.appengine_googleapis_com_request_log_], WHERE
TIMESTAMP('2017-05-01'), _TABLE_SUFFIX BETWEEN '20170501' AND '20170601'
-- End timestamp is inclusive, so subtract 1 day from the
-- timestamp representing the start of the next month.
DATE_ADD(TIMESTAMP('2017-06-01'), -1, 'DAY'))

View file

@ -26,7 +26,7 @@ SELECT
THEN 'pre-ramp-up-registrars' THEN 'pre-ramp-up-registrars'
-- The import process is imprecise; filter out invalid rows. -- The import process is imprecise; filter out invalid rows.
ELSE 'not-applicable' END AS metricName, ELSE 'not-applicable' END AS metricName,
INTEGER(COUNT(registrar_id)) AS count COUNT(registrar_id) AS count
FROM FROM
[registrar_data.registrar_status] `domain-registry-alpha.registrar_data.registrar_status`
GROUP BY metricName GROUP BY metricName

View file

@ -18,16 +18,15 @@
-- counts the number of hits via both endpoints (port 43 and the web). -- counts the number of hits via both endpoints (port 43 and the web).
SELECT SELECT
-- Whois applies to all TLDs, hence the 'null' magic value.
STRING(NULL) AS tld, STRING(NULL) AS tld,
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
-- while web queries come in via /whois/<params>.
CASE CASE
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' WHEN SUBSTR(requestPath, 0, 7) = '/whois/' THEN 'web-whois-queries'
END AS metricName, END AS metricName,
INTEGER(COUNT(requestPath)) AS count, COUNT(requestPath) AS count
FROM FROM
[monthly_logs.monthly_logs_table] `domain-registry-alpha.monthly_logs.monthly_logs`
GROUP BY metricName GROUP BY
HAVING metricName IS NOT NULL metricName
HAVING
metricName IS NOT NULL