mirror of
https://github.com/google/nomulus.git
synced 2025-05-14 16:37:13 +02:00
Upgrade activity reporting queries to StandardSQL
This also brings the SQL template parameters in-line with the anticipated Bigquery dataset format, and switches from DateTime to the more appropriate LocalDate (since we only need monthly granularity). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=162747692
This commit is contained in:
parent
8a921f08ed
commit
33eb5f1c87
14 changed files with 160 additions and 171 deletions
|
@ -20,7 +20,7 @@ import google.registry.util.ResourceUtils;
|
||||||
import google.registry.util.SqlTemplate;
|
import google.registry.util.SqlTemplate;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.LocalDate;
|
||||||
import org.joda.time.format.DateTimeFormat;
|
import org.joda.time.format.DateTimeFormat;
|
||||||
import org.joda.time.format.DateTimeFormatter;
|
import org.joda.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
@ -30,28 +30,30 @@ import org.joda.time.format.DateTimeFormatter;
|
||||||
public final class ActivityReportingQueryBuilder {
|
public final class ActivityReportingQueryBuilder {
|
||||||
|
|
||||||
// Names for intermediary tables for overall activity reporting query.
|
// Names for intermediary tables for overall activity reporting query.
|
||||||
static final String ACTIVITY_REPORTING = "activity_reporting";
|
static final String ICANN_REPORTING_DATA_SET = "icann_reporting";
|
||||||
static final String MONTHLY_LOGS = "monthly_logs";
|
static final String MONTHLY_LOGS_TABLE = "monthly_logs";
|
||||||
static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status";
|
static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status";
|
||||||
static final String DNS_COUNTS = "dns_counts";
|
static final String DNS_COUNTS = "dns_counts";
|
||||||
static final String EPP_METRICS = "epp_metrics";
|
static final String EPP_METRICS = "epp_metrics";
|
||||||
static final String WHOIS_COUNTS = "whois_counts";
|
static final String WHOIS_COUNTS = "whois_counts";
|
||||||
|
|
||||||
/** Sets the month we're doing activity reporting for, and initializes the query map. */
|
/** Sets the month we're doing activity reporting for, and initializes the query map. */
|
||||||
static ImmutableMap<String, String> getQueryMap(DateTime reportingMonth) throws IOException {
|
static ImmutableMap<String, String> getQueryMap(
|
||||||
// Convert the DateTime reportingMonth into YYYY-MM-01 format for start and end of month
|
LocalDate reportingMonth, String projectId) throws IOException {
|
||||||
DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYY-MM-01");
|
// Convert reportingMonth into YYYYMM01 format for Bigquery table partition pattern-matching.
|
||||||
|
DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYYMM01");
|
||||||
String startOfMonth = formatter.print(reportingMonth);
|
String startOfMonth = formatter.print(reportingMonth);
|
||||||
String endOfMonth = formatter.print(reportingMonth.plusMonths(1));
|
String endOfMonth = formatter.print(reportingMonth.plusMonths(1));
|
||||||
return createQueryMap(startOfMonth, endOfMonth);
|
return createQueryMap(startOfMonth, endOfMonth, projectId);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ImmutableMap<String, String> createQueryMap(
|
private static ImmutableMap<String, String> createQueryMap(
|
||||||
String startOfMonth, String endOfMonth) throws IOException {
|
String startOfMonth, String endOfMonth, String projectId) throws IOException {
|
||||||
|
|
||||||
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
|
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
|
||||||
String operationalRegistrarsQuery =
|
String operationalRegistrarsQuery =
|
||||||
SqlTemplate.create(getQueryFromFile("registrar_operating_status.sql"))
|
SqlTemplate.create(getQueryFromFile("registrar_operating_status.sql"))
|
||||||
|
.put("PROJECT_ID", projectId)
|
||||||
.put("REGISTRAR_DATA_SET", "registrar_data")
|
.put("REGISTRAR_DATA_SET", "registrar_data")
|
||||||
.put("REGISTRAR_STATUS_TABLE", "registrar_status")
|
.put("REGISTRAR_STATUS_TABLE", "registrar_status")
|
||||||
.build();
|
.build();
|
||||||
|
@ -65,29 +67,33 @@ public final class ActivityReportingQueryBuilder {
|
||||||
// The monthly logs query is a shared dependency for epp counts and whois metrics
|
// The monthly logs query is a shared dependency for epp counts and whois metrics
|
||||||
String monthlyLogsQuery =
|
String monthlyLogsQuery =
|
||||||
SqlTemplate.create(getQueryFromFile("monthly_logs.sql"))
|
SqlTemplate.create(getQueryFromFile("monthly_logs.sql"))
|
||||||
.put("START_OF_MONTH", startOfMonth).put("END_OF_MONTH", endOfMonth)
|
.put("PROJECT_ID", projectId)
|
||||||
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
|
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
|
||||||
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
|
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
|
||||||
|
.put("START_OF_MONTH", startOfMonth)
|
||||||
|
.put("END_OF_MONTH", endOfMonth)
|
||||||
.build();
|
.build();
|
||||||
queriesBuilder.put("monthly_logs", monthlyLogsQuery);
|
queriesBuilder.put("monthly_logs", monthlyLogsQuery);
|
||||||
|
|
||||||
String eppQuery =
|
String eppQuery =
|
||||||
SqlTemplate.create(getQueryFromFile("epp_metrics.sql"))
|
SqlTemplate.create(getQueryFromFile("epp_metrics.sql"))
|
||||||
.put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS)
|
.put("PROJECT_ID", projectId)
|
||||||
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table")
|
.put("ICANN_REPORTING_DATA_SET", ICANN_REPORTING_DATA_SET)
|
||||||
|
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS_TABLE)
|
||||||
.build();
|
.build();
|
||||||
queriesBuilder.put(EPP_METRICS, eppQuery);
|
queriesBuilder.put(EPP_METRICS, eppQuery);
|
||||||
|
|
||||||
String whoisQuery =
|
String whoisQuery =
|
||||||
SqlTemplate.create(getQueryFromFile("whois_counts.sql"))
|
SqlTemplate.create(getQueryFromFile("whois_counts.sql"))
|
||||||
.put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS)
|
.put("PROJECT_ID", projectId)
|
||||||
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table")
|
.put("ICANN_REPORTING_DATA_SET", MONTHLY_LOGS_TABLE)
|
||||||
|
.put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS_TABLE)
|
||||||
.build();
|
.build();
|
||||||
queriesBuilder.put(WHOIS_COUNTS, whoisQuery);
|
queriesBuilder.put(WHOIS_COUNTS, whoisQuery);
|
||||||
|
|
||||||
String activityQuery =
|
String activityQuery =
|
||||||
SqlTemplate.create(getQueryFromFile("activity_report_aggregation.sql"))
|
SqlTemplate.create(getQueryFromFile("activity_report_aggregation.sql"))
|
||||||
.put("ACTIVITY_REPORTING_DATA_SET", ACTIVITY_REPORTING)
|
.put("ICANN_REPORTING_DATA_SET", ICANN_REPORTING_DATA_SET)
|
||||||
.put("REGISTRAR_OPERATING_STATUS_TABLE", REGISTRAR_OPERATING_STATUS)
|
.put("REGISTRAR_OPERATING_STATUS_TABLE", REGISTRAR_OPERATING_STATUS)
|
||||||
.put("DNS_COUNTS_TABLE", DNS_COUNTS)
|
.put("DNS_COUNTS_TABLE", DNS_COUNTS)
|
||||||
.put("EPP_METRICS_TABLE", EPP_METRICS)
|
.put("EPP_METRICS_TABLE", EPP_METRICS)
|
||||||
|
|
|
@ -11,8 +11,10 @@
|
||||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
-- See the License for the specific language governing permissions and
|
-- See the License for the specific language governing permissions and
|
||||||
-- limitations under the License.
|
-- limitations under the License.
|
||||||
|
|
||||||
-- This query pulls from all intermediary tables to create the activity
|
-- This query pulls from all intermediary tables to create the activity
|
||||||
-- report csv, via a table transpose and sum over all activity report fields.
|
-- report csv, via a table transpose and sum over all activity report fields.
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
Tld.tld AS tld,
|
Tld.tld AS tld,
|
||||||
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
||||||
|
@ -79,10 +81,10 @@ LEFT OUTER JOIN (
|
||||||
metricName,
|
metricName,
|
||||||
count FROM
|
count FROM
|
||||||
-- BEGIN INTERMEDIARY DATA SOURCES --
|
-- BEGIN INTERMEDIARY DATA SOURCES --
|
||||||
[%ACTIVITY_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%],
|
[%ICANN_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%],
|
||||||
[%ACTIVITY_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%],
|
[%ICANN_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%],
|
||||||
[%ACTIVITY_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%],
|
[%ICANN_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%],
|
||||||
[%ACTIVITY_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%],
|
[%ICANN_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%],
|
||||||
-- END INTERMEDIARY DATA SOURCES --
|
-- END INTERMEDIARY DATA SOURCES --
|
||||||
) AS TldMetrics
|
) AS TldMetrics
|
||||||
ON
|
ON
|
||||||
|
|
|
@ -14,20 +14,16 @@
|
||||||
|
|
||||||
-- Query for DNS metrics.
|
-- Query for DNS metrics.
|
||||||
|
|
||||||
-- Our DNS provider exports logs for all queries received. However, these
|
-- This is a no-op until after we transition to Google Cloud DNS, which
|
||||||
-- tables only have a TTL of 7 days. We make daily exports of the data
|
-- will likely export metrics via Stackdriver.
|
||||||
-- relevant to us, which allows us to get the full month's UDP and TCP
|
|
||||||
-- queries when generating activity reports.
|
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
|
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
|
||||||
STRING(NULL) AS tld,
|
STRING(NULL) AS tld,
|
||||||
metricName,
|
metricName,
|
||||||
-- TODO(b/63388735): Change this to actually query the DNS tables when ready.
|
-- TODO(b/63388735): Change this to actually query Google Cloud DNS when ready.
|
||||||
-1 AS count,
|
-1 AS count
|
||||||
FROM (
|
FROM ((
|
||||||
SELECT
|
SELECT 'dns-udp-queries' AS metricName)
|
||||||
'dns-udp-queries' AS metricName),
|
UNION ALL
|
||||||
(
|
(SELECT 'dns-tcp-queries' AS metricName))
|
||||||
SELECT
|
|
||||||
'dns-tcp-queries' AS metricName)
|
|
||||||
|
|
|
@ -14,51 +14,44 @@
|
||||||
|
|
||||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
||||||
|
|
||||||
-- We use regex's over the monthly appengine logs to determine how many
|
-- We use ugly regex's over the monthly appengine logs to determine how many
|
||||||
-- EPP requests we received for each command.
|
-- EPP requests we received for each command. For example:
|
||||||
|
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
|
||||||
|
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
tld,
|
-- Remove quotation marks from tld fields.
|
||||||
|
REGEXP_EXTRACT(tld, '^"(.*)"$') AS tld,
|
||||||
activityReportField AS metricName,
|
activityReportField AS metricName,
|
||||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
COUNT(*) AS count
|
||||||
INTEGER(COUNT(*)) AS count,
|
FROM (
|
||||||
FROM
|
|
||||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
|
||||||
-- across multiple TLDs are counted towards each checked TLD as though
|
|
||||||
-- there were one copy of this row per TLD (the effect of flattening).
|
|
||||||
FLATTEN((
|
|
||||||
SELECT
|
SELECT
|
||||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
-- TODO(b/32486667): Replace with JSON.parse() UDF when available for views
|
||||||
-- repeated string values, since there's no built-in for this.
|
|
||||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
|
||||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
|
||||||
-- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"],
|
|
||||||
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
SPLIT(
|
SPLIT(
|
||||||
REGEXP_EXTRACT(
|
REGEXP_EXTRACT(JSON_EXTRACT(json, '$.tlds'), r'^\[(.*)\]$')) AS tlds,
|
||||||
JSON_EXTRACT(json, '$.tlds'),
|
JSON_EXTRACT_SCALAR(json,
|
||||||
r'^\[(.*)\]$')),
|
'$.resourceType') AS resourceType,
|
||||||
'^"(.*)"$') AS tld,
|
JSON_EXTRACT_SCALAR(json,
|
||||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
'$.icannActivityReportField') AS activityReportField
|
||||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
|
||||||
AS activityReportField,
|
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
-- Extract JSON payload following log signature.
|
-- Extract the logged JSON payload.
|
||||||
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
||||||
AS json,
|
AS json
|
||||||
FROM
|
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%MONTHLY_LOGS_TABLE%` AS logs
|
||||||
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%]
|
JOIN
|
||||||
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA'
|
UNNEST(logs.logMessage) AS logMessage
|
||||||
)
|
WHERE
|
||||||
),
|
logMessage LIKE "%FLOW-LOG-SIGNATURE-METADATA%")) AS regexes
|
||||||
-- Second argument to flatten (see above).
|
JOIN
|
||||||
tld)
|
-- Unnest the JSON-parsed tlds.
|
||||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
UNNEST(regexes.tlds) AS tld
|
||||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
-- Exclude cases that can't be tabulated correctly, where activityReportField
|
||||||
|
-- is null/empty, or TLD is null/empty despite being a domain flow.
|
||||||
WHERE
|
WHERE
|
||||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
activityReportField != ''
|
||||||
GROUP BY tld, metricName
|
AND (tld != '' OR resourceType != 'domain')
|
||||||
ORDER BY tld, metricName
|
GROUP BY
|
||||||
|
tld, metricName
|
||||||
|
ORDER BY
|
||||||
|
tld, metricName
|
||||||
|
|
|
@ -14,15 +14,16 @@
|
||||||
|
|
||||||
-- Query to fetch AppEngine request logs for the report month.
|
-- Query to fetch AppEngine request logs for the report month.
|
||||||
|
|
||||||
-- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format.
|
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
protoPayload.resource AS requestPath,
|
protoPayload.resource AS requestPath,
|
||||||
protoPayload.line.logMessage AS logMessage,
|
ARRAY(
|
||||||
|
SELECT
|
||||||
|
logMessage
|
||||||
|
FROM
|
||||||
|
UNNEST(protoPayload.line)) AS logMessage
|
||||||
FROM
|
FROM
|
||||||
TABLE_DATE_RANGE_STRICT(
|
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
|
||||||
[%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%],
|
WHERE
|
||||||
TIMESTAMP('%START_OF_MONTH%'),
|
_TABLE_SUFFIX BETWEEN '%START_OF_MONTH%' AND '%END_OF_MONTH%'
|
||||||
-- End timestamp is inclusive, so subtract 1 day from the
|
|
||||||
-- timestamp representing the start of the next month.
|
|
||||||
DATE_ADD(TIMESTAMP('%END_OF_MONTH%'), -1, 'DAY'))
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ SELECT
|
||||||
THEN 'pre-ramp-up-registrars'
|
THEN 'pre-ramp-up-registrars'
|
||||||
-- The import process is imprecise; filter out invalid rows.
|
-- The import process is imprecise; filter out invalid rows.
|
||||||
ELSE 'not-applicable' END AS metricName,
|
ELSE 'not-applicable' END AS metricName,
|
||||||
INTEGER(COUNT(registrar_id)) AS count
|
COUNT(registrar_id) AS count
|
||||||
FROM
|
FROM
|
||||||
[%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%]
|
`%PROJECT_ID%.%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%`
|
||||||
GROUP BY metricName
|
GROUP BY metricName
|
||||||
|
|
|
@ -18,16 +18,15 @@
|
||||||
-- counts the number of hits via both endpoints (port 43 and the web).
|
-- counts the number of hits via both endpoints (port 43 and the web).
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
-- Whois applies to all TLDs, hence the 'null' magic value.
|
|
||||||
STRING(NULL) AS tld,
|
STRING(NULL) AS tld,
|
||||||
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
|
|
||||||
-- while web queries come in via /whois/<params>.
|
|
||||||
CASE
|
CASE
|
||||||
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
||||||
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries'
|
WHEN SUBSTR(requestPath, 0, 7) = '/whois/' THEN 'web-whois-queries'
|
||||||
END AS metricName,
|
END AS metricName,
|
||||||
INTEGER(COUNT(requestPath)) AS count,
|
COUNT(requestPath) AS count
|
||||||
FROM
|
FROM
|
||||||
[%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%]
|
`%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%MONTHLY_LOGS_TABLE%`
|
||||||
GROUP BY metricName
|
GROUP BY
|
||||||
HAVING metricName IS NOT NULL
|
metricName
|
||||||
|
HAVING
|
||||||
|
metricName IS NOT NULL
|
||||||
|
|
|
@ -19,7 +19,7 @@ import static com.google.common.truth.Truth.assertThat;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.LocalDate;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.junit.runners.JUnit4;
|
import org.junit.runners.JUnit4;
|
||||||
|
@ -34,7 +34,7 @@ public class ActivityReportingQueryBuilderTest {
|
||||||
ImmutableList.of(
|
ImmutableList.of(
|
||||||
ActivityReportingQueryBuilder.REGISTRAR_OPERATING_STATUS,
|
ActivityReportingQueryBuilder.REGISTRAR_OPERATING_STATUS,
|
||||||
ActivityReportingQueryBuilder.DNS_COUNTS,
|
ActivityReportingQueryBuilder.DNS_COUNTS,
|
||||||
ActivityReportingQueryBuilder.MONTHLY_LOGS,
|
ActivityReportingQueryBuilder.MONTHLY_LOGS_TABLE,
|
||||||
ActivityReportingQueryBuilder.EPP_METRICS,
|
ActivityReportingQueryBuilder.EPP_METRICS,
|
||||||
ActivityReportingQueryBuilder.WHOIS_COUNTS,
|
ActivityReportingQueryBuilder.WHOIS_COUNTS,
|
||||||
"activity_report_aggregation");
|
"activity_report_aggregation");
|
||||||
|
@ -46,7 +46,8 @@ public class ActivityReportingQueryBuilderTest {
|
||||||
}
|
}
|
||||||
ImmutableMap<String, String> testQueries = testQueryBuilder.build();
|
ImmutableMap<String, String> testQueries = testQueryBuilder.build();
|
||||||
ImmutableMap<String, String> queries =
|
ImmutableMap<String, String> queries =
|
||||||
ActivityReportingQueryBuilder.getQueryMap(new DateTime(2017, 05, 15, 0, 0));
|
ActivityReportingQueryBuilder.getQueryMap(
|
||||||
|
new LocalDate(2017, 05, 15), "domain-registry-alpha");
|
||||||
for (String query : queryNames) {
|
for (String query : queryNames) {
|
||||||
assertThat(queries.get(query)).isEqualTo(testQueries.get(query));
|
assertThat(queries.get(query)).isEqualTo(testQueries.get(query));
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,10 @@
|
||||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
-- See the License for the specific language governing permissions and
|
-- See the License for the specific language governing permissions and
|
||||||
-- limitations under the License.
|
-- limitations under the License.
|
||||||
|
|
||||||
-- This query pulls from all intermediary tables to create the activity
|
-- This query pulls from all intermediary tables to create the activity
|
||||||
-- report csv, via a table transpose and sum over all activity report fields.
|
-- report csv, via a table transpose and sum over all activity report fields.
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
Tld.tld AS tld,
|
Tld.tld AS tld,
|
||||||
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars,
|
||||||
|
@ -79,10 +81,10 @@ LEFT OUTER JOIN (
|
||||||
metricName,
|
metricName,
|
||||||
count FROM
|
count FROM
|
||||||
-- BEGIN INTERMEDIARY DATA SOURCES --
|
-- BEGIN INTERMEDIARY DATA SOURCES --
|
||||||
[activity_reporting.registrar_operating_status],
|
[icann_reporting.registrar_operating_status],
|
||||||
[activity_reporting.dns_counts],
|
[icann_reporting.dns_counts],
|
||||||
[activity_reporting.epp_metrics],
|
[icann_reporting.epp_metrics],
|
||||||
[activity_reporting.whois_counts],
|
[icann_reporting.whois_counts],
|
||||||
-- END INTERMEDIARY DATA SOURCES --
|
-- END INTERMEDIARY DATA SOURCES --
|
||||||
) AS TldMetrics
|
) AS TldMetrics
|
||||||
ON
|
ON
|
||||||
|
|
|
@ -14,20 +14,16 @@
|
||||||
|
|
||||||
-- Query for DNS metrics.
|
-- Query for DNS metrics.
|
||||||
|
|
||||||
-- Our DNS provider exports logs for all queries received. However, these
|
-- This is a no-op until after we transition to Google Cloud DNS, which
|
||||||
-- tables only have a TTL of 7 days. We make daily exports of the data
|
-- will likely export metrics via Stackdriver.
|
||||||
-- relevant to us, which allows us to get the full month's UDP and TCP
|
|
||||||
-- queries when generating activity reports.
|
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
|
-- DNS metrics apply to all tlds, which requires the 'null' magic value.
|
||||||
STRING(NULL) AS tld,
|
STRING(NULL) AS tld,
|
||||||
metricName,
|
metricName,
|
||||||
-- TODO(b/63388735): Change this to actually query the DNS tables when ready.
|
-- TODO(b/63388735): Change this to actually query Google Cloud DNS when ready.
|
||||||
-1 AS count,
|
-1 AS count
|
||||||
FROM (
|
FROM ((
|
||||||
SELECT
|
SELECT 'dns-udp-queries' AS metricName)
|
||||||
'dns-udp-queries' AS metricName),
|
UNION ALL
|
||||||
(
|
(SELECT 'dns-tcp-queries' AS metricName))
|
||||||
SELECT
|
|
||||||
'dns-tcp-queries' AS metricName)
|
|
||||||
|
|
|
@ -14,51 +14,44 @@
|
||||||
|
|
||||||
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
-- Query FlowReporter JSON log messages and calculate SRS metrics.
|
||||||
|
|
||||||
-- We use regex's over the monthly appengine logs to determine how many
|
-- We use ugly regex's over the monthly appengine logs to determine how many
|
||||||
-- EPP requests we received for each command.
|
-- EPP requests we received for each command. For example:
|
||||||
|
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
|
||||||
|
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
tld,
|
-- Remove quotation marks from tld fields.
|
||||||
|
REGEXP_EXTRACT(tld, '^"(.*)"$') AS tld,
|
||||||
activityReportField AS metricName,
|
activityReportField AS metricName,
|
||||||
-- Manual INTEGER cast to work around a BigQuery bug (b/14560012).
|
COUNT(*) AS count
|
||||||
INTEGER(COUNT(*)) AS count,
|
FROM (
|
||||||
FROM
|
|
||||||
-- Flatten the "tld" column (repeated) so that domain checks for names
|
|
||||||
-- across multiple TLDs are counted towards each checked TLD as though
|
|
||||||
-- there were one copy of this row per TLD (the effect of flattening).
|
|
||||||
FLATTEN((
|
|
||||||
SELECT
|
SELECT
|
||||||
-- Use some ugly regex hackery to convert JSON list of strings into
|
-- TODO(b/32486667): Replace with JSON.parse() UDF when available for views
|
||||||
-- repeated string values, since there's no built-in for this.
|
|
||||||
-- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF
|
|
||||||
-- once we can use GoogleSQL; example in b/37629674#comment2.
|
|
||||||
-- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"],
|
|
||||||
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
|
|
||||||
REGEXP_EXTRACT(
|
|
||||||
SPLIT(
|
SPLIT(
|
||||||
REGEXP_EXTRACT(
|
REGEXP_EXTRACT(JSON_EXTRACT(json, '$.tlds'), r'^\[(.*)\]$')) AS tlds,
|
||||||
JSON_EXTRACT(json, '$.tlds'),
|
JSON_EXTRACT_SCALAR(json,
|
||||||
r'^\[(.*)\]$')),
|
'$.resourceType') AS resourceType,
|
||||||
'^"(.*)"$') AS tld,
|
JSON_EXTRACT_SCALAR(json,
|
||||||
-- TODO(b/XXX): remove rawTlds after June 2017 (see below).
|
'$.icannActivityReportField') AS activityReportField
|
||||||
JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType,
|
|
||||||
JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField')
|
|
||||||
AS activityReportField,
|
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
-- Extract JSON payload following log signature.
|
-- Extract the logged JSON payload.
|
||||||
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
|
||||||
AS json,
|
AS json
|
||||||
FROM
|
FROM `domain-registry-alpha.icann_reporting.monthly_logs` AS logs
|
||||||
[monthly_logs.monthly_logs_table]
|
JOIN
|
||||||
WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA'
|
UNNEST(logs.logMessage) AS logMessage
|
||||||
)
|
WHERE
|
||||||
),
|
logMessage LIKE "%FLOW-LOG-SIGNATURE-METADATA%")) AS regexes
|
||||||
-- Second argument to flatten (see above).
|
JOIN
|
||||||
tld)
|
-- Unnest the JSON-parsed tlds.
|
||||||
-- Exclude cases that can't be tabulated correctly - activity report field
|
UNNEST(regexes.tlds) AS tld
|
||||||
-- is null/empty, or the TLD is null/empty even though it's a domain flow.
|
-- Exclude cases that can't be tabulated correctly, where activityReportField
|
||||||
|
-- is null/empty, or TLD is null/empty despite being a domain flow.
|
||||||
WHERE
|
WHERE
|
||||||
activityReportField != '' AND (tld != '' OR resourceType != 'domain')
|
activityReportField != ''
|
||||||
GROUP BY tld, metricName
|
AND (tld != '' OR resourceType != 'domain')
|
||||||
ORDER BY tld, metricName
|
GROUP BY
|
||||||
|
tld, metricName
|
||||||
|
ORDER BY
|
||||||
|
tld, metricName
|
||||||
|
|
|
@ -14,15 +14,16 @@
|
||||||
|
|
||||||
-- Query to fetch AppEngine request logs for the report month.
|
-- Query to fetch AppEngine request logs for the report month.
|
||||||
|
|
||||||
-- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format.
|
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
protoPayload.resource AS requestPath,
|
protoPayload.resource AS requestPath,
|
||||||
protoPayload.line.logMessage AS logMessage,
|
ARRAY(
|
||||||
|
SELECT
|
||||||
|
logMessage
|
||||||
|
FROM
|
||||||
|
UNNEST(protoPayload.line)) AS logMessage
|
||||||
FROM
|
FROM
|
||||||
TABLE_DATE_RANGE_STRICT(
|
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
|
||||||
[appengine_logs.appengine_googleapis_com_request_log_],
|
WHERE
|
||||||
TIMESTAMP('2017-05-01'),
|
_TABLE_SUFFIX BETWEEN '20170501' AND '20170601'
|
||||||
-- End timestamp is inclusive, so subtract 1 day from the
|
|
||||||
-- timestamp representing the start of the next month.
|
|
||||||
DATE_ADD(TIMESTAMP('2017-06-01'), -1, 'DAY'))
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ SELECT
|
||||||
THEN 'pre-ramp-up-registrars'
|
THEN 'pre-ramp-up-registrars'
|
||||||
-- The import process is imprecise; filter out invalid rows.
|
-- The import process is imprecise; filter out invalid rows.
|
||||||
ELSE 'not-applicable' END AS metricName,
|
ELSE 'not-applicable' END AS metricName,
|
||||||
INTEGER(COUNT(registrar_id)) AS count
|
COUNT(registrar_id) AS count
|
||||||
FROM
|
FROM
|
||||||
[registrar_data.registrar_status]
|
`domain-registry-alpha.registrar_data.registrar_status`
|
||||||
GROUP BY metricName
|
GROUP BY metricName
|
||||||
|
|
|
@ -18,16 +18,15 @@
|
||||||
-- counts the number of hits via both endpoints (port 43 and the web).
|
-- counts the number of hits via both endpoints (port 43 and the web).
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
-- Whois applies to all TLDs, hence the 'null' magic value.
|
|
||||||
STRING(NULL) AS tld,
|
STRING(NULL) AS tld,
|
||||||
-- Whois queries over port 43 get forwarded by the proxy to /_dr/whois,
|
|
||||||
-- while web queries come in via /whois/<params>.
|
|
||||||
CASE
|
CASE
|
||||||
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries'
|
||||||
WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries'
|
WHEN SUBSTR(requestPath, 0, 7) = '/whois/' THEN 'web-whois-queries'
|
||||||
END AS metricName,
|
END AS metricName,
|
||||||
INTEGER(COUNT(requestPath)) AS count,
|
COUNT(requestPath) AS count
|
||||||
FROM
|
FROM
|
||||||
[monthly_logs.monthly_logs_table]
|
`domain-registry-alpha.monthly_logs.monthly_logs`
|
||||||
GROUP BY metricName
|
GROUP BY
|
||||||
HAVING metricName IS NOT NULL
|
metricName
|
||||||
|
HAVING
|
||||||
|
metricName IS NOT NULL
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue