diff --git a/java/google/registry/reporting/ActivityReportingQueryBuilder.java b/java/google/registry/reporting/ActivityReportingQueryBuilder.java new file mode 100644 index 000000000..729e39d16 --- /dev/null +++ b/java/google/registry/reporting/ActivityReportingQueryBuilder.java @@ -0,0 +1,111 @@ +// Copyright 2017 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.reporting; + +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; +import google.registry.util.ResourceUtils; +import google.registry.util.SqlTemplate; +import java.io.IOException; +import java.net.URL; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + +/** + * Utility class that produces SQL queries used to generate activity reports from Bigquery. + */ +public final class ActivityReportingQueryBuilder { + + // Names for intermediary tables for overall activity reporting query. + static final String ACTIVITY_REPORTING = "activity_reporting"; + static final String MONTHLY_LOGS = "monthly_logs"; + static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status"; + static final String DNS_COUNTS = "dns_counts"; + static final String EPP_METRICS = "epp_metrics"; + static final String WHOIS_COUNTS = "whois_counts"; + + /** Sets the month we're doing activity reporting for, and initializes the query map. */ + static ImmutableMap getQueryMap(DateTime reportingMonth) throws IOException { + // Convert the DateTime reportingMonth into YYYY-MM-01 format for start and end of month + DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYY-MM-01"); + String startOfMonth = formatter.print(reportingMonth); + String endOfMonth = formatter.print(reportingMonth.plusMonths(1)); + return createQueryMap(startOfMonth, endOfMonth); + } + + private static ImmutableMap createQueryMap( + String startOfMonth, String endOfMonth) throws IOException { + + ImmutableMap.Builder queriesBuilder = ImmutableMap.builder(); + String operationalRegistrarsQuery = + SqlTemplate.create(getQueryFromFile("registrar_operating_status.sql")) + .put("REGISTRAR_DATA_SET", "registrar_data") + .put("REGISTRAR_STATUS_TABLE", "registrar_status") + .build(); + queriesBuilder.put(REGISTRAR_OPERATING_STATUS, operationalRegistrarsQuery); + + // TODO(b/62626209): Make this use the CloudDNS counts instead. + String dnsCountsQuery = + SqlTemplate.create(getQueryFromFile("dns_counts.sql")).build(); + queriesBuilder.put(DNS_COUNTS, dnsCountsQuery); + + // The monthly logs query is a shared dependency for epp counts and whois metrics + String monthlyLogsQuery = + SqlTemplate.create(getQueryFromFile("monthly_logs.sql")) + .put("START_OF_MONTH", startOfMonth).put("END_OF_MONTH", endOfMonth) + .put("APPENGINE_LOGS_DATA_SET", "appengine_logs") + .put("REQUEST_TABLE", "appengine_googleapis_com_request_log_") + .build(); + queriesBuilder.put("monthly_logs", monthlyLogsQuery); + + String eppQuery = + SqlTemplate.create(getQueryFromFile("epp_metrics.sql")) + .put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS) + .put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table") + .build(); + queriesBuilder.put(EPP_METRICS, eppQuery); + + String whoisQuery = + SqlTemplate.create(getQueryFromFile("whois_counts.sql")) + .put("MONTHLY_LOGS_DATA_SET", MONTHLY_LOGS) + .put("MONTHLY_LOGS_TABLE", MONTHLY_LOGS + "_table") + .build(); + queriesBuilder.put(WHOIS_COUNTS, whoisQuery); + + String activityQuery = + SqlTemplate.create(getQueryFromFile("activity_report_aggregation.sql")) + .put("ACTIVITY_REPORTING_DATA_SET", ACTIVITY_REPORTING) + .put("REGISTRAR_OPERATING_STATUS_TABLE", REGISTRAR_OPERATING_STATUS) + .put("DNS_COUNTS_TABLE", DNS_COUNTS) + .put("EPP_METRICS_TABLE", EPP_METRICS) + .put("WHOIS_COUNTS_TABLE", WHOIS_COUNTS) + .put("LATEST_SNAPSHOT_DATA_SET", "latest_snapshot") + .put("REGISTRY_TABLE", "Registry") + .build(); + queriesBuilder.put("activity_report_aggregation", activityQuery); + + return queriesBuilder.build(); + } + + /** Returns {@link String} for file in {@code reporting/sql/} directory. */ + private static String getQueryFromFile(String filename) throws IOException { + return ResourceUtils.readResourceUtf8(getUrl(filename)); + } + + private static URL getUrl(String filename) { + return Resources.getResource(ActivityReportingQueryBuilder.class, "sql/" + filename); + } +} diff --git a/java/google/registry/reporting/BUILD b/java/google/registry/reporting/BUILD index 75e85ddb6..6b58cf91c 100644 --- a/java/google/registry/reporting/BUILD +++ b/java/google/registry/reporting/BUILD @@ -7,6 +7,7 @@ licenses(["notice"]) # Apache 2.0 java_library( name = "reporting", srcs = glob(["*.java"]), + resources = glob(["sql/*"]), deps = [ "//java/google/registry/config", "//java/google/registry/gcs", @@ -23,5 +24,6 @@ java_library( "@com_google_guava", "@com_google_http_client", "@javax_servlet_api", + "@joda_time", ], ) diff --git a/java/google/registry/reporting/sql/activity_report_aggregation.sql b/java/google/registry/reporting/sql/activity_report_aggregation.sql new file mode 100644 index 000000000..7a999a606 --- /dev/null +++ b/java/google/registry/reporting/sql/activity_report_aggregation.sql @@ -0,0 +1,93 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + -- This query pulls from all intermediary tables to create the activity + -- report csv, via a table transpose and sum over all activity report fields. +SELECT + Tld.tld AS tld, + SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, + SUM(IF(metricName = 'ramp-up-registrars', count, 0)) AS ramp_up_registrars, + SUM(IF(metricName = 'pre-ramp-up-registrars', count, 0)) AS pre_ramp_up_registrars, + -- We don't support ZFA over SFTP, only AXFR. + 0 AS zfa_passwords, + SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries, + SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries, + -- We don't support searchable WHOIS. + 0 AS searchable_whois_queries, + -- DNS queries for UDP/TCP are all assumed to be recevied/responded. + SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received, + SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded, + SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received, + SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded, + -- SRS metrics. + SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check, + SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create, + SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete, + SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info, + SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew, + SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report, + SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request, + SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve, + SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel, + SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query, + SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject, + SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request, + SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update, + SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check, + SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create, + SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete, + SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info, + SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update, + SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check, + SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create, + SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete, + SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info, + SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve, + SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel, + SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query, + SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject, + SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request, + SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update, + -- Cross join a list of all TLDs against TLD-specific metrics and then + -- filter so that only metrics with that TLD or a NULL TLD are counted + -- towards a given TLD. +FROM ( + SELECT + tldStr AS tld + FROM + [%LATEST_SNAPSHOT_DATA_SET%.%REGISTRY_TABLE%] + -- Include all real TLDs that are not in pre-delegation testing. + WHERE + tldType = 'REAL' + OMIT + RECORD IF SOME(tldStateTransitions.tldState = 'PDT') ) AS Tld + -- TODO(larryruili): Use LEFT JOIN on Tld.tld = TldMetrics.tld instead. + -- Also obsoletes dummy data. +LEFT OUTER JOIN ( + SELECT + tld, + metricName, + count FROM + -- BEGIN INTERMEDIARY DATA SOURCES -- + [%ACTIVITY_REPORTING_DATA_SET%.%REGISTRAR_OPERATING_STATUS_TABLE%], + [%ACTIVITY_REPORTING_DATA_SET%.%DNS_COUNTS_TABLE%], + [%ACTIVITY_REPORTING_DATA_SET%.%EPP_METRICS_TABLE%], + [%ACTIVITY_REPORTING_DATA_SET%.%WHOIS_COUNTS_TABLE%], + -- END INTERMEDIARY DATA SOURCES -- + ) AS TldMetrics +ON + Tld.tld = TldMetrics.tld +GROUP BY + tld +ORDER BY + tld diff --git a/java/google/registry/reporting/sql/dns_counts.sql b/java/google/registry/reporting/sql/dns_counts.sql new file mode 100644 index 000000000..e5e2ebbe3 --- /dev/null +++ b/java/google/registry/reporting/sql/dns_counts.sql @@ -0,0 +1,33 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for DNS metrics. + + -- Our DNS provider exports logs for all queries received. However, these + -- tables only have a TTL of 7 days. We make daily exports of the data + -- relevant to us, which allows us to get the full month's UDP and TCP + -- queries when generating activity reports. + +SELECT + -- DNS metrics apply to all tlds, which requires the 'null' magic value. + STRING(NULL) AS tld, + metricName, + -- TODO(b/63388735): Change this to actually query the DNS tables when ready. + -1 AS count, +FROM ( + SELECT + 'dns-udp-queries' AS metricName), + ( + SELECT + 'dns-tcp-queries' AS metricName) diff --git a/java/google/registry/reporting/sql/epp_metrics.sql b/java/google/registry/reporting/sql/epp_metrics.sql new file mode 100644 index 000000000..49b84948e --- /dev/null +++ b/java/google/registry/reporting/sql/epp_metrics.sql @@ -0,0 +1,64 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query FlowReporter JSON log messages and calculate SRS metrics. + + -- We use regex's over the monthly appengine logs to determine how many + -- EPP requests we received for each command. + +SELECT + tld, + activityReportField AS metricName, + -- Manual INTEGER cast to work around a BigQuery bug (b/14560012). + INTEGER(COUNT(*)) AS count, +FROM + -- Flatten the "tld" column (repeated) so that domain checks for names + -- across multiple TLDs are counted towards each checked TLD as though + -- there were one copy of this row per TLD (the effect of flattening). + FLATTEN(( + SELECT + -- Use some ugly regex hackery to convert JSON list of strings into + -- repeated string values, since there's no built-in for this. + -- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF + -- once we can use GoogleSQL; example in b/37629674#comment2. + -- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"], + -- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"} + REGEXP_EXTRACT( + SPLIT( + REGEXP_EXTRACT( + JSON_EXTRACT(json, '$.tlds'), + r'^\[(.*)\]$')), + '^"(.*)"$') AS tld, + -- TODO(b/XXX): remove rawTlds after June 2017 (see below). + JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType, + JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField') + AS activityReportField, + FROM ( + SELECT + -- Extract JSON payload following log signature. + REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') + AS json, + FROM + [%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%] + WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA' + ) + ), + -- Second argument to flatten (see above). + tld) +-- Exclude cases that can't be tabulated correctly - activity report field +-- is null/empty, or the TLD is null/empty even though it's a domain flow. +WHERE + activityReportField != '' AND (tld != '' OR resourceType != 'domain') +GROUP BY tld, metricName +ORDER BY tld, metricName diff --git a/java/google/registry/reporting/sql/monthly_logs.sql b/java/google/registry/reporting/sql/monthly_logs.sql new file mode 100644 index 000000000..7298895de --- /dev/null +++ b/java/google/registry/reporting/sql/monthly_logs.sql @@ -0,0 +1,28 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query to fetch AppEngine request logs for the report month. + + -- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format. + +SELECT + protoPayload.resource AS requestPath, + protoPayload.line.logMessage AS logMessage, +FROM + TABLE_DATE_RANGE_STRICT( + [%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%], + TIMESTAMP('%START_OF_MONTH%'), + -- End timestamp is inclusive, so subtract 1 day from the + -- timestamp representing the start of the next month. + DATE_ADD(TIMESTAMP('%END_OF_MONTH%'), -1, 'DAY')) diff --git a/java/google/registry/reporting/sql/registrar_operating_status.sql b/java/google/registry/reporting/sql/registrar_operating_status.sql new file mode 100644 index 000000000..a0d434d9a --- /dev/null +++ b/java/google/registry/reporting/sql/registrar_operating_status.sql @@ -0,0 +1,32 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for all registrar statuses: + -- production, ramping up (OTE), or pre-ramp-up (requested). + +SELECT + -- Applies to all TLDs, hence the 'null' magic value. + STRING(NULL) AS tld, + CASE WHEN access_type = 'PROD' AND registrar_name IS NOT NULL + THEN 'operational-registrars' + WHEN access_type = 'OTE' AND registrar_name IS NOT NULL + THEN 'ramp-up-registrars' + WHEN access_type IS NULL AND registrar_name IS NOT NULL + THEN 'pre-ramp-up-registrars' + -- The import process is imprecise; filter out invalid rows. + ELSE 'not-applicable' END AS metricName, + INTEGER(COUNT(registrar_id)) AS count +FROM + [%REGISTRAR_DATA_SET%.%REGISTRAR_STATUS_TABLE%] +GROUP BY metricName diff --git a/java/google/registry/reporting/sql/whois_counts.sql b/java/google/registry/reporting/sql/whois_counts.sql new file mode 100644 index 000000000..cb37bcac5 --- /dev/null +++ b/java/google/registry/reporting/sql/whois_counts.sql @@ -0,0 +1,33 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for WHOIS metrics. + + -- This searches the monthly appengine logs for Whois requests, and + -- counts the number of hits via both endpoints (port 43 and the web). + +SELECT + -- Whois applies to all TLDs, hence the 'null' magic value. + STRING(NULL) AS tld, + -- Whois queries over port 43 get forwarded by the proxy to /_dr/whois, + -- while web queries come in via /whois/. + CASE + WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' + WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' + END AS metricName, + INTEGER(COUNT(requestPath)) AS count, +FROM + [%MONTHLY_LOGS_DATA_SET%.%MONTHLY_LOGS_TABLE%] +GROUP BY metricName +HAVING metricName IS NOT NULL diff --git a/javatests/google/registry/reporting/ActivityReportingQueryBuilderTest.java b/javatests/google/registry/reporting/ActivityReportingQueryBuilderTest.java new file mode 100644 index 000000000..c0f374e52 --- /dev/null +++ b/javatests/google/registry/reporting/ActivityReportingQueryBuilderTest.java @@ -0,0 +1,54 @@ +// Copyright 2017 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.reporting; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.io.IOException; +import org.joda.time.DateTime; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link ActivityReportingQueryBuilder}. */ +@RunWith(JUnit4.class) +public class ActivityReportingQueryBuilderTest { + + @Test + public void testQueryMatch() throws IOException { + ImmutableList queryNames = + ImmutableList.of( + ActivityReportingQueryBuilder.REGISTRAR_OPERATING_STATUS, + ActivityReportingQueryBuilder.DNS_COUNTS, + ActivityReportingQueryBuilder.MONTHLY_LOGS, + ActivityReportingQueryBuilder.EPP_METRICS, + ActivityReportingQueryBuilder.WHOIS_COUNTS, + "activity_report_aggregation"); + + ImmutableMap.Builder testQueryBuilder = ImmutableMap.builder(); + for (String queryName : queryNames) { + String testFilename = String.format("%s_test.sql", queryName); + testQueryBuilder.put(queryName, ReportingTestData.getString(testFilename)); + } + ImmutableMap testQueries = testQueryBuilder.build(); + ImmutableMap queries = + ActivityReportingQueryBuilder.getQueryMap(new DateTime(2017, 05, 15, 0, 0)); + for (String query : queryNames) { + assertThat(queries.get(query)).isEqualTo(testQueries.get(query)); + } + } +} diff --git a/javatests/google/registry/reporting/BUILD b/javatests/google/registry/reporting/BUILD index f60b83c80..419b3d0d8 100644 --- a/javatests/google/registry/reporting/BUILD +++ b/javatests/google/registry/reporting/BUILD @@ -24,6 +24,7 @@ java_library( "@com_google_http_client", "@com_google_truth", "@javax_servlet_api", + "@joda_time", "@junit", "@org_mockito_all", ], diff --git a/javatests/google/registry/reporting/ReportingTestData.java b/javatests/google/registry/reporting/ReportingTestData.java index 0eb919942..ea3d3cd07 100644 --- a/javatests/google/registry/reporting/ReportingTestData.java +++ b/javatests/google/registry/reporting/ReportingTestData.java @@ -14,8 +14,11 @@ package google.registry.reporting; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.io.ByteSource; import com.google.common.io.Resources; +import java.io.IOException; import java.net.URL; /** Utility class providing easy access to contents of the {@code testdata/} directory. */ @@ -26,7 +29,12 @@ public final class ReportingTestData { return Resources.asByteSource(getUrl(filename)); } - private static URL getUrl(String filename) { + /** Returns a {@link String} from a file in the {@code reporting/testdata/} directory. */ + public static String getString(String filename) throws IOException { + return Resources.asCharSource(getUrl(filename), UTF_8).read(); + } + + private static URL getUrl(String filename) { return Resources.getResource(ReportingTestData.class, "testdata/" + filename); } } diff --git a/javatests/google/registry/reporting/testdata/activity_report_aggregation_test.sql b/javatests/google/registry/reporting/testdata/activity_report_aggregation_test.sql new file mode 100644 index 000000000..391bd3963 --- /dev/null +++ b/javatests/google/registry/reporting/testdata/activity_report_aggregation_test.sql @@ -0,0 +1,93 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + -- This query pulls from all intermediary tables to create the activity + -- report csv, via a table transpose and sum over all activity report fields. +SELECT + Tld.tld AS tld, + SUM(IF(metricName = 'operational-registrars', count, 0)) AS operational_registrars, + SUM(IF(metricName = 'ramp-up-registrars', count, 0)) AS ramp_up_registrars, + SUM(IF(metricName = 'pre-ramp-up-registrars', count, 0)) AS pre_ramp_up_registrars, + -- We don't support ZFA over SFTP, only AXFR. + 0 AS zfa_passwords, + SUM(IF(metricName = 'whois-43-queries', count, 0)) AS whois_43_queries, + SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries, + -- We don't support searchable WHOIS. + 0 AS searchable_whois_queries, + -- DNS queries for UDP/TCP are all assumed to be recevied/responded. + SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received, + SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded, + SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received, + SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_responded, + -- SRS metrics. + SUM(IF(metricName = 'srs-dom-check', count, 0)) AS srs_dom_check, + SUM(IF(metricName = 'srs-dom-create', count, 0)) AS srs_dom_create, + SUM(IF(metricName = 'srs-dom-delete', count, 0)) AS srs_dom_delete, + SUM(IF(metricName = 'srs-dom-info', count, 0)) AS srs_dom_info, + SUM(IF(metricName = 'srs-dom-renew', count, 0)) AS srs_dom_renew, + SUM(IF(metricName = 'srs-dom-rgp-restore-report', count, 0)) AS srs_dom_rgp_restore_report, + SUM(IF(metricName = 'srs-dom-rgp-restore-request', count, 0)) AS srs_dom_rgp_restore_request, + SUM(IF(metricName = 'srs-dom-transfer-approve', count, 0)) AS srs_dom_transfer_approve, + SUM(IF(metricName = 'srs-dom-transfer-cancel', count, 0)) AS srs_dom_transfer_cancel, + SUM(IF(metricName = 'srs-dom-transfer-query', count, 0)) AS srs_dom_transfer_query, + SUM(IF(metricName = 'srs-dom-transfer-reject', count, 0)) AS srs_dom_transfer_reject, + SUM(IF(metricName = 'srs-dom-transfer-request', count, 0)) AS srs_dom_transfer_request, + SUM(IF(metricName = 'srs-dom-update', count, 0)) AS srs_dom_update, + SUM(IF(metricName = 'srs-host-check', count, 0)) AS srs_host_check, + SUM(IF(metricName = 'srs-host-create', count, 0)) AS srs_host_create, + SUM(IF(metricName = 'srs-host-delete', count, 0)) AS srs_host_delete, + SUM(IF(metricName = 'srs-host-info', count, 0)) AS srs_host_info, + SUM(IF(metricName = 'srs-host-update', count, 0)) AS srs_host_update, + SUM(IF(metricName = 'srs-cont-check', count, 0)) AS srs_cont_check, + SUM(IF(metricName = 'srs-cont-create', count, 0)) AS srs_cont_create, + SUM(IF(metricName = 'srs-cont-delete', count, 0)) AS srs_cont_delete, + SUM(IF(metricName = 'srs-cont-info', count, 0)) AS srs_cont_info, + SUM(IF(metricName = 'srs-cont-transfer-approve', count, 0)) AS srs_cont_transfer_approve, + SUM(IF(metricName = 'srs-cont-transfer-cancel', count, 0)) AS srs_cont_transfer_cancel, + SUM(IF(metricName = 'srs-cont-transfer-query', count, 0)) AS srs_cont_transfer_query, + SUM(IF(metricName = 'srs-cont-transfer-reject', count, 0)) AS srs_cont_transfer_reject, + SUM(IF(metricName = 'srs-cont-transfer-request', count, 0)) AS srs_cont_transfer_request, + SUM(IF(metricName = 'srs-cont-update', count, 0)) AS srs_cont_update, + -- Cross join a list of all TLDs against TLD-specific metrics and then + -- filter so that only metrics with that TLD or a NULL TLD are counted + -- towards a given TLD. +FROM ( + SELECT + tldStr AS tld + FROM + [latest_snapshot.Registry] + -- Include all real TLDs that are not in pre-delegation testing. + WHERE + tldType = 'REAL' + OMIT + RECORD IF SOME(tldStateTransitions.tldState = 'PDT') ) AS Tld + -- TODO(larryruili): Use LEFT JOIN on Tld.tld = TldMetrics.tld instead. + -- Also obsoletes dummy data. +LEFT OUTER JOIN ( + SELECT + tld, + metricName, + count FROM + -- BEGIN INTERMEDIARY DATA SOURCES -- + [activity_reporting.registrar_operating_status], + [activity_reporting.dns_counts], + [activity_reporting.epp_metrics], + [activity_reporting.whois_counts], + -- END INTERMEDIARY DATA SOURCES -- + ) AS TldMetrics +ON + Tld.tld = TldMetrics.tld +GROUP BY + tld +ORDER BY + tld diff --git a/javatests/google/registry/reporting/testdata/dns_counts_test.sql b/javatests/google/registry/reporting/testdata/dns_counts_test.sql new file mode 100644 index 000000000..e5e2ebbe3 --- /dev/null +++ b/javatests/google/registry/reporting/testdata/dns_counts_test.sql @@ -0,0 +1,33 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for DNS metrics. + + -- Our DNS provider exports logs for all queries received. However, these + -- tables only have a TTL of 7 days. We make daily exports of the data + -- relevant to us, which allows us to get the full month's UDP and TCP + -- queries when generating activity reports. + +SELECT + -- DNS metrics apply to all tlds, which requires the 'null' magic value. + STRING(NULL) AS tld, + metricName, + -- TODO(b/63388735): Change this to actually query the DNS tables when ready. + -1 AS count, +FROM ( + SELECT + 'dns-udp-queries' AS metricName), + ( + SELECT + 'dns-tcp-queries' AS metricName) diff --git a/javatests/google/registry/reporting/testdata/epp_metrics_test.sql b/javatests/google/registry/reporting/testdata/epp_metrics_test.sql new file mode 100644 index 000000000..d8d085349 --- /dev/null +++ b/javatests/google/registry/reporting/testdata/epp_metrics_test.sql @@ -0,0 +1,64 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query FlowReporter JSON log messages and calculate SRS metrics. + + -- We use regex's over the monthly appengine logs to determine how many + -- EPP requests we received for each command. + +SELECT + tld, + activityReportField AS metricName, + -- Manual INTEGER cast to work around a BigQuery bug (b/14560012). + INTEGER(COUNT(*)) AS count, +FROM + -- Flatten the "tld" column (repeated) so that domain checks for names + -- across multiple TLDs are counted towards each checked TLD as though + -- there were one copy of this row per TLD (the effect of flattening). + FLATTEN(( + SELECT + -- Use some ugly regex hackery to convert JSON list of strings into + -- repeated string values, since there's no built-in for this. + -- TODO(b/20829992): replace with "JSON.parse()" inside a JS UDF + -- once we can use GoogleSQL; example in b/37629674#comment2. + -- e.g. JSON:"{"commandType":"check"...,"targetIds":["ais.a.how"], + -- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"} + REGEXP_EXTRACT( + SPLIT( + REGEXP_EXTRACT( + JSON_EXTRACT(json, '$.tlds'), + r'^\[(.*)\]$')), + '^"(.*)"$') AS tld, + -- TODO(b/XXX): remove rawTlds after June 2017 (see below). + JSON_EXTRACT_SCALAR(json, '$.resourceType') AS resourceType, + JSON_EXTRACT_SCALAR(json, '$.icannActivityReportField') + AS activityReportField, + FROM ( + SELECT + -- Extract JSON payload following log signature. + REGEXP_EXTRACT(logMessage, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') + AS json, + FROM + [monthly_logs.monthly_logs_table] + WHERE logMessage CONTAINS 'FLOW-LOG-SIGNATURE-METADATA' + ) + ), + -- Second argument to flatten (see above). + tld) +-- Exclude cases that can't be tabulated correctly - activity report field +-- is null/empty, or the TLD is null/empty even though it's a domain flow. +WHERE + activityReportField != '' AND (tld != '' OR resourceType != 'domain') +GROUP BY tld, metricName +ORDER BY tld, metricName diff --git a/javatests/google/registry/reporting/testdata/monthly_logs_test.sql b/javatests/google/registry/reporting/testdata/monthly_logs_test.sql new file mode 100644 index 000000000..2b1cd69f6 --- /dev/null +++ b/javatests/google/registry/reporting/testdata/monthly_logs_test.sql @@ -0,0 +1,28 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query to fetch AppEngine request logs for the report month. + + -- START_OF_MONTH and END_OF_MONTH should be in YYYY-MM-01 format. + +SELECT + protoPayload.resource AS requestPath, + protoPayload.line.logMessage AS logMessage, +FROM + TABLE_DATE_RANGE_STRICT( + [appengine_logs.appengine_googleapis_com_request_log_], + TIMESTAMP('2017-05-01'), + -- End timestamp is inclusive, so subtract 1 day from the + -- timestamp representing the start of the next month. + DATE_ADD(TIMESTAMP('2017-06-01'), -1, 'DAY')) diff --git a/javatests/google/registry/reporting/testdata/registrar_operating_status_test.sql b/javatests/google/registry/reporting/testdata/registrar_operating_status_test.sql new file mode 100644 index 000000000..e55de2c5a --- /dev/null +++ b/javatests/google/registry/reporting/testdata/registrar_operating_status_test.sql @@ -0,0 +1,32 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for all registrar statuses: + -- production, ramping up (OTE), or pre-ramp-up (requested). + +SELECT + -- Applies to all TLDs, hence the 'null' magic value. + STRING(NULL) AS tld, + CASE WHEN access_type = 'PROD' AND registrar_name IS NOT NULL + THEN 'operational-registrars' + WHEN access_type = 'OTE' AND registrar_name IS NOT NULL + THEN 'ramp-up-registrars' + WHEN access_type IS NULL AND registrar_name IS NOT NULL + THEN 'pre-ramp-up-registrars' + -- The import process is imprecise; filter out invalid rows. + ELSE 'not-applicable' END AS metricName, + INTEGER(COUNT(registrar_id)) AS count +FROM + [registrar_data.registrar_status] +GROUP BY metricName diff --git a/javatests/google/registry/reporting/testdata/whois_counts_test.sql b/javatests/google/registry/reporting/testdata/whois_counts_test.sql new file mode 100644 index 000000000..bd1b258ac --- /dev/null +++ b/javatests/google/registry/reporting/testdata/whois_counts_test.sql @@ -0,0 +1,33 @@ + -- Copyright 2017 The Nomulus Authors. All Rights Reserved. + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Query for WHOIS metrics. + + -- This searches the monthly appengine logs for Whois requests, and + -- counts the number of hits via both endpoints (port 43 and the web). + +SELECT + -- Whois applies to all TLDs, hence the 'null' magic value. + STRING(NULL) AS tld, + -- Whois queries over port 43 get forwarded by the proxy to /_dr/whois, + -- while web queries come in via /whois/. + CASE + WHEN requestPath = '/_dr/whois' THEN 'whois-43-queries' + WHEN LEFT(requestPath, 7) = '/whois/' THEN 'web-whois-queries' + END AS metricName, + INTEGER(COUNT(requestPath)) AS count, +FROM + [monthly_logs.monthly_logs_table] +GROUP BY metricName +HAVING metricName IS NOT NULL