Add transaction report generation code

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=167311547
This commit is contained in:
larryruili 2017-09-01 12:45:06 -07:00 committed by jianglai
parent f26bfbf632
commit 06f3215659
25 changed files with 1284 additions and 75 deletions

View file

@ -14,6 +14,9 @@
package google.registry.reporting;
import static google.registry.reporting.IcannReportingModule.DATASTORE_EXPORT_DATA_SET;
import static google.registry.reporting.IcannReportingModule.ICANN_REPORTING_DATA_SET;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import google.registry.config.RegistryConfig.Config;
@ -30,10 +33,9 @@ import org.joda.time.format.DateTimeFormatter;
/**
* Utility class that produces SQL queries used to generate activity reports from Bigquery.
*/
public final class ActivityReportingQueryBuilder {
public final class ActivityReportingQueryBuilder implements QueryBuilder {
// Names for intermediary tables for overall activity reporting query.
static final String ICANN_REPORTING_DATA_SET = "icann_reporting";
static final String ACTIVITY_REPORT_AGGREGATION = "activity_report_aggregation";
static final String MONTHLY_LOGS = "monthly_logs";
static final String REGISTRAR_OPERATING_STATUS = "registrar_operating_status";
@ -46,7 +48,8 @@ public final class ActivityReportingQueryBuilder {
@Inject ActivityReportingQueryBuilder() {}
/** Returns the aggregate query which generates the activity report from the saved view. */
String getActivityReportQuery() throws IOException {
@Override
public String getReportQuery() throws IOException {
return String.format(
"#standardSQL\nSELECT * FROM `%s.%s.%s`",
projectId,
@ -54,24 +57,20 @@ public final class ActivityReportingQueryBuilder {
getTableName(ACTIVITY_REPORT_AGGREGATION));
}
/** Returns the table name of the query, suffixed with the yearMonth in _YYYYMM format. */
private String getTableName(String queryName) {
return String.format("%s_%s", queryName, yearMonth.replace("-", ""));
}
/** Sets the month we're doing activity reporting for, and returns the view query map. */
ImmutableMap<String, String> getViewQueryMap() throws IOException {
LocalDate reportDate = DateTimeFormat.forPattern("yyyy-MM").parseLocalDate(yearMonth);
// Convert reportingMonth into YYYYMM01 format for Bigquery table partition pattern-matching.
DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYYMM01");
String startOfMonth = formatter.print(reportDate);
String endOfMonth = formatter.print(reportDate.plusMonths(1));
return createQueryMap(startOfMonth, endOfMonth);
@Override
public ImmutableMap<String, String> getViewQueryMap() throws IOException {
LocalDate reportDate =
DateTimeFormat.forPattern("yyyy-MM").parseLocalDate(yearMonth).withDayOfMonth(1);
LocalDate firstDayOfMonth = reportDate;
// The pattern-matching is inclusive, so we subtract 1 day to only report that month's data.
LocalDate lastDayOfMonth = reportDate.plusMonths(1).minusDays(1);
return createQueryMap(firstDayOfMonth, lastDayOfMonth);
}
/** Returns a map from view name to its associated SQL query. */
private ImmutableMap<String, String> createQueryMap(
String startOfMonth, String endOfMonth) throws IOException {
LocalDate firstDayOfMonth, LocalDate lastDayOfMonth) throws IOException {
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
String operationalRegistrarsQuery =
@ -87,14 +86,16 @@ public final class ActivityReportingQueryBuilder {
SqlTemplate.create(getQueryFromFile("dns_counts.sql")).build();
queriesBuilder.put(getTableName(DNS_COUNTS), dnsCountsQuery);
// The monthly logs query is a shared dependency for epp counts and whois metrics
// Convert reportingMonth into YYYYMMDD format for Bigquery table partition pattern-matching.
DateTimeFormatter logTableFormatter = DateTimeFormat.forPattern("yyyyMMdd");
// The monthly logs are a shared dependency for epp counts and whois metrics
String monthlyLogsQuery =
SqlTemplate.create(getQueryFromFile("monthly_logs.sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
.put("START_OF_MONTH", startOfMonth)
.put("END_OF_MONTH", endOfMonth)
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(firstDayOfMonth))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(lastDayOfMonth))
.build();
queriesBuilder.put(getTableName(MONTHLY_LOGS), monthlyLogsQuery);
@ -126,8 +127,7 @@ public final class ActivityReportingQueryBuilder {
.put("DNS_COUNTS_TABLE", getTableName(DNS_COUNTS))
.put("EPP_METRICS_TABLE", getTableName(EPP_METRICS))
.put("WHOIS_COUNTS_TABLE", getTableName(WHOIS_COUNTS))
// TODO(larryruili): Change to "latest_datastore_export" when cl/163124895 in prod.
.put("LATEST_DATASTORE_EXPORT", "latest_datastore_views")
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("REGISTRY_TABLE", "Registry")
.build();
queriesBuilder.put(getTableName(ACTIVITY_REPORT_AGGREGATION), aggregateQuery);
@ -135,6 +135,11 @@ public final class ActivityReportingQueryBuilder {
return queriesBuilder.build();
}
/** Returns the table name of the query, suffixed with the yearMonth in _YYYYMM format. */
private String getTableName(String queryName) {
return String.format("%s_%s", queryName, yearMonth.replace("-", ""));
}
/** Returns {@link String} for file in {@code reporting/sql/} directory. */
private static String getQueryFromFile(String filename) throws IOException {
return ResourceUtils.readResourceUtf8(getUrl(filename));

View file

@ -44,6 +44,8 @@ public final class IcannReportingModule {
static final String PARAM_YEAR_MONTH = "yearMonth";
static final String PARAM_REPORT_TYPE = "reportType";
static final String PARAM_SUBDIR = "subdir";
static final String ICANN_REPORTING_DATA_SET = "icann_reporting";
static final String DATASTORE_EXPORT_DATA_SET = "latest_datastore_export";
private static final String BIGQUERY_SCOPE = "https://www.googleapis.com/auth/bigquery";
private static final String DRIVE_SCOPE = "https://www.googleapis.com/auth/drive.readonly";
@ -65,6 +67,14 @@ public final class IcannReportingModule {
return extractOptionalParameter(req, PARAM_SUBDIR);
}
@Provides
static QueryBuilder provideQueryBuilder(
@Parameter(PARAM_REPORT_TYPE) ReportType reportType,
ActivityReportingQueryBuilder activityBuilder,
TransactionsReportingQueryBuilder transactionsBuilder) {
return reportType == ReportType.ACTIVITY ? activityBuilder : transactionsBuilder;
}
/**
* Constructs a BigqueryConnection with default settings.
*
@ -81,7 +91,7 @@ public final class IcannReportingModule {
BigqueryConnection connection = new BigqueryConnection.Builder()
.setExecutorService(Executors.newFixedThreadPool(20))
.setCredential(credential.createScoped(ImmutableList.of(BIGQUERY_SCOPE, DRIVE_SCOPE)))
.setDatasetId(ActivityReportingQueryBuilder.ICANN_REPORTING_DATA_SET)
.setDatasetId(ICANN_REPORTING_DATA_SET)
.setOverwrite(true)
.setPollInterval(Duration.standardSeconds(1))
.build();

View file

@ -25,10 +25,14 @@ import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import com.google.common.net.MediaType;
import google.registry.bigquery.BigqueryConnection;
import google.registry.bigquery.BigqueryUtils.TableType;
@ -42,7 +46,9 @@ import google.registry.request.auth.Auth;
import google.registry.util.FormattingLogger;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutionException;
@ -68,7 +74,8 @@ public final class IcannReportingStagingAction implements Runnable {
@Inject @Config("icannReportingBucket") String reportingBucket;
@Inject @Parameter(IcannReportingModule.PARAM_YEAR_MONTH) String yearMonth;
@Inject @Parameter(IcannReportingModule.PARAM_SUBDIR) Optional<String> subdir;
@Inject ActivityReportingQueryBuilder queryBuilder;
@Inject @Parameter(IcannReportingModule.PARAM_REPORT_TYPE) ReportType reportType;
@Inject QueryBuilder queryBuilder;
@Inject BigqueryConnection bigquery;
@Inject GcsUtils gcsUtils;
@Inject Response response;
@ -77,29 +84,24 @@ public final class IcannReportingStagingAction implements Runnable {
@Override
public void run() {
try {
ImmutableMap<String, String> viewQueryMap = queryBuilder.getViewQueryMap();
// Generate intermediary views
ImmutableMap<String, String> activityQueries =
queryBuilder.getViewQueryMap();
for (Entry<String, String> entry : activityQueries.entrySet()) {
for (Entry<String, String> entry : viewQueryMap.entrySet()) {
createIntermediaryTableView(entry.getKey(), entry.getValue());
}
// Get an in-memory table of the activity report query
// Get an in-memory table of the aggregate query's result
ImmutableTable<Integer, TableFieldSchema, Object> reportTable =
bigquery.queryToLocalTableSync(queryBuilder.getActivityReportQuery());
bigquery.queryToLocalTableSync(queryBuilder.getReportQuery());
// Get report headers from the table schema and convert into CSV format
String headerRow = constructActivityReportRow(getHeaders(reportTable.columnKeySet()));
String headerRow = constructRow(getHeaders(reportTable.columnKeySet()));
logger.infofmt("Headers: %s", headerRow);
// Create a report csv for each tld from query table, and upload to GCS
for (Map<TableFieldSchema, Object> row : reportTable.rowMap().values()) {
// Get the tld (first cell in each row)
String tld = row.values().iterator().next().toString();
if (isNullOrEmpty(tld)) {
throw new RuntimeException("Found an empty row in the activity report table!");
}
uploadReport(tld, createReport(headerRow, row));
if (reportType == ReportType.ACTIVITY) {
stageActivityReports(headerRow, reportTable.rowMap().values());
} else {
stageTransactionsReports(headerRow, reportTable.rowMap().values());
}
response.setStatus(SC_OK);
response.setContentType(MediaType.PLAIN_TEXT_UTF_8);
@ -108,7 +110,9 @@ public final class IcannReportingStagingAction implements Runnable {
logger.warning(Throwables.getStackTraceAsString(e));
response.setStatus(SC_INTERNAL_SERVER_ERROR);
response.setContentType(MediaType.PLAIN_TEXT_UTF_8);
response.setPayload("Caught exception:\n" + e.getMessage());
response.setPayload(
String.format("Caught exception:\n%s\n%s", e.getMessage(),
Arrays.toString(e.getStackTrace())));
}
}
@ -118,7 +122,8 @@ public final class IcannReportingStagingAction implements Runnable {
bigquery.query(
query,
bigquery.buildDestinationTable(queryName)
.description("An intermediary view to generate activity reports for this month.")
.description(String.format(
"An intermediary view to generate %s reports for this month.", reportType))
.type(TableType.VIEW)
.build()
).get();
@ -137,13 +142,48 @@ public final class IcannReportingStagingAction implements Runnable {
);
}
private void stageActivityReports (
String headerRow, ImmutableCollection<Map<TableFieldSchema, Object>> rows)
throws IOException {
// Create a report csv for each tld from query table, and upload to GCS
for (Map<TableFieldSchema, Object> row : rows) {
// Get the tld (first cell in each row)
String tld = row.values().iterator().next().toString();
if (isNullOrEmpty(tld)) {
throw new RuntimeException("Found an empty row in the activity report table!");
}
ImmutableList<String> rowStrings = ImmutableList.of(constructRow(row.values()));
// Create and upload the activity report with a single row
uploadReport(tld, createReport(headerRow, rowStrings));
}
}
private void stageTransactionsReports(
String headerRow, ImmutableCollection<Map<TableFieldSchema, Object>> rows)
throws IOException {
// Map from tld to rows
ListMultimap<String, String> tldToRows = ArrayListMultimap.create();
for (Map<TableFieldSchema, Object> row : rows) {
// Get the tld (first cell in each row)
String tld = row.values().iterator().next().toString();
if (isNullOrEmpty(tld)) {
throw new RuntimeException("Found an empty row in the activity report table!");
}
tldToRows.put(tld, constructRow(row.values()));
}
// Create and upload a transactions report for each tld via its rows
for (String tld : tldToRows.keySet()) {
uploadReport(tld, createReport(headerRow, tldToRows.get(tld)));
}
}
/**
* Makes a row of the report by appending the string representation of all objects in an iterable
* with commas separating individual fields.
*
* <p>This discards the first object, which is assumed to be the TLD field.
* */
private String constructActivityReportRow(Iterable<? extends Object> iterable) {
private String constructRow(Iterable<? extends Object> iterable) {
Iterator<? extends Object> rowIter = iterable.iterator();
StringBuilder rowString = new StringBuilder();
// Skip the TLD column
@ -156,13 +196,20 @@ public final class IcannReportingStagingAction implements Runnable {
return rowString.toString();
}
private String createReport(String headers, Map<TableFieldSchema, Object> row) {
/**
* Constructs a report given its headers and rows as a string.
*
* <p>Note that activity reports will only have one row, while transactions reports may have
* multiple rows.
*/
private String createReport(String headers, List<String> rows) {
StringBuilder reportCsv = new StringBuilder(headers);
// Add CRLF between rows per ICANN specification
reportCsv.append("\r\n");
String valuesRow = constructActivityReportRow(row.values());
reportCsv.append(valuesRow);
logger.infofmt("Created report %s", reportCsv.toString());
for (String row : rows) {
// Add CRLF between rows per ICANN specification
reportCsv.append("\r\n");
reportCsv.append(row);
}
logger.infofmt("Created %s report:\n%s", reportType, reportCsv.toString());
return reportCsv.toString();
}
@ -170,7 +217,7 @@ public final class IcannReportingStagingAction implements Runnable {
// Upload resulting CSV file to GCS
byte[] reportBytes = reportCsv.getBytes(UTF_8);
String reportFilename =
IcannReportingUploadAction.createFilename(tld, yearMonth, ReportType.ACTIVITY);
IcannReportingUploadAction.createFilename(tld, yearMonth, reportType);
String reportBucketname =
IcannReportingUploadAction.createReportingBucketName(reportingBucket, subdir, yearMonth);
final GcsFilename gcsFilename = new GcsFilename(reportBucketname, reportFilename);

View file

@ -0,0 +1,28 @@
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.reporting;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
/** Interface defining the necessary methods to construct ICANN reporting SQL queries. */
public interface QueryBuilder {
/** Returns a map from an intermediary view's table name to the query that generates it. */
ImmutableMap<String, String> getViewQueryMap() throws IOException;
/** Returns a query that retrieves the overall report from the previously generated view. */
String getReportQuery() throws IOException;
}

View file

@ -0,0 +1,189 @@
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.reporting;
import static google.registry.reporting.IcannReportingModule.DATASTORE_EXPORT_DATA_SET;
import static google.registry.reporting.IcannReportingModule.ICANN_REPORTING_DATA_SET;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import google.registry.config.RegistryConfig.Config;
import google.registry.request.Parameter;
import google.registry.util.ResourceUtils;
import google.registry.util.SqlTemplate;
import java.io.IOException;
import java.net.URL;
import javax.inject.Inject;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
/**
* Utility class that produces SQL queries used to generate activity reports from Bigquery.
*/
public final class TransactionsReportingQueryBuilder implements QueryBuilder {
@Inject @Config("projectId") String projectId;
@Inject @Parameter(IcannReportingModule.PARAM_YEAR_MONTH) String yearMonth;
@Inject TransactionsReportingQueryBuilder() {}
static final String TRANSACTIONS_REPORT_AGGREGATION = "transactions_report_aggregation";
static final String REGISTRAR_IANA_ID = "registrar_iana_id";
static final String TOTAL_DOMAINS = "total_domains";
static final String TOTAL_NAMESERVERS = "total_nameservers";
static final String TRANSACTION_COUNTS = "transaction_counts";
static final String TRANSACTION_TRANSFER_LOSING = "transaction_transfer_losing";
static final String ATTEMPTED_ADDS = "attempted_adds";
/** Returns the aggregate query which generates the transactions report from the saved view. */
@Override
public String getReportQuery() throws IOException {
return String.format(
"#standardSQL\nSELECT * FROM `%s.%s.%s`",
projectId,
ICANN_REPORTING_DATA_SET,
getTableName(TRANSACTIONS_REPORT_AGGREGATION));
}
/** Sets the month we're doing transactions reporting for, and returns the view query map. */
@Override
public ImmutableMap<String, String> getViewQueryMap() throws IOException {
// Set the earliest date to to yearMonth on day 1 at 00:00:00
DateTime earliestReportTime =
DateTimeFormat.forPattern("yyyy-MM")
.parseDateTime(yearMonth)
.withDayOfMonth(1)
.withHourOfDay(0)
.withMinuteOfHour(0)
.withSecondOfMinute(0);
// Set the latest date to yearMonth on the last day at 23:59:59
DateTime latestReportTime = earliestReportTime.plusMonths(1).minusSeconds(1);
return createQueryMap(earliestReportTime, latestReportTime);
}
/** Returns a map from view name to its associated SQL query. */
private ImmutableMap<String, String> createQueryMap(
DateTime earliestReportTime, DateTime latestReportTime) throws IOException {
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
String registrarIanaIdQuery =
SqlTemplate.create(getQueryFromFile("registrar_iana_id.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("REGISTRAR_TABLE", "Registrar")
.build();
queriesBuilder.put(getTableName(REGISTRAR_IANA_ID), registrarIanaIdQuery);
String totalDomainsQuery =
SqlTemplate.create(getQueryFromFile("total_domains.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("DOMAINBASE_TABLE", "DomainBase")
.put("REGISTRAR_TABLE", "Registrar")
.build();
queriesBuilder.put(getTableName(TOTAL_DOMAINS), totalDomainsQuery);
DateTimeFormatter timestampFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
String totalNameserversQuery =
SqlTemplate.create(getQueryFromFile("total_nameservers.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("HOSTRESOURCE_TABLE", "HostResource")
.put("DOMAINBASE_TABLE", "DomainBase")
.put("REGISTRAR_TABLE", "Registrar")
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
.build();
queriesBuilder.put(getTableName(TOTAL_NAMESERVERS), totalNameserversQuery);
String transactionCountsQuery =
SqlTemplate.create(getQueryFromFile("transaction_counts.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("REGISTRAR_TABLE", "Registrar")
.put("HISTORYENTRY_TABLE", "HistoryEntry")
.put("EARLIEST_REPORT_TIME", timestampFormatter.print(earliestReportTime))
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
.put("CLIENT_ID", "clientId")
.put("TRANSFER_SUCCESS_FIELD", "TRANSFER_GAINING_SUCCESSFUL")
.put("TRANSFER_NACKED_FIELD", "TRANSFER_GAINING_NACKED")
.put("DEFAULT_FIELD", "field")
.build();
queriesBuilder.put(getTableName(TRANSACTION_COUNTS), transactionCountsQuery);
String transactionTransferLosingQuery =
SqlTemplate.create(getQueryFromFile("transaction_counts.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("REGISTRAR_TABLE", "Registrar")
.put("HISTORYENTRY_TABLE", "HistoryEntry")
.put("EARLIEST_REPORT_TIME", timestampFormatter.print(earliestReportTime))
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
.put("CLIENT_ID", "otherClientId")
.put("TRANSFER_SUCCESS_FIELD", "TRANSFER_LOSING_SUCCESSFUL")
.put("TRANSFER_NACKED_FIELD", "TRANSFER_LOSING_NACKED")
.put("DEFAULT_FIELD", "NULL")
.build();
queriesBuilder.put(getTableName(TRANSACTION_TRANSFER_LOSING), transactionTransferLosingQuery);
// App Engine log table suffixes use YYYYMMDD format
DateTimeFormatter logTableFormatter = DateTimeFormat.forPattern("yyyyMMdd");
String attemptedAddsQuery =
SqlTemplate.create(getQueryFromFile("attempted_adds.sql"))
.put("PROJECT_ID", projectId)
.put("DATASTORE_EXPORT_DATA_SET", DATASTORE_EXPORT_DATA_SET)
.put("REGISTRAR_TABLE", "Registrar")
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(earliestReportTime))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(latestReportTime))
// All metadata logs for reporting come from google.registry.flows.FlowReporter.
.put(
"METADATA_LOG_PREFIX",
"google.registry.flows.FlowReporter recordToLogs: FLOW-LOG-SIGNATURE-METADATA")
.build();
queriesBuilder.put(getTableName(ATTEMPTED_ADDS), attemptedAddsQuery);
String aggregateQuery =
SqlTemplate.create(getQueryFromFile("transactions_report_aggregation.sql"))
.put("PROJECT_ID", projectId)
.put("ICANN_REPORTING_DATA_SET", ICANN_REPORTING_DATA_SET)
.put("REGISTRAR_IANA_ID_TABLE", getTableName(REGISTRAR_IANA_ID))
.put("TOTAL_DOMAINS_TABLE", getTableName(TOTAL_DOMAINS))
.put("TOTAL_NAMESERVERS_TABLE", getTableName(TOTAL_NAMESERVERS))
.put("TRANSACTION_COUNTS_TABLE", getTableName(TRANSACTION_COUNTS))
.put("TRANSACTION_TRANSFER_LOSING_TABLE", getTableName(TRANSACTION_TRANSFER_LOSING))
.put("ATTEMPTED_ADDS_TABLE", getTableName(ATTEMPTED_ADDS))
.build();
queriesBuilder.put(getTableName(TRANSACTIONS_REPORT_AGGREGATION), aggregateQuery);
return queriesBuilder.build();
}
/** Returns the table name of the query, suffixed with the yearMonth in _YYYYMM format. */
private String getTableName(String queryName) {
return String.format("%s_%s", queryName, yearMonth.replace("-", ""));
}
/** Returns {@link String} for file in {@code reporting/sql/} directory. */
private static String getQueryFromFile(String filename) throws IOException {
return ResourceUtils.readResourceUtf8(getUrl(filename));
}
private static URL getUrl(String filename) {
return Resources.getResource(
ActivityReportingQueryBuilder.class, "sql/" + filename);
}
}

View file

@ -27,7 +27,7 @@ SELECT
SUM(IF(metricName = 'web-whois-queries', count, 0)) AS web_whois_queries,
-- We don't support searchable WHOIS.
0 AS searchable_whois_queries,
-- DNS queries for UDP/TCP are all assumed to be recevied/responded.
-- DNS queries for UDP/TCP are all assumed to be received/responded.
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_received,
SUM(IF(metricName = 'dns-udp-queries', count, 0)) AS dns_udp_queries_responded,
SUM(IF(metricName = 'dns-tcp-queries', count, 0)) AS dns_tcp_queries_received,
@ -66,7 +66,7 @@ SELECT
-- towards a given TLD.
FROM (
SELECT tldStr as tld
FROM `%PROJECT_ID%.%LATEST_DATASTORE_EXPORT%.%REGISTRY_TABLE%`
FROM `%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRY_TABLE%`
WHERE tldType = 'REAL'
) as RealTlds
CROSS JOIN(
@ -97,4 +97,3 @@ CROSS JOIN(
WHERE RealTlds.tld = TldMetrics.tld OR TldMetrics.tld IS NULL
GROUP BY tld
ORDER BY tld

View file

@ -0,0 +1,73 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of attempted adds each registrar made.
-- Since the specification requests all 'attempted' adds, we regex the
-- monthly App Engine logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine logs:
--google.registry.flows.FlowReporter
-- recordToLogs: FLOW-LOG-SIGNATURE-METADATA:
--{"serverTrid":"oNwL2J2eRya7bh7c9oHIzg==-2360a","clientId":"ipmirror"
-- ,"commandType":"hello", "resourceType":"","flowClassName":"HelloFlow"
-- ,"targetId":"","targetIds":[],"tld":"",
-- "tlds":[],"icannActivityReportField":""}
-- This outer select just converts the registrar's clientId to their name.
SELECT
tld,
registrar_table.registrarName AS registrar_name,
'ATTEMPTED_ADDS' AS metricName,
count AS metricValue
FROM (
SELECT
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(logMessages, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM (
SELECT
protoPayload.resource AS requestPath,
ARRAY(
SELECT logMessage
FROM UNNEST(protoPayload.line)) AS logMessage
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE _TABLE_SUFFIX
BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%')
JOIN UNNEST(logMessage) AS logMessages
-- Look for metadata logs from epp and registrar console requests
WHERE requestPath IN ('/_dr/epp', '/_dr/epptool', '/registrar-xhr')
AND STARTS_WITH(logMessages, "%METADATA_LOG_PREFIX%")
-- Look for domain creates
AND REGEXP_CONTAINS(
logMessages, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(
logMessages, r'"prober-[a-z]{2}-((any)|(canary))"') )
GROUP BY tld, clientId ) AS logs_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON logs_table.clientId = registrar_table.__key__.name
ORDER BY tld, registrar_name

View file

@ -27,4 +27,4 @@ SELECT
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE
_TABLE_SUFFIX BETWEEN '%START_OF_MONTH%' AND '%END_OF_MONTH%'
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%'

View file

@ -0,0 +1,30 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Gather a list of all tld-registrar pairs, with their IANA IDs.
-- This establishes which registrars will appear in the reports.
SELECT
allowed_tlds AS tld,
registrarName AS registrar_name,
ianaIdentifier AS iana_id
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`,
UNNEST(allowedTlds) as allowed_tlds
WHERE (type = 'REAL' OR type = 'INTERNAL')
-- Filter out prober data
AND NOT ENDS_WITH(allowed_tlds, "test")
ORDER BY tld, registrarName

View file

@ -0,0 +1,38 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of domains each registrar sponsors per tld.
-- This is just the number of fullyQualifiedDomainNames under each
-- tld-registrar pair.
SELECT
tld,
registrarName as registrar_name,
'TOTAL_DOMAINS' as metricName,
COUNT(fullyQualifiedDomainName) as metricValue
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%DOMAINBASE_TABLE%`
AS domain_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON
currentSponsorClientId = registrar_table.__key__.name
WHERE
domain_table._d = "DomainResource"
AND (registrar_table.type = "REAL" OR registrar_table.type = "INTERNAL")
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -0,0 +1,56 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Determine the number of referenced nameservers for a registrar's domains.
-- We count the number of unique hosts under each tld-registrar combo by
-- collecting all domains' listed hosts that were still valid at the
-- end of the reporting month.
SELECT
tld,
registrarName AS registrar_name,
'TOTAL_NAMESERVERS' AS metricName,
COUNT(fullyQualifiedHostName) AS metricValue
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%HOSTRESOURCE_TABLE%` AS host_table
JOIN (
SELECT
__key__.name AS clientId,
registrarName
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
WHERE
type = 'REAL'
OR type = 'INTERNAL') AS registrar_table
ON
currentSponsorClientId = registrar_table.clientId
JOIN (
SELECT
tld,
hosts.name AS referencedHostName
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%DOMAINBASE_TABLE%`,
UNNEST(nsHosts) AS hosts
WHERE _d = 'DomainResource'
AND creationTime <= TIMESTAMP("%LATEST_REPORT_TIME%")
AND deletionTime > TIMESTAMP("%LATEST_REPORT_TIME%") ) AS domain_table
ON
host_table.__key__.name = domain_table.referencedHostName
WHERE creationTime <= TIMESTAMP("%LATEST_REPORT_TIME%")
AND deletionTime > TIMESTAMP("%LATEST_REPORT_TIME%")
GROUP BY tld, registrarName
ORDER BY tld, registrarName

View file

@ -0,0 +1,78 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Counts the number of mutating transactions each registrar made.
-- We populate the fields through explicit logging of
-- DomainTransactionRecords, which contain all necessary information for
-- reporting (such as reporting time, report field, report amount, etc.
-- A special note on transfers: we only record 'TRANSFER_SUCCESSFUL' or
-- 'TRANSFER_NACKED', and we can infer the gaining and losing parties
-- from the enclosing HistoryEntry's clientId and otherClientId
-- respectively. This query templates the client ID, field for transfer
-- success, field for transfer nacks and default field. This allows us to
-- create one query for TRANSFER_GAINING and the other report fields,
-- and one query for TRANSFER_LOSING fields from the same template.
-- This outer select just converts the registrar's clientId to their name.
SELECT
tld,
registrar_table.registrarName AS registrar_name,
metricName,
metricValue
FROM (
SELECT
tld,
clientId,
CASE
WHEN field = 'TRANSFER_SUCCESSFUL' THEN '%TRANSFER_SUCCESS_FIELD%'
WHEN field = 'TRANSFER_NACKED' THEN '%TRANSFER_NACKED_FIELD%'
ELSE %DEFAULT_FIELD%
END AS metricName,
SUM(amount) AS metricValue
FROM (
SELECT
entries.%CLIENT_ID% AS clientId,
entries.domainTransactionRecords.tld[SAFE_OFFSET(index)] AS tld,
entries.domainTransactionRecords.reportingTime[SAFE_OFFSET(index)]
AS reportingTime,
entries.domainTransactionRecords.reportField[SAFE_OFFSET(index)]
AS field,
entries.domainTransactionRecords.reportAmount[SAFE_OFFSET(index)]
AS amount
FROM
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%HISTORYENTRY_TABLE%`
AS entries,
-- This allows us to 'loop' through the arrays in parallel by index
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(
entries.domainTransactionRecords.tld) - 1)) AS index
-- Ignore null entries
WHERE entries.domainTransactionRecords IS NOT NULL )
-- Only look at this month's data
WHERE reportingTime
BETWEEN TIMESTAMP('%EARLIEST_REPORT_TIME%')
AND TIMESTAMP('%LATEST_REPORT_TIME%')
-- Ignore prober data
AND NOT ENDS_WITH(tld, "test")
GROUP BY
tld,
clientId,
field ) AS counts_table
JOIN
`%PROJECT_ID%.%DATASTORE_EXPORT_DATA_SET%.%REGISTRAR_TABLE%`
AS registrar_table
ON
counts_table.clientId = registrar_table.__key__.name

View file

@ -0,0 +1,92 @@
#standardSQL
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Construct the transaction reports' rows from the intermediary data views.
-- This query pulls from all intermediary tables to create the activity
-- report csv, via a table transpose and sum over all activity report fields.
SELECT
registrars.tld as tld,
registrars.registrar_name as registrar_name,
registrars.iana_id as iana_id,
SUM(IF(metrics.metricName = 'TOTAL_DOMAINS', metrics.metricValue, 0)) AS total_domains,
SUM(IF(metrics.metricName = 'TOTAL_NAMESERVERS', metrics.metricValue, 0)) AS total_nameservers,
SUM(IF(metrics.metricName = 'NET_ADDS_1_YR', metrics.metricValue, 0)) AS net_adds_1_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_2_YR', metrics.metricValue, 0)) AS net_adds_2_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_3_YR', metrics.metricValue, 0)) AS net_adds_3_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_4_YR', metrics.metricValue, 0)) AS net_adds_4_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_5_YR', metrics.metricValue, 0)) AS net_adds_5_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_6_YR', metrics.metricValue, 0)) AS net_adds_6_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_7_YR', metrics.metricValue, 0)) AS net_adds_7_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_8_YR', metrics.metricValue, 0)) AS net_adds_8_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_9_YR', metrics.metricValue, 0)) AS net_adds_9_yr,
SUM(IF(metrics.metricName = 'NET_ADDS_10_Yr', metrics.metricValue, 0)) AS net_adds_10_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_1_YR', metrics.metricValue, 0)) AS net_renews_1_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_2_YR', metrics.metricValue, 0)) AS net_renews_2_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_3_YR', metrics.metricValue, 0)) AS net_renews_3_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_4_YR', metrics.metricValue, 0)) AS net_renews_4_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_5_YR', metrics.metricValue, 0)) AS net_renews_5_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_6_YR', metrics.metricValue, 0)) AS net_renews_6_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_7_YR', metrics.metricValue, 0)) AS net_renews_7_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_8_YR', metrics.metricValue, 0)) AS net_renews_8_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_9_YR', metrics.metricValue, 0)) AS net_renews_9_yr,
SUM(IF(metrics.metricName = 'NET_RENEWS_10_YR', metrics.metricValue, 0)) AS net_renews_10_yr,
SUM(IF(metrics.metricName = 'TRANSFER_GAINING_SUCCESSFUL', metrics.metricValue, 0)) AS transfer_gaining_successful,
SUM(IF(metrics.metricName = 'TRANSFER_GAINING_NACKED', metrics.metricValue, 0)) AS transfer_gaining_nacked,
SUM(IF(metrics.metricName = 'TRANSFER_LOSING_SUCCESSFUL', metrics.metricValue, 0)) AS transfer_losing_successful,
SUM(IF(metrics.metricName = 'TRANSFER_LOSING_NACKED', metrics.metricValue, 0)) AS transfer_losing_nacked,
-- We don't interact with transfer disputes
0 AS transfer_disputed_won,
0 AS transfer_disputed_lost,
0 AS transfer_disputed_nodecision,
SUM(IF(metrics.metricName = 'DELETED_DOMAINS_GRACE', metrics.metricValue, 0)) AS deleted_domains_grace,
SUM(IF(metrics.metricName = 'DELETED_DOMAINS_NOGRACE', metrics.metricValue, 0)) AS deleted_domains_nograce,
SUM(IF(metrics.metricName = 'RESTORED_DOMAINS', metrics.metricValue, 0)) AS restored_domains,
-- We don't require restore reports
0 AS restored_noreport,
-- We don't enforce AGP limits right now
0 AS agp_exemption_requests,
0 AS agp_exemptions_granted,
0 AS agp_exempted_domains,
SUM(IF(metrics.metricName = 'ATTEMPTED_ADDS', metrics.metricValue, 0)) AS attempted_adds
FROM (
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%REGISTRAR_IANA_ID_TABLE%`) AS registrars
-- We LEFT JOIN to produce reports even if the registrar made no transactions
LEFT OUTER JOIN (
-- Gather all intermediary data views
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TOTAL_DOMAINS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TOTAL_NAMESERVERS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TRANSACTION_COUNTS_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%TRANSACTION_TRANSFER_LOSING_TABLE%`
UNION ALL
SELECT *
FROM `%PROJECT_ID%.%ICANN_REPORTING_DATA_SET%.%ATTEMPTED_ADDS_TABLE%` ) AS metrics
-- Join on tld and registrar name
ON registrars.tld = metrics.tld
AND registrars.registrar_name = metrics.registrar_name
GROUP BY
tld, registrar_name, iana_id
ORDER BY
tld, registrar_name