Make FlowReporter log tld and various other fields

As part of b/36599833, this makes FlowReporter log the tld(s) of every domain
flow it executes, so we can provide ICANN reporting totals on a per-TLD basis.

It also adds several other fields that we're computing anyway and which seem
useful, particularly for debugging any issues we see in production with the data
that we're attempting to record for ICANN reporting.  The full set of fields is:

  - commandType (e.g. "create", "info", "transfer")
  - resourceType* (e.g. "domain", "contact", "host")
  - flowClassName (e.g. "ContactCreateFlow", "DomainRestoreRequestFlow")
  - targetId* (e.g. "ns1.foo.com", "bar.org", "contact-1234")
  - targetIds* - plural of the above, for multi-resource checks
  - tld** (e.g. "com", "co.uk") - extracted from targetId, lowercased
  - tlds** - plural of the above, deduplicated, for multi-resource checks

* = only non-empty for resource flows (not e.g. login, logout, poll)
** = only non-empty for domain flows

Note that TLD extraction is deliberately very lenient to avoid the complexity
overhead of double-validation of the domain names in the common case.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=154070794
This commit is contained in:
nickfelt 2017-04-24 11:08:29 -07:00 committed by Ben McIlwain
parent c596d23523
commit f296b225af
21 changed files with 215 additions and 17 deletions

View file

@ -16,12 +16,18 @@ package google.registry.flows;
import static com.google.common.io.BaseEncoding.base64;
import static google.registry.xml.XmlTransformer.prettyPrint;
import static java.util.Collections.EMPTY_LIST;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import google.registry.flows.FlowModule.ClientId;
import google.registry.flows.FlowModule.InputXml;
import google.registry.flows.annotations.ReportingSpec;
import google.registry.model.eppcommon.Trid;
import google.registry.model.eppinput.EppInput;
import google.registry.util.FormattingLogger;
import javax.inject.Inject;
import org.json.simple.JSONValue;
@ -48,6 +54,7 @@ public class FlowReporter {
@Inject Trid trid;
@Inject @ClientId String clientId;
@Inject @InputXml byte[] inputXmlBytes;
@Inject EppInput eppInput;
@Inject Class<? extends Flow> flowClass;
@Inject FlowReporter() {}
@ -64,13 +71,57 @@ public class FlowReporter {
// Explicitly log flow metadata separately from the EPP XML itself so that it stays compact
// enough to be sure to fit in a single log entry (the XML part in rare cases could be long
// enough to overflow into multiple log entries, breaking routine parsing of the JSON format).
String resourceType = eppInput.getResourceType().or("");
boolean isDomain = "domain".equals(resourceType);
String singleTargetId = eppInput.getSingleTargetId().or("");
ImmutableList<String> targetIds = eppInput.getTargetIds();
logger.infofmt(
"%s: %s",
METADATA_LOG_SIGNATURE,
JSONValue.toJSONString(ImmutableMap.<String, Object>of(
"trid", trid.getServerTransactionId(),
"clientId", clientId,
"icannActivityReportField", extractActivityReportField(flowClass))));
JSONValue.toJSONString(new ImmutableMap.Builder<String, Object>()
.put("serverTrid", trid.getServerTransactionId())
.put("clientId", clientId)
.put("commandType", eppInput.getCommandType())
.put("resourceType", resourceType)
.put("flowClassName", flowClass.getSimpleName())
.put("targetId", singleTargetId)
.put("targetIds", targetIds)
.put("tld", isDomain ? extractTld(singleTargetId).or("") : "")
.put("tlds", isDomain ? extractTlds(targetIds).asList() : EMPTY_LIST)
.put("icannActivityReportField", extractActivityReportField(flowClass))
.build()));
}
/**
* Returns the guessed TLD of the given domain name, assuming a second-level domain name, or
* absent if no TLD could be detected.
*
* <p>This method is quick and dirty and doesn't attempt to validate the domain name in any way;
* it just takes anything after the first period to be the TLD and converts ASCII to lowercase.
* We want quick and dirty here because this will be called on not-yet-validated EPP XML where
* just about anything could be supplied, and there's no reason to validate twice when this just
* needs to be roughly correct.
*/
private static final Optional<String> extractTld(String domainName) {
int index = domainName.indexOf('.');
return index == -1
? Optional.absent()
: Optional.of(Ascii.toLowerCase(domainName.substring(index + 1)));
}
/**
* Returns the set of unique results of {@link #extractTld} applied to each given domain name,
* excluding any absent results (i.e. cases where no TLD was detected).
*/
private static final ImmutableSet<String> extractTlds(Iterable<String> domainNames) {
ImmutableSet.Builder<String> set = new ImmutableSet.Builder<>();
for (String domainName : domainNames) {
Optional<String> extractedTld = extractTld(domainName);
if (extractedTld.isPresent()) {
set.add(extractedTld.get());
}
}
return set.build();
}
/**