mirror of
https://github.com/google/nomulus.git
synced 2025-05-15 00:47:11 +02:00
Update input/output of Spec11 pipeline to final format
This changes the BigQuery input to the fields we ultimately want (fqdn, registrarName, registrarEmailAddress) and the output to a structured POJO holding the results from the API. This POJO is then converted to its final text output, i.e.: Map from registrar e-mail to list of threat-detected subdomains: {"registrarEmail": "c@fake.com", "threats": [{"url": "a.com", "threatType": "MALWARE"}]} {"registrarEmail": "d@fake.com", "threats": [{"url": "x.com", "threatType": "MALWARE"}, {"url": "y.com", "threatType": "MALWARE"}]} This gives us all the data we want in a JSON structured format, to be acted upon downstream by the to-be-constructed PublishSpec11ReportAction. Ideally, we would send an e-mail directly from the beam pipeline, but this is only possible through third-party providers (as opposed to app engine itself). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=209416880
This commit is contained in:
parent
7dcadaecf6
commit
f7bc17fbe8
11 changed files with 393 additions and 130 deletions
|
@ -22,9 +22,6 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.flogger.FluentLogger;
|
||||
import java.io.Serializable;
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord;
|
||||
|
||||
|
@ -42,14 +39,14 @@ public abstract class Subdomain implements Serializable {
|
|||
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
|
||||
|
||||
private static final ImmutableList<String> FIELD_NAMES =
|
||||
ImmutableList.of("fullyQualifiedDomainName", "statuses", "creationTime");
|
||||
ImmutableList.of("fullyQualifiedDomainName", "registrarName", "registrarEmailAddress");
|
||||
|
||||
/** Returns the fully qualified domain name. */
|
||||
abstract String fullyQualifiedDomainName();
|
||||
/** Returns the UTC DateTime this domain was created. */
|
||||
abstract ZonedDateTime creationTime();
|
||||
/** Returns the space-delimited list of statuses on this domain. */
|
||||
abstract String statuses();
|
||||
/** Returns the name of the associated registrar for this domain. */
|
||||
abstract String registrarName();
|
||||
/** Returns the email address of the registrar associated with this domain. */
|
||||
abstract String registrarEmailAddress();
|
||||
|
||||
/**
|
||||
* Constructs a {@link Subdomain} from an Apache Avro {@code SchemaAndRecord}.
|
||||
|
@ -63,10 +60,8 @@ public abstract class Subdomain implements Serializable {
|
|||
GenericRecord record = schemaAndRecord.getRecord();
|
||||
return create(
|
||||
extractField(record, "fullyQualifiedDomainName"),
|
||||
// Bigquery provides UNIX timestamps with microsecond precision.
|
||||
Instant.ofEpochMilli(Long.parseLong(extractField(record, "creationTime")) / 1000)
|
||||
.atZone(ZoneId.of("UTC")),
|
||||
extractField(record, "statuses"));
|
||||
extractField(record, "registrarName"),
|
||||
extractField(record, "registrarEmailAddress"));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -77,8 +72,8 @@ public abstract class Subdomain implements Serializable {
|
|||
*/
|
||||
@VisibleForTesting
|
||||
static Subdomain create(
|
||||
String fullyQualifiedDomainName, ZonedDateTime creationTime, String statuses) {
|
||||
return new AutoValue_Subdomain(fullyQualifiedDomainName, creationTime, statuses);
|
||||
String fullyQualifiedDomainName, String registrarName, String registrarEmailAddress) {
|
||||
return new AutoValue_Subdomain(fullyQualifiedDomainName, registrarName, registrarEmailAddress);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue