Rename Spec11Pipeline's Subdomain -> DomainNameInfo (#1318)

* Rename Spec11Pipeline's Subdomain -> DomainNameInfo

"Subdomain" never made any sense as a class name because these are all
second-level domain names, along with a little bit of metadata such as some
registrar info. "DomainNameInfo" is a better fit.
This commit is contained in:
Ben McIlwain 2021-09-14 14:07:26 -04:00 committed by GitHub
parent d282c35c64
commit fd4a94b9e7
7 changed files with 100 additions and 91 deletions

View file

@ -25,7 +25,7 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord; import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord;
/** /**
* A POJO representing a single subdomain, parsed from a {@code SchemaAndRecord}. * A POJO representing a domain name and associated info, parsed from a {@code SchemaAndRecord}.
* *
* <p>This is a trivially serializable class that allows Beam to transform the results of a Bigquery * <p>This is a trivially serializable class that allows Beam to transform the results of a Bigquery
* query into a standard Java representation, giving us the type guarantees and ease of manipulation * query into a standard Java representation, giving us the type guarantees and ease of manipulation
@ -33,28 +33,31 @@ import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord;
* function. * function.
*/ */
@AutoValue @AutoValue
public abstract class Subdomain implements Serializable { public abstract class DomainNameInfo implements Serializable {
private static final ImmutableList<String> FIELD_NAMES = private static final ImmutableList<String> FIELD_NAMES =
ImmutableList.of("domainName", "domainRepoId", "registrarId", "registrarEmailAddress"); ImmutableList.of("domainName", "domainRepoId", "registrarId", "registrarEmailAddress");
/** Returns the fully qualified domain name. */ /** Returns the fully qualified domain name. */
abstract String domainName(); abstract String domainName();
/** Returns the domain repo ID (the primary key of the domain table). */ /** Returns the domain repo ID (the primary key of the domain table). */
abstract String domainRepoId(); abstract String domainRepoId();
/** Returns the registrar ID of the associated registrar for this domain. */ /** Returns the registrar ID of the associated registrar for this domain. */
abstract String registrarId(); abstract String registrarId();
/** Returns the email address of the registrar associated with this domain. */ /** Returns the email address of the registrar associated with this domain. */
abstract String registrarEmailAddress(); abstract String registrarEmailAddress();
/** /**
* Constructs a {@link Subdomain} from an Apache Avro {@code SchemaAndRecord}. * Constructs a {@link DomainNameInfo} from an Apache Avro {@code SchemaAndRecord}.
* *
* @see <a * @see <a
* href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html> * href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html>
* Apache AVRO GenericRecord</a> * Apache AVRO GenericRecord</a>
*/ */
static Subdomain parseFromRecord(SchemaAndRecord schemaAndRecord) { static DomainNameInfo parseFromRecord(SchemaAndRecord schemaAndRecord) {
checkFieldsNotNull(FIELD_NAMES, schemaAndRecord); checkFieldsNotNull(FIELD_NAMES, schemaAndRecord);
GenericRecord record = schemaAndRecord.getRecord(); GenericRecord record = schemaAndRecord.getRecord();
return create( return create(
@ -65,18 +68,15 @@ public abstract class Subdomain implements Serializable {
} }
/** /**
* Creates a concrete {@link Subdomain}. * Creates a concrete {@link DomainNameInfo}.
* *
* <p>This should only be used outside this class for testing- instances of {@link Subdomain} * <p>This should only be used outside this class for testing- instances of {@link DomainNameInfo}
* should otherwise come from {@link #parseFromRecord}. * should otherwise come from {@link #parseFromRecord}.
*/ */
@VisibleForTesting @VisibleForTesting
static Subdomain create( static DomainNameInfo create(
String domainName, String domainName, String domainRepoId, String registrarId, String registrarEmailAddress) {
String domainRepoId, return new AutoValue_DomainNameInfo(
String registrarId,
String registrarEmailAddress) {
return new AutoValue_Subdomain(
domainName, domainRepoId, registrarId, registrarEmailAddress); domainName, domainRepoId, registrarId, registrarEmailAddress);
} }
} }

View file

@ -55,13 +55,14 @@ public class SafeBrowsingTransforms {
"https://safebrowsing.googleapis.com/v4/threatMatches:find"; "https://safebrowsing.googleapis.com/v4/threatMatches:find";
/** /**
* {@link DoFn} mapping a {@link Subdomain} to its evaluation report from SafeBrowsing. * {@link DoFn} mapping a {@link DomainNameInfo} to its evaluation report from SafeBrowsing.
* *
* <p>Refer to the Lookup API documentation for the request/response format and other details. * <p>Refer to the Lookup API documentation for the request/response format and other details.
* *
* @see <a href=https://developers.google.com/safe-browsing/v4/lookup-api>Lookup API</a> * @see <a href=https://developers.google.com/safe-browsing/v4/lookup-api>Lookup API</a>
*/ */
static class EvaluateSafeBrowsingFn extends DoFn<Subdomain, KV<Subdomain, ThreatMatch>> { static class EvaluateSafeBrowsingFn
extends DoFn<DomainNameInfo, KV<DomainNameInfo, ThreatMatch>> {
/** /**
* Max number of urls we can check in a single query. * Max number of urls we can check in a single query.
@ -74,10 +75,11 @@ public class SafeBrowsingTransforms {
private final String apiKey; private final String apiKey;
/** /**
* Maps a subdomain's {@code fullyQualifiedDomainName} to its corresponding {@link Subdomain} to * Maps a domain name's {@code fullyQualifiedDomainName} to its corresponding {@link
* facilitate batching SafeBrowsing API requests. * DomainNameInfo} to facilitate batching SafeBrowsing API requests.
*/ */
private final Map<String, Subdomain> subdomainBuffer = new LinkedHashMap<>(BATCH_SIZE); private final Map<String, DomainNameInfo> domainNameInfoBuffer =
new LinkedHashMap<>(BATCH_SIZE);
/** /**
* Provides the HTTP client we use to interact with the SafeBrowsing API. * Provides the HTTP client we use to interact with the SafeBrowsing API.
@ -119,37 +121,38 @@ public class SafeBrowsingTransforms {
closeableHttpClientSupplier = clientSupplier; closeableHttpClientSupplier = clientSupplier;
} }
/** Evaluates any buffered {@link Subdomain} objects upon completing the bundle. */ /** Evaluates any buffered {@link DomainNameInfo} objects upon completing the bundle. */
@FinishBundle @FinishBundle
public void finishBundle(FinishBundleContext context) { public void finishBundle(FinishBundleContext context) {
if (!subdomainBuffer.isEmpty()) { if (!domainNameInfoBuffer.isEmpty()) {
ImmutableSet<KV<Subdomain, ThreatMatch>> results = evaluateAndFlush(); ImmutableSet<KV<DomainNameInfo, ThreatMatch>> results = evaluateAndFlush();
results.forEach((kv) -> context.output(kv, Instant.now(), GlobalWindow.INSTANCE)); results.forEach((kv) -> context.output(kv, Instant.now(), GlobalWindow.INSTANCE));
} }
} }
/** /**
* Buffers {@link Subdomain} objects until we reach the batch size, then bulk-evaluate the URLs * Buffers {@link DomainNameInfo} objects until we reach the batch size, then bulk-evaluate the
* with the SafeBrowsing API. * URLs with the SafeBrowsing API.
*/ */
@ProcessElement @ProcessElement
public void processElement(ProcessContext context) { public void processElement(ProcessContext context) {
Subdomain subdomain = context.element(); DomainNameInfo domainNameInfo = context.element();
subdomainBuffer.put(subdomain.domainName(), subdomain); domainNameInfoBuffer.put(domainNameInfo.domainName(), domainNameInfo);
if (subdomainBuffer.size() >= BATCH_SIZE) { if (domainNameInfoBuffer.size() >= BATCH_SIZE) {
ImmutableSet<KV<Subdomain, ThreatMatch>> results = evaluateAndFlush(); ImmutableSet<KV<DomainNameInfo, ThreatMatch>> results = evaluateAndFlush();
results.forEach(context::output); results.forEach(context::output);
} }
} }
/** /**
* Evaluates all {@link Subdomain} objects in the buffer and returns a list of key-value pairs * Evaluates all {@link DomainNameInfo} objects in the buffer and returns a list of key-value
* from {@link Subdomain} to its SafeBrowsing report. * pairs from {@link DomainNameInfo} to its SafeBrowsing report.
* *
* <p>If a {@link Subdomain} is safe according to the API, it will not emit a report. * <p>If a {@link DomainNameInfo} is safe according to the API, it will not emit a report.
*/ */
private ImmutableSet<KV<Subdomain, ThreatMatch>> evaluateAndFlush() { private ImmutableSet<KV<DomainNameInfo, ThreatMatch>> evaluateAndFlush() {
ImmutableSet.Builder<KV<Subdomain, ThreatMatch>> resultBuilder = new ImmutableSet.Builder<>(); ImmutableSet.Builder<KV<DomainNameInfo, ThreatMatch>> resultBuilder =
new ImmutableSet.Builder<>();
try { try {
URIBuilder uriBuilder = new URIBuilder(SAFE_BROWSING_URL); URIBuilder uriBuilder = new URIBuilder(SAFE_BROWSING_URL);
// Add the API key param // Add the API key param
@ -174,7 +177,7 @@ public class SafeBrowsingTransforms {
throw new RuntimeException("Caught parsing exception, failing pipeline.", e); throw new RuntimeException("Caught parsing exception, failing pipeline.", e);
} finally { } finally {
// Flush the buffer // Flush the buffer
subdomainBuffer.clear(); domainNameInfoBuffer.clear();
} }
return resultBuilder.build(); return resultBuilder.build();
} }
@ -183,7 +186,7 @@ public class SafeBrowsingTransforms {
private JSONObject createRequestBody() throws JSONException { private JSONObject createRequestBody() throws JSONException {
// Accumulate all domain names to evaluate. // Accumulate all domain names to evaluate.
JSONArray threatArray = new JSONArray(); JSONArray threatArray = new JSONArray();
for (String fullyQualifiedDomainName : subdomainBuffer.keySet()) { for (String fullyQualifiedDomainName : domainNameInfoBuffer.keySet()) {
threatArray.put(new JSONObject().put("url", fullyQualifiedDomainName)); threatArray.put(new JSONObject().put("url", fullyQualifiedDomainName));
} }
// Construct the JSON request body // Construct the JSON request body
@ -211,7 +214,7 @@ public class SafeBrowsingTransforms {
*/ */
private void processResponse( private void processResponse(
CloseableHttpResponse response, CloseableHttpResponse response,
ImmutableSet.Builder<KV<Subdomain, ThreatMatch>> resultBuilder) ImmutableSet.Builder<KV<DomainNameInfo, ThreatMatch>> resultBuilder)
throws JSONException, IOException { throws JSONException, IOException {
int statusCode = response.getStatusLine().getStatusCode(); int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != SC_OK) { if (statusCode != SC_OK) {
@ -226,16 +229,17 @@ public class SafeBrowsingTransforms {
if (responseBody.length() == 0) { if (responseBody.length() == 0) {
logger.atInfo().log("Response was empty, no threats detected"); logger.atInfo().log("Response was empty, no threats detected");
} else { } else {
// Emit all Subdomains with their API results. // Emit all DomainNameInfos with their API results.
JSONArray threatMatches = responseBody.getJSONArray("matches"); JSONArray threatMatches = responseBody.getJSONArray("matches");
for (int i = 0; i < threatMatches.length(); i++) { for (int i = 0; i < threatMatches.length(); i++) {
JSONObject match = threatMatches.getJSONObject(i); JSONObject match = threatMatches.getJSONObject(i);
String url = match.getJSONObject("threat").getString("url"); String url = match.getJSONObject("threat").getString("url");
Subdomain subdomain = subdomainBuffer.get(url); DomainNameInfo domainNameInfo = domainNameInfoBuffer.get(url);
resultBuilder.add( resultBuilder.add(
KV.of( KV.of(
subdomain, domainNameInfo,
ThreatMatch.create(match.getString("threatType"), subdomain.domainName()))); ThreatMatch.create(
match.getString("threatType"), domainNameInfo.domainName())));
} }
} }
} }

View file

@ -100,20 +100,20 @@ public class Spec11Pipeline implements Serializable {
void setupPipeline(Pipeline pipeline) { void setupPipeline(Pipeline pipeline) {
options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_READ_COMMITTED); options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_READ_COMMITTED);
PCollection<Subdomain> domains = PCollection<DomainNameInfo> domains =
options.getDatabase().equals("DATASTORE") options.getDatabase().equals("DATASTORE")
? readFromBigQuery(options, pipeline) ? readFromBigQuery(options, pipeline)
: readFromCloudSql(pipeline); : readFromCloudSql(pipeline);
PCollection<KV<Subdomain, ThreatMatch>> threatMatches = PCollection<KV<DomainNameInfo, ThreatMatch>> threatMatches =
domains.apply("Run through SafeBrowsing API", ParDo.of(safeBrowsingFn)); domains.apply("Run through SafeBrowsing API", ParDo.of(safeBrowsingFn));
saveToSql(threatMatches, options); saveToSql(threatMatches, options);
saveToGcs(threatMatches, options); saveToGcs(threatMatches, options);
} }
static PCollection<Subdomain> readFromCloudSql(Pipeline pipeline) { static PCollection<DomainNameInfo> readFromCloudSql(Pipeline pipeline) {
Read<Object[], Subdomain> read = Read<Object[], DomainNameInfo> read =
RegistryJpaIO.read( RegistryJpaIO.read(
"select d, r.emailAddress from Domain d join Registrar r on" "select d, r.emailAddress from Domain d join Registrar r on"
+ " d.currentSponsorClientId = r.clientIdentifier where r.type = 'REAL'" + " d.currentSponsorClientId = r.clientIdentifier where r.type = 'REAL'"
@ -124,30 +124,31 @@ public class Spec11Pipeline implements Serializable {
return pipeline.apply("Read active domains from Cloud SQL", read); return pipeline.apply("Read active domains from Cloud SQL", read);
} }
static PCollection<Subdomain> readFromBigQuery(Spec11PipelineOptions options, Pipeline pipeline) { static PCollection<DomainNameInfo> readFromBigQuery(
Spec11PipelineOptions options, Pipeline pipeline) {
return pipeline.apply( return pipeline.apply(
"Read active domains from BigQuery", "Read active domains from BigQuery",
BigQueryIO.read(Subdomain::parseFromRecord) BigQueryIO.read(DomainNameInfo::parseFromRecord)
.fromQuery( .fromQuery(
SqlTemplate.create(getQueryFromFile(Spec11Pipeline.class, "subdomains.sql")) SqlTemplate.create(getQueryFromFile(Spec11Pipeline.class, "domain_name_infos.sql"))
.put("PROJECT_ID", options.getProject()) .put("PROJECT_ID", options.getProject())
.put("DATASTORE_EXPORT_DATASET", "latest_datastore_export") .put("DATASTORE_EXPORT_DATASET", "latest_datastore_export")
.put("REGISTRAR_TABLE", "Registrar") .put("REGISTRAR_TABLE", "Registrar")
.put("DOMAIN_BASE_TABLE", "DomainBase") .put("DOMAIN_BASE_TABLE", "DomainBase")
.build()) .build())
.withCoder(SerializableCoder.of(Subdomain.class)) .withCoder(SerializableCoder.of(DomainNameInfo.class))
.usingStandardSql() .usingStandardSql()
.withoutValidation() .withoutValidation()
.withTemplateCompatibility()); .withTemplateCompatibility());
} }
private static Subdomain parseRow(Object[] row) { private static DomainNameInfo parseRow(Object[] row) {
DomainBase domainBase = (DomainBase) row[0]; DomainBase domainBase = (DomainBase) row[0];
String emailAddress = (String) row[1]; String emailAddress = (String) row[1];
if (emailAddress == null) { if (emailAddress == null) {
emailAddress = ""; emailAddress = "";
} }
return Subdomain.create( return DomainNameInfo.create(
domainBase.getDomainName(), domainBase.getDomainName(),
domainBase.getRepoId(), domainBase.getRepoId(),
domainBase.getCurrentSponsorClientId(), domainBase.getCurrentSponsorClientId(),
@ -155,31 +156,31 @@ public class Spec11Pipeline implements Serializable {
} }
static void saveToSql( static void saveToSql(
PCollection<KV<Subdomain, ThreatMatch>> threatMatches, Spec11PipelineOptions options) { PCollection<KV<DomainNameInfo, ThreatMatch>> threatMatches, Spec11PipelineOptions options) {
String transformId = "Spec11 Threat Matches"; String transformId = "Spec11 Threat Matches";
LocalDate date = LocalDate.parse(options.getDate(), ISODateTimeFormat.date()); LocalDate date = LocalDate.parse(options.getDate(), ISODateTimeFormat.date());
threatMatches.apply( threatMatches.apply(
"Write to Sql: " + transformId, "Write to Sql: " + transformId,
RegistryJpaIO.<KV<Subdomain, ThreatMatch>>write() RegistryJpaIO.<KV<DomainNameInfo, ThreatMatch>>write()
.withName(transformId) .withName(transformId)
.withBatchSize(options.getSqlWriteBatchSize()) .withBatchSize(options.getSqlWriteBatchSize())
.withShards(options.getSqlWriteShards()) .withShards(options.getSqlWriteShards())
.withJpaConverter( .withJpaConverter(
(kv) -> { (kv) -> {
Subdomain subdomain = kv.getKey(); DomainNameInfo domainNameInfo = kv.getKey();
return new Spec11ThreatMatch.Builder() return new Spec11ThreatMatch.Builder()
.setThreatTypes( .setThreatTypes(
ImmutableSet.of(ThreatType.valueOf(kv.getValue().threatType()))) ImmutableSet.of(ThreatType.valueOf(kv.getValue().threatType())))
.setCheckDate(date) .setCheckDate(date)
.setDomainName(subdomain.domainName()) .setDomainName(domainNameInfo.domainName())
.setDomainRepoId(subdomain.domainRepoId()) .setDomainRepoId(domainNameInfo.domainRepoId())
.setRegistrarId(subdomain.registrarId()) .setRegistrarId(domainNameInfo.registrarId())
.build(); .build();
})); }));
} }
static void saveToGcs( static void saveToGcs(
PCollection<KV<Subdomain, ThreatMatch>> threatMatches, Spec11PipelineOptions options) { PCollection<KV<DomainNameInfo, ThreatMatch>> threatMatches, Spec11PipelineOptions options) {
threatMatches threatMatches
.apply( .apply(
"Map registrar ID to email/ThreatMatch pair", "Map registrar ID to email/ThreatMatch pair",
@ -187,7 +188,7 @@ public class Spec11Pipeline implements Serializable {
TypeDescriptors.kvs( TypeDescriptors.kvs(
TypeDescriptors.strings(), TypeDescriptor.of(EmailAndThreatMatch.class))) TypeDescriptors.strings(), TypeDescriptor.of(EmailAndThreatMatch.class)))
.via( .via(
(KV<Subdomain, ThreatMatch> kv) -> (KV<DomainNameInfo, ThreatMatch> kv) ->
KV.of( KV.of(
kv.getKey().registrarId(), kv.getKey().registrarId(),
EmailAndThreatMatch.create( EmailAndThreatMatch.create(
@ -230,7 +231,7 @@ public class Spec11Pipeline implements Serializable {
options.getReportingBucketUrl(), options.getReportingBucketUrl(),
getSpec11ReportFilePath(LocalDate.parse(options.getDate())))) getSpec11ReportFilePath(LocalDate.parse(options.getDate()))))
.withoutSharding() .withoutSharding()
.withHeader("Map from registrar email / name to detected subdomain threats:")); .withHeader("Map from registrar email / name to detected domain name threats:"));
} }
public static void main(String[] args) { public static void main(String[] args) {

View file

@ -80,7 +80,7 @@ class SafeBrowsingTransformsTest {
private static final String REGISTRAR_ID = "registrarID"; private static final String REGISTRAR_ID = "registrarID";
private static final String REGISTRAR_EMAIL = "email@registrar.net"; private static final String REGISTRAR_EMAIL = "email@registrar.net";
private static ImmutableMap<Subdomain, ThreatMatch> THREAT_MATCH_MAP; private static ImmutableMap<DomainNameInfo, ThreatMatch> THREAT_MATCH_MAP;
private final CloseableHttpClient mockHttpClient = private final CloseableHttpClient mockHttpClient =
mock(CloseableHttpClient.class, withSettings().serializable()); mock(CloseableHttpClient.class, withSettings().serializable());
@ -95,24 +95,25 @@ class SafeBrowsingTransformsTest {
final TestPipelineExtension pipeline = final TestPipelineExtension pipeline =
TestPipelineExtension.create().enableAbandonedNodeEnforcement(true); TestPipelineExtension.create().enableAbandonedNodeEnforcement(true);
private static Subdomain createSubdomain(String url) { private static DomainNameInfo createDomainNameInfo(String url) {
return Subdomain.create(url, REPO_ID, REGISTRAR_ID, REGISTRAR_EMAIL); return DomainNameInfo.create(url, REPO_ID, REGISTRAR_ID, REGISTRAR_EMAIL);
} }
private KV<Subdomain, ThreatMatch> getKv(String url) { private KV<DomainNameInfo, ThreatMatch> getKv(String url) {
Subdomain subdomain = createSubdomain(url); DomainNameInfo domainNameInfo = createDomainNameInfo(url);
return KV.of(subdomain, THREAT_MATCH_MAP.get(subdomain)); return KV.of(domainNameInfo, THREAT_MATCH_MAP.get(domainNameInfo));
} }
@BeforeAll @BeforeAll
static void beforeAll() { static void beforeAll() {
ImmutableMap.Builder<Subdomain, ThreatMatch> builder = new ImmutableMap.Builder<>(); ImmutableMap.Builder<DomainNameInfo, ThreatMatch> builder = new ImmutableMap.Builder<>();
THREAT_MAP THREAT_MAP
.entrySet() .entrySet()
.forEach( .forEach(
kv -> kv ->
builder.put( builder.put(
createSubdomain(kv.getKey()), ThreatMatch.create(kv.getValue(), kv.getKey()))); createDomainNameInfo(kv.getKey()),
ThreatMatch.create(kv.getValue(), kv.getKey())));
THREAT_MATCH_MAP = builder.build(); THREAT_MATCH_MAP = builder.build();
} }
@ -123,16 +124,16 @@ class SafeBrowsingTransformsTest {
@Test @Test
void testSuccess_someBadDomains() throws Exception { void testSuccess_someBadDomains() throws Exception {
ImmutableList<Subdomain> subdomains = ImmutableList<DomainNameInfo> domainNameInfos =
ImmutableList.of( ImmutableList.of(
createSubdomain("111.com"), createDomainNameInfo("111.com"),
createSubdomain("hooli.com"), createDomainNameInfo("hooli.com"),
createSubdomain("party-night.net"), createDomainNameInfo("party-night.net"),
createSubdomain("anti-anti-anti-virus.dev"), createDomainNameInfo("anti-anti-anti-virus.dev"),
createSubdomain("no-email.com")); createDomainNameInfo("no-email.com"));
PCollection<KV<Subdomain, ThreatMatch>> threats = PCollection<KV<DomainNameInfo, ThreatMatch>> threats =
pipeline pipeline
.apply(Create.of(subdomains).withCoder(SerializableCoder.of(Subdomain.class))) .apply(Create.of(domainNameInfos).withCoder(SerializableCoder.of(DomainNameInfo.class)))
.apply(ParDo.of(safeBrowsingFn)); .apply(ParDo.of(safeBrowsingFn));
PAssert.that(threats) PAssert.that(threats)
@ -146,14 +147,14 @@ class SafeBrowsingTransformsTest {
@Test @Test
void testSuccess_noBadDomains() throws Exception { void testSuccess_noBadDomains() throws Exception {
ImmutableList<Subdomain> subdomains = ImmutableList<DomainNameInfo> domainNameInfos =
ImmutableList.of( ImmutableList.of(
createSubdomain("hello_kitty.dev"), createDomainNameInfo("hello_kitty.dev"),
createSubdomain("555.com"), createDomainNameInfo("555.com"),
createSubdomain("goodboy.net")); createDomainNameInfo("goodboy.net"));
PCollection<KV<Subdomain, ThreatMatch>> threats = PCollection<KV<DomainNameInfo, ThreatMatch>> threats =
pipeline pipeline
.apply(Create.of(subdomains).withCoder(SerializableCoder.of(Subdomain.class))) .apply(Create.of(domainNameInfos).withCoder(SerializableCoder.of(DomainNameInfo.class)))
.apply(ParDo.of(safeBrowsingFn)); .apply(ParDo.of(safeBrowsingFn));
PAssert.that(threats).empty(); PAssert.that(threats).empty();

View file

@ -94,13 +94,16 @@ class Spec11PipelineTest {
private final CloseableHttpClient mockHttpClient = private final CloseableHttpClient mockHttpClient =
mock(CloseableHttpClient.class, withSettings().serializable()); mock(CloseableHttpClient.class, withSettings().serializable());
private static final ImmutableList<Subdomain> SUBDOMAINS = private static final ImmutableList<DomainNameInfo> DOMAIN_NAME_INFOS =
ImmutableList.of( ImmutableList.of(
Subdomain.create("111.com", "123456789-COM", "hello-registrar", "email@hello.net"), DomainNameInfo.create("111.com", "123456789-COM", "hello-registrar", "email@hello.net"),
Subdomain.create("party-night.net", "2244AABBC-NET", "kitty-registrar", "contact@kit.ty"), DomainNameInfo.create(
Subdomain.create("bitcoin.bank", "1C3D5E7F9-BANK", "hello-registrar", "email@hello.net"), "party-night.net", "2244AABBC-NET", "kitty-registrar", "contact@kit.ty"),
Subdomain.create("no-email.com", "2A4BA9BBC-COM", "kitty-registrar", "contact@kit.ty"), DomainNameInfo.create(
Subdomain.create( "bitcoin.bank", "1C3D5E7F9-BANK", "hello-registrar", "email@hello.net"),
DomainNameInfo.create(
"no-email.com", "2A4BA9BBC-COM", "kitty-registrar", "contact@kit.ty"),
DomainNameInfo.create(
"anti-anti-anti-virus.dev", "555666888-DEV", "cool-registrar", "cool@aid.net")); "anti-anti-anti-virus.dev", "555666888-DEV", "cool-registrar", "cool@aid.net"));
private static final ImmutableList<ThreatMatch> THREAT_MATCHES = private static final ImmutableList<ThreatMatch> THREAT_MATCHES =
@ -129,7 +132,7 @@ class Spec11PipelineTest {
PipelineOptionsFactory.create().as(Spec11PipelineOptions.class); PipelineOptionsFactory.create().as(Spec11PipelineOptions.class);
private File reportingBucketUrl; private File reportingBucketUrl;
private PCollection<KV<Subdomain, ThreatMatch>> threatMatches; private PCollection<KV<DomainNameInfo, ThreatMatch>> threatMatches;
ImmutableSet<Spec11ThreatMatch> sqlThreatMatches; ImmutableSet<Spec11ThreatMatch> sqlThreatMatches;
@ -143,11 +146,11 @@ class Spec11PipelineTest {
threatMatches = threatMatches =
pipeline.apply( pipeline.apply(
Create.of( Create.of(
Streams.zip(SUBDOMAINS.stream(), THREAT_MATCHES.stream(), KV::of) Streams.zip(DOMAIN_NAME_INFOS.stream(), THREAT_MATCHES.stream(), KV::of)
.collect(toImmutableList())) .collect(toImmutableList()))
.withCoder( .withCoder(
KvCoder.of( KvCoder.of(
SerializableCoder.of(Subdomain.class), SerializableCoder.of(DomainNameInfo.class),
SerializableCoder.of(ThreatMatch.class)))); SerializableCoder.of(ThreatMatch.class))));
sqlThreatMatches = sqlThreatMatches =
@ -223,8 +226,8 @@ class Spec11PipelineTest {
@Test @Test
void testSuccess_readFromCloudSql() throws Exception { void testSuccess_readFromCloudSql() throws Exception {
setupCloudSql(); setupCloudSql();
PCollection<Subdomain> subdomains = Spec11Pipeline.readFromCloudSql(pipeline); PCollection<DomainNameInfo> domainNameInfos = Spec11Pipeline.readFromCloudSql(pipeline);
PAssert.that(subdomains).containsInAnyOrder(SUBDOMAINS); PAssert.that(domainNameInfos).containsInAnyOrder(DOMAIN_NAME_INFOS);
pipeline.run().waitUntilFinish(); pipeline.run().waitUntilFinish();
} }

View file

@ -1,4 +1,4 @@
Map from registrar email / name to detected subdomain threats: Map from registrar email / name to detected domain name threats:
{"threatMatches":[{"threatType":"UNWANTED_SOFTWARE","fullyQualifiedDomainName":"anti-anti-anti-virus.dev"}],"registrarClientId":"cool-registrar","registrarEmailAddress":"cool@aid.net"} {"threatMatches":[{"threatType":"UNWANTED_SOFTWARE","fullyQualifiedDomainName":"anti-anti-anti-virus.dev"}],"registrarClientId":"cool-registrar","registrarEmailAddress":"cool@aid.net"}
{"threatMatches":[{"threatType":"MALWARE","fullyQualifiedDomainName":"111.com"},{"threatType":"POTENTIALLY_HARMFUL_APPLICATION","fullyQualifiedDomainName":"bitcoin.bank"}],"registrarClientId":"hello-registrar","registrarEmailAddress":"email@hello.net"} {"threatMatches":[{"threatType":"MALWARE","fullyQualifiedDomainName":"111.com"},{"threatType":"POTENTIALLY_HARMFUL_APPLICATION","fullyQualifiedDomainName":"bitcoin.bank"}],"registrarClientId":"hello-registrar","registrarEmailAddress":"email@hello.net"}
{"threatMatches":[{"threatType":"THREAT_TYPE_UNSPECIFIED","fullyQualifiedDomainName":"no-eamil.com"},{"threatType":"SOCIAL_ENGINEERING","fullyQualifiedDomainName":"party-night.net"}],"registrarClientId":"kitty-registrar","registrarEmailAddress":"contact@kit.ty"} {"threatMatches":[{"threatType":"THREAT_TYPE_UNSPECIFIED","fullyQualifiedDomainName":"no-eamil.com"},{"threatType":"SOCIAL_ENGINEERING","fullyQualifiedDomainName":"party-night.net"}],"registrarClientId":"kitty-registrar","registrarEmailAddress":"contact@kit.ty"}