Coalesce null to the empty string in the Spec11 pipeline

We'll have a separate change to make sure we're not actually trying to email these folks, but this will make it so that the entire pipeline doesn't crash. The test makes sure that we can run the pipeline properly with these empty strings.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=240346954
This commit is contained in:
gbrodman 2019-03-26 07:54:35 -07:00 committed by jianglai
parent 1f3c419e20
commit 648656e002
2 changed files with 28 additions and 23 deletions

View file

@ -21,7 +21,7 @@
SELECT SELECT
domain.fullyQualifiedDomainName AS fullyQualifiedDomainName, domain.fullyQualifiedDomainName AS fullyQualifiedDomainName,
registrar.name AS registrarName, registrar.name AS registrarName,
registrar.emailAddress AS registrarEmailAddress COALESCE(registrar.emailAddress, '') AS registrarEmailAddress
FROM ( ( FROM ( (
SELECT SELECT
fullyQualifiedDomainName, fullyQualifiedDomainName,

View file

@ -35,7 +35,6 @@ import java.io.InputStreamReader;
import java.io.ObjectInputStream; import java.io.ObjectInputStream;
import java.io.ObjectOutputStream; import java.io.ObjectOutputStream;
import java.io.Serializable; import java.io.Serializable;
import java.util.Comparator;
import java.util.function.Supplier; import java.util.function.Supplier;
import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.runners.direct.DirectRunner;
import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptions;
@ -91,7 +90,7 @@ public class Spec11PipelineTest {
} }
private static final ImmutableList<String> BAD_DOMAINS = private static final ImmutableList<String> BAD_DOMAINS =
ImmutableList.of("111.com", "222.com", "444.com"); ImmutableList.of("111.com", "222.com", "444.com", "no-email.com");
private ImmutableList<Subdomain> getInputDomains() { private ImmutableList<Subdomain> getInputDomains() {
ImmutableList.Builder<Subdomain> subdomainsBuilder = new ImmutableList.Builder<>(); ImmutableList.Builder<Subdomain> subdomainsBuilder = new ImmutableList.Builder<>();
@ -105,6 +104,7 @@ public class Spec11PipelineTest {
subdomainsBuilder.add( subdomainsBuilder.add(
Subdomain.create(String.format("%s.com", i), "someRegistrar", "fake@someRegistrar.com")); Subdomain.create(String.format("%s.com", i), "someRegistrar", "fake@someRegistrar.com"));
} }
subdomainsBuilder.add(Subdomain.create("no-email.com", "noEmailRegistrar", ""));
return subdomainsBuilder.build(); return subdomainsBuilder.build();
} }
@ -135,33 +135,18 @@ public class Spec11PipelineTest {
spec11Pipeline.evaluateUrlHealth(input, evalFn, StaticValueProvider.of("2018-06-01")); spec11Pipeline.evaluateUrlHealth(input, evalFn, StaticValueProvider.of("2018-06-01"));
p.run(); p.run();
// Verify header and 3 threat matches for 2 registrars are found // Verify header and 4 threat matches for 3 registrars are found
ImmutableList<String> generatedReport = resultFileContents(); ImmutableList<String> generatedReport = resultFileContents();
assertThat(generatedReport).hasSize(3); assertThat(generatedReport).hasSize(4);
assertThat(generatedReport.get(0)) assertThat(generatedReport.get(0))
.isEqualTo("Map from registrar email to detected subdomain threats:"); .isEqualTo("Map from registrar email to detected subdomain threats:");
// The output file can put the registrar emails and bad URLs in any order. // The output file can put the registrar emails and bad URLs in any order.
// So we sort by length (sorry) to put the shorter JSON first. // Sort lexicographically to have a stable ordering
ImmutableList<String> sortedLines = ImmutableList<String> sortedLines = ImmutableList.sortedCopyOf(generatedReport.subList(1, 4));
generatedReport
.subList(1, 3)
.stream()
.sorted(Comparator.comparingInt(String::length))
.collect(ImmutableList.toImmutableList());
JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(0));
assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com");
assertThat(someRegistrarJSON.has("threatMatches")).isTrue();
JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches");
assertThat(someThreatMatch.length()).isEqualTo(1);
assertThat(someThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName"))
.isEqualTo("444.com");
assertThat(someThreatMatch.getJSONObject(0).get("threatType"))
.isEqualTo("MALWARE");
// theRegistrar has two ThreatMatches, we have to parse it explicitly // theRegistrar has two ThreatMatches, we have to parse it explicitly
JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(1)); JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(0));
assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com"); assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com");
assertThat(theRegistrarJSON.has("threatMatches")).isTrue(); assertThat(theRegistrarJSON.has("threatMatches")).isTrue();
JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches"); JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches");
@ -184,6 +169,26 @@ public class Spec11PipelineTest {
.put("threatEntryMetadata", "NONE") .put("threatEntryMetadata", "NONE")
.put("platformType", "WINDOWS") .put("platformType", "WINDOWS")
.toString()); .toString());
JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(1));
assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com");
assertThat(someRegistrarJSON.has("threatMatches")).isTrue();
JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches");
assertThat(someThreatMatch.length()).isEqualTo(1);
assertThat(someThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName"))
.isEqualTo("444.com");
assertThat(someThreatMatch.getJSONObject(0).get("threatType"))
.isEqualTo("MALWARE");
JSONObject noEmailRegistrarJSON = new JSONObject(sortedLines.get(2));
assertThat(noEmailRegistrarJSON.get("registrarEmailAddress")).isEqualTo("");
assertThat(noEmailRegistrarJSON.has("threatMatches")).isTrue();
JSONArray noEmailThreatMatch = noEmailRegistrarJSON.getJSONArray("threatMatches");
assertThat(noEmailThreatMatch.length()).isEqualTo(1);
assertThat(noEmailThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName"))
.isEqualTo("no-email.com");
assertThat(noEmailThreatMatch.getJSONObject(0).get("threatType"))
.isEqualTo("MALWARE");
} }
/** /**