From f5bf6e4f3d8eb197e2036ca3427621b04660a942 Mon Sep 17 00:00:00 2001 From: gbrodman Date: Tue, 26 Mar 2019 07:54:35 -0700 Subject: [PATCH] Coalesce null to the empty string in the Spec11 pipeline We'll have a separate change to make sure we're not actually trying to email these folks, but this will make it so that the entire pipeline doesn't crash. The test makes sure that we can run the pipeline properly with these empty strings. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=240346954 --- .../registry/beam/spec11/sql/subdomains.sql | 2 +- .../beam/spec11/Spec11PipelineTest.java | 49 ++++++++++--------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/java/google/registry/beam/spec11/sql/subdomains.sql b/java/google/registry/beam/spec11/sql/subdomains.sql index 46b51e30a..2df1cf74c 100644 --- a/java/google/registry/beam/spec11/sql/subdomains.sql +++ b/java/google/registry/beam/spec11/sql/subdomains.sql @@ -21,7 +21,7 @@ SELECT domain.fullyQualifiedDomainName AS fullyQualifiedDomainName, registrar.name AS registrarName, - registrar.emailAddress AS registrarEmailAddress + COALESCE(registrar.emailAddress, '') AS registrarEmailAddress FROM ( ( SELECT fullyQualifiedDomainName, diff --git a/javatests/google/registry/beam/spec11/Spec11PipelineTest.java b/javatests/google/registry/beam/spec11/Spec11PipelineTest.java index 0f1e9a5b4..d1d21dcb3 100644 --- a/javatests/google/registry/beam/spec11/Spec11PipelineTest.java +++ b/javatests/google/registry/beam/spec11/Spec11PipelineTest.java @@ -35,7 +35,6 @@ import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; -import java.util.Comparator; import java.util.function.Supplier; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.options.PipelineOptions; @@ -91,7 +90,7 @@ public class Spec11PipelineTest { } private static final ImmutableList BAD_DOMAINS = - ImmutableList.of("111.com", "222.com", "444.com"); + ImmutableList.of("111.com", "222.com", "444.com", "no-email.com"); private ImmutableList getInputDomains() { ImmutableList.Builder subdomainsBuilder = new ImmutableList.Builder<>(); @@ -105,6 +104,7 @@ public class Spec11PipelineTest { subdomainsBuilder.add( Subdomain.create(String.format("%s.com", i), "someRegistrar", "fake@someRegistrar.com")); } + subdomainsBuilder.add(Subdomain.create("no-email.com", "noEmailRegistrar", "")); return subdomainsBuilder.build(); } @@ -135,33 +135,18 @@ public class Spec11PipelineTest { spec11Pipeline.evaluateUrlHealth(input, evalFn, StaticValueProvider.of("2018-06-01")); p.run(); - // Verify header and 3 threat matches for 2 registrars are found + // Verify header and 4 threat matches for 3 registrars are found ImmutableList generatedReport = resultFileContents(); - assertThat(generatedReport).hasSize(3); + assertThat(generatedReport).hasSize(4); assertThat(generatedReport.get(0)) .isEqualTo("Map from registrar email to detected subdomain threats:"); // The output file can put the registrar emails and bad URLs in any order. - // So we sort by length (sorry) to put the shorter JSON first. - ImmutableList sortedLines = - generatedReport - .subList(1, 3) - .stream() - .sorted(Comparator.comparingInt(String::length)) - .collect(ImmutableList.toImmutableList()); - - JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(0)); - assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com"); - assertThat(someRegistrarJSON.has("threatMatches")).isTrue(); - JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches"); - assertThat(someThreatMatch.length()).isEqualTo(1); - assertThat(someThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName")) - .isEqualTo("444.com"); - assertThat(someThreatMatch.getJSONObject(0).get("threatType")) - .isEqualTo("MALWARE"); + // Sort lexicographically to have a stable ordering + ImmutableList sortedLines = ImmutableList.sortedCopyOf(generatedReport.subList(1, 4)); // theRegistrar has two ThreatMatches, we have to parse it explicitly - JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(1)); + JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(0)); assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com"); assertThat(theRegistrarJSON.has("threatMatches")).isTrue(); JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches"); @@ -184,6 +169,26 @@ public class Spec11PipelineTest { .put("threatEntryMetadata", "NONE") .put("platformType", "WINDOWS") .toString()); + + JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(1)); + assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com"); + assertThat(someRegistrarJSON.has("threatMatches")).isTrue(); + JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches"); + assertThat(someThreatMatch.length()).isEqualTo(1); + assertThat(someThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName")) + .isEqualTo("444.com"); + assertThat(someThreatMatch.getJSONObject(0).get("threatType")) + .isEqualTo("MALWARE"); + + JSONObject noEmailRegistrarJSON = new JSONObject(sortedLines.get(2)); + assertThat(noEmailRegistrarJSON.get("registrarEmailAddress")).isEqualTo(""); + assertThat(noEmailRegistrarJSON.has("threatMatches")).isTrue(); + JSONArray noEmailThreatMatch = noEmailRegistrarJSON.getJSONArray("threatMatches"); + assertThat(noEmailThreatMatch.length()).isEqualTo(1); + assertThat(noEmailThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName")) + .isEqualTo("no-email.com"); + assertThat(noEmailThreatMatch.getJSONObject(0).get("threatType")) + .isEqualTo("MALWARE"); } /**