diff --git a/java/google/registry/beam/spec11/Spec11Pipeline.java b/java/google/registry/beam/spec11/Spec11Pipeline.java index ee39720e6..de58ab8b6 100644 --- a/java/google/registry/beam/spec11/Spec11Pipeline.java +++ b/java/google/registry/beam/spec11/Spec11Pipeline.java @@ -14,8 +14,10 @@ package google.registry.beam.spec11; +import static com.google.common.base.Preconditions.checkArgument; import static google.registry.beam.BeamUtils.getQueryFromFile; +import com.google.auto.value.AutoValue; import google.registry.beam.spec11.SafeBrowsingTransforms.EvaluateSafeBrowsingFn; import google.registry.config.RegistryConfig.Config; import google.registry.util.Retrier; @@ -68,8 +70,10 @@ public class Spec11Pipeline implements Serializable { return String.format("icann/spec11/%s/SPEC11_MONTHLY_REPORT_%s", yearMonth, localDate); } - /** The JSON object field we put the registrar's e-mail address for Spec11 reports. */ + /** The JSON object field into which we put the registrar's e-mail address for Spec11 reports. */ public static final String REGISTRAR_EMAIL_FIELD = "registrarEmailAddress"; + /** The JSON object field into which we put the registrar's name for Spec11 reports. */ + public static final String REGISTRAR_NAME_FIELD = "registrarName"; /** The JSON object field we put the threat match array for Spec11 reports. */ public static final String THREAT_MATCHES_FIELD = "threatMatches"; @@ -164,30 +168,41 @@ public class Spec11Pipeline implements Serializable { PCollection domains, EvaluateSafeBrowsingFn evaluateSafeBrowsingFn, ValueProvider dateProvider) { - domains - .apply("Run through SafeBrowsingAPI", ParDo.of(evaluateSafeBrowsingFn)) + PCollection> subdomains = + domains.apply("Run through SafeBrowsingAPI", ParDo.of(evaluateSafeBrowsingFn)); + subdomains .apply( - "Map registrar e-mail to ThreatMatch", + "Map registrar name to email/ThreatMatch pair", MapElements.into( TypeDescriptors.kvs( - TypeDescriptors.strings(), TypeDescriptor.of(ThreatMatch.class))) + TypeDescriptors.strings(), TypeDescriptor.of(EmailAndThreatMatch.class))) .via( (KV kv) -> - KV.of(kv.getKey().registrarEmailAddress(), kv.getValue()))) - .apply("Group by registrar email address", GroupByKey.create()) + KV.of( + kv.getKey().registrarName(), + EmailAndThreatMatch.create( + kv.getKey().registrarEmailAddress(), kv.getValue())))) + .apply("Group by registrar name", GroupByKey.create()) .apply( "Convert results to JSON format", MapElements.into(TypeDescriptors.strings()) .via( - (KV> kv) -> { + (KV> kv) -> { + String registrarName = kv.getKey(); + checkArgument( + kv.getValue().iterator().hasNext(), + String.format( + "Registrar named %s had no corresponding threats", registrarName)); + String email = kv.getValue().iterator().next().email(); JSONObject output = new JSONObject(); try { - output.put(REGISTRAR_EMAIL_FIELD, kv.getKey()); - JSONArray threatMatches = new JSONArray(); - for (ThreatMatch match : kv.getValue()) { - threatMatches.put(match.toJSON()); + output.put(REGISTRAR_NAME_FIELD, registrarName); + output.put(REGISTRAR_EMAIL_FIELD, email); + JSONArray threatMatchArray = new JSONArray(); + for (EmailAndThreatMatch emailAndThreatMatch : kv.getValue()) { + threatMatchArray.put(emailAndThreatMatch.threatMatch().toJSON()); } - output.put(THREAT_MATCHES_FIELD, threatMatches); + output.put(THREAT_MATCHES_FIELD, threatMatchArray); return output.toString(); } catch (JSONException e) { throw new RuntimeException( @@ -208,6 +223,18 @@ public class Spec11Pipeline implements Serializable { reportingBucketUrl, getSpec11ReportFilePath(LocalDate.parse(date))))) .withoutSharding() - .withHeader("Map from registrar email to detected subdomain threats:")); + .withHeader("Map from registrar email / name to detected subdomain threats:")); + } + + @AutoValue + abstract static class EmailAndThreatMatch implements Serializable { + + abstract String email(); + + abstract ThreatMatch threatMatch(); + + static EmailAndThreatMatch create(String email, ThreatMatch threatMatch) { + return new AutoValue_Spec11Pipeline_EmailAndThreatMatch(email, threatMatch); + } } } diff --git a/javatests/google/registry/beam/spec11/Spec11PipelineTest.java b/javatests/google/registry/beam/spec11/Spec11PipelineTest.java index ac0d6c3ed..9382e8d2a 100644 --- a/javatests/google/registry/beam/spec11/Spec11PipelineTest.java +++ b/javatests/google/registry/beam/spec11/Spec11PipelineTest.java @@ -140,7 +140,7 @@ public class Spec11PipelineTest { ImmutableList generatedReport = resultFileContents(); assertThat(generatedReport).hasSize(4); assertThat(generatedReport.get(0)) - .isEqualTo("Map from registrar email to detected subdomain threats:"); + .isEqualTo("Map from registrar email / name to detected subdomain threats:"); // The output file can put the registrar emails and bad URLs in any order. // We cannot rely on the JSON toString to sort because the keys are not always in the same @@ -151,6 +151,7 @@ public class Spec11PipelineTest { JSONObject noEmailRegistrarJSON = new JSONObject(sortedLines.get(0)); assertThat(noEmailRegistrarJSON.get("registrarEmailAddress")).isEqualTo(""); + assertThat(noEmailRegistrarJSON.get("registrarName")).isEqualTo("noEmailRegistrar"); assertThat(noEmailRegistrarJSON.has("threatMatches")).isTrue(); JSONArray noEmailThreatMatch = noEmailRegistrarJSON.getJSONArray("threatMatches"); assertThat(noEmailThreatMatch.length()).isEqualTo(1); @@ -161,6 +162,7 @@ public class Spec11PipelineTest { JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(1)); assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com"); + assertThat(someRegistrarJSON.get("registrarName")).isEqualTo("someRegistrar"); assertThat(someRegistrarJSON.has("threatMatches")).isTrue(); JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches"); assertThat(someThreatMatch.length()).isEqualTo(1); @@ -172,6 +174,7 @@ public class Spec11PipelineTest { // theRegistrar has two ThreatMatches, we have to parse it explicitly JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(2)); assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com"); + assertThat(theRegistrarJSON.get("registrarName")).isEqualTo("theRegistrar"); assertThat(theRegistrarJSON.has("threatMatches")).isTrue(); JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches"); assertThat(theThreatMatches.length()).isEqualTo(2);