Include the registrar name in the Spec11 threats report

Because we make use of previous reports, it's easier to add this in and then use it later as two steps, rather than trying to add this and use it in one change. This way we don't really need to deal with backcompat issues.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=241378853
This commit is contained in:
gbrodman 2019-04-01 12:39:50 -07:00 committed by jianglai
parent 25f1d58969
commit 9c1739d562
2 changed files with 45 additions and 15 deletions

View file

@ -14,8 +14,10 @@
package google.registry.beam.spec11; package google.registry.beam.spec11;
import static com.google.common.base.Preconditions.checkArgument;
import static google.registry.beam.BeamUtils.getQueryFromFile; import static google.registry.beam.BeamUtils.getQueryFromFile;
import com.google.auto.value.AutoValue;
import google.registry.beam.spec11.SafeBrowsingTransforms.EvaluateSafeBrowsingFn; import google.registry.beam.spec11.SafeBrowsingTransforms.EvaluateSafeBrowsingFn;
import google.registry.config.RegistryConfig.Config; import google.registry.config.RegistryConfig.Config;
import google.registry.util.Retrier; import google.registry.util.Retrier;
@ -68,8 +70,10 @@ public class Spec11Pipeline implements Serializable {
return String.format("icann/spec11/%s/SPEC11_MONTHLY_REPORT_%s", yearMonth, localDate); return String.format("icann/spec11/%s/SPEC11_MONTHLY_REPORT_%s", yearMonth, localDate);
} }
/** The JSON object field we put the registrar's e-mail address for Spec11 reports. */ /** The JSON object field into which we put the registrar's e-mail address for Spec11 reports. */
public static final String REGISTRAR_EMAIL_FIELD = "registrarEmailAddress"; public static final String REGISTRAR_EMAIL_FIELD = "registrarEmailAddress";
/** The JSON object field into which we put the registrar's name for Spec11 reports. */
public static final String REGISTRAR_NAME_FIELD = "registrarName";
/** The JSON object field we put the threat match array for Spec11 reports. */ /** The JSON object field we put the threat match array for Spec11 reports. */
public static final String THREAT_MATCHES_FIELD = "threatMatches"; public static final String THREAT_MATCHES_FIELD = "threatMatches";
@ -164,30 +168,41 @@ public class Spec11Pipeline implements Serializable {
PCollection<Subdomain> domains, PCollection<Subdomain> domains,
EvaluateSafeBrowsingFn evaluateSafeBrowsingFn, EvaluateSafeBrowsingFn evaluateSafeBrowsingFn,
ValueProvider<String> dateProvider) { ValueProvider<String> dateProvider) {
domains PCollection<KV<Subdomain, ThreatMatch>> subdomains =
.apply("Run through SafeBrowsingAPI", ParDo.of(evaluateSafeBrowsingFn)) domains.apply("Run through SafeBrowsingAPI", ParDo.of(evaluateSafeBrowsingFn));
subdomains
.apply( .apply(
"Map registrar e-mail to ThreatMatch", "Map registrar name to email/ThreatMatch pair",
MapElements.into( MapElements.into(
TypeDescriptors.kvs( TypeDescriptors.kvs(
TypeDescriptors.strings(), TypeDescriptor.of(ThreatMatch.class))) TypeDescriptors.strings(), TypeDescriptor.of(EmailAndThreatMatch.class)))
.via( .via(
(KV<Subdomain, ThreatMatch> kv) -> (KV<Subdomain, ThreatMatch> kv) ->
KV.of(kv.getKey().registrarEmailAddress(), kv.getValue()))) KV.of(
.apply("Group by registrar email address", GroupByKey.create()) kv.getKey().registrarName(),
EmailAndThreatMatch.create(
kv.getKey().registrarEmailAddress(), kv.getValue()))))
.apply("Group by registrar name", GroupByKey.create())
.apply( .apply(
"Convert results to JSON format", "Convert results to JSON format",
MapElements.into(TypeDescriptors.strings()) MapElements.into(TypeDescriptors.strings())
.via( .via(
(KV<String, Iterable<ThreatMatch>> kv) -> { (KV<String, Iterable<EmailAndThreatMatch>> kv) -> {
String registrarName = kv.getKey();
checkArgument(
kv.getValue().iterator().hasNext(),
String.format(
"Registrar named %s had no corresponding threats", registrarName));
String email = kv.getValue().iterator().next().email();
JSONObject output = new JSONObject(); JSONObject output = new JSONObject();
try { try {
output.put(REGISTRAR_EMAIL_FIELD, kv.getKey()); output.put(REGISTRAR_NAME_FIELD, registrarName);
JSONArray threatMatches = new JSONArray(); output.put(REGISTRAR_EMAIL_FIELD, email);
for (ThreatMatch match : kv.getValue()) { JSONArray threatMatchArray = new JSONArray();
threatMatches.put(match.toJSON()); for (EmailAndThreatMatch emailAndThreatMatch : kv.getValue()) {
threatMatchArray.put(emailAndThreatMatch.threatMatch().toJSON());
} }
output.put(THREAT_MATCHES_FIELD, threatMatches); output.put(THREAT_MATCHES_FIELD, threatMatchArray);
return output.toString(); return output.toString();
} catch (JSONException e) { } catch (JSONException e) {
throw new RuntimeException( throw new RuntimeException(
@ -208,6 +223,18 @@ public class Spec11Pipeline implements Serializable {
reportingBucketUrl, reportingBucketUrl,
getSpec11ReportFilePath(LocalDate.parse(date))))) getSpec11ReportFilePath(LocalDate.parse(date)))))
.withoutSharding() .withoutSharding()
.withHeader("Map from registrar email to detected subdomain threats:")); .withHeader("Map from registrar email / name to detected subdomain threats:"));
}
@AutoValue
abstract static class EmailAndThreatMatch implements Serializable {
abstract String email();
abstract ThreatMatch threatMatch();
static EmailAndThreatMatch create(String email, ThreatMatch threatMatch) {
return new AutoValue_Spec11Pipeline_EmailAndThreatMatch(email, threatMatch);
}
} }
} }

View file

@ -140,7 +140,7 @@ public class Spec11PipelineTest {
ImmutableList<String> generatedReport = resultFileContents(); ImmutableList<String> generatedReport = resultFileContents();
assertThat(generatedReport).hasSize(4); assertThat(generatedReport).hasSize(4);
assertThat(generatedReport.get(0)) assertThat(generatedReport.get(0))
.isEqualTo("Map from registrar email to detected subdomain threats:"); .isEqualTo("Map from registrar email / name to detected subdomain threats:");
// The output file can put the registrar emails and bad URLs in any order. // The output file can put the registrar emails and bad URLs in any order.
// We cannot rely on the JSON toString to sort because the keys are not always in the same // We cannot rely on the JSON toString to sort because the keys are not always in the same
@ -151,6 +151,7 @@ public class Spec11PipelineTest {
JSONObject noEmailRegistrarJSON = new JSONObject(sortedLines.get(0)); JSONObject noEmailRegistrarJSON = new JSONObject(sortedLines.get(0));
assertThat(noEmailRegistrarJSON.get("registrarEmailAddress")).isEqualTo(""); assertThat(noEmailRegistrarJSON.get("registrarEmailAddress")).isEqualTo("");
assertThat(noEmailRegistrarJSON.get("registrarName")).isEqualTo("noEmailRegistrar");
assertThat(noEmailRegistrarJSON.has("threatMatches")).isTrue(); assertThat(noEmailRegistrarJSON.has("threatMatches")).isTrue();
JSONArray noEmailThreatMatch = noEmailRegistrarJSON.getJSONArray("threatMatches"); JSONArray noEmailThreatMatch = noEmailRegistrarJSON.getJSONArray("threatMatches");
assertThat(noEmailThreatMatch.length()).isEqualTo(1); assertThat(noEmailThreatMatch.length()).isEqualTo(1);
@ -161,6 +162,7 @@ public class Spec11PipelineTest {
JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(1)); JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(1));
assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com"); assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com");
assertThat(someRegistrarJSON.get("registrarName")).isEqualTo("someRegistrar");
assertThat(someRegistrarJSON.has("threatMatches")).isTrue(); assertThat(someRegistrarJSON.has("threatMatches")).isTrue();
JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches"); JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches");
assertThat(someThreatMatch.length()).isEqualTo(1); assertThat(someThreatMatch.length()).isEqualTo(1);
@ -172,6 +174,7 @@ public class Spec11PipelineTest {
// theRegistrar has two ThreatMatches, we have to parse it explicitly // theRegistrar has two ThreatMatches, we have to parse it explicitly
JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(2)); JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(2));
assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com"); assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com");
assertThat(theRegistrarJSON.get("registrarName")).isEqualTo("theRegistrar");
assertThat(theRegistrarJSON.has("threatMatches")).isTrue(); assertThat(theRegistrarJSON.has("threatMatches")).isTrue();
JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches"); JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches");
assertThat(theThreatMatches.length()).isEqualTo(2); assertThat(theThreatMatches.length()).isEqualTo(2);