From 565c4bcf5017712a08211518563889da8a1c0770 Mon Sep 17 00:00:00 2001 From: gbrodman Date: Wed, 10 Apr 2019 06:36:51 -0700 Subject: [PATCH] Use a multimap to index the Spec11 threat matches Collecting by key leads to exceptions if there are multiple client IDs with the same email address (if we group by client ID in the pipeline). Using Multimaps::index means that if we're grouping by email, all matches with the same email get concatenated together ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=242858112 --- .../spec11/PublishSpec11ReportAction.java | 18 ++++++--- .../spec11/PublishSpec11ReportActionTest.java | 37 +++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/java/google/registry/reporting/spec11/PublishSpec11ReportAction.java b/java/google/registry/reporting/spec11/PublishSpec11ReportAction.java index be1a8f11e..fae06e863 100644 --- a/java/google/registry/reporting/spec11/PublishSpec11ReportAction.java +++ b/java/google/registry/reporting/spec11/PublishSpec11ReportAction.java @@ -14,6 +14,7 @@ package google.registry.reporting.spec11; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static google.registry.reporting.ReportingModule.PARAM_DATE; import static google.registry.request.Action.Method.POST; @@ -24,7 +25,10 @@ import static javax.servlet.http.HttpServletResponse.SC_OK; import com.google.api.services.dataflow.Dataflow; import com.google.api.services.dataflow.model.Job; +import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimaps; import com.google.common.flogger.FluentLogger; import com.google.common.net.MediaType; import com.google.template.soy.parseinfo.SoyTemplateInfo; @@ -168,12 +172,16 @@ public class PublishSpec11ReportAction implements Runnable { private ImmutableSet getNewMatches( Set previousMatchesSet, Set currentMatchesSet) { + // Group by email address then flat-map all of the ThreatMatch objects together + ImmutableListMultimap currentMatchesByEmail = + Multimaps.index(currentMatchesSet, RegistrarThreatMatches::registrarEmailAddress); Map> currentMatchMap = - currentMatchesSet.stream() - .collect( - Collectors.toMap( - RegistrarThreatMatches::registrarEmailAddress, - RegistrarThreatMatches::threatMatches)); + Maps.transformValues( + currentMatchesByEmail.asMap(), + registrarThreatMatchesCollection -> + registrarThreatMatchesCollection.stream() + .flatMap(matches -> matches.threatMatches().stream()) + .collect(toImmutableList())); previousMatchesSet.forEach( previousMatches -> currentMatchMap.computeIfPresent( diff --git a/javatests/google/registry/reporting/spec11/PublishSpec11ReportActionTest.java b/javatests/google/registry/reporting/spec11/PublishSpec11ReportActionTest.java index 282460a7b..1567879d2 100644 --- a/javatests/google/registry/reporting/spec11/PublishSpec11ReportActionTest.java +++ b/javatests/google/registry/reporting/spec11/PublishSpec11ReportActionTest.java @@ -15,6 +15,8 @@ package google.registry.reporting.spec11; import static com.google.common.truth.Truth.assertThat; +import static google.registry.reporting.spec11.Spec11RegistrarThreatMatchesParserTest.getMatchA; +import static google.registry.reporting.spec11.Spec11RegistrarThreatMatchesParserTest.getMatchB; import static google.registry.reporting.spec11.Spec11RegistrarThreatMatchesParserTest.sampleThreatMatches; import static javax.servlet.http.HttpServletResponse.SC_INTERNAL_SERVER_ERROR; import static javax.servlet.http.HttpServletResponse.SC_NOT_MODIFIED; @@ -30,8 +32,10 @@ import com.google.api.services.dataflow.Dataflow.Projects; import com.google.api.services.dataflow.Dataflow.Projects.Jobs; import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Get; import com.google.api.services.dataflow.model.Job; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.net.MediaType; +import google.registry.beam.spec11.ThreatMatch; import google.registry.reporting.spec11.soy.Spec11EmailSoyInfo; import google.registry.testing.FakeResponse; import java.io.IOException; @@ -162,6 +166,39 @@ public class PublishSpec11ReportActionTest { verifyNoMoreInteractions(emailUtils); } + @Test + public void testJobDone_multipleEntriesWithSameEmail() throws Exception { + LocalDate yesterday = date.minusDays(1); + when(parser.getPreviousDateWithMatches(date)).thenReturn(Optional.of(yesterday)); + when(parser.getRegistrarThreatMatches(yesterday)).thenReturn(ImmutableSet.of()); + + // if the input is [{email: a, matches: list1}, {email: a, matches: list2}] then we should + // concatenate list1 and list2 in the resulting grouping + RegistrarThreatMatches firstMatches = getMatchA(); + ImmutableList secondMatchList = getMatchB().threatMatches(); + RegistrarThreatMatches secondMatches = + RegistrarThreatMatches.create("a@fake.com", secondMatchList); + when(parser.getRegistrarThreatMatches(date)) + .thenReturn(ImmutableSet.of(firstMatches, secondMatches)); + expectedJob.setCurrentState("JOB_STATE_DONE"); + publishAction.run(); + ImmutableSet expectedMatchSet = + ImmutableSet.of( + RegistrarThreatMatches.create( + "a@fake.com", + ImmutableList.builder() + .addAll(firstMatches.threatMatches()) + .addAll(secondMatchList) + .build())); + verify(emailUtils) + .emailSpec11Reports( + date, + Spec11EmailSoyInfo.DAILY_SPEC_11_EMAIL, + "Super Cool Registry Daily Threat Detector [2018-06-05]", + expectedMatchSet); + verifyNoMoreInteractions(emailUtils); + } + @Test public void testJobDone_failsDueToNoPreviousResults() { when(parser.getPreviousDateWithMatches(date)).thenReturn(Optional.empty());