google-nomulus/javatests/google/registry/beam/spec11/Spec11PipelineTest.java
jianglai ec26e3a96a Run the Spec11 pipeline daily without sending emails
Add a sendSpec11Email parameter that allows us to only send the email on
one run per month. Next, we will compute the diffs between the daily runs
and send daily emails with those diffs.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=224404653
2018-12-12 13:22:34 -05:00

286 lines
12 KiB
Java

// Copyright 2018 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.spec11;
import static com.google.common.truth.Truth.assertThat;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.withSettings;
import com.google.common.collect.ImmutableList;
import com.google.common.io.CharStreams;
import google.registry.beam.spec11.SafeBrowsingTransforms.EvaluateSafeBrowsingFn;
import google.registry.testing.FakeClock;
import google.registry.testing.FakeSleeper;
import google.registry.util.ResourceUtils;
import google.registry.util.Retrier;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Comparator;
import java.util.function.Supplier;
import org.apache.beam.runners.direct.DirectRunner;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.values.PCollection;
import org.apache.http.ProtocolVersion;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.BasicHttpEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicStatusLine;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
/** Unit tests for {@link Spec11Pipeline}. */
@RunWith(JUnit4.class)
public class Spec11PipelineTest {
private static PipelineOptions pipelineOptions;
@BeforeClass
public static void initializePipelineOptions() {
pipelineOptions = PipelineOptionsFactory.create();
pipelineOptions.setRunner(DirectRunner.class);
}
@Rule public final transient TestPipeline p = TestPipeline.fromOptions(pipelineOptions);
@Rule public final TemporaryFolder tempFolder = new TemporaryFolder();
private Spec11Pipeline spec11Pipeline;
@Before
public void initializePipeline() throws IOException {
spec11Pipeline = new Spec11Pipeline();
spec11Pipeline.projectId = "test-project";
spec11Pipeline.reportingBucketUrl = tempFolder.getRoot().getAbsolutePath();
File beamTempFolder = tempFolder.newFolder();
spec11Pipeline.beamStagingUrl = beamTempFolder.getAbsolutePath() + "/staging";
spec11Pipeline.spec11TemplateUrl = beamTempFolder.getAbsolutePath() + "/templates/invoicing";
}
private static final ImmutableList<String> BAD_DOMAINS =
ImmutableList.of("111.com", "222.com", "444.com");
private ImmutableList<Subdomain> getInputDomains() {
ImmutableList.Builder<Subdomain> subdomainsBuilder = new ImmutableList.Builder<>();
// Put in at least 2 batches worth (x > 490) to guarantee multiple executions.
// Put in half for theRegistrar and half for someRegistrar
for (int i = 0; i < 255; i++) {
subdomainsBuilder.add(
Subdomain.create(String.format("%s.com", i), "theRegistrar", "fake@theRegistrar.com"));
}
for (int i = 255; i < 510; i++) {
subdomainsBuilder.add(
Subdomain.create(String.format("%s.com", i), "someRegistrar", "fake@someRegistrar.com"));
}
return subdomainsBuilder.build();
}
/**
* Tests the end-to-end Spec11 pipeline with mocked out API calls.
*
* <p>We suppress the (Serializable & Supplier) dual-casted lambda warnings because the supplier
* produces an explicitly serializable mock, which is safe to cast.
*/
@Test
@SuppressWarnings("unchecked")
public void testEndToEndPipeline_generatesExpectedFiles() throws Exception {
// Establish mocks for testing
ImmutableList<Subdomain> inputRows = getInputDomains();
CloseableHttpClient httpClient = mock(CloseableHttpClient.class, withSettings().serializable());
// Return a mock HttpResponse that returns a JSON response based on the request.
when(httpClient.execute(any(HttpPost.class))).thenAnswer(new HttpResponder());
EvaluateSafeBrowsingFn evalFn =
new EvaluateSafeBrowsingFn(
StaticValueProvider.of("apikey"),
new Retrier(new FakeSleeper(new FakeClock()), 3),
(Serializable & Supplier) () -> httpClient);
// Apply input and evaluation transforms
PCollection<Subdomain> input = p.apply(Create.of(inputRows));
spec11Pipeline.evaluateUrlHealth(input, evalFn, StaticValueProvider.of("2018-06-01"));
p.run();
// Verify header and 3 threat matches for 2 registrars are found
ImmutableList<String> generatedReport = resultFileContents();
assertThat(generatedReport).hasSize(3);
assertThat(generatedReport.get(0))
.isEqualTo("Map from registrar email to detected subdomain threats:");
// The output file can put the registrar emails and bad URLs in any order.
// So we sort by length (sorry) to put the shorter JSON first.
ImmutableList<String> sortedLines =
generatedReport
.subList(1, 3)
.stream()
.sorted(Comparator.comparingInt(String::length))
.collect(ImmutableList.toImmutableList());
JSONObject someRegistrarJSON = new JSONObject(sortedLines.get(0));
assertThat(someRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@someRegistrar.com");
assertThat(someRegistrarJSON.has("threatMatches")).isTrue();
JSONArray someThreatMatch = someRegistrarJSON.getJSONArray("threatMatches");
assertThat(someThreatMatch.length()).isEqualTo(1);
assertThat(someThreatMatch.getJSONObject(0).get("fullyQualifiedDomainName"))
.isEqualTo("444.com");
assertThat(someThreatMatch.getJSONObject(0).get("threatType"))
.isEqualTo("MALWARE");
// theRegistrar has two ThreatMatches, we have to parse it explicitly
JSONObject theRegistrarJSON = new JSONObject(sortedLines.get(1));
assertThat(theRegistrarJSON.get("registrarEmailAddress")).isEqualTo("fake@theRegistrar.com");
assertThat(theRegistrarJSON.has("threatMatches")).isTrue();
JSONArray theThreatMatches = theRegistrarJSON.getJSONArray("threatMatches");
assertThat(theThreatMatches.length()).isEqualTo(2);
ImmutableList<String> threatMatchStrings =
ImmutableList.of(
theThreatMatches.getJSONObject(0).toString(),
theThreatMatches.getJSONObject(1).toString());
assertThat(threatMatchStrings)
.containsExactly(
new JSONObject()
.put("fullyQualifiedDomainName", "111.com")
.put("threatType", "MALWARE")
.put("threatEntryMetadata", "NONE")
.put("platformType", "WINDOWS")
.toString(),
new JSONObject()
.put("fullyQualifiedDomainName", "222.com")
.put("threatType", "MALWARE")
.put("threatEntryMetadata", "NONE")
.put("platformType", "WINDOWS")
.toString());
}
/**
* A serializable {@link Answer} that returns a mock HTTP response based on the HTTP request's
* content.
*/
private static class HttpResponder implements Answer<CloseableHttpResponse>, Serializable {
@Override
public CloseableHttpResponse answer(InvocationOnMock invocation) throws Throwable {
return getMockResponse(
CharStreams.toString(
new InputStreamReader(
((HttpPost) invocation.getArguments()[0]).getEntity().getContent(), UTF_8)));
}
}
/**
* Returns a {@link CloseableHttpResponse} containing either positive (threat found) or negative
* (no threat) API examples based on the request data.
*/
private static CloseableHttpResponse getMockResponse(String request) throws JSONException {
// Determine which bad URLs are in the request (if any)
ImmutableList<String> badUrls =
BAD_DOMAINS.stream().filter(request::contains).collect(ImmutableList.toImmutableList());
CloseableHttpResponse httpResponse =
mock(CloseableHttpResponse.class, withSettings().serializable());
when(httpResponse.getStatusLine())
.thenReturn(
new BasicStatusLine(new ProtocolVersion("HTTP", 1, 1), 200, "Done"));
when(httpResponse.getEntity())
.thenReturn(new FakeHttpEntity(getAPIResponse(badUrls)));
return httpResponse;
}
/**
* Returns the expected API response for a list of bad URLs.
*
* <p>If there are no badUrls in the list, this returns the empty JSON string "{}".
*/
private static String getAPIResponse(ImmutableList<String> badUrls) throws JSONException {
JSONObject response = new JSONObject();
if (badUrls.isEmpty()) {
return response.toString();
}
// Create a threatMatch for each badUrl
JSONArray matches = new JSONArray();
for (String badUrl : badUrls) {
matches.put(
new JSONObject()
.put("threatType", "MALWARE")
.put("platformType", "WINDOWS")
.put("threatEntryType", "URL")
.put("threat", new JSONObject().put("url", badUrl))
.put("cacheDuration", "300.000s"));
}
response.put("matches", matches);
return response.toString();
}
/** A serializable HttpEntity fake that returns {@link String} content. */
private static class FakeHttpEntity extends BasicHttpEntity implements Serializable {
private static final long serialVersionUID = 105738294571L;
private String content;
private void writeObject(ObjectOutputStream oos) throws IOException {
oos.defaultWriteObject();
}
/**
* Sets the {@link FakeHttpEntity} content upon deserialization.
*
* <p>This allows us to use {@link #getContent()} as-is, fully emulating the behavior of {@link
* BasicHttpEntity} regardless of serialization.
*/
private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
ois.defaultReadObject();
super.setContent(new ByteArrayInputStream(this.content.getBytes(UTF_8)));
}
FakeHttpEntity(String content) {
this.content = content;
super.setContent(new ByteArrayInputStream(this.content.getBytes(UTF_8)));
}
}
/** Returns the text contents of a file under the beamBucket/results directory. */
private ImmutableList<String> resultFileContents() throws Exception {
File resultFile =
new File(
String.format(
"%s/icann/spec11/2018-06/SPEC11_MONTHLY_REPORT_2018-06-01",
tempFolder.getRoot().getAbsolutePath()));
return ImmutableList.copyOf(
ResourceUtils.readResourceUtf8(resultFile.toURI().toURL()).split("\n"));
}
}