Add GenerateSpec11Action and SafeBrowsing evaluation

This adds actual subdomain verification via the SafeBrowsing API to the Spec11
pipeline, as well as on-the-fly KMS decryption via the GenerateSpec11Action to
securely store our API key in source code.

Testing the interaction becomes difficult due to serialization requirements, and will be significantly expanded in the next cl. For now, it verifies basic end-to-end pipeline behavior.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=208092942
This commit is contained in:
larryruili 2018-08-09 12:32:30 -07:00 committed by jianglai
parent f554ace51b
commit 33ee7de457
29 changed files with 767 additions and 26 deletions

View file

@ -24,6 +24,8 @@ java_library(
"@org_apache_beam_runners_google_cloud_dataflow_java",
"@org_apache_beam_sdks_java_core",
"@org_apache_beam_sdks_java_io_google_cloud_platform",
"@org_apache_httpcomponents_httpclient",
"@org_apache_httpcomponents_httpcore",
"@org_mockito_all",
],
)

View file

@ -15,19 +15,39 @@
package google.registry.beam.spec11;
import static com.google.common.truth.Truth.assertThat;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.withSettings;
import com.google.common.collect.ImmutableList;
import com.google.common.io.CharStreams;
import google.registry.beam.spec11.SafeBrowsingTransforms.EvaluateSafeBrowsingFn;
import google.registry.util.ResourceUtils;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.function.Supplier;
import org.apache.beam.runners.direct.DirectRunner;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.values.PCollection;
import org.apache.http.ProtocolVersion;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.BasicHttpEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicStatusLine;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
@ -35,6 +55,7 @@ import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.stubbing.Answer;
/** Unit tests for {@link Spec11Pipeline}. */
@RunWith(JUnit4.class)
@ -64,25 +85,72 @@ public class Spec11PipelineTest {
}
private ImmutableList<Subdomain> getInputDomains() {
return ImmutableList.of(
Subdomain.create(
"a.com", ZonedDateTime.of(2017, 9, 29, 0, 0, 0, 0, ZoneId.of("UTC")), "OK"),
Subdomain.create(
"b.com", ZonedDateTime.of(2017, 9, 29, 0, 0, 0, 0, ZoneId.of("UTC")), "OK"),
Subdomain.create(
"c.com", ZonedDateTime.of(2017, 9, 29, 0, 0, 0, 0, ZoneId.of("UTC")), "OK"));
ImmutableList.Builder<Subdomain> subdomainsBuilder = new ImmutableList.Builder<>();
// Put in 2 batches worth (490 < max < 490*2) to get one positive and one negative example.
for (int i = 0; i < 510; i++) {
subdomainsBuilder.add(
Subdomain.create(
String.format("%s.com", i),
ZonedDateTime.of(2017, 9, 29, 0, 0, 0, 0, ZoneId.of("UTC")),
"OK"));
}
return subdomainsBuilder.build();
}
/**
* Tests the end-to-end Spec11 pipeline with mocked out API calls.
*
* <p>We suppress the (Serializable & Supplier) dual-casted lambda warnings because the supplier
* produces an explicitly serializable mock, which is safe to cast.
*/
@Test
@SuppressWarnings("unchecked")
public void testEndToEndPipeline_generatesExpectedFiles() throws Exception {
// Establish mocks for testing
ImmutableList<Subdomain> inputRows = getInputDomains();
CloseableHttpClient httpClient = mock(CloseableHttpClient.class, withSettings().serializable());
CloseableHttpResponse negativeResponse =
mock(CloseableHttpResponse.class, withSettings().serializable());
CloseableHttpResponse positiveResponse =
mock(CloseableHttpResponse.class, withSettings().serializable());
// Tailor the fake API's response based on whether or not it contains the "bad url" 111.com
when(httpClient.execute(any(HttpPost.class)))
.thenAnswer(
(Answer & Serializable)
(i) -> {
String request =
CharStreams.toString(
new InputStreamReader(
((HttpPost) i.getArguments()[0]).getEntity().getContent(), UTF_8));
if (request.contains("http://111.com")) {
return positiveResponse;
} else {
return negativeResponse;
}
});
when(negativeResponse.getStatusLine())
.thenReturn(new BasicStatusLine(new ProtocolVersion("HTTP", 1, 1), 200, "Done"));
when(negativeResponse.getEntity()).thenReturn(new FakeHttpEntity("{}"));
when(positiveResponse.getStatusLine())
.thenReturn(new BasicStatusLine(new ProtocolVersion("HTTP", 1, 1), 200, "Done"));
when(positiveResponse.getEntity())
.thenReturn(new FakeHttpEntity(getBadUrlMatch("http://111.com")));
EvaluateSafeBrowsingFn evalFn =
new EvaluateSafeBrowsingFn(
StaticValueProvider.of("apikey"), (Serializable & Supplier) () -> httpClient);
// Apply input and evaluation transforms
PCollection<Subdomain> input = p.apply(Create.of(inputRows));
spec11Pipeline.countDomainsAndOutputResults(input);
spec11Pipeline.evaluateUrlHealth(input, evalFn);
p.run();
// Verify output of text file
ImmutableList<String> generatedReport = resultFileContents();
assertThat(generatedReport.get(0)).isEqualTo("HELLO WORLD");
assertThat(generatedReport.get(1)).isEqualTo("3");
// TODO(b/80524726): Rigorously test this output once the pipeline output is finalized.
assertThat(generatedReport).hasSize(2);
assertThat(generatedReport.get(1)).contains("http://111.com");
}
/** Returns the text contents of a file under the beamBucket/results directory. */
@ -91,4 +159,45 @@ public class Spec11PipelineTest {
return ImmutableList.copyOf(
ResourceUtils.readResourceUtf8(resultFile.toURI().toURL()).split("\n"));
}
/** Returns a filled-in template for threat detected at a given url. */
private static String getBadUrlMatch(String url) {
return "{\n"
+ " \"matches\": [{\n"
+ " \"threatType\": \"MALWARE\",\n"
+ " \"platformType\": \"WINDOWS\",\n"
+ " \"threatEntryType\": \"URL\",\n"
+ String.format(" \"threat\": {\"url\": \"%s\"},\n", url)
+ " \"threatEntryMetadata\": {\n"
+ " \"entries\": [{\n"
+ " \"key\": \"malware_threat_type\",\n"
+ " \"value\": \"landing\"\n"
+ " }]\n"
+ " },\n"
+ " \"cacheDuration\": \"300.000s\"\n"
+ " },"
+ "]\n"
+ "}";
}
/** A serializable HttpEntity fake that returns {@link String} content. */
private static class FakeHttpEntity extends BasicHttpEntity implements Serializable {
private static final long serialVersionUID = 105738294571L;
private String content;
private void writeObject(ObjectOutputStream oos) throws IOException {
oos.defaultWriteObject();
}
private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
ois.defaultReadObject();
super.setContent(new ByteArrayInputStream(this.content.getBytes(UTF_8)));
}
FakeHttpEntity(String content) {
this.content = content;
}
}
}

View file

@ -17,6 +17,7 @@ PATH CLASS METHOD
/_dr/task/exportReservedTerms ExportReservedTermsAction POST n INTERNAL APP IGNORED
/_dr/task/exportSnapshot ExportSnapshotAction POST y INTERNAL APP IGNORED
/_dr/task/generateInvoices GenerateInvoicesAction POST n INTERNAL APP IGNORED
/_dr/task/generateSpec11 GenerateSpec11ReportAction POST n INTERNAL APP IGNORED
/_dr/task/icannReportingStaging IcannReportingStagingAction POST n INTERNAL APP IGNORED
/_dr/task/icannReportingUpload IcannReportingUploadAction POST n INTERNAL,API APP ADMIN
/_dr/task/importRdeContacts RdeContactImportAction GET n INTERNAL APP IGNORED

View file

@ -83,6 +83,7 @@ public class GenerateInvoicesActionTest {
"test-project",
"gs://test-project-beam",
"gs://test-project-beam/templates/invoicing",
"us-east1-c",
true,
new YearMonth(2017, 10),
dataflow,
@ -118,6 +119,7 @@ public class GenerateInvoicesActionTest {
"test-project",
"gs://test-project-beam",
"gs://test-project-beam/templates/invoicing",
"us-east1-c",
false,
new YearMonth(2017, 10),
dataflow,
@ -147,6 +149,7 @@ public class GenerateInvoicesActionTest {
"test-project",
"gs://test-project-beam",
"gs://test-project-beam/templates/invoicing",
"us-east1-c",
true,
new YearMonth(2017, 10),
dataflow,

View file

@ -0,0 +1,39 @@
package(
default_testonly = 1,
default_visibility = ["//java/google/registry:registry_project"],
)
licenses(["notice"]) # Apache 2.0
load("//java/com/google/testing/builddefs:GenTestRules.bzl", "GenTestRules")
java_library(
name = "spec11",
srcs = glob(["*.java"]),
deps = [
"//java/google/registry/reporting/spec11",
"//javatests/google/registry/testing",
"@com_google_apis_google_api_services_dataflow",
"@com_google_appengine_api_1_0_sdk",
"@com_google_appengine_tools_appengine_gcs_client",
"@com_google_dagger",
"@com_google_guava",
"@com_google_truth",
"@com_google_truth_extensions_truth_java8_extension",
"@javax_servlet_api",
"@joda_time",
"@junit",
"@org_apache_beam_runners_direct_java",
"@org_apache_beam_runners_google_cloud_dataflow_java",
"@org_apache_beam_sdks_java_core",
"@org_apache_beam_sdks_java_io_google_cloud_platform",
"@org_mockito_all",
],
)
GenTestRules(
name = "GeneratedTestRules",
default_test_size = "small",
test_files = glob(["*Test.java"]),
deps = [":spec11"],
)

View file

@ -0,0 +1,100 @@
// Copyright 2018 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.reporting.spec11;
import static com.google.common.truth.Truth.assertThat;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import com.google.api.services.dataflow.Dataflow;
import com.google.api.services.dataflow.Dataflow.Projects;
import com.google.api.services.dataflow.Dataflow.Projects.Templates;
import com.google.api.services.dataflow.Dataflow.Projects.Templates.Launch;
import com.google.api.services.dataflow.model.Job;
import com.google.api.services.dataflow.model.LaunchTemplateParameters;
import com.google.api.services.dataflow.model.LaunchTemplateResponse;
import com.google.api.services.dataflow.model.RuntimeEnvironment;
import com.google.common.collect.ImmutableMap;
import com.google.common.net.MediaType;
import google.registry.testing.FakeResponse;
import java.io.IOException;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/** Unit tests for {@link google.registry.reporting.spec11.GenerateSpec11ReportAction}. */
@RunWith(JUnit4.class)
public class GenerateSpec11ReportActionTest {
private FakeResponse response;
private Dataflow dataflow;
private Projects dataflowProjects;
private Templates dataflowTemplates;
private Launch dataflowLaunch;
private GenerateSpec11ReportAction action;
@Before
public void setUp() throws IOException {
response = new FakeResponse();
dataflow = mock(Dataflow.class);
// Establish the Dataflow API call chain
dataflow = mock(Dataflow.class);
dataflowProjects = mock(Dataflow.Projects.class);
dataflowTemplates = mock(Templates.class);
dataflowLaunch = mock(Launch.class);
LaunchTemplateResponse launchTemplateResponse = new LaunchTemplateResponse();
// Ultimately we get back this job response with a given id.
launchTemplateResponse.setJob(new Job().setReplaceJobId("jobid"));
when(dataflow.projects()).thenReturn(dataflowProjects);
when(dataflowProjects.templates()).thenReturn(dataflowTemplates);
when(dataflowTemplates.launch(any(String.class), any(LaunchTemplateParameters.class)))
.thenReturn(dataflowLaunch);
when(dataflowLaunch.setGcsPath(any(String.class))).thenReturn(dataflowLaunch);
when(dataflowLaunch.execute()).thenReturn(launchTemplateResponse);
}
@Test
public void testLaunch_success() throws IOException {
action =
new GenerateSpec11ReportAction(
"test",
"gs://my-bucket-beam",
"gs://template",
"us-east1-c",
"api_key/a",
response,
dataflow);
action.run();
LaunchTemplateParameters expectedLaunchTemplateParameters =
new LaunchTemplateParameters()
.setJobName("spec11_action")
.setEnvironment(
new RuntimeEnvironment()
.setZone("us-east1-c")
.setTempLocation("gs://my-bucket-beam/temporary"))
.setParameters(ImmutableMap.of("safeBrowsingApiKey", "api_key/a"));
verify(dataflowTemplates).launch("test", expectedLaunchTemplateParameters);
verify(dataflowLaunch).setGcsPath("gs://template");
assertThat(response.getStatus()).isEqualTo(200);
assertThat(response.getContentType()).isEqualTo(MediaType.PLAIN_TEXT_UTF_8);
assertThat(response.getPayload()).isEqualTo("Launched Spec11 dataflow template.");
}
}

View file

@ -51,6 +51,7 @@ public final class FakeKeyringModule {
private static final ByteSource PGP_PRIVATE_KEYRING =
loadBytes(FakeKeyringModule.class, "pgp-private-keyring-registry.asc");
private static final String ICANN_REPORTING_PASSWORD = "yolo";
private static final String SAFE_BROWSING_API_KEY = "a/b_c";
private static final String MARKSDB_DNL_LOGIN = "dnl:yolo";
private static final String MARKSDB_LORDN_PASSWORD = "yolo";
private static final String MARKSDB_SMDRL_LOGIN = "smdrl:yolo";
@ -134,6 +135,11 @@ public final class FakeKeyringModule {
return ICANN_REPORTING_PASSWORD;
}
@Override
public String getSafeBrowsingAPIKey() {
return SAFE_BROWSING_API_KEY;
}
@Override
public PGPKeyPair getBrdaSigningKey() {
return rdeSigningKey;