From dd633c9e7200e46f4d67edbd8777e6a7d231a7bc Mon Sep 17 00:00:00 2001 From: mcilwain Date: Wed, 9 Mar 2016 08:48:06 -0800 Subject: [PATCH] Add [] to export domain lists to GCS The ExportDomainListsAction [] has a cron entry that runs it twice per day. It exports one flat text file per real (non-test) TLD to the "{project-id}-domain-lists" bucket in Google Cloud Storage, overwriting the existing ones in place. Each file is a newline-delimited list of active (non-deleted) domains in that TLD. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=116767987 --- .../domain/registry/config/ConfigModule.java | 6 + .../registry/config/RegistryConfig.java | 7 + .../registry/config/TestRegistryConfig.java | 5 + .../env/common/backend/WEB-INF/web.xml | 9 ++ .../env/production/default/WEB-INF/cron.xml | 9 ++ java/com/google/domain/registry/export/BUILD | 4 +- .../export/ExportDomainListsAction.java | 135 +++++++++++++++++ .../registry/model/domain/DomainBase.java | 2 +- .../backend/BackendRequestComponent.java | 2 + .../com/google/domain/registry/export/BUILD | 2 + .../export/ExportDomainListsActionTest.java | 138 ++++++++++++++++++ .../registry/testing/DatastoreHelper.java | 3 +- 12 files changed, 317 insertions(+), 5 deletions(-) create mode 100644 java/com/google/domain/registry/export/ExportDomainListsAction.java create mode 100644 javatests/com/google/domain/registry/export/ExportDomainListsActionTest.java diff --git a/java/com/google/domain/registry/config/ConfigModule.java b/java/com/google/domain/registry/config/ConfigModule.java index cb51a573e..361d34d49 100644 --- a/java/com/google/domain/registry/config/ConfigModule.java +++ b/java/com/google/domain/registry/config/ConfigModule.java @@ -82,6 +82,12 @@ public final class ConfigModule { return config.getCommitLogDatastoreRetention(); } + @Provides + @Config("domainListsGcsBucket") + public static String provideDomainListsGcsBucket(RegistryConfig config) { + return config.getDomainListsBucket(); + } + /** * Maximum number of commit logs to delete per transaction. * diff --git a/java/com/google/domain/registry/config/RegistryConfig.java b/java/com/google/domain/registry/config/RegistryConfig.java index ef8a65119..93c25f00e 100644 --- a/java/com/google/domain/registry/config/RegistryConfig.java +++ b/java/com/google/domain/registry/config/RegistryConfig.java @@ -44,6 +44,13 @@ public interface RegistryConfig { */ public String getSnapshotsBucket(); + /** + * Returns the Google Cloud Storage bucket for storing exported domain lists. + * + * @see com.google.domain.registry.export.ExportDomainListsAction + */ + public String getDomainListsBucket(); + /** * Returns the BigQuery dataset for storing directly imported datastore snapshots. * diff --git a/java/com/google/domain/registry/config/TestRegistryConfig.java b/java/com/google/domain/registry/config/TestRegistryConfig.java index ec4576c70..e958a7f6d 100644 --- a/java/com/google/domain/registry/config/TestRegistryConfig.java +++ b/java/com/google/domain/registry/config/TestRegistryConfig.java @@ -52,6 +52,11 @@ public class TestRegistryConfig implements RegistryConfig { return getProjectId() + "-snapshots"; } + @Override + public String getDomainListsBucket() { + return getProjectId() + "-domain-lists"; + } + @Override public String getSnapshotsDataset() { return "snapshots"; diff --git a/java/com/google/domain/registry/env/common/backend/WEB-INF/web.xml b/java/com/google/domain/registry/env/common/backend/WEB-INF/web.xml index 2af431f57..f5a9bcd1b 100644 --- a/java/com/google/domain/registry/env/common/backend/WEB-INF/web.xml +++ b/java/com/google/domain/registry/env/common/backend/WEB-INF/web.xml @@ -364,6 +364,15 @@ /_dr/task/syncGroupMembers + + export-domain-lists + com.google.domain.registry.module.backend.BackendServlet + + + export-domain-lists + /_dr/task/exportDomainLists + + diff --git a/java/com/google/domain/registry/env/production/default/WEB-INF/cron.xml b/java/com/google/domain/registry/env/production/default/WEB-INF/cron.xml index 82b9208ad..df9a62c6f 100644 --- a/java/com/google/domain/registry/env/production/default/WEB-INF/cron.xml +++ b/java/com/google/domain/registry/env/production/default/WEB-INF/cron.xml @@ -124,6 +124,15 @@ backend + + + + This job exports lists of all active domain names to Google Cloud Storage. + + every 12 hours synchronized + backend + + diff --git a/java/com/google/domain/registry/export/BUILD b/java/com/google/domain/registry/export/BUILD index 2f7dea5c6..51ee6983d 100644 --- a/java/com/google/domain/registry/export/BUILD +++ b/java/com/google/domain/registry/export/BUILD @@ -23,16 +23,16 @@ java_library( "//java/com/google/common/net", "//java/com/google/domain/registry/bigquery", "//java/com/google/domain/registry/config", - "//java/com/google/domain/registry/flows", "//java/com/google/domain/registry/gcs", "//java/com/google/domain/registry/groups", + "//java/com/google/domain/registry/mapreduce", "//java/com/google/domain/registry/model", "//java/com/google/domain/registry/request", - "//java/com/google/domain/registry/security:servlets", "//java/com/google/domain/registry/storage/drive", "//java/com/google/domain/registry/util", "//third_party/java/appengine:appengine-api", "//third_party/java/appengine_gcs_client", + "//third_party/java/appengine_mapreduce2:appengine_mapreduce", "//third_party/java/dagger", "//third_party/java/joda_time", "//third_party/java/json_simple", diff --git a/java/com/google/domain/registry/export/ExportDomainListsAction.java b/java/com/google/domain/registry/export/ExportDomainListsAction.java new file mode 100644 index 000000000..be201926c --- /dev/null +++ b/java/com/google/domain/registry/export/ExportDomainListsAction.java @@ -0,0 +1,135 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.domain.registry.export; + +import static com.google.appengine.tools.cloudstorage.GcsServiceFactory.createGcsService; +import static com.google.domain.registry.mapreduce.EppResourceInputs.createEntityInput; +import static com.google.domain.registry.model.EppResourceUtils.isActive; +import static com.google.domain.registry.model.registry.Registries.getTldsOfType; +import static com.google.domain.registry.util.PipelineUtils.createJobPath; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.joda.time.DateTimeZone.UTC; + +import com.google.appengine.tools.cloudstorage.GcsFilename; +import com.google.appengine.tools.cloudstorage.RetryParams; +import com.google.appengine.tools.mapreduce.Mapper; +import com.google.appengine.tools.mapreduce.Reducer; +import com.google.appengine.tools.mapreduce.ReducerInput; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.domain.registry.config.ConfigModule.Config; +import com.google.domain.registry.gcs.GcsUtils; +import com.google.domain.registry.mapreduce.MapreduceAction; +import com.google.domain.registry.mapreduce.MapreduceRunner; +import com.google.domain.registry.model.domain.DomainResource; +import com.google.domain.registry.model.registry.Registry.TldType; +import com.google.domain.registry.request.Action; +import com.google.domain.registry.request.Response; +import com.google.domain.registry.util.FormattingLogger; + +import org.joda.time.DateTime; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Writer; + +import javax.inject.Inject; + +/** + * A mapreduce that exports the list of active domains on all real TLDs to Google Cloud Storage. + * + * Each TLD's active domain names are exported as a newline-delimited flat text file with the name + * TLD.txt into the domain-lists bucket. Note that this overwrites the files in place. + */ +@Action(path = "/_dr/task/exportDomainLists") +public class ExportDomainListsAction implements MapreduceAction { + + private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass(); + + @Inject MapreduceRunner mrRunner; + @Inject Response response; + @Inject @Config("domainListsGcsBucket") String gcsBucket; + @Inject @Config("gcsBufferSize") int gcsBufferSize; + @Inject ExportDomainListsAction() {} + + @Override + public void run() { + ImmutableSet realTlds = getTldsOfType(TldType.REAL); + logger.infofmt("Exporting domain lists for tlds %s", realTlds); + response.sendJavaScriptRedirect(createJobPath(mrRunner + .setJobName("Export domain lists") + .setModuleName("backend") + .runMapreduce( + new ExportDomainListsMapper(DateTime.now(UTC), realTlds), + new ExportDomainListsReducer(gcsBucket, gcsBufferSize), + ImmutableList.of(createEntityInput(DomainResource.class))))); + } + + static class ExportDomainListsMapper extends Mapper { + + private static final long serialVersionUID = -7312206212434039854L; + + private final DateTime exportTime; + private final ImmutableSet realTlds; + + ExportDomainListsMapper(DateTime exportTime, ImmutableSet realTlds) { + this.exportTime = exportTime; + this.realTlds = realTlds; + } + + @Override + public void map(DomainResource domain) { + if (realTlds.contains(domain.getTld()) && isActive(domain, exportTime)) { + emit(domain.getTld(), domain.getFullyQualifiedDomainName()); + getContext().incrementCounter(String.format("domains in tld %s", domain.getTld())); + } + } + } + + static class ExportDomainListsReducer extends Reducer { + + private static final long serialVersionUID = 7035260977259119087L; + + private final String gcsBucket; + private final int gcsBufferSize; + + public ExportDomainListsReducer(String gcsBucket, int gcsBufferSize) { + this.gcsBucket = gcsBucket; + this.gcsBufferSize = gcsBufferSize; + } + + @Override + public void reduce(String tld, ReducerInput fqdns) { + GcsFilename filename = new GcsFilename(gcsBucket, tld + ".txt"); + GcsUtils cloudStorage = + new GcsUtils(createGcsService(RetryParams.getDefaultInstance()), gcsBufferSize); + try (OutputStream gcsOutput = cloudStorage.openOutputStream(filename); + Writer osWriter = new OutputStreamWriter(gcsOutput, UTF_8); + PrintWriter writer = new PrintWriter(osWriter)) { + long count; + for (count = 0; fqdns.hasNext(); count++) { + writer.println(fqdns.next()); + } + writer.flush(); + getContext().incrementCounter("tld domain lists written out"); + logger.infofmt("Wrote out %d domains for tld %s.", count, tld); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/java/com/google/domain/registry/model/domain/DomainBase.java b/java/com/google/domain/registry/model/domain/DomainBase.java index 9f690a051..65e9d9c85 100644 --- a/java/com/google/domain/registry/model/domain/DomainBase.java +++ b/java/com/google/domain/registry/model/domain/DomainBase.java @@ -52,7 +52,7 @@ import javax.xml.bind.annotation.XmlTransient; public abstract class DomainBase extends EppResource { /** - * Fully qualified domain name, which serves as the foreign key for this domain. + * Fully qualified domain name (puny-coded), which serves as the foreign key for this domain. *

* This is only unique in the sense that for any given lifetime specified as the time range from * (creationTime, deletionTime) there can only be one domain in the datastore with this name. diff --git a/java/com/google/domain/registry/module/backend/BackendRequestComponent.java b/java/com/google/domain/registry/module/backend/BackendRequestComponent.java index dd46903d2..c5f1ef856 100644 --- a/java/com/google/domain/registry/module/backend/BackendRequestComponent.java +++ b/java/com/google/domain/registry/module/backend/BackendRequestComponent.java @@ -28,6 +28,7 @@ import com.google.domain.registry.dns.ReadDnsQueueAction; import com.google.domain.registry.dns.RefreshDns; import com.google.domain.registry.dns.WriteDnsTask; import com.google.domain.registry.export.BigqueryPollJobAction; +import com.google.domain.registry.export.ExportDomainListsAction; import com.google.domain.registry.export.ExportRequestModule; import com.google.domain.registry.export.ExportReservedTermsTask; import com.google.domain.registry.export.SyncGroupMembersTask; @@ -81,6 +82,7 @@ interface BackendRequestComponent { DeleteOldCommitLogsAction deleteOldCommitLogsAction(); DnsRefreshForHostRenameAction dnsRefreshForHostRenameAction(); ExportCommitLogDiffAction exportCommitLogDiffAction(); + ExportDomainListsAction exportDomainListsAction(); ExportReservedTermsTask exportReservedTermsTask(); NordnUploadAction nordnUploadAction(); NordnVerifyAction nordnVerifyAction(); diff --git a/javatests/com/google/domain/registry/export/BUILD b/javatests/com/google/domain/registry/export/BUILD index 52aec5d9a..edc036570 100644 --- a/javatests/com/google/domain/registry/export/BUILD +++ b/javatests/com/google/domain/registry/export/BUILD @@ -26,11 +26,13 @@ java_library( "//java/com/google/domain/registry/export", "//java/com/google/domain/registry/gcs", "//java/com/google/domain/registry/groups", + "//java/com/google/domain/registry/mapreduce", "//java/com/google/domain/registry/model", "//java/com/google/domain/registry/request", "//java/com/google/domain/registry/storage/drive", "//java/com/google/domain/registry/util", "//javatests/com/google/domain/registry/testing", + "//javatests/com/google/domain/registry/testing/mapreduce", "//third_party/java/appengine:appengine-api-testonly", "//third_party/java/appengine:appengine-stubs", "//third_party/java/appengine_gcs_client", diff --git a/javatests/com/google/domain/registry/export/ExportDomainListsActionTest.java b/javatests/com/google/domain/registry/export/ExportDomainListsActionTest.java new file mode 100644 index 000000000..af24b59fa --- /dev/null +++ b/javatests/com/google/domain/registry/export/ExportDomainListsActionTest.java @@ -0,0 +1,138 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.domain.registry.export; + +import static com.google.appengine.tools.cloudstorage.GcsServiceFactory.createGcsService; +import static com.google.common.truth.Truth.assertThat; +import static com.google.domain.registry.testing.DatastoreHelper.createTld; +import static com.google.domain.registry.testing.DatastoreHelper.persistActiveDomain; +import static com.google.domain.registry.testing.DatastoreHelper.persistActiveDomainApplication; +import static com.google.domain.registry.testing.DatastoreHelper.persistDeletedDomain; +import static com.google.domain.registry.testing.DatastoreHelper.persistResource; +import static com.google.domain.registry.testing.GcsTestingUtils.readGcsFile; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.appengine.tools.cloudstorage.GcsFilename; +import com.google.appengine.tools.cloudstorage.GcsService; +import com.google.appengine.tools.cloudstorage.ListOptions; +import com.google.appengine.tools.cloudstorage.ListResult; +import com.google.common.base.Optional; +import com.google.common.base.Splitter; +import com.google.domain.registry.mapreduce.MapreduceRunner; +import com.google.domain.registry.model.registry.Registry; +import com.google.domain.registry.model.registry.Registry.TldType; +import com.google.domain.registry.testing.ExceptionRule; +import com.google.domain.registry.testing.FakeResponse; +import com.google.domain.registry.testing.mapreduce.MapreduceTestCase; + +import org.joda.time.DateTime; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.FileNotFoundException; + +/** Unit tests for {@link ExportDomainListsAction}. */ +@RunWith(JUnit4.class) +public class ExportDomainListsActionTest extends MapreduceTestCase { + + private GcsService gcsService; + + @Rule + public final ExceptionRule thrown = new ExceptionRule(); + + @Before + public void init() { + createTld("tld"); + createTld("testtld"); + persistResource(Registry.get("testtld").asBuilder().setTldType(TldType.TEST).build()); + + action = new ExportDomainListsAction(); + action.mrRunner = new MapreduceRunner(Optional.absent(), Optional.absent()); + action.response = new FakeResponse(); + action.gcsBucket = "outputbucket"; + action.gcsBufferSize = 500; + gcsService = createGcsService(); + } + + private void runMapreduce() throws Exception { + action.run(); + executeTasksUntilEmpty("mapreduce"); + } + + @Test + public void test_outputsOnlyActiveDomains() throws Exception { + persistActiveDomain("onetwo.tld"); + persistActiveDomain("rudnitzky.tld"); + persistDeletedDomain("mortuary.tld", DateTime.parse("2001-03-14T10:11:12Z")); + runMapreduce(); + GcsFilename existingFile = new GcsFilename("outputbucket", "tld.txt"); + String tlds = new String(readGcsFile(gcsService, existingFile), UTF_8).trim(); + // Check that it only contains the active domains, not the dead one. + assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("onetwo.tld", "rudnitzky.tld"); + } + + @Test + public void test_outputsOnlyDomainsOnRealTlds() throws Exception { + persistActiveDomain("onetwo.tld"); + persistActiveDomain("rudnitzky.tld"); + persistActiveDomain("wontgo.testtld"); + runMapreduce(); + GcsFilename existingFile = new GcsFilename("outputbucket", "tld.txt"); + String tlds = new String(readGcsFile(gcsService, existingFile), UTF_8).trim(); + // Check that it only contains the domains on the real TLD, and not the test one. + assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("onetwo.tld", "rudnitzky.tld"); + // Make sure that the test TLD file wasn't written out. + GcsFilename nonexistentFile = new GcsFilename("outputbucket", "testtld.txt"); + thrown.expect(FileNotFoundException.class); + readGcsFile(gcsService, nonexistentFile); + ListResult ls = gcsService.list("outputbucket", ListOptions.DEFAULT); + assertThat(ls.next().getName()).isEqualTo("tld.txt"); + // Make sure that no other files were written out. + assertThat(ls.hasNext()).isFalse(); + } + + @Test + public void test_outputsDomainsFromDifferentTldsToMultipleFiles() throws Exception { + createTld("tldtwo"); + // You'd think this test was written around Christmas, but it wasn't. + persistActiveDomain("dasher.tld"); + persistActiveDomain("prancer.tld"); + persistActiveDomain("rudolph.tldtwo"); + persistActiveDomain("santa.tldtwo"); + persistActiveDomain("buddy.tldtwo"); + runMapreduce(); + GcsFilename firstTldFile = new GcsFilename("outputbucket", "tld.txt"); + String tlds = new String(readGcsFile(gcsService, firstTldFile), UTF_8).trim(); + assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("dasher.tld", "prancer.tld"); + GcsFilename secondTldFile = new GcsFilename("outputbucket", "tldtwo.txt"); + String moreTlds = new String(readGcsFile(gcsService, secondTldFile), UTF_8).trim(); + assertThat(Splitter.on('\n').splitToList(moreTlds)) + .containsExactly("rudolph.tldtwo", "santa.tldtwo", "buddy.tldtwo"); + } + + @Test + public void test_doesntOutputDomainApplications() throws Exception { + persistActiveDomain("chilipepper.tld"); + persistActiveDomainApplication("nagajolokia.tld"); + runMapreduce(); + GcsFilename firstTldFile = new GcsFilename("outputbucket", "tld.txt"); + String tlds = new String(readGcsFile(gcsService, firstTldFile), UTF_8).trim(); + // Check that it didn't output nagajolokia.tld. + assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("chilipepper.tld"); + } +} diff --git a/javatests/com/google/domain/registry/testing/DatastoreHelper.java b/javatests/com/google/domain/registry/testing/DatastoreHelper.java index d30da8c40..23a348eb0 100644 --- a/javatests/com/google/domain/registry/testing/DatastoreHelper.java +++ b/javatests/com/google/domain/registry/testing/DatastoreHelper.java @@ -286,8 +286,7 @@ public class DatastoreHelper { } public static DomainResource persistDomainAsDeleted(DomainResource domain, DateTime now) { - return persistResource( - domain.asBuilder().setDeletionTime(now.minusDays(1)).build()); + return persistResource(domain.asBuilder().setDeletionTime(now.minusDays(1)).build()); } /** Persists a domain and enqueues a LORDN task of the appropriate type for it. */