mirror of
https://github.com/google/nomulus.git
synced 2025-04-29 19:47:51 +02:00
Add [] to export domain lists to GCS
The ExportDomainListsAction [] has a cron entry that runs it twice per day. It exports one flat text file per real (non-test) TLD to the "{project-id}-domain-lists" bucket in Google Cloud Storage, overwriting the existing ones in place. Each file is a newline-delimited list of active (non-deleted) domains in that TLD. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=116767987
This commit is contained in:
parent
d6815fb55a
commit
dd633c9e72
12 changed files with 317 additions and 5 deletions
|
@ -82,6 +82,12 @@ public final class ConfigModule {
|
|||
return config.getCommitLogDatastoreRetention();
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Config("domainListsGcsBucket")
|
||||
public static String provideDomainListsGcsBucket(RegistryConfig config) {
|
||||
return config.getDomainListsBucket();
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum number of commit logs to delete per transaction.
|
||||
*
|
||||
|
|
|
@ -44,6 +44,13 @@ public interface RegistryConfig {
|
|||
*/
|
||||
public String getSnapshotsBucket();
|
||||
|
||||
/**
|
||||
* Returns the Google Cloud Storage bucket for storing exported domain lists.
|
||||
*
|
||||
* @see com.google.domain.registry.export.ExportDomainListsAction
|
||||
*/
|
||||
public String getDomainListsBucket();
|
||||
|
||||
/**
|
||||
* Returns the BigQuery dataset for storing directly imported datastore snapshots.
|
||||
*
|
||||
|
|
|
@ -52,6 +52,11 @@ public class TestRegistryConfig implements RegistryConfig {
|
|||
return getProjectId() + "-snapshots";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomainListsBucket() {
|
||||
return getProjectId() + "-domain-lists";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSnapshotsDataset() {
|
||||
return "snapshots";
|
||||
|
|
|
@ -364,6 +364,15 @@
|
|||
<url-pattern>/_dr/task/syncGroupMembers</url-pattern>
|
||||
</servlet-mapping>
|
||||
|
||||
<servlet>
|
||||
<servlet-name>export-domain-lists</servlet-name>
|
||||
<servlet-class>com.google.domain.registry.module.backend.BackendServlet</servlet-class>
|
||||
</servlet>
|
||||
<servlet-mapping>
|
||||
<servlet-name>export-domain-lists</servlet-name>
|
||||
<url-pattern>/_dr/task/exportDomainLists</url-pattern>
|
||||
</servlet-mapping>
|
||||
|
||||
<!-- Mapreduce to delete the specified contact resource if it is not referenced by any domains. -->
|
||||
<servlet>
|
||||
<description>
|
||||
|
|
|
@ -124,6 +124,15 @@
|
|||
<target>backend</target>
|
||||
</cron>
|
||||
|
||||
<cron>
|
||||
<url><![CDATA[/_dr/task/exportDomainLists]]></url>
|
||||
<description>
|
||||
This job exports lists of all active domain names to Google Cloud Storage.
|
||||
</description>
|
||||
<schedule>every 12 hours synchronized</schedule>
|
||||
<target>backend</target>
|
||||
</cron>
|
||||
|
||||
<cron>
|
||||
<url><![CDATA[/_dr/cron/fanout?queue=export-snapshot&endpoint=/_dr/task/exportSnapshot&runInEmpty]]></url>
|
||||
<description>
|
||||
|
|
|
@ -23,16 +23,16 @@ java_library(
|
|||
"//java/com/google/common/net",
|
||||
"//java/com/google/domain/registry/bigquery",
|
||||
"//java/com/google/domain/registry/config",
|
||||
"//java/com/google/domain/registry/flows",
|
||||
"//java/com/google/domain/registry/gcs",
|
||||
"//java/com/google/domain/registry/groups",
|
||||
"//java/com/google/domain/registry/mapreduce",
|
||||
"//java/com/google/domain/registry/model",
|
||||
"//java/com/google/domain/registry/request",
|
||||
"//java/com/google/domain/registry/security:servlets",
|
||||
"//java/com/google/domain/registry/storage/drive",
|
||||
"//java/com/google/domain/registry/util",
|
||||
"//third_party/java/appengine:appengine-api",
|
||||
"//third_party/java/appengine_gcs_client",
|
||||
"//third_party/java/appengine_mapreduce2:appengine_mapreduce",
|
||||
"//third_party/java/dagger",
|
||||
"//third_party/java/joda_time",
|
||||
"//third_party/java/json_simple",
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.export;
|
||||
|
||||
import static com.google.appengine.tools.cloudstorage.GcsServiceFactory.createGcsService;
|
||||
import static com.google.domain.registry.mapreduce.EppResourceInputs.createEntityInput;
|
||||
import static com.google.domain.registry.model.EppResourceUtils.isActive;
|
||||
import static com.google.domain.registry.model.registry.Registries.getTldsOfType;
|
||||
import static com.google.domain.registry.util.PipelineUtils.createJobPath;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.joda.time.DateTimeZone.UTC;
|
||||
|
||||
import com.google.appengine.tools.cloudstorage.GcsFilename;
|
||||
import com.google.appengine.tools.cloudstorage.RetryParams;
|
||||
import com.google.appengine.tools.mapreduce.Mapper;
|
||||
import com.google.appengine.tools.mapreduce.Reducer;
|
||||
import com.google.appengine.tools.mapreduce.ReducerInput;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.domain.registry.config.ConfigModule.Config;
|
||||
import com.google.domain.registry.gcs.GcsUtils;
|
||||
import com.google.domain.registry.mapreduce.MapreduceAction;
|
||||
import com.google.domain.registry.mapreduce.MapreduceRunner;
|
||||
import com.google.domain.registry.model.domain.DomainResource;
|
||||
import com.google.domain.registry.model.registry.Registry.TldType;
|
||||
import com.google.domain.registry.request.Action;
|
||||
import com.google.domain.registry.request.Response;
|
||||
import com.google.domain.registry.util.FormattingLogger;
|
||||
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.Writer;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
/**
|
||||
* A mapreduce that exports the list of active domains on all real TLDs to Google Cloud Storage.
|
||||
*
|
||||
* Each TLD's active domain names are exported as a newline-delimited flat text file with the name
|
||||
* TLD.txt into the domain-lists bucket. Note that this overwrites the files in place.
|
||||
*/
|
||||
@Action(path = "/_dr/task/exportDomainLists")
|
||||
public class ExportDomainListsAction implements MapreduceAction {
|
||||
|
||||
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
|
||||
|
||||
@Inject MapreduceRunner mrRunner;
|
||||
@Inject Response response;
|
||||
@Inject @Config("domainListsGcsBucket") String gcsBucket;
|
||||
@Inject @Config("gcsBufferSize") int gcsBufferSize;
|
||||
@Inject ExportDomainListsAction() {}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
ImmutableSet<String> realTlds = getTldsOfType(TldType.REAL);
|
||||
logger.infofmt("Exporting domain lists for tlds %s", realTlds);
|
||||
response.sendJavaScriptRedirect(createJobPath(mrRunner
|
||||
.setJobName("Export domain lists")
|
||||
.setModuleName("backend")
|
||||
.runMapreduce(
|
||||
new ExportDomainListsMapper(DateTime.now(UTC), realTlds),
|
||||
new ExportDomainListsReducer(gcsBucket, gcsBufferSize),
|
||||
ImmutableList.of(createEntityInput(DomainResource.class)))));
|
||||
}
|
||||
|
||||
static class ExportDomainListsMapper extends Mapper<DomainResource, String, String> {
|
||||
|
||||
private static final long serialVersionUID = -7312206212434039854L;
|
||||
|
||||
private final DateTime exportTime;
|
||||
private final ImmutableSet<String> realTlds;
|
||||
|
||||
ExportDomainListsMapper(DateTime exportTime, ImmutableSet<String> realTlds) {
|
||||
this.exportTime = exportTime;
|
||||
this.realTlds = realTlds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void map(DomainResource domain) {
|
||||
if (realTlds.contains(domain.getTld()) && isActive(domain, exportTime)) {
|
||||
emit(domain.getTld(), domain.getFullyQualifiedDomainName());
|
||||
getContext().incrementCounter(String.format("domains in tld %s", domain.getTld()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class ExportDomainListsReducer extends Reducer<String, String, Void> {
|
||||
|
||||
private static final long serialVersionUID = 7035260977259119087L;
|
||||
|
||||
private final String gcsBucket;
|
||||
private final int gcsBufferSize;
|
||||
|
||||
public ExportDomainListsReducer(String gcsBucket, int gcsBufferSize) {
|
||||
this.gcsBucket = gcsBucket;
|
||||
this.gcsBufferSize = gcsBufferSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reduce(String tld, ReducerInput<String> fqdns) {
|
||||
GcsFilename filename = new GcsFilename(gcsBucket, tld + ".txt");
|
||||
GcsUtils cloudStorage =
|
||||
new GcsUtils(createGcsService(RetryParams.getDefaultInstance()), gcsBufferSize);
|
||||
try (OutputStream gcsOutput = cloudStorage.openOutputStream(filename);
|
||||
Writer osWriter = new OutputStreamWriter(gcsOutput, UTF_8);
|
||||
PrintWriter writer = new PrintWriter(osWriter)) {
|
||||
long count;
|
||||
for (count = 0; fqdns.hasNext(); count++) {
|
||||
writer.println(fqdns.next());
|
||||
}
|
||||
writer.flush();
|
||||
getContext().incrementCounter("tld domain lists written out");
|
||||
logger.infofmt("Wrote out %d domains for tld %s.", count, tld);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -52,7 +52,7 @@ import javax.xml.bind.annotation.XmlTransient;
|
|||
public abstract class DomainBase extends EppResource {
|
||||
|
||||
/**
|
||||
* Fully qualified domain name, which serves as the foreign key for this domain.
|
||||
* Fully qualified domain name (puny-coded), which serves as the foreign key for this domain.
|
||||
* <p>
|
||||
* This is only unique in the sense that for any given lifetime specified as the time range from
|
||||
* (creationTime, deletionTime) there can only be one domain in the datastore with this name.
|
||||
|
|
|
@ -28,6 +28,7 @@ import com.google.domain.registry.dns.ReadDnsQueueAction;
|
|||
import com.google.domain.registry.dns.RefreshDns;
|
||||
import com.google.domain.registry.dns.WriteDnsTask;
|
||||
import com.google.domain.registry.export.BigqueryPollJobAction;
|
||||
import com.google.domain.registry.export.ExportDomainListsAction;
|
||||
import com.google.domain.registry.export.ExportRequestModule;
|
||||
import com.google.domain.registry.export.ExportReservedTermsTask;
|
||||
import com.google.domain.registry.export.SyncGroupMembersTask;
|
||||
|
@ -81,6 +82,7 @@ interface BackendRequestComponent {
|
|||
DeleteOldCommitLogsAction deleteOldCommitLogsAction();
|
||||
DnsRefreshForHostRenameAction dnsRefreshForHostRenameAction();
|
||||
ExportCommitLogDiffAction exportCommitLogDiffAction();
|
||||
ExportDomainListsAction exportDomainListsAction();
|
||||
ExportReservedTermsTask exportReservedTermsTask();
|
||||
NordnUploadAction nordnUploadAction();
|
||||
NordnVerifyAction nordnVerifyAction();
|
||||
|
|
|
@ -26,11 +26,13 @@ java_library(
|
|||
"//java/com/google/domain/registry/export",
|
||||
"//java/com/google/domain/registry/gcs",
|
||||
"//java/com/google/domain/registry/groups",
|
||||
"//java/com/google/domain/registry/mapreduce",
|
||||
"//java/com/google/domain/registry/model",
|
||||
"//java/com/google/domain/registry/request",
|
||||
"//java/com/google/domain/registry/storage/drive",
|
||||
"//java/com/google/domain/registry/util",
|
||||
"//javatests/com/google/domain/registry/testing",
|
||||
"//javatests/com/google/domain/registry/testing/mapreduce",
|
||||
"//third_party/java/appengine:appengine-api-testonly",
|
||||
"//third_party/java/appengine:appengine-stubs",
|
||||
"//third_party/java/appengine_gcs_client",
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.export;
|
||||
|
||||
import static com.google.appengine.tools.cloudstorage.GcsServiceFactory.createGcsService;
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static com.google.domain.registry.testing.DatastoreHelper.createTld;
|
||||
import static com.google.domain.registry.testing.DatastoreHelper.persistActiveDomain;
|
||||
import static com.google.domain.registry.testing.DatastoreHelper.persistActiveDomainApplication;
|
||||
import static com.google.domain.registry.testing.DatastoreHelper.persistDeletedDomain;
|
||||
import static com.google.domain.registry.testing.DatastoreHelper.persistResource;
|
||||
import static com.google.domain.registry.testing.GcsTestingUtils.readGcsFile;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import com.google.appengine.tools.cloudstorage.GcsFilename;
|
||||
import com.google.appengine.tools.cloudstorage.GcsService;
|
||||
import com.google.appengine.tools.cloudstorage.ListOptions;
|
||||
import com.google.appengine.tools.cloudstorage.ListResult;
|
||||
import com.google.common.base.Optional;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.domain.registry.mapreduce.MapreduceRunner;
|
||||
import com.google.domain.registry.model.registry.Registry;
|
||||
import com.google.domain.registry.model.registry.Registry.TldType;
|
||||
import com.google.domain.registry.testing.ExceptionRule;
|
||||
import com.google.domain.registry.testing.FakeResponse;
|
||||
import com.google.domain.registry.testing.mapreduce.MapreduceTestCase;
|
||||
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/** Unit tests for {@link ExportDomainListsAction}. */
|
||||
@RunWith(JUnit4.class)
|
||||
public class ExportDomainListsActionTest extends MapreduceTestCase<ExportDomainListsAction> {
|
||||
|
||||
private GcsService gcsService;
|
||||
|
||||
@Rule
|
||||
public final ExceptionRule thrown = new ExceptionRule();
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
createTld("tld");
|
||||
createTld("testtld");
|
||||
persistResource(Registry.get("testtld").asBuilder().setTldType(TldType.TEST).build());
|
||||
|
||||
action = new ExportDomainListsAction();
|
||||
action.mrRunner = new MapreduceRunner(Optional.<Integer>absent(), Optional.<Integer>absent());
|
||||
action.response = new FakeResponse();
|
||||
action.gcsBucket = "outputbucket";
|
||||
action.gcsBufferSize = 500;
|
||||
gcsService = createGcsService();
|
||||
}
|
||||
|
||||
private void runMapreduce() throws Exception {
|
||||
action.run();
|
||||
executeTasksUntilEmpty("mapreduce");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_outputsOnlyActiveDomains() throws Exception {
|
||||
persistActiveDomain("onetwo.tld");
|
||||
persistActiveDomain("rudnitzky.tld");
|
||||
persistDeletedDomain("mortuary.tld", DateTime.parse("2001-03-14T10:11:12Z"));
|
||||
runMapreduce();
|
||||
GcsFilename existingFile = new GcsFilename("outputbucket", "tld.txt");
|
||||
String tlds = new String(readGcsFile(gcsService, existingFile), UTF_8).trim();
|
||||
// Check that it only contains the active domains, not the dead one.
|
||||
assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("onetwo.tld", "rudnitzky.tld");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_outputsOnlyDomainsOnRealTlds() throws Exception {
|
||||
persistActiveDomain("onetwo.tld");
|
||||
persistActiveDomain("rudnitzky.tld");
|
||||
persistActiveDomain("wontgo.testtld");
|
||||
runMapreduce();
|
||||
GcsFilename existingFile = new GcsFilename("outputbucket", "tld.txt");
|
||||
String tlds = new String(readGcsFile(gcsService, existingFile), UTF_8).trim();
|
||||
// Check that it only contains the domains on the real TLD, and not the test one.
|
||||
assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("onetwo.tld", "rudnitzky.tld");
|
||||
// Make sure that the test TLD file wasn't written out.
|
||||
GcsFilename nonexistentFile = new GcsFilename("outputbucket", "testtld.txt");
|
||||
thrown.expect(FileNotFoundException.class);
|
||||
readGcsFile(gcsService, nonexistentFile);
|
||||
ListResult ls = gcsService.list("outputbucket", ListOptions.DEFAULT);
|
||||
assertThat(ls.next().getName()).isEqualTo("tld.txt");
|
||||
// Make sure that no other files were written out.
|
||||
assertThat(ls.hasNext()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_outputsDomainsFromDifferentTldsToMultipleFiles() throws Exception {
|
||||
createTld("tldtwo");
|
||||
// You'd think this test was written around Christmas, but it wasn't.
|
||||
persistActiveDomain("dasher.tld");
|
||||
persistActiveDomain("prancer.tld");
|
||||
persistActiveDomain("rudolph.tldtwo");
|
||||
persistActiveDomain("santa.tldtwo");
|
||||
persistActiveDomain("buddy.tldtwo");
|
||||
runMapreduce();
|
||||
GcsFilename firstTldFile = new GcsFilename("outputbucket", "tld.txt");
|
||||
String tlds = new String(readGcsFile(gcsService, firstTldFile), UTF_8).trim();
|
||||
assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("dasher.tld", "prancer.tld");
|
||||
GcsFilename secondTldFile = new GcsFilename("outputbucket", "tldtwo.txt");
|
||||
String moreTlds = new String(readGcsFile(gcsService, secondTldFile), UTF_8).trim();
|
||||
assertThat(Splitter.on('\n').splitToList(moreTlds))
|
||||
.containsExactly("rudolph.tldtwo", "santa.tldtwo", "buddy.tldtwo");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_doesntOutputDomainApplications() throws Exception {
|
||||
persistActiveDomain("chilipepper.tld");
|
||||
persistActiveDomainApplication("nagajolokia.tld");
|
||||
runMapreduce();
|
||||
GcsFilename firstTldFile = new GcsFilename("outputbucket", "tld.txt");
|
||||
String tlds = new String(readGcsFile(gcsService, firstTldFile), UTF_8).trim();
|
||||
// Check that it didn't output nagajolokia.tld.
|
||||
assertThat(Splitter.on('\n').splitToList(tlds)).containsExactly("chilipepper.tld");
|
||||
}
|
||||
}
|
|
@ -286,8 +286,7 @@ public class DatastoreHelper {
|
|||
}
|
||||
|
||||
public static DomainResource persistDomainAsDeleted(DomainResource domain, DateTime now) {
|
||||
return persistResource(
|
||||
domain.asBuilder().setDeletionTime(now.minusDays(1)).build());
|
||||
return persistResource(domain.asBuilder().setDeletionTime(now.minusDays(1)).build());
|
||||
}
|
||||
|
||||
/** Persists a domain and enqueues a LORDN task of the appropriate type for it. */
|
||||
|
|
Loading…
Add table
Reference in a new issue