Add mapreduce for RDE hosts import

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=136766682
This commit is contained in:
Wolfgang Meyers 2016-07-22 15:13:40 -04:00 committed by Ben McIlwain
parent 975f574256
commit 4cfe107475
22 changed files with 3654 additions and 0 deletions

View file

@ -279,6 +279,12 @@
<url-pattern>/_dr/task/importRdeContacts</url-pattern>
</servlet-mapping>
<!-- Mapreduce to import hosts from escrow file -->
<servlet-mapping>
<servlet-name>backend-servlet</servlet-name>
<url-pattern>/_dr/task/importRdeHosts</url-pattern>
</servlet-mapping>
<!-- Security config -->
<security-constraint>
<web-resource-collection>

View file

@ -50,6 +50,7 @@ import google.registry.monitoring.whitebox.VerifyEntityIntegrityAction;
import google.registry.monitoring.whitebox.WhiteboxModule;
import google.registry.rde.BrdaCopyAction;
import google.registry.rde.RdeContactImportAction;
import google.registry.rde.RdeHostImportAction;
import google.registry.rde.RdeModule;
import google.registry.rde.RdeReportAction;
import google.registry.rde.RdeReporter;
@ -103,6 +104,7 @@ interface BackendRequestComponent {
PublishDnsUpdatesAction publishDnsUpdatesAction();
ReadDnsQueueAction readDnsQueueAction();
RdeContactImportAction rdeContactImportAction();
RdeHostImportAction rdeHostImportAction();
RdeReportAction rdeReportAction();
RdeStagingAction rdeStagingAction();
RdeUploadAction rdeUploadAction();

View file

@ -0,0 +1,114 @@
// Copyright 2016 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.rde;
import static google.registry.mapreduce.MapreduceRunner.PARAM_MAP_SHARDS;
import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.rde.RdeModule.PATH;
import static google.registry.util.PipelineUtils.createJobPath;
import com.google.appengine.tools.cloudstorage.GcsService;
import com.google.appengine.tools.cloudstorage.GcsServiceFactory;
import com.google.appengine.tools.cloudstorage.RetryParams;
import com.google.appengine.tools.mapreduce.Mapper;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import google.registry.config.ConfigModule;
import google.registry.config.ConfigModule.Config;
import google.registry.gcs.GcsUtils;
import google.registry.mapreduce.MapreduceRunner;
import google.registry.model.host.HostResource;
import google.registry.request.Action;
import google.registry.request.Parameter;
import google.registry.request.Response;
import google.registry.util.SystemClock;
import javax.inject.Inject;
/**
* A mapreduce that imports hosts from an escrow file.
*
* <p>Specify the escrow file to import with the "path" parameter.
*/
@Action(path = "/_dr/task/importRdeHosts")
public class RdeHostImportAction implements Runnable {
private static final GcsService GCS_SERVICE =
GcsServiceFactory.createGcsService(RetryParams.getDefaultInstance());
private final MapreduceRunner mrRunner;
private final Response response;
private final String importBucketName;
private final String importFileName;
private final Optional<Integer> mapShards;
@Inject
public RdeHostImportAction(
MapreduceRunner mrRunner,
Response response,
@Config("rdeImportBucket") String importBucketName,
@Parameter(PATH) String importFileName,
@Parameter(PARAM_MAP_SHARDS) Optional<Integer> mapShards) {
this.mrRunner = mrRunner;
this.response = response;
this.importBucketName = importBucketName;
this.importFileName = importFileName;
this.mapShards = mapShards;
}
@Override
public void run() {
response.sendJavaScriptRedirect(createJobPath(mrRunner
.setJobName("Import hosts from escrow file")
.setModuleName("backend")
.runMapOnly(
new RdeHostImportMapper(importBucketName),
ImmutableList.of(new RdeHostInput(mapShards, importBucketName, importFileName)))));
}
/** Mapper to import hosts from an escrow file. */
public static class RdeHostImportMapper extends Mapper<HostResource, Void, Void> {
private static final long serialVersionUID = -2898753709127134419L;
private final String importBucketName;
private transient RdeImportUtils importUtils;
public RdeHostImportMapper(String importBucketName) {
this.importBucketName = importBucketName;
}
private RdeImportUtils getImportUtils() {
if (importUtils == null) {
importUtils = createRdeImportUtils();
}
return importUtils;
}
/**
* Creates a new instance of RdeImportUtils.
*/
private RdeImportUtils createRdeImportUtils() {
return new RdeImportUtils(
ofy(),
new SystemClock(),
importBucketName,
new GcsUtils(GCS_SERVICE, ConfigModule.provideGcsBufferSize()));
}
@Override
public void map(HostResource host) {
getImportUtils().importHost(host);
}
}
}

View file

@ -0,0 +1,132 @@
// Copyright 2016 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.rde;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.appengine.tools.cloudstorage.GcsService;
import com.google.appengine.tools.cloudstorage.GcsServiceFactory;
import com.google.appengine.tools.cloudstorage.RetryParams;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import google.registry.config.ConfigModule;
import google.registry.gcs.GcsUtils;
import google.registry.model.host.HostResource;
import google.registry.rde.RdeParser.RdeHeader;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
/**
* A MapReduce {@link Input} that imports {@link HostResource} objects from an escrow file.
*
* <p>If a mapShards parameter has been specified, up to that many readers will be created
* so that each map shard has one reader. If a mapShards parameter has not been specified, a
* default number of readers will be created.
*/
public class RdeHostInput extends Input<HostResource> {
private static final long serialVersionUID = 9218225041307602452L;
private static final GcsService GCS_SERVICE =
GcsServiceFactory.createGcsService(RetryParams.getDefaultInstance());
/**
* Default number of readers if map shards are not specified.
*/
private static final int DEFAULT_READERS = 50;
/**
* Minimum number of records per reader.
*/
private static final int MINIMUM_RECORDS_PER_READER = 100;
/**
* Optional argument to explicitly specify the number of readers.
*/
private final int numReaders;
private final String importBucketName;
private final String importFileName;
/**
* Creates a new {@link RdeHostInput}
*
* @param mapShards Number of readers that should be created
* @param importBucketName Name of GCS bucket for escrow file imports
* @param importFileName Name of escrow file in GCS
*/
public RdeHostInput(Optional<Integer> mapShards, String importBucketName,
String importFileName) {
this.numReaders = mapShards.or(DEFAULT_READERS);
checkArgument(numReaders > 0, "Number of shards must be greater than zero");
this.importBucketName = importBucketName;
this.importFileName = importFileName;
}
@Override
public List<? extends InputReader<HostResource>> createReaders() throws IOException {
int numReaders = this.numReaders;
RdeHeader header = createParser().getHeader();
int numberOfHosts = header.getHostCount().intValue();
if (numberOfHosts / numReaders < MINIMUM_RECORDS_PER_READER) {
numReaders = numberOfHosts / MINIMUM_RECORDS_PER_READER;
// use at least one reader
numReaders = Math.max(numReaders, 1);
}
ImmutableList.Builder<RdeHostReader> builder = new ImmutableList.Builder<>();
int hostsPerReader =
Math.max(MINIMUM_RECORDS_PER_READER, (int) Math.ceil((double) numberOfHosts / numReaders));
int offset = 0;
for (int i = 0; i < numReaders; i++) {
builder = builder.add(createReader(offset, hostsPerReader));
offset += hostsPerReader;
}
return builder.build();
}
/**
* Creates a new instance of {@link RdeHostReader}
*/
private RdeHostReader createReader(int offset, int maxResults) {
return new RdeHostReader(importBucketName, importFileName, offset, maxResults);
}
/**
* Creates a new instance of {@link RdeParser}
*/
private RdeParser createParser() {
GcsUtils utils = new GcsUtils(GCS_SERVICE, ConfigModule.provideGcsBufferSize());
GcsFilename filename = new GcsFilename(importBucketName, importFileName);
InputStream xmlInput = utils.openInputStream(filename);
try {
return new RdeParser(xmlInput);
} catch (Exception e) {
throw new InitializationException(
String.format("Error opening rde file %s/%s", importBucketName, importFileName), e);
}
}
/**
* Thrown when the input cannot initialize properly.
*/
private static class InitializationException extends RuntimeException {
public InitializationException(String message, Throwable cause) {
super(message, cause);
}
}
}

View file

@ -0,0 +1,107 @@
// Copyright 2016 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.rde;
import com.google.appengine.tools.cloudstorage.GcsFilename;
import com.google.appengine.tools.cloudstorage.GcsService;
import com.google.appengine.tools.cloudstorage.GcsServiceFactory;
import com.google.appengine.tools.cloudstorage.RetryParams;
import com.google.appengine.tools.mapreduce.InputReader;
import google.registry.config.ConfigModule;
import google.registry.gcs.GcsUtils;
import google.registry.model.host.HostResource;
import google.registry.util.FormattingLogger;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.NoSuchElementException;
import javax.annotation.concurrent.NotThreadSafe;
/** Mapreduce {@link InputReader} for reading hosts from escrow files */
@NotThreadSafe
public class RdeHostReader extends InputReader<HostResource> implements Serializable {
private static final long serialVersionUID = 3037264959150412846L;
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
private static final GcsService GCS_SERVICE =
GcsServiceFactory.createGcsService(RetryParams.getDefaultInstance());
final String importBucketName;
final String importFileName;
final int offset;
final int maxResults;
private int count = 0;
transient RdeParser parser;
/**
* Creates a new instance of {@link RdeParser}
*/
private RdeParser newParser() {
GcsUtils utils = new GcsUtils(GCS_SERVICE, ConfigModule.provideGcsBufferSize());
GcsFilename filename = new GcsFilename(importBucketName, importFileName);
InputStream xmlInput = utils.openInputStream(filename);
try {
RdeParser parser = new RdeParser(xmlInput);
// skip the file offset and count
// if count is greater than 0, the reader has been rehydrated after doing some work.
// skip any already processed records.
parser.skipHosts(offset + count);
return parser;
} catch (Exception e) {
logger.severefmt(e, "Error opening rde file %s/%s", importBucketName, importFileName);
throw new RuntimeException(e);
}
}
public RdeHostReader(
String importBucketName,
String importFileName,
int offset,
int maxResults) {
this.importBucketName = importBucketName;
this.importFileName = importFileName;
this.offset = offset;
this.maxResults = maxResults;
}
@Override
public HostResource next() throws IOException {
if (count < maxResults) {
if (parser == null) {
parser = newParser();
if (parser.isAtHost()) {
count++;
return XjcToHostResourceConverter.convert(parser.getHost());
}
}
if (parser.nextHost()) {
count++;
return XjcToHostResourceConverter.convert(parser.getHost());
}
}
throw new NoSuchElementException();
}
@Override
public void endSlice() throws IOException {
super.endSlice();
if (parser != null) {
parser.close();
}
}
}

View file

@ -24,6 +24,7 @@ import google.registry.config.ConfigModule.Config;
import google.registry.gcs.GcsUtils;
import google.registry.model.EppResource;
import google.registry.model.contact.ContactResource;
import google.registry.model.host.HostResource;
import google.registry.model.index.EppResourceIndex;
import google.registry.model.index.ForeignKeyIndex;
import google.registry.model.ofy.Ofy;
@ -99,6 +100,19 @@ public class RdeImportUtils {
}
/**
* Imports a host from an escrow file.
*
* <p>The host will only be imported if it has not been previously imported.
*
* <p>If the host is imported, {@link ForeignKeyIndex} and {@link EppResourceIndex} are also
* created.
*
* @return true if the host was created or updated, false otherwise.
*/
public boolean importHost(final HostResource resource) {
return importEppResource(resource, "host");
}
/**
* Imports a contact from an escrow file.
*

View file

@ -0,0 +1,76 @@
// Copyright 2016 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.rde;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.net.InetAddresses;
import google.registry.model.contact.ContactResource;
import google.registry.model.eppcommon.StatusValue;
import google.registry.model.host.HostResource;
import google.registry.xjc.host.XjcHostAddrType;
import google.registry.xjc.host.XjcHostStatusType;
import google.registry.xjc.rdecontact.XjcRdeContact;
import google.registry.xjc.rdehost.XjcRdeHost;
import java.net.InetAddress;
/** Utility class that converts an {@link XjcRdeContact} into a {@link ContactResource}. */
public class XjcToHostResourceConverter {
private static final Function<XjcHostStatusType, StatusValue> STATUS_VALUE_CONVERTER =
new Function<XjcHostStatusType, StatusValue>() {
@Override
public StatusValue apply(XjcHostStatusType status) {
return convertStatusType(status);
}
};
private static final Function<XjcHostAddrType, InetAddress> ADDR_CONVERTER =
new Function<XjcHostAddrType, InetAddress>() {
@Override
public InetAddress apply(XjcHostAddrType addr) {
return convertAddrType(addr);
}
};
static HostResource convert(XjcRdeHost host) {
return new HostResource.Builder()
.setFullyQualifiedHostName(host.getName())
.setRepoId(host.getRoid())
.setCurrentSponsorClientId(host.getClID())
.setLastTransferTime(host.getTrDate())
.setCreationTime(host.getCrDate())
.setLastEppUpdateTime(host.getUpDate())
.setCreationClientId(host.getCrRr().getValue())
.setLastEppUpdateClientId(host.getUpRr() == null ? null : host.getUpRr().getValue())
.setStatusValues(
ImmutableSet.copyOf(Lists.transform(host.getStatuses(), STATUS_VALUE_CONVERTER)))
.setInetAddresses(ImmutableSet.copyOf(Lists.transform(host.getAddrs(), ADDR_CONVERTER)))
.build();
}
/** Converts {@link XjcHostStatusType} to {@link StatusValue}. */
private static StatusValue convertStatusType(XjcHostStatusType status) {
return StatusValue.fromXmlName(status.getS().value());
}
/** Converts {@link XjcHostAddrType} to {@link InetAddress}. */
private static InetAddress convertAddrType(XjcHostAddrType addr) {
return InetAddresses.forString(addr.getValue());
}
private XjcToHostResourceConverter() {}
}