mirror of
https://github.com/google/nomulus.git
synced 2025-06-28 15:23:34 +02:00
Import code from internal repository to git
This commit is contained in:
commit
0ef0c933d2
2490 changed files with 281594 additions and 0 deletions
26
java/com/google/domain/registry/mapreduce/BUILD
Normal file
26
java/com/google/domain/registry/mapreduce/BUILD
Normal file
|
@ -0,0 +1,26 @@
|
|||
package(
|
||||
default_visibility = ["//java/com/google/domain/registry:registry_project"],
|
||||
)
|
||||
|
||||
|
||||
java_library(
|
||||
name = "mapreduce",
|
||||
srcs = glob(["*.java"]),
|
||||
deps = [
|
||||
"//java/com/google/common/annotations",
|
||||
"//java/com/google/common/base",
|
||||
"//java/com/google/common/collect",
|
||||
"//java/com/google/domain/registry/config",
|
||||
"//java/com/google/domain/registry/model",
|
||||
"//java/com/google/domain/registry/request",
|
||||
"//java/com/google/domain/registry/util",
|
||||
"//third_party/java/appengine:appengine-api",
|
||||
"//third_party/java/appengine_mapreduce2:appengine_mapreduce",
|
||||
"//third_party/java/appengine_pipeline",
|
||||
"//third_party/java/dagger",
|
||||
"//third_party/java/joda_time",
|
||||
"//third_party/java/jsr330_inject",
|
||||
"//third_party/java/objectify:objectify-v4_1",
|
||||
"//third_party/java/servlet/servlet_api",
|
||||
],
|
||||
)
|
113
java/com/google/domain/registry/mapreduce/ChunkingKeyInput.java
Normal file
113
java/com/google/domain/registry/mapreduce/ChunkingKeyInput.java
Normal file
|
@ -0,0 +1,113 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import com.google.appengine.api.datastore.Key;
|
||||
import com.google.appengine.tools.mapreduce.Input;
|
||||
import com.google.appengine.tools.mapreduce.InputReader;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/** A MapReduce {@link Input} adapter that chunks an input of keys into sublists of keys. */
|
||||
public class ChunkingKeyInput extends Input<List<Key>> {
|
||||
|
||||
private static final long serialVersionUID = 1670202385246824694L;
|
||||
|
||||
private final Input<Key> input;
|
||||
private final int chunkSize;
|
||||
|
||||
public ChunkingKeyInput(Input<Key> input, int chunkSize) {
|
||||
this.input = input;
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* An input reader that wraps around another input reader and returns its contents in chunks of
|
||||
* a given size.
|
||||
*/
|
||||
private static class ChunkingKeyInputReader extends InputReader<List<Key>> {
|
||||
|
||||
private static final long serialVersionUID = 53502324675703263L;
|
||||
|
||||
private final InputReader<Key> reader;
|
||||
private final int chunkSize;
|
||||
|
||||
ChunkingKeyInputReader(InputReader<Key> reader, int chunkSize) {
|
||||
this.reader = reader;
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Key> next() throws IOException {
|
||||
ImmutableList.Builder<Key> chunk = new ImmutableList.Builder<>();
|
||||
try {
|
||||
for (int i = 0; i < chunkSize; i++) {
|
||||
chunk.add(reader.next());
|
||||
}
|
||||
} catch (NoSuchElementException e) {
|
||||
// Amazingly this is the recommended (and only) way to test for hasNext().
|
||||
}
|
||||
ImmutableList<Key> builtChunk = chunk.build();
|
||||
if (builtChunk.isEmpty()) {
|
||||
throw new NoSuchElementException(); // Maintain the contract.
|
||||
}
|
||||
return builtChunk;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double getProgress() {
|
||||
return reader.getProgress();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void beginShard() throws IOException {
|
||||
reader.beginShard();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void beginSlice() throws IOException {
|
||||
reader.beginSlice();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endSlice() throws IOException {
|
||||
reader.endSlice();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endShard() throws IOException {
|
||||
reader.endShard();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long estimateMemoryRequirement() {
|
||||
// The reader's memory requirement plus the memory for this chunk's worth of buffered keys.
|
||||
// 256 comes from DatastoreKeyInputReader.AVERAGE_KEY_SIZE.
|
||||
return reader.estimateMemoryRequirement() + chunkSize * 256;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<InputReader<List<Key>>> createReaders() throws IOException {
|
||||
ImmutableList.Builder<InputReader<List<Key>>> readers = new ImmutableList.Builder<>();
|
||||
for (InputReader<Key> reader : input.createReaders()) {
|
||||
readers.add(new ChunkingKeyInputReader(reader, chunkSize));
|
||||
}
|
||||
return readers.build();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import com.google.appengine.tools.mapreduce.Input;
|
||||
import com.google.appengine.tools.mapreduce.InputReader;
|
||||
import com.google.appengine.tools.mapreduce.inputs.ConcatenatingInputReader;
|
||||
import com.google.common.collect.ArrayListMultimap;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.ListMultimap;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A MapReduce {@link Input} adapter that joins multiple inputs.
|
||||
*
|
||||
* @param <T> input type
|
||||
*/
|
||||
public class ConcatenatingInput<T> extends Input<T> {
|
||||
|
||||
private static final long serialVersionUID = 1225981408139437077L;
|
||||
|
||||
private final Set<? extends Input<? extends T>> inputs;
|
||||
private final int numShards;
|
||||
|
||||
public ConcatenatingInput(Iterable<? extends Input<? extends T>> inputs, int numShards) {
|
||||
this.inputs = ImmutableSet.copyOf(inputs);
|
||||
this.numShards = numShards;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<InputReader<T>> createReaders() throws IOException {
|
||||
ListMultimap<Integer, InputReader<T>> shards = ArrayListMultimap.create();
|
||||
int i = 0;
|
||||
for (Input<? extends T> input : inputs) {
|
||||
for (InputReader<? extends T> reader : input.createReaders()) {
|
||||
// Covariant cast is safe because an InputReader<I> only outputs I and never consumes it.
|
||||
@SuppressWarnings("unchecked")
|
||||
InputReader<T> typedReader = (InputReader<T>) reader;
|
||||
shards.put(i % numShards, typedReader);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
ImmutableList.Builder<InputReader<T>> concatenatingReaders = new ImmutableList.Builder<>();
|
||||
for (Collection<InputReader<T>> shard : shards.asMap().values()) {
|
||||
concatenatingReaders.add(new ConcatenatingInputReader<>(ImmutableList.copyOf(shard)));
|
||||
}
|
||||
return concatenatingReaders.build();
|
||||
}
|
||||
}
|
396
java/com/google/domain/registry/mapreduce/EppResourceInputs.java
Normal file
396
java/com/google/domain/registry/mapreduce/EppResourceInputs.java
Normal file
|
@ -0,0 +1,396 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Predicates.not;
|
||||
import static com.google.common.collect.Iterables.all;
|
||||
import static com.google.common.collect.Lists.asList;
|
||||
import static com.google.domain.registry.model.EntityClasses.CLASS_TO_KIND_FUNCTION;
|
||||
import static com.google.domain.registry.model.ofy.ObjectifyService.ofy;
|
||||
import static com.google.domain.registry.util.CollectionUtils.difference;
|
||||
import static com.google.domain.registry.util.TypeUtils.hasAnnotation;
|
||||
|
||||
import com.google.appengine.api.datastore.Cursor;
|
||||
import com.google.appengine.api.datastore.QueryResultIterator;
|
||||
import com.google.appengine.tools.mapreduce.Input;
|
||||
import com.google.appengine.tools.mapreduce.InputReader;
|
||||
import com.google.common.collect.FluentIterable;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.domain.registry.model.EppResource;
|
||||
import com.google.domain.registry.model.index.EppResourceIndex;
|
||||
import com.google.domain.registry.model.index.EppResourceIndexBucket;
|
||||
import com.google.domain.registry.util.FormattingLogger;
|
||||
|
||||
import com.googlecode.objectify.Key;
|
||||
import com.googlecode.objectify.Ref;
|
||||
import com.googlecode.objectify.annotation.EntitySubclass;
|
||||
import com.googlecode.objectify.cmd.Query;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Mapreduce {@link Input} types (and related helpers) for {@link EppResource} keys and objects.
|
||||
*
|
||||
* <p>The inputs provided by this class are not deletion-aware and do not project the resources
|
||||
* forward in time. That is the responsibility of mappers that use these inputs.
|
||||
*/
|
||||
public class EppResourceInputs {
|
||||
|
||||
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
|
||||
|
||||
/** Number of bytes in 1MB of memory, used for memory estimates. */
|
||||
private static final long ONE_MB = 1024 * 1024;
|
||||
|
||||
/** Returns a MapReduce {@link Input} that loads all {@link EppResourceIndex} objects. */
|
||||
public static <R extends EppResource> Input<EppResourceIndex> createIndexInput() {
|
||||
return new IndexInput();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a MapReduce {@link Input} that loads all {@link EppResource} objects of a given type,
|
||||
* including deleted resources.
|
||||
*
|
||||
* <p>Note: Do not concatenate multiple EntityInputs together (this is inefficient as it iterates
|
||||
* through all buckets multiple times). Specify the types in a single input, or load all types by
|
||||
* specifying {@link EppResource} as the class.
|
||||
*/
|
||||
@SafeVarargs
|
||||
public static <R extends EppResource> Input<R> createEntityInput(
|
||||
Class<? extends R> resourceClass,
|
||||
Class<? extends R>... moreResourceClasses) {
|
||||
return new EntityInput<R>(ImmutableSet.copyOf(asList(resourceClass, moreResourceClasses)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a MapReduce {@link Input} that loads keys to all {@link EppResource} objects of a given
|
||||
* type, including deleted resources.
|
||||
*
|
||||
* <p>Note: Do not concatenate multiple KeyInputs together (this is inefficient as it iterates
|
||||
* through all buckets multiple times). Specify the types in a single input, or load all types by
|
||||
* specifying {@link EppResource} as the class.
|
||||
*/
|
||||
@SafeVarargs
|
||||
public static <R extends EppResource> Input<Key<R>> createKeyInput(
|
||||
Class<? extends R> resourceClass,
|
||||
Class<? extends R>... moreResourceClasses) {
|
||||
ImmutableSet<Class<? extends R>> resourceClasses =
|
||||
ImmutableSet.copyOf(asList(resourceClass, moreResourceClasses));
|
||||
checkArgument(
|
||||
all(resourceClasses, not(hasAnnotation(EntitySubclass.class))),
|
||||
"Mapping over keys requires a non-polymorphic Entity");
|
||||
return new KeyInput<>(resourceClasses);
|
||||
}
|
||||
|
||||
/** Base class for {@link Input} classes that map over {@link EppResourceIndex}. */
|
||||
private abstract static class BaseInput<R, I> extends Input<I> {
|
||||
|
||||
private static final long serialVersionUID = -6681886718929462122L;
|
||||
|
||||
@Override
|
||||
public List<InputReader<I>> createReaders() {
|
||||
ImmutableList.Builder<InputReader<I>> readers = new ImmutableList.Builder<>();
|
||||
for (Key<EppResourceIndexBucket> bucketKey : EppResourceIndexBucket.getAllBuckets()) {
|
||||
readers.add(bucketToReader(bucketKey));
|
||||
}
|
||||
return readers.build();
|
||||
}
|
||||
|
||||
/** Creates a reader that returns the resources under a bucket. */
|
||||
protected abstract InputReader<I> bucketToReader(Key<EppResourceIndexBucket> bucketKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* A MapReduce {@link Input} that loads all {@link EppResourceIndex} entities.
|
||||
*/
|
||||
private static class IndexInput extends BaseInput<EppResourceIndex, EppResourceIndex> {
|
||||
|
||||
private static final long serialVersionUID = -1231269296567279059L;
|
||||
|
||||
@Override
|
||||
protected InputReader<EppResourceIndex> bucketToReader(Key<EppResourceIndexBucket> bucketKey) {
|
||||
return new IndexReader(bucketKey);
|
||||
}
|
||||
}
|
||||
|
||||
/** A MapReduce {@link Input} that loads all {@link EppResource} objects of a given type. */
|
||||
private static class EntityInput<R extends EppResource> extends BaseInput<R, R> {
|
||||
|
||||
private static final long serialVersionUID = 8162607479124406226L;
|
||||
|
||||
private final ImmutableSet<Class<? extends R>> resourceClasses;
|
||||
|
||||
public EntityInput(ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
this.resourceClasses = resourceClasses;
|
||||
checkResourceClassesForInheritance(resourceClasses);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InputReader<R> bucketToReader(Key<EppResourceIndexBucket> bucketKey) {
|
||||
return new EntityReader<R>(bucketKey, resourceClasses);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A MapReduce {@link Input} that loads keys to all {@link EppResource} objects of a given type.
|
||||
*
|
||||
* <p>When mapping over keys we can't distinguish between Objectify polymorphic types.
|
||||
*/
|
||||
private static class KeyInput<R extends EppResource> extends BaseInput<R, Key<R>> {
|
||||
|
||||
private static final long serialVersionUID = -5426821384707653743L;
|
||||
|
||||
private final ImmutableSet<Class<? extends R>> resourceClasses;
|
||||
|
||||
public KeyInput(ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
this.resourceClasses = resourceClasses;
|
||||
checkResourceClassesForInheritance(resourceClasses);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InputReader<Key<R>> bucketToReader(Key<EppResourceIndexBucket> bucketKey) {
|
||||
return new KeyReader<>(bucketKey, resourceClasses);
|
||||
}
|
||||
}
|
||||
|
||||
/** Base class for {@link InputReader} classes that map over {@link EppResourceIndex}. */
|
||||
private abstract static class BaseReader<T> extends InputReader<T> {
|
||||
|
||||
private static final long serialVersionUID = -2970253037856017147L;
|
||||
|
||||
/**
|
||||
* The resource kinds to filter for.
|
||||
*
|
||||
* <p>This can be empty, or any of {"ContactResource", "HostResource", "DomainBase"}. It will
|
||||
* never contain "EppResource", "DomainResource" or "DomainApplication" since these aren't
|
||||
* actual kinds in Datastore.
|
||||
*/
|
||||
private final ImmutableSet<String> filterKinds;
|
||||
|
||||
private final Key<EppResourceIndexBucket> bucketKey;
|
||||
private final long memoryEstimate;
|
||||
|
||||
private Cursor cursor;
|
||||
private int total;
|
||||
private int loaded;
|
||||
|
||||
private transient QueryResultIterator<EppResourceIndex> queryIterator;
|
||||
|
||||
BaseReader(
|
||||
Key<EppResourceIndexBucket>
|
||||
bucketKey,
|
||||
long memoryEstimate,
|
||||
ImmutableSet<String> filterKinds) {
|
||||
this.bucketKey = bucketKey;
|
||||
this.memoryEstimate = memoryEstimate;
|
||||
this.filterKinds = filterKinds;
|
||||
}
|
||||
|
||||
/** Called once at start. Cache the expected size. */
|
||||
@Override
|
||||
public void beginShard() {
|
||||
total = query().count();
|
||||
}
|
||||
|
||||
/** Called every time we are deserialized. Create a new query or resume an existing one. */
|
||||
@Override
|
||||
public void beginSlice() {
|
||||
Query<EppResourceIndex> query = query();
|
||||
if (cursor != null) {
|
||||
// The underlying query is strongly consistent, and according to the documentation at
|
||||
// https://cloud.google.com/appengine/docs/java/datastore/queries#Java_Data_consistency
|
||||
// "strongly consistent queries are always transactionally consistent". However, each time
|
||||
// we restart the query at a cursor we have a new effective query, and "if the results for a
|
||||
// query change between uses of a cursor, the query notices only changes that occur in
|
||||
// results after the cursor. If a new result appears before the cursor's position for the
|
||||
// query, it will not be returned when the results after the cursor are fetched."
|
||||
// What this means in practice is that entities that are created after the initial query
|
||||
// begins may or may not be seen by this reader, depending on whether the query was
|
||||
// paused and restarted with a cursor before it would have reached the new entity.
|
||||
query = query.startAt(cursor);
|
||||
}
|
||||
queryIterator = query.iterator();
|
||||
}
|
||||
|
||||
/** Called occasionally alongside {@link #next}. */
|
||||
@Override
|
||||
public Double getProgress() {
|
||||
// Cap progress at 1.0, since the query's count() can increase during the run of the mapreduce
|
||||
// if more entities are written, but we've cached the value once in "total".
|
||||
return Math.min(1.0, ((double) loaded) / total);
|
||||
}
|
||||
|
||||
/** Called before we are serialized. Save a serializable cursor for this query. */
|
||||
@Override
|
||||
public void endSlice() {
|
||||
cursor = queryIterator.getCursor();
|
||||
}
|
||||
|
||||
/** Query for children of this bucket. */
|
||||
Query<EppResourceIndex> query() {
|
||||
Query<EppResourceIndex> query = ofy().load().type(EppResourceIndex.class).ancestor(bucketKey);
|
||||
return filterKinds.isEmpty() ? query : query.filter("kind in", filterKinds);
|
||||
}
|
||||
|
||||
/** Returns the estimated memory that will be used by this reader in bytes. */
|
||||
@Override
|
||||
public long estimateMemoryRequirement() {
|
||||
return memoryEstimate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next {@link EppResourceIndex} from the query.
|
||||
*
|
||||
* @throws NoSuchElementException if there are no more elements.
|
||||
*/
|
||||
EppResourceIndex nextEri() {
|
||||
loaded++;
|
||||
try {
|
||||
return queryIterator.next();
|
||||
} finally {
|
||||
ofy().clearSessionCache(); // Try not to leak memory.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Reader that maps over {@link EppResourceIndex} and returns the index objects themselves. */
|
||||
private static class IndexReader extends BaseReader<EppResourceIndex> {
|
||||
|
||||
private static final long serialVersionUID = -4816383426796766911L;
|
||||
|
||||
public IndexReader(Key<EppResourceIndexBucket> bucketKey) {
|
||||
// Estimate 1MB of memory for this reader, which is massive overkill.
|
||||
// Use an empty set for the filter kinds, which disables filtering.
|
||||
super(bucketKey, ONE_MB, ImmutableSet.<String>of());
|
||||
}
|
||||
|
||||
/**
|
||||
* Called for each map invocation.
|
||||
*
|
||||
* @throws NoSuchElementException if there are no more elements, as specified in the
|
||||
* {@link InputReader#next} Javadoc.
|
||||
*/
|
||||
@Override
|
||||
public EppResourceIndex next() throws NoSuchElementException {
|
||||
return nextEri();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reader that maps over {@link EppResourceIndex} and returns resource keys.
|
||||
*
|
||||
* <p>When mapping over keys we can't distinguish between Objectify polymorphic types.
|
||||
*/
|
||||
private static class KeyReader<R extends EppResource> extends BaseReader<Key<R>> {
|
||||
|
||||
private static final long serialVersionUID = -428232054739189774L;
|
||||
|
||||
public KeyReader(
|
||||
Key<EppResourceIndexBucket> bucketKey, ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
super(
|
||||
bucketKey,
|
||||
ONE_MB, // Estimate 1MB of memory for this reader, which is massive overkill.
|
||||
varargsToKinds(resourceClasses));
|
||||
}
|
||||
|
||||
/**
|
||||
* Called for each map invocation.
|
||||
*
|
||||
* @throws NoSuchElementException if there are no more elements, as specified in the
|
||||
* {@link InputReader#next} Javadoc.
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Key<R> next() throws NoSuchElementException {
|
||||
// This is a safe cast because we filtered on kind inside the query.
|
||||
return (Key<R>) nextEri().getReference().getKey();
|
||||
}
|
||||
}
|
||||
|
||||
/** Reader that maps over {@link EppResourceIndex} and returns resources. */
|
||||
private static class EntityReader<R extends EppResource> extends BaseReader<R> {
|
||||
|
||||
private static final long serialVersionUID = -8042933349899971801L;
|
||||
|
||||
/**
|
||||
* The resource classes to postfilter for.
|
||||
*
|
||||
* <p>This can be {@link EppResource} or any descendant classes, regardless of whether those
|
||||
* classes map directly to a kind in datastore, with the restriction that none of the classes
|
||||
* is a supertype of any of the others.
|
||||
*/
|
||||
private final ImmutableSet<Class<? extends R>> resourceClasses;
|
||||
|
||||
public EntityReader(
|
||||
Key<EppResourceIndexBucket> bucketKey,
|
||||
ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
super(
|
||||
bucketKey,
|
||||
ONE_MB * 2, // Estimate 2MB of memory for this reader, since it loads a (max 1MB) entity.
|
||||
varargsToKinds(resourceClasses));
|
||||
this.resourceClasses = resourceClasses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called for each map invocation.
|
||||
*
|
||||
* @throws NoSuchElementException if there are no more elements, as specified in the
|
||||
* {@link InputReader#next} Javadoc.
|
||||
*/
|
||||
@Override
|
||||
public R next() throws NoSuchElementException {
|
||||
// Loop until we find a value, or nextRef() throws a NoSuchElementException.
|
||||
while (true) {
|
||||
Ref<? extends EppResource> reference = nextEri().getReference();
|
||||
EppResource resource = reference.get();
|
||||
if (resource == null) {
|
||||
logger.severefmt("Broken ERI reference: %s", reference.getKey());
|
||||
continue;
|
||||
}
|
||||
// Postfilter to distinguish polymorphic types (e.g. DomainBase and DomainResource).
|
||||
for (Class<? extends R> resourceClass : resourceClasses) {
|
||||
if (resourceClass.isAssignableFrom(resource.getClass())) {
|
||||
@SuppressWarnings("unchecked")
|
||||
R r = (R) resource;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static <R extends EppResource> ImmutableSet<String> varargsToKinds(
|
||||
ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
// Ignore EppResource when finding kinds, since it doesn't have one and doesn't imply filtering.
|
||||
return resourceClasses.contains(EppResource.class)
|
||||
? ImmutableSet.<String>of()
|
||||
: FluentIterable.from(resourceClasses).transform(CLASS_TO_KIND_FUNCTION).toSet();
|
||||
}
|
||||
|
||||
private static <R extends EppResource> void checkResourceClassesForInheritance(
|
||||
ImmutableSet<Class<? extends R>> resourceClasses) {
|
||||
for (Class<? extends R> resourceClass : resourceClasses) {
|
||||
for (Class<? extends R> potentialSuperclass : difference(resourceClasses, resourceClass)) {
|
||||
checkArgument(
|
||||
!potentialSuperclass.isAssignableFrom(resourceClass),
|
||||
"Cannot specify resource classes with inheritance relationship: %s extends %s",
|
||||
resourceClass,
|
||||
potentialSuperclass);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import com.google.domain.registry.request.Action;
|
||||
|
||||
/**
|
||||
* Marker interface to denote an action intended for the Mapreduce queue.
|
||||
*
|
||||
* <p>Classes that implement this interface are expected to have {@link Action} set.
|
||||
*/
|
||||
public interface MapreduceAction extends Runnable {}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import static com.google.domain.registry.mapreduce.MapreduceRunner.PARAM_DRY_RUN;
|
||||
import static com.google.domain.registry.mapreduce.MapreduceRunner.PARAM_MAP_SHARDS;
|
||||
import static com.google.domain.registry.mapreduce.MapreduceRunner.PARAM_REDUCE_SHARDS;
|
||||
import static com.google.domain.registry.request.RequestParameters.extractBooleanParameter;
|
||||
import static com.google.domain.registry.request.RequestParameters.extractOptionalIntParameter;
|
||||
|
||||
import com.google.common.base.Optional;
|
||||
import com.google.domain.registry.request.Parameter;
|
||||
|
||||
import dagger.Module;
|
||||
import dagger.Provides;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
/** Dagger module for the mapreduce package. */
|
||||
@Module
|
||||
public final class MapreduceModule {
|
||||
|
||||
@Provides
|
||||
@Parameter(PARAM_DRY_RUN)
|
||||
static boolean provideIsDryRun(HttpServletRequest req) {
|
||||
return extractBooleanParameter(req, PARAM_DRY_RUN);
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Parameter(PARAM_MAP_SHARDS)
|
||||
static Optional<Integer> provideMapShards(HttpServletRequest req) {
|
||||
return extractOptionalIntParameter(req, PARAM_MAP_SHARDS);
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Parameter(PARAM_REDUCE_SHARDS)
|
||||
static Optional<Integer> provideReduceShards(HttpServletRequest req) {
|
||||
return extractOptionalIntParameter(req, PARAM_REDUCE_SHARDS);
|
||||
}
|
||||
}
|
246
java/com/google/domain/registry/mapreduce/MapreduceRunner.java
Normal file
246
java/com/google/domain/registry/mapreduce/MapreduceRunner.java
Normal file
|
@ -0,0 +1,246 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import static com.google.appengine.api.search.checkers.Preconditions.checkNotNull;
|
||||
import static com.google.appengine.tools.pipeline.PipelineServiceFactory.newPipelineService;
|
||||
import static com.google.domain.registry.util.PreconditionsUtils.checkArgumentNotNull;
|
||||
|
||||
import com.google.appengine.tools.mapreduce.Input;
|
||||
import com.google.appengine.tools.mapreduce.MapJob;
|
||||
import com.google.appengine.tools.mapreduce.MapReduceJob;
|
||||
import com.google.appengine.tools.mapreduce.MapReduceSettings;
|
||||
import com.google.appengine.tools.mapreduce.MapReduceSpecification;
|
||||
import com.google.appengine.tools.mapreduce.MapSettings;
|
||||
import com.google.appengine.tools.mapreduce.MapSpecification;
|
||||
import com.google.appengine.tools.mapreduce.Mapper;
|
||||
import com.google.appengine.tools.mapreduce.Marshallers;
|
||||
import com.google.appengine.tools.mapreduce.Output;
|
||||
import com.google.appengine.tools.mapreduce.Reducer;
|
||||
import com.google.appengine.tools.mapreduce.outputs.NoOutput;
|
||||
import com.google.appengine.tools.pipeline.Job0;
|
||||
import com.google.appengine.tools.pipeline.JobSetting;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Optional;
|
||||
import com.google.domain.registry.request.Parameter;
|
||||
import com.google.domain.registry.util.FormattingLogger;
|
||||
import com.google.domain.registry.util.PipelineUtils;
|
||||
|
||||
import org.joda.time.Duration;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
/**
|
||||
* Runner for map-only or full map and reduce mapreduces.
|
||||
*
|
||||
* <p>We use hardcoded serialization marshallers for moving data between steps, so all types used as
|
||||
* keys or values must implement {@link Serializable}.
|
||||
*/
|
||||
public class MapreduceRunner {
|
||||
|
||||
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
|
||||
|
||||
public static final String PARAM_DRY_RUN = "dryRun";
|
||||
public static final String PARAM_MAP_SHARDS = "mapShards";
|
||||
public static final String PARAM_REDUCE_SHARDS = "reduceShards";
|
||||
|
||||
private static final String BASE_URL = "/_dr/mapreduce/";
|
||||
private static final String QUEUE_NAME = "mapreduce";
|
||||
|
||||
private final Optional<Integer> httpParamMapShards;
|
||||
private final Optional<Integer> httpParamReduceShards;
|
||||
|
||||
// Default to 3 minutes since many slices will contain datastore queries that time out at 4:30.
|
||||
private Duration sliceDuration = Duration.standardMinutes(3);
|
||||
private String jobName;
|
||||
private String moduleName;
|
||||
|
||||
// If no reduce shards are set via http params, use this many shards.
|
||||
private int defaultReduceShards = 1;
|
||||
|
||||
/**
|
||||
* @param mapShards number of map shards; if omitted, the {@link Input} objects will choose
|
||||
* @param reduceShards number of reduce shards; if omitted, uses {@link #defaultReduceShards}
|
||||
*/
|
||||
@Inject
|
||||
@VisibleForTesting
|
||||
public MapreduceRunner(
|
||||
@Parameter(PARAM_MAP_SHARDS) Optional<Integer> mapShards,
|
||||
@Parameter(PARAM_REDUCE_SHARDS) Optional<Integer> reduceShards) {
|
||||
this.httpParamMapShards = mapShards;
|
||||
this.httpParamReduceShards = reduceShards;
|
||||
}
|
||||
|
||||
/** Set the max time to run a slice before serializing; defaults to 3 minutes. */
|
||||
public MapreduceRunner setSliceDuration(Duration sliceDuration) {
|
||||
this.sliceDuration = checkArgumentNotNull(sliceDuration, "sliceDuration");
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the human readable job name for display purposes. */
|
||||
public MapreduceRunner setJobName(String jobName) {
|
||||
this.jobName = checkArgumentNotNull(jobName, "jobName");
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the module to run in. */
|
||||
public MapreduceRunner setModuleName(String moduleName) {
|
||||
this.moduleName = checkArgumentNotNull(moduleName, "moduleName");
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the default number of reducers, if not overriden by the http param. */
|
||||
public MapreduceRunner setDefaultReduceShards(int defaultReduceShards) {
|
||||
this.defaultReduceShards = checkNotNull(defaultReduceShards, "defaultReduceShards");
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a map-only mapreduce to be run as part of a pipeline.
|
||||
*
|
||||
* @see #runMapOnly for creating and running an independent map-only mapreduce
|
||||
*
|
||||
* @param mapper instance of a mapper class
|
||||
* @param inputs input sources for the mapper
|
||||
* @param <I> mapper input type
|
||||
* @param <O> individual output record type sent to the {@link Output}
|
||||
* @param <R> overall output result type
|
||||
*/
|
||||
public <I, O, R> MapJob<I, O, R> createMapOnlyJob(
|
||||
Mapper<I, Void, O> mapper,
|
||||
Output<O, R> output,
|
||||
Iterable<? extends Input<? extends I>> inputs) {
|
||||
checkCommonRequiredFields(inputs, mapper);
|
||||
return new MapJob<>(
|
||||
new MapSpecification.Builder<I, O, R>()
|
||||
.setJobName(jobName)
|
||||
.setInput(new ConcatenatingInput<>(inputs, httpParamMapShards.or(Integer.MAX_VALUE)))
|
||||
.setMapper(mapper)
|
||||
.setOutput(output)
|
||||
.build(),
|
||||
new MapSettings.Builder()
|
||||
.setWorkerQueueName(QUEUE_NAME)
|
||||
.setBaseUrl(BASE_URL)
|
||||
.setModule(moduleName)
|
||||
.setMillisPerSlice((int) sliceDuration.getMillis())
|
||||
.build());
|
||||
}
|
||||
|
||||
/**
|
||||
* Kick off a map-only mapreduce.
|
||||
*
|
||||
* <p>For simplicity, the mapreduce is hard-coded with {@link NoOutput}, on the assumption that
|
||||
* all work will be accomplished via side effects during the map phase.
|
||||
*
|
||||
* @see #createMapOnlyJob for creating and running a map-only mapreduce as part of a pipeline
|
||||
*
|
||||
* @param mapper instance of a mapper class
|
||||
* @param inputs input sources for the mapper
|
||||
* @param <I> mapper input type
|
||||
* @return the job id
|
||||
*/
|
||||
public <I> String runMapOnly(
|
||||
Mapper<I, Void, Void> mapper,
|
||||
Iterable<? extends Input<? extends I>> inputs) {
|
||||
return runAsPipeline(createMapOnlyJob(mapper, new NoOutput<Void, Void>(), inputs));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a mapreduce job to be run as part of a pipeline.
|
||||
*
|
||||
* @see #runMapreduce for creating and running an independent mapreduce
|
||||
*
|
||||
* @param mapper instance of a mapper class
|
||||
* @param reducer instance of a reducer class
|
||||
* @param inputs input sources for the mapper
|
||||
* @param <I> mapper input type
|
||||
* @param <K> emitted key type
|
||||
* @param <V> emitted value type
|
||||
* @param <O> individual output record type sent to the {@link Output}
|
||||
* @param <R> overall output result type
|
||||
*/
|
||||
public final <I, K extends Serializable, V extends Serializable, O, R> MapReduceJob<I, K, V, O, R>
|
||||
createMapreduceJob(
|
||||
Mapper<I, K, V> mapper,
|
||||
Reducer<K, V, O> reducer,
|
||||
Output<O, R> output,
|
||||
Iterable<? extends Input<? extends I>> inputs) {
|
||||
checkCommonRequiredFields(inputs, mapper);
|
||||
checkArgumentNotNull(reducer, "reducer");
|
||||
return new MapReduceJob<>(
|
||||
new MapReduceSpecification.Builder<I, K, V, O, R>()
|
||||
.setJobName(jobName)
|
||||
.setInput(new ConcatenatingInput<>(inputs, httpParamMapShards.or(Integer.MAX_VALUE)))
|
||||
.setMapper(mapper)
|
||||
.setReducer(reducer)
|
||||
.setOutput(output)
|
||||
.setKeyMarshaller(Marshallers.<K>getSerializationMarshaller())
|
||||
.setValueMarshaller(Marshallers.<V>getSerializationMarshaller())
|
||||
.setNumReducers(httpParamReduceShards.or(defaultReduceShards))
|
||||
.build(),
|
||||
new MapReduceSettings.Builder()
|
||||
.setWorkerQueueName(QUEUE_NAME)
|
||||
.setBaseUrl(BASE_URL)
|
||||
.setModule(moduleName)
|
||||
.setMillisPerSlice((int) sliceDuration.getMillis())
|
||||
.build());
|
||||
}
|
||||
|
||||
/**
|
||||
* Kick off a mapreduce job.
|
||||
*
|
||||
* <p>For simplicity, the mapreduce is hard-coded with {@link NoOutput}, on the assumption that
|
||||
* all work will be accomplished via side effects during the map or reduce phases.
|
||||
*
|
||||
* @see #createMapreduceJob for creating and running a mapreduce as part of a pipeline
|
||||
*
|
||||
* @param mapper instance of a mapper class
|
||||
* @param reducer instance of a reducer class
|
||||
* @param inputs input sources for the mapper
|
||||
* @param <I> mapper input type
|
||||
* @param <K> emitted key type
|
||||
* @param <V> emitted value type
|
||||
* @return the job id
|
||||
*/
|
||||
public final <I, K extends Serializable, V extends Serializable> String runMapreduce(
|
||||
Mapper<I, K, V> mapper,
|
||||
Reducer<K, V, Void> reducer,
|
||||
Iterable<? extends Input<? extends I>> inputs) {
|
||||
return runAsPipeline(
|
||||
createMapreduceJob(mapper, reducer, new NoOutput<Void, Void>(), inputs));
|
||||
}
|
||||
|
||||
private void checkCommonRequiredFields(Iterable<?> inputs, Mapper<?, ?, ?> mapper) {
|
||||
checkNotNull(jobName, "jobName");
|
||||
checkNotNull(moduleName, "moduleName");
|
||||
checkArgumentNotNull(inputs, "inputs");
|
||||
checkArgumentNotNull(mapper, "mapper");
|
||||
}
|
||||
|
||||
private String runAsPipeline(Job0<?> job) {
|
||||
String jobId = newPipelineService().startNewPipeline(
|
||||
job,
|
||||
new JobSetting.OnModule(moduleName),
|
||||
new JobSetting.OnQueue(QUEUE_NAME));
|
||||
logger.infofmt(
|
||||
"Started '%s' %s job: %s",
|
||||
jobName,
|
||||
job instanceof MapJob ? "map" : "mapreduce",
|
||||
PipelineUtils.createJobPath(jobId));
|
||||
return jobId;
|
||||
}
|
||||
}
|
54
java/com/google/domain/registry/mapreduce/NullInput.java
Normal file
54
java/com/google/domain/registry/mapreduce/NullInput.java
Normal file
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.mapreduce;
|
||||
|
||||
import com.google.appengine.tools.mapreduce.Input;
|
||||
import com.google.appengine.tools.mapreduce.InputReader;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/** An input that returns a single {@code null} value. */
|
||||
public class NullInput<T> extends Input<T> {
|
||||
|
||||
private static final long serialVersionUID = 1816836937031979851L;
|
||||
|
||||
private static final class NullReader<T> extends InputReader<T> {
|
||||
|
||||
private static final long serialVersionUID = -8176201363578913125L;
|
||||
|
||||
boolean read = false;
|
||||
|
||||
@Override
|
||||
public T next() throws NoSuchElementException {
|
||||
if (read) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
read = true;
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double getProgress() {
|
||||
return read ? 1.0 : 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<? extends InputReader<T>> createReaders() {
|
||||
return ImmutableList.of(new NullReader<T>());
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue