Implement input for MRs over child entities

Also throwing in a proof-of-concept MR that I'd like to run in production, and then scrap once the meaty MR is finished (e.g. exploding Recurring billing events into OneTimes).
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=119881471
This commit is contained in:
ctingue 2016-04-14 12:59:35 -07:00 committed by Justine Tunney
parent 27c034c080
commit fbe076b5da
13 changed files with 671 additions and 24 deletions

View file

@ -88,6 +88,12 @@
<url-pattern>/_dr/task/resaveAllEppResources</url-pattern>
</servlet-mapping>
<!-- Mapreduce to count recurring billing events (to test the child entity reader). -->
<servlet-mapping>
<servlet-name>tools-servlet</servlet-name>
<url-pattern>/_dr/task/countRecurringBillingEvents</url-pattern>
</servlet-mapping>
<!-- This path serves up the App Engine results page for mapreduce runs. -->
<servlet>
<servlet-name>mapreduce</servlet-name>

View file

@ -0,0 +1,53 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.mapreduce.inputs;
import static com.google.domain.registry.util.TypeUtils.checkNoInheritanceRelationships;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.collect.ImmutableSet;
import com.google.domain.registry.model.EppResource;
import com.google.domain.registry.model.ImmutableObject;
import com.google.domain.registry.model.index.EppResourceIndexBucket;
import com.googlecode.objectify.Key;
/**
* A MapReduce {@link Input} that loads all child objects of a given set of types, that are children
* of given {@link EppResource} types.
*/
class ChildEntityInput<R extends EppResource, I extends ImmutableObject>
extends EppResourceBaseInput<I> {
private static final long serialVersionUID = -3888034213150865008L;
private final ImmutableSet<Class<? extends R>> resourceClasses;
private final ImmutableSet<Class<? extends I>> childResourceClasses;
public ChildEntityInput(
ImmutableSet<Class<? extends R>> resourceClasses,
ImmutableSet<Class<? extends I>> childResourceClasses) {
this.resourceClasses = resourceClasses;
this.childResourceClasses = childResourceClasses;
checkNoInheritanceRelationships(ImmutableSet.<Class<?>>copyOf(resourceClasses));
checkNoInheritanceRelationships(ImmutableSet.<Class<?>>copyOf(childResourceClasses));
}
@Override
protected InputReader<I> bucketToReader(Key<EppResourceIndexBucket> bucketKey) {
return new ChildEntityReader<>(bucketKey, resourceClasses, childResourceClasses);
}
}

View file

@ -0,0 +1,192 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.mapreduce.inputs;
import static com.google.domain.registry.model.EntityClasses.ALL_CLASSES;
import static com.google.domain.registry.model.ofy.ObjectifyService.ofy;
import com.google.appengine.api.datastore.Cursor;
import com.google.appengine.api.datastore.QueryResultIterator;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.appengine.tools.mapreduce.ShardContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.domain.registry.model.EppResource;
import com.google.domain.registry.model.ImmutableObject;
import com.google.domain.registry.model.index.EppResourceIndex;
import com.google.domain.registry.model.index.EppResourceIndexBucket;
import com.google.domain.registry.util.FormattingLogger;
import com.googlecode.objectify.Key;
import com.googlecode.objectify.annotation.Entity;
import com.googlecode.objectify.cmd.Query;
import java.io.IOException;
import java.util.NoSuchElementException;
/**
* Reader that maps over {@link EppResourceIndex} and returns resources that are children of
* {@link EppResource} objects.
*/
class ChildEntityReader<R extends EppResource, I extends ImmutableObject> extends InputReader<I> {
private static final long serialVersionUID = -7430731417793849164L;
static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
/** This reader uses an EppResourceEntityReader under the covers to iterate over EPP resources. */
private final EppResourceEntityReader<? extends R> eppResourceEntityReader;
/** The current EPP resource being referenced for child entity queries. */
private Key<? extends R> currentEppResource;
/** The child resource classes to postfilter for. */
private final ImmutableList<Class<? extends I>> childResourceClasses;
/** The index within the list above for the next ofy query. */
private int childResourceClassIndex;
/** An iterator over queries for child entities of EppResources. */
private transient QueryResultIterator<I> childQueryIterator;
/** A cursor for queries for child entities of EppResources. */
private Cursor childCursor;
public ChildEntityReader(
Key<EppResourceIndexBucket> bucketKey,
ImmutableSet<Class<? extends R>> resourceClasses,
ImmutableSet<Class<? extends I>> childResourceClasses) {
this.childResourceClasses = expandPolymorphicClasses(childResourceClasses);
this.eppResourceEntityReader = new EppResourceEntityReader<>(bucketKey, resourceClasses);
}
/** Expands non-entity polymorphic classes into their child types. */
@SuppressWarnings("unchecked")
private ImmutableList<Class<? extends I>> expandPolymorphicClasses(
ImmutableSet<Class<? extends I>> resourceClasses) {
ImmutableList.Builder<Class<? extends I>> builder = ImmutableList.builder();
for (Class<? extends I> clazz : resourceClasses) {
if (clazz.isAnnotationPresent(Entity.class)) {
builder.add(clazz);
} else {
for (Class<? extends ImmutableObject> entityClass : ALL_CLASSES) {
if (clazz.isAssignableFrom(entityClass)) {
builder.add((Class<? extends I>) entityClass);
}
}
}
}
return builder.build();
}
/**
* Get the next {@link ImmutableObject} (i.e. child element) from the query.
*
* @throws NoSuchElementException if there are no more EPP resources to iterate over.
*/
I nextChild() throws NoSuchElementException {
try {
while (true) {
if (currentEppResource == null) {
currentEppResource = Key.create(eppResourceEntityReader.next());
childResourceClassIndex = 0;
childQueryIterator = null;
}
if (childQueryIterator == null) {
childQueryIterator = childQuery().iterator();
}
try {
return childQueryIterator.next();
} catch (NoSuchElementException e) {
childQueryIterator = null;
childResourceClassIndex++;
if (childResourceClassIndex >= childResourceClasses.size()) {
currentEppResource = null;
}
}
}
} finally {
ofy().clearSessionCache(); // Try not to leak memory.
}
}
@Override
public I next() throws NoSuchElementException {
while (true) {
I entity = nextChild();
if (entity != null) {
// Postfilter to distinguish polymorphic types.
for (Class<? extends I> resourceClass : childResourceClasses) {
if (resourceClass.isInstance(entity)) {
return entity;
}
}
}
}
}
/** Query for children of the current resource and of the current child class. */
private Query<I> childQuery() {
@SuppressWarnings("unchecked")
Query<I> query = (Query<I>) ofy().load()
.type(childResourceClasses.get(childResourceClassIndex))
.ancestor(currentEppResource);
return query;
}
@Override
public void beginSlice() {
eppResourceEntityReader.beginSlice();
if (childCursor != null) {
Query<I> query = childQuery().startAt(childCursor);
childQueryIterator = query.iterator();
}
}
@Override
public void endSlice() {
if (childQueryIterator != null) {
childCursor = childQueryIterator.getCursor();
}
eppResourceEntityReader.endSlice();
}
@Override
public Double getProgress() {
return eppResourceEntityReader.getProgress();
}
@Override
public long estimateMemoryRequirement() {
return eppResourceEntityReader.estimateMemoryRequirement();
}
@Override
public ShardContext getContext() {
return eppResourceEntityReader.getContext();
}
@Override
public void setContext(ShardContext context) {
eppResourceEntityReader.setContext(context);
}
@Override
public void beginShard() throws IOException {
eppResourceEntityReader.beginShard();
}
@Override
public void endShard() throws IOException {
eppResourceEntityReader.endShard();
}
}

View file

@ -14,14 +14,9 @@
package com.google.domain.registry.mapreduce.inputs;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.domain.registry.util.CollectionUtils.difference;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.domain.registry.model.EppResource;
import com.google.domain.registry.model.index.EppResourceIndex;
import com.google.domain.registry.model.index.EppResourceIndexBucket;
@ -45,18 +40,5 @@ abstract class EppResourceBaseInput<I> extends Input<I> {
/** Creates a reader that returns the resources under a bucket. */
protected abstract InputReader<I> bucketToReader(Key<EppResourceIndexBucket> bucketKey);
static <R extends EppResource> void checkResourceClassesForInheritance(
ImmutableSet<Class<? extends R>> resourceClasses) {
for (Class<? extends R> resourceClass : resourceClasses) {
for (Class<? extends R> potentialSuperclass : difference(resourceClasses, resourceClass)) {
checkArgument(
!potentialSuperclass.isAssignableFrom(resourceClass),
"Cannot specify resource classes with inheritance relationship: %s extends %s",
resourceClass,
potentialSuperclass);
}
}
}
}

View file

@ -61,8 +61,7 @@ abstract class EppResourceBaseReader<T> extends InputReader<T> {
private transient QueryResultIterator<EppResourceIndex> queryIterator;
EppResourceBaseReader(
Key<EppResourceIndexBucket>
bucketKey,
Key<EppResourceIndexBucket> bucketKey,
long memoryEstimate,
ImmutableSet<String> filterKinds) {
this.bucketKey = bucketKey;

View file

@ -14,6 +14,8 @@
package com.google.domain.registry.mapreduce.inputs;
import static com.google.domain.registry.util.TypeUtils.checkNoInheritanceRelationships;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.collect.ImmutableSet;
@ -31,7 +33,7 @@ class EppResourceEntityInput<R extends EppResource> extends EppResourceBaseInput
public EppResourceEntityInput(ImmutableSet<Class<? extends R>> resourceClasses) {
this.resourceClasses = resourceClasses;
checkResourceClassesForInheritance(resourceClasses);
checkNoInheritanceRelationships(ImmutableSet.<Class<?>>copyOf(resourceClasses));
}
@Override
@ -39,5 +41,3 @@ class EppResourceEntityInput<R extends EppResource> extends EppResourceBaseInput
return new EppResourceEntityReader<R>(bucketKey, resourceClasses);
}
}

View file

@ -23,6 +23,7 @@ import static com.google.domain.registry.util.TypeUtils.hasAnnotation;
import com.google.appengine.tools.mapreduce.Input;
import com.google.common.collect.ImmutableSet;
import com.google.domain.registry.model.EppResource;
import com.google.domain.registry.model.ImmutableObject;
import com.google.domain.registry.model.index.EppResourceIndex;
import com.googlecode.objectify.Key;
@ -59,6 +60,24 @@ public final class EppResourceInputs {
ImmutableSet.copyOf(asList(resourceClass, moreResourceClasses)));
}
/**
* Returns a MapReduce {@link Input} that loads all {@link ImmutableObject} objects of a given
* type, including deleted resources, that are child entities of all {@link EppResource} objects
* of a given type.
*
* <p>Note: Do not concatenate multiple EntityInputs together (this is inefficient as it iterates
* through all buckets multiple times). Specify the types in a single input, or load all types by
* specifying {@link EppResource} and/or {@link ImmutableObject} as the class.
*/
public static <R extends EppResource, I extends ImmutableObject> Input<I> createChildEntityInput(
ImmutableSet<Class<? extends R>> parentClasses,
ImmutableSet<Class<? extends I>> childClasses) {
checkArgument(!parentClasses.isEmpty(), "Must provide at least one parent type.");
checkArgument(!childClasses.isEmpty(), "Must provide at least one child type.");
return new ChildEntityInput<>(parentClasses, childClasses);
}
/**
* Returns a MapReduce {@link Input} that loads keys to all {@link EppResource} objects of a given
* type, including deleted resources.

View file

@ -14,6 +14,8 @@
package com.google.domain.registry.mapreduce.inputs;
import static com.google.domain.registry.util.TypeUtils.checkNoInheritanceRelationships;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.collect.ImmutableSet;
@ -35,7 +37,7 @@ class EppResourceKeyInput<R extends EppResource> extends EppResourceBaseInput<Ke
public EppResourceKeyInput(ImmutableSet<Class<? extends R>> resourceClasses) {
this.resourceClasses = resourceClasses;
checkResourceClassesForInheritance(resourceClasses);
checkNoInheritanceRelationships(ImmutableSet.<Class<?>>copyOf(resourceClasses));
}
@Override

View file

@ -19,6 +19,7 @@ java_library(
"//java/com/google/domain/registry/request",
"//java/com/google/domain/registry/request:modules",
"//java/com/google/domain/registry/tools/server",
"//java/com/google/domain/registry/tools/server/javascrap",
"//java/com/google/domain/registry/util",
"//third_party/java/bouncycastle",
"//third_party/java/dagger",

View file

@ -37,6 +37,7 @@ import com.google.domain.registry.tools.server.ResaveAllEppResourcesAction;
import com.google.domain.registry.tools.server.ToolsServerModule;
import com.google.domain.registry.tools.server.UpdatePremiumListAction;
import com.google.domain.registry.tools.server.VerifyOteAction;
import com.google.domain.registry.tools.server.javascrap.CountRecurringBillingEventsAction;
import dagger.Subcomponent;
@ -50,6 +51,7 @@ import dagger.Subcomponent;
ToolsServerModule.class,
})
interface ToolsRequestComponent {
CountRecurringBillingEventsAction countRecurringBillingEventsAction();
CreateGroupsAction createGroupsAction();
CreatePremiumListAction createPremiumListAction();
DeleteEntityAction deleteEntityAction();

View file

@ -15,11 +15,13 @@
package com.google.domain.registry.util;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.domain.registry.util.CollectionUtils.difference;
import static java.lang.reflect.Modifier.isFinal;
import static java.lang.reflect.Modifier.isStatic;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.reflect.TypeToken;
import java.lang.annotation.Annotation;
@ -88,4 +90,16 @@ public class TypeUtils {
}
};
}
public static void checkNoInheritanceRelationships(ImmutableSet<Class<?>> resourceClasses) {
for (Class<?> resourceClass : resourceClasses) {
for (Class<?> potentialSuperclass : difference(resourceClasses, resourceClass)) {
checkArgument(
!potentialSuperclass.isAssignableFrom(resourceClass),
"Cannot specify resource classes with inheritance relationship: %s extends %s",
resourceClass,
potentialSuperclass);
}
}
}
}