Add real batching to KillAllXXXActions, and fix nits

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=120272836
This commit is contained in:
cgoldfeder 2016-04-19 14:50:25 -07:00 committed by Justine Tunney
parent 67a604654c
commit 5d88962258
5 changed files with 107 additions and 64 deletions

View file

@ -197,12 +197,6 @@
<url-pattern>/_dr/task/exportCommitLogDiff</url-pattern> <url-pattern>/_dr/task/exportCommitLogDiff</url-pattern>
</servlet-mapping> </servlet-mapping>
<!-- Deletes EppResources, children, and indices. -->
<servlet-mapping>
<servlet-name>backend-servlet</servlet-name>
<url-pattern>/_dr/task/killAllEppResources</url-pattern>
</servlet-mapping>
<!-- Restores commit logs. --> <!-- Restores commit logs. -->
<servlet-mapping> <servlet-mapping>
<servlet-name>backend-servlet</servlet-name> <servlet-name>backend-servlet</servlet-name>

View file

@ -100,6 +100,18 @@
<url-pattern>/_dr/task/backfillAutorenewBillingFlag</url-pattern> <url-pattern>/_dr/task/backfillAutorenewBillingFlag</url-pattern>
</servlet-mapping> </servlet-mapping>
<!-- Mapreduce to delete EppResources, children, and indices. -->
<servlet-mapping>
<servlet-name>tools-servlet</servlet-name>
<url-pattern>/_dr/task/killAllEppResources</url-pattern>
</servlet-mapping>
<!-- Mapreduce to delete all commit logs. -->
<servlet-mapping>
<servlet-name>tools-servlet</servlet-name>
<url-pattern>/_dr/task/killAllCommitLogs</url-pattern>
</servlet-mapping>
<!-- This path serves up the App Engine results page for mapreduce runs. --> <!-- This path serves up the App Engine results page for mapreduce runs. -->
<servlet> <servlet>
<servlet-name>mapreduce</servlet-name> <servlet-name>mapreduce</servlet-name>

View file

@ -15,7 +15,6 @@
package com.google.domain.registry.tools.server; package com.google.domain.registry.tools.server;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Iterables.partition;
import static com.google.common.collect.Lists.partition; import static com.google.common.collect.Lists.partition;
import static com.google.domain.registry.model.ofy.ObjectifyService.ofy; import static com.google.domain.registry.model.ofy.ObjectifyService.ofy;
import static com.google.domain.registry.request.Action.Method.POST; import static com.google.domain.registry.request.Action.Method.POST;
@ -33,26 +32,13 @@ import com.google.domain.registry.request.Action;
import com.google.domain.registry.request.Response; import com.google.domain.registry.request.Response;
import com.googlecode.objectify.Key; import com.googlecode.objectify.Key;
import com.googlecode.objectify.VoidWork;
import java.util.List;
import javax.inject.Inject; import javax.inject.Inject;
/** /** Deletes all commit logs in datastore. */
* Deletes all commit logs in datastore.
*
* <p>Before running this, use the datastore admin page to delete all {@code CommitLogManifest} and
* {@code CommitLogMutation} entities. That will take care of most (likely all) commit log entities
* (except perhaps for very recently created entities that are missed by the eventually consistent
* query driving that deletion) and it will be much faster than this mapreduce. After that, run this
* to get a guarantee that everything was deleted.
*/
@Action(path = "/_dr/task/killAllCommitLogs", method = POST) @Action(path = "/_dr/task/killAllCommitLogs", method = POST)
public class KillAllCommitLogsAction implements MapreduceAction { public class KillAllCommitLogsAction implements MapreduceAction {
private static final int BATCH_SIZE = 100;
@Inject MapreduceRunner mrRunner; @Inject MapreduceRunner mrRunner;
@Inject Response response; @Inject Response response;
@Inject KillAllCommitLogsAction() {} @Inject KillAllCommitLogsAction() {}
@ -60,16 +46,19 @@ public class KillAllCommitLogsAction implements MapreduceAction {
@Override @Override
public void run() { public void run() {
checkArgument( // safety checkArgument( // safety
RegistryEnvironment.get() == RegistryEnvironment.ALPHA RegistryEnvironment.get() == RegistryEnvironment.CRASH
|| RegistryEnvironment.get() == RegistryEnvironment.UNITTEST, || RegistryEnvironment.get() == RegistryEnvironment.UNITTEST,
"DO NOT RUN ANYWHERE ELSE EXCEPT ALPHA OR TESTS."); "DO NOT RUN ANYWHERE ELSE EXCEPT CRASH OR TESTS.");
// Create a in-memory input, assigning each bucket to its own shard for maximum parallelization. // Create a in-memory input, assigning each bucket to its own shard for maximum parallelization.
Input<Key<CommitLogBucket>> input = Input<Key<CommitLogBucket>> input =
new InMemoryInput<>(partition(CommitLogBucket.getAllBucketKeys().asList(), 1)); new InMemoryInput<>(partition(CommitLogBucket.getAllBucketKeys().asList(), 1));
response.sendJavaScriptRedirect(createJobPath(mrRunner response.sendJavaScriptRedirect(createJobPath(mrRunner
.setJobName("Delete all commit logs") .setJobName("Delete all commit logs")
.setModuleName("tools") .setModuleName("tools")
.runMapOnly(new KillAllCommitLogsMapper(), ImmutableList.of(input)))); .runMapreduce(
new KillAllCommitLogsMapper(),
new KillAllEntitiesReducer(),
ImmutableList.of(input))));
} }
/** /**
@ -82,24 +71,17 @@ public class KillAllCommitLogsAction implements MapreduceAction {
* <li>{@code CommitLogMutation} * <li>{@code CommitLogMutation}
* </ul> * </ul>
*/ */
static class KillAllCommitLogsMapper extends Mapper<Key<CommitLogBucket>, Void, Void> { static class KillAllCommitLogsMapper extends Mapper<Key<CommitLogBucket>, Key<?>, Key<?>> {
private static final long serialVersionUID = 1504266335352952033L; private static final long serialVersionUID = 1504266335352952033L;
@Override @Override
public void map(Key<CommitLogBucket> bucket) { public void map(Key<CommitLogBucket> bucket) {
// The query on the bucket could time out, but we are not worried about that because of the for (Key<Object> key : ofy().load().ancestor(bucket).keys()) {
// procedure outlined above. emit(bucket, key);
for (final List<Key<Object>> batch getContext().incrementCounter("entities emitted");
: partition(ofy().load().ancestor(bucket).keys(), BATCH_SIZE)) { getContext().incrementCounter(String.format("%s emitted", key.getKind()));
ofy().transact(new VoidWork() {
@Override
public void vrun() {
ofy().deleteWithoutBackup().entities(batch);
}});
getContext().incrementCounter("deleted entities", batch.size());
} }
getContext().incrementCounter("completed buckets");
} }
} }
} }

View file

@ -0,0 +1,53 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tools.server;
import static com.google.common.collect.Iterators.partition;
import static com.google.domain.registry.model.ofy.ObjectifyService.ofy;
import com.google.appengine.tools.mapreduce.Reducer;
import com.google.appengine.tools.mapreduce.ReducerInput;
import com.googlecode.objectify.Key;
import com.googlecode.objectify.VoidWork;
import java.util.Iterator;
import java.util.List;
/** Reducer that deletes a group of keys, identified by a shared ancestor key. */
public class KillAllEntitiesReducer extends Reducer<Key<?>, Key<?>, Void> {
private static final long serialVersionUID = 7939357855356876000L;
private static final int BATCH_SIZE = 100;
@Override
public void reduce(Key<?> ancestor, final ReducerInput<Key<?>> keysToDelete) {
Iterator<List<Key<?>>> batches = partition(keysToDelete, BATCH_SIZE);
while (batches.hasNext()) {
final List<Key<?>> batch = batches.next();
// Use a transaction to get retrying for free.
ofy().transact(new VoidWork() {
@Override
public void vrun() {
ofy().deleteWithoutBackup().keys(batch);
}});
getContext().incrementCounter("entities deleted", batch.size());
for (Key<?> key : batch) {
getContext().incrementCounter(String.format("%s deleted", key.getKind()));
}
}
}
}

View file

@ -15,7 +15,6 @@
package com.google.domain.registry.tools.server; package com.google.domain.registry.tools.server;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Iterables.partition;
import static com.google.domain.registry.model.ofy.ObjectifyService.ofy; import static com.google.domain.registry.model.ofy.ObjectifyService.ofy;
import static com.google.domain.registry.request.Action.Method.POST; import static com.google.domain.registry.request.Action.Method.POST;
import static com.google.domain.registry.util.PipelineUtils.createJobPath; import static com.google.domain.registry.util.PipelineUtils.createJobPath;
@ -35,9 +34,7 @@ import com.google.domain.registry.request.Action;
import com.google.domain.registry.request.Response; import com.google.domain.registry.request.Response;
import com.googlecode.objectify.Key; import com.googlecode.objectify.Key;
import com.googlecode.objectify.VoidWork; import com.googlecode.objectify.Work;
import java.util.List;
import javax.inject.Inject; import javax.inject.Inject;
@ -45,8 +42,6 @@ import javax.inject.Inject;
@Action(path = "/_dr/task/killAllEppResources", method = POST) @Action(path = "/_dr/task/killAllEppResources", method = POST)
public class KillAllEppResourcesAction implements MapreduceAction { public class KillAllEppResourcesAction implements MapreduceAction {
private static final int BATCH_SIZE = 100;
@Inject MapreduceRunner mrRunner; @Inject MapreduceRunner mrRunner;
@Inject Response response; @Inject Response response;
@Inject KillAllEppResourcesAction() {} @Inject KillAllEppResourcesAction() {}
@ -54,20 +49,21 @@ public class KillAllEppResourcesAction implements MapreduceAction {
@Override @Override
public void run() { public void run() {
checkArgument( // safety checkArgument( // safety
RegistryEnvironment.get() == RegistryEnvironment.ALPHA RegistryEnvironment.get() == RegistryEnvironment.CRASH
|| RegistryEnvironment.get() == RegistryEnvironment.UNITTEST, || RegistryEnvironment.get() == RegistryEnvironment.UNITTEST,
"DO NOT RUN ANYWHERE ELSE EXCEPT ALPHA OR TESTS."); "DO NOT RUN ANYWHERE ELSE EXCEPT CRASH OR TESTS.");
response.sendJavaScriptRedirect(createJobPath(mrRunner response.sendJavaScriptRedirect(createJobPath(mrRunner
.setJobName("Delete all EppResources, children, and indices") .setJobName("Delete all EppResources, children, and indices")
.setModuleName("tools") .setModuleName("tools")
.runMapOnly( .runMapreduce(
new KillAllEppResourcesMapper(), new KillAllEppResourcesMapper(),
new KillAllEntitiesReducer(),
ImmutableList.of(EppResourceInputs.createIndexInput())))); ImmutableList.of(EppResourceInputs.createIndexInput()))));
} }
static class KillAllEppResourcesMapper extends Mapper<EppResourceIndex, Void, Void> { static class KillAllEppResourcesMapper extends Mapper<EppResourceIndex, Key<?>, Key<?>> {
private static final long serialVersionUID = 103826288518612669L; private static final long serialVersionUID = 8205309000002507407L;
/** /**
* Delete an {@link EppResourceIndex}, its referent, all descendants of each referent, and the * Delete an {@link EppResourceIndex}, its referent, all descendants of each referent, and the
@ -86,25 +82,31 @@ public class KillAllEppResourcesAction implements MapreduceAction {
*/ */
@Override @Override
public void map(final EppResourceIndex eri) { public void map(final EppResourceIndex eri) {
EppResource resource = eri.getReference().get(); Key<EppResourceIndex> eriKey = Key.create(eri);
for (final List<Key<Object>> batch emitAndIncrementCounter(eriKey, eriKey);
: partition(ofy().load().ancestor(resource).keys(), BATCH_SIZE)) { Key<?> resourceKey = eri.getReference().getKey();
ofy().transact(new VoidWork() { for (Key<Object> key : ofy().load().ancestor(resourceKey).keys()) {
@Override emitAndIncrementCounter(resourceKey, key);
public void vrun() {
ofy().deleteWithoutBackup().entities(batch);
}});
getContext().incrementCounter("deleted descendants", batch.size());
} }
final Key<?> foreignKey = resource instanceof DomainApplication // Load in a transaction to make sure we don't get stale data (in case of host renames).
// TODO(b/27424173): A transaction is overkill. When we have memcache-skipping, use that.
EppResource resource = ofy().transactNewReadOnly(
new Work<EppResource>() {
@Override
public EppResource run() {
return eri.getReference().get();
}});
// TODO(b/28247733): What about FKI's for renamed hosts?
Key<?> indexKey = resource instanceof DomainApplication
? DomainApplicationIndex.createKey((DomainApplication) resource) ? DomainApplicationIndex.createKey((DomainApplication) resource)
: ForeignKeyIndex.createKey(resource); : ForeignKeyIndex.createKey(resource);
ofy().transact(new VoidWork() { emitAndIncrementCounter(indexKey, indexKey);
@Override }
public void vrun() {
ofy().deleteWithoutBackup().keys(Key.create(eri), foreignKey).now(); private void emitAndIncrementCounter(Key<?> ancestor, Key<?> child) {
}}); emit(ancestor, child);
getContext().incrementCounter("deleted eris"); getContext().incrementCounter("entities emitted");
getContext().incrementCounter(String.format("%s emitted", child.getKind()));
} }
} }
} }