Add a fast mode to the ResaveAllEppResourcesAction mapreduce (#912)

* Add a fast mode to the ResaveAllEppResourcesAction mapreduce

This new mode avoids writing no-op mutations for entities that don't actually
have any changes to write. The cronjobs use fast mode by default, but manual
invocations do not, as manual invocations are often used to trigger @OnLoad
migrations, and fast mode won't pick up on those changes.
This commit is contained in:
Ben McIlwain 2020-12-21 18:07:59 -05:00 committed by GitHub
parent e550c94cbc
commit 2cf190e448
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 69 additions and 22 deletions

View file

@ -14,6 +14,7 @@
package google.registry.batch;
import static google.registry.mapreduce.MapreduceRunner.PARAM_FAST;
import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.persistence.transaction.TransactionManagerFactory.tm;
@ -24,6 +25,7 @@ import google.registry.mapreduce.MapreduceRunner;
import google.registry.mapreduce.inputs.EppResourceInputs;
import google.registry.model.EppResource;
import google.registry.request.Action;
import google.registry.request.Parameter;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import javax.inject.Inject;
@ -39,6 +41,14 @@ import javax.inject.Inject;
* <p>Because there are no auth settings in the {@link Action} annotation, this command can only be
* run internally, or by pretending to be internal by setting the X-AppEngine-QueueName header,
* which only admin users can do.
*
* <p>If the <code>?fast=true</code> querystring parameter is passed, then entities that are not
* changed by {@link EppResource#cloneProjectedAtTime} will not be re-saved. This helps prevent
* mutation load on the DB and has the beneficial side effect of writing out smaller commit logs.
* Note that this does NOT pick up mutations caused by migrations using the {@link
* com.googlecode.objectify.annotation.OnLoad} annotation, so if you are running a one-off schema
* migration, do not use fast mode. Fast mode defaults to false for this reason, but is used by the
* monthly invocation of the mapreduce.
*/
@Action(
service = Action.Service.BACKEND,
@ -48,7 +58,13 @@ public class ResaveAllEppResourcesAction implements Runnable {
@Inject MapreduceRunner mrRunner;
@Inject Response response;
@Inject ResaveAllEppResourcesAction() {}
@Inject
@Parameter(PARAM_FAST)
boolean isFast;
@Inject
ResaveAllEppResourcesAction() {}
/**
* The number of shards to run the map-only mapreduce on.
@ -66,7 +82,7 @@ public class ResaveAllEppResourcesAction implements Runnable {
.setModuleName("backend")
.setDefaultMapShards(NUM_SHARDS)
.runMapOnly(
new ResaveAllEppResourcesActionMapper(),
new ResaveAllEppResourcesActionMapper(isFast),
ImmutableList.of(EppResourceInputs.createKeyInput(EppResource.class)))
.sendLinkToMapreduceConsole(response);
}
@ -76,23 +92,33 @@ public class ResaveAllEppResourcesAction implements Runnable {
extends Mapper<Key<EppResource>, Void, Void> {
private static final long serialVersionUID = -7721628665138087001L;
public ResaveAllEppResourcesActionMapper() {}
private final boolean isFast;
ResaveAllEppResourcesActionMapper(boolean isFast) {
this.isFast = isFast;
}
@Override
public final void map(final Key<EppResource> resourceKey) {
tm()
.transact(
boolean resaved =
tm().transact(
() -> {
EppResource originalResource = ofy().load().key(resourceKey).now();
EppResource projectedResource =
ofy()
.load()
.key(resourceKey)
.now()
.cloneProjectedAtTime(tm().getTransactionTime());
originalResource.cloneProjectedAtTime(tm().getTransactionTime());
if (isFast && originalResource.equals(projectedResource)) {
return false;
} else {
ofy().save().entity(projectedResource).now();
return true;
}
});
getContext().incrementCounter(String.format("%s entities re-saved", resourceKey.getKind()));
getContext()
.incrementCounter(
String.format(
"%s entities %s",
resourceKey.getKind(), resaved ? "re-saved" : "with no changes skipped"));
}
}
}

View file

@ -80,7 +80,7 @@
</cron>
<cron>
<url><![CDATA[/_dr/task/resaveAllEppResources]]></url>
<url><![CDATA[/_dr/task/resaveAllEppResources?fast=true]]></url>
<description>
This job resaves all our resources, projected in time to "now".
It is needed for "deleteOldCommitLogs" to work correctly.

View file

@ -103,7 +103,7 @@
</cron>
<cron>
<url><![CDATA[/_dr/task/resaveAllEppResources]]></url>
<url><![CDATA[/_dr/task/resaveAllEppResources?fast=true]]></url>
<description>
This job resaves all our resources, projected in time to "now".
It is needed for "deleteOldCommitLogs" to work correctly.

View file

@ -21,7 +21,7 @@
</cron>
<cron>
<url><![CDATA[/_dr/task/resaveAllEppResources]]></url>
<url><![CDATA[/_dr/task/resaveAllEppResources?fast=true]]></url>
<description>
This job resaves all our resources, projected in time to "now".
It is needed for "deleteOldCommitLogs" to work correctly.

View file

@ -87,7 +87,7 @@
</cron>
<cron>
<url><![CDATA[/_dr/task/resaveAllEppResources]]></url>
<url><![CDATA[/_dr/task/resaveAllEppResources?fast=true]]></url>
<description>
This job resaves all our resources, projected in time to "now".
It is needed for "deleteOldCommitLogs" to work correctly.

View file

@ -15,6 +15,7 @@
package google.registry.mapreduce;
import static google.registry.mapreduce.MapreduceRunner.PARAM_DRY_RUN;
import static google.registry.mapreduce.MapreduceRunner.PARAM_FAST;
import static google.registry.mapreduce.MapreduceRunner.PARAM_MAP_SHARDS;
import static google.registry.mapreduce.MapreduceRunner.PARAM_REDUCE_SHARDS;
import static google.registry.request.RequestParameters.extractBooleanParameter;
@ -36,6 +37,12 @@ public final class MapreduceModule {
return extractBooleanParameter(req, PARAM_DRY_RUN);
}
@Provides
@Parameter(PARAM_FAST)
static boolean provideIsFast(HttpServletRequest req) {
return extractBooleanParameter(req, PARAM_FAST);
}
@Provides
@Parameter(PARAM_MAP_SHARDS)
static Optional<Integer> provideMapShards(HttpServletRequest req) {

View file

@ -55,6 +55,7 @@ public class MapreduceRunner {
public static final String PARAM_DRY_RUN = "dryRun";
public static final String PARAM_MAP_SHARDS = "mapShards";
public static final String PARAM_REDUCE_SHARDS = "reduceShards";
public static final String PARAM_FAST = "fast";
private static final String BASE_URL = "/_dr/mapreduce/";
private static final String QUEUE_NAME = "mapreduce";

View file

@ -22,7 +22,7 @@ import javax.annotation.Nullable;
import org.joda.time.DateTime;
/**
* A timestamp that auto-updates on each save to Datastore.
* A timestamp that auto-updates on each save to Datastore/Cloud SQL.
*
* @see UpdateAutoTimestampTranslatorFactory
*/
@ -31,7 +31,7 @@ public class UpdateAutoTimestamp extends ImmutableObject {
// When set to true, database converters/translators should do tha auto update. When set to
// false, auto update should be suspended (this exists to allow us to preserve the original value
// during a replay).
static ThreadLocal<Boolean> autoUpdateEnabled = ThreadLocal.withInitial(() -> true);
private static ThreadLocal<Boolean> autoUpdateEnabled = ThreadLocal.withInitial(() -> true);
DateTime timestamp;

View file

@ -55,6 +55,19 @@ class ResaveAllEppResourcesActionTest extends MapreduceTestCase<ResaveAllEppReso
.isGreaterThan(creationTime);
}
@Test
void test_fastMode_doesNotResaveEntityWithNoChanges() throws Exception {
ContactResource contact = persistActiveContact("test123");
DateTime creationTime = contact.getUpdateTimestamp().getTimestamp();
assertThat(ofy().load().entity(contact).now().getUpdateTimestamp().getTimestamp())
.isEqualTo(creationTime);
ofy().clearSessionCache();
action.isFast = true;
runMapreduce();
assertThat(ofy().load().entity(contact).now().getUpdateTimestamp().getTimestamp())
.isEqualTo(creationTime);
}
@Test
void test_mapreduceResolvesPendingTransfer() throws Exception {
DateTime now = DateTime.now(UTC);