Retry any Datastore reads in EppResource map-reduce input

Datastore has a non-zero chance of failing on reads. A map-reduce with too many
failures will eventually give up. As a result, any map-reduce that goes over a
large number of datastore entities is almost guaranteed to fail.

Since we expect to have a large number of EppResources, we make sure to wrap
all datastore reads with some retrying mechanism to reduce the number of
transient failures that propagate to Map-Reduce.

This feature already existed for CommitLogManifestReader, we refactor the code to use the same retrying mechanism in EppResource readers.

Also removed the transactNew around the reads because looking at the source - it doesn't actually do anything we need (doesn't retry on any failure other than concurrency failure)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=190633281
This commit is contained in:
guyben 2018-03-27 10:21:21 -07:00 committed by jianglai
parent 422ec9b97a
commit 2bbde9d9a9
8 changed files with 293 additions and 167 deletions

View file

@ -14,11 +14,9 @@
package google.registry.mapreduce.inputs;
import static com.google.common.base.Preconditions.checkNotNull;
import static google.registry.model.ofy.ObjectifyService.ofy;
import com.google.appengine.api.datastore.Cursor;
import com.google.appengine.api.datastore.DatastoreTimeoutException;
import com.google.appengine.api.datastore.QueryResultIterator;
import com.google.appengine.tools.mapreduce.InputReader;
import com.googlecode.objectify.Key;
@ -26,14 +24,13 @@ import com.googlecode.objectify.cmd.Query;
import google.registry.model.ofy.CommitLogBucket;
import google.registry.model.ofy.CommitLogManifest;
import google.registry.util.FormattingLogger;
import google.registry.util.Retrier;
import google.registry.util.SystemSleeper;
import java.util.NoSuchElementException;
import javax.annotation.Nullable;
import org.joda.time.DateTime;
/** {@link InputReader} that maps over {@link CommitLogManifest}. */
class CommitLogManifestReader extends InputReader<Key<CommitLogManifest>> {
class CommitLogManifestReader
extends RetryingInputReader<Key<CommitLogManifest>, Key<CommitLogManifest>> {
static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
@ -45,8 +42,7 @@ class CommitLogManifestReader extends InputReader<Key<CommitLogManifest>> {
*/
private static final long MEMORY_ESTIMATE = 100 * 1024;
private static final Retrier retrier = new Retrier(new SystemSleeper(), 3);
private static final long serialVersionUID = 2553537421598284748L;
private static final long serialVersionUID = 6215490573108252100L;
private final Key<CommitLogBucket> bucketKey;
@ -58,55 +54,19 @@ class CommitLogManifestReader extends InputReader<Key<CommitLogManifest>> {
@Nullable
private final DateTime olderThan;
private Cursor cursor;
private int total;
private int loaded;
private transient QueryResultIterator<Key<CommitLogManifest>> queryIterator;
CommitLogManifestReader(Key<CommitLogBucket> bucketKey, @Nullable DateTime olderThan) {
this.bucketKey = bucketKey;
this.olderThan = olderThan;
}
/** Called once at start. Cache the expected size. */
@Override
public void beginShard() {
total = query().count();
public QueryResultIterator<Key<CommitLogManifest>> getQueryIterator(@Nullable Cursor cursor) {
return startQueryAt(query(), cursor).keys().iterator();
}
/** Called every time we are deserialized. Create a new query or resume an existing one. */
@Override
public void beginSlice() {
Query<CommitLogManifest> query = query();
if (cursor != null) {
// The underlying query is strongly consistent, and according to the documentation at
// https://cloud.google.com/appengine/docs/java/datastore/queries#Java_Data_consistency
// "strongly consistent queries are always transactionally consistent". However, each time
// we restart the query at a cursor we have a new effective query, and "if the results for a
// query change between uses of a cursor, the query notices only changes that occur in
// results after the cursor. If a new result appears before the cursor's position for the
// query, it will not be returned when the results after the cursor are fetched."
// What this means in practice is that entities that are created after the initial query
// begins may or may not be seen by this reader, depending on whether the query was
// paused and restarted with a cursor before it would have reached the new entity.
query = query.startAt(cursor);
}
queryIterator = query.keys().iterator();
}
/** Called occasionally alongside {@link #next}. */
@Override
public Double getProgress() {
// Cap progress at 1.0, since the query's count() can increase during the run of the mapreduce
// if more entities are written, but we've cached the value once in "total".
return Math.min(1.0, ((double) loaded) / total);
}
/** Called before we are serialized. Save a serializable cursor for this query. */
@Override
public void endSlice() {
cursor = queryIterator.getCursor();
public int getTotal() {
return query().count();
}
/** Query for children of this bucket. */
@ -133,19 +93,6 @@ class CommitLogManifestReader extends InputReader<Key<CommitLogManifest>> {
*/
@Override
public Key<CommitLogManifest> next() {
loaded++;
final Cursor currentCursor = queryIterator.getCursor();
try {
return retrier.callWithRetry(
() -> queryIterator.next(),
(thrown, failures, maxAttempts) -> {
checkNotNull(currentCursor, "Can't retry because cursor is null. Giving up.");
queryIterator = query().startAt(currentCursor).keys().iterator();
},
DatastoreTimeoutException.class);
} finally {
ofy().clearSessionCache(); // Try not to leak memory.
}
return nextQueryResult();
}
}