Retry Datastore errors in CommitLogManifestReader.next()

When trying to run the MapReduce for DeleteOldCommitLogsAction, we run into a
lot of DatastoreTimeoutException during CommitLogManifestReader.next.

This causes the entire shard to fail. Since we have a lot of keys (tens of
millions), this is almost guaranteed to happen, dooming the entire MapReduce.

Here is an attempt to recover from the Timeout Exception by saving the state
before the read, then on failure restoring that state and trying again.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=165172222
This commit is contained in:
guyben 2017-08-14 06:43:39 -07:00 committed by Ben McIlwain
parent 8b0b54e997
commit 00f2662f33
3 changed files with 172 additions and 9 deletions

View file

@ -14,9 +14,12 @@
package google.registry.util;
import static com.google.common.truth.Truth.assertThat;
import google.registry.testing.ExceptionRule;
import google.registry.testing.FakeClock;
import google.registry.testing.FakeSleeper;
import google.registry.util.Retrier.FailureReporter;
import java.util.concurrent.Callable;
import org.junit.Rule;
import org.junit.Test;
@ -33,33 +36,97 @@ public class RetrierTest {
Retrier retrier = new Retrier(new FakeSleeper(new FakeClock()), 3);
/** An exception to throw from {@link CountingThrower}. */
class CountingException extends RuntimeException {
static class CountingException extends RuntimeException {
CountingException(int count) {
super("" + count);
}
}
/** Test object that always throws an exception with the current count. */
class CountingThrower implements Callable<Object> {
static class CountingThrower implements Callable<Integer> {
int count = 0;
final int numThrows;
CountingThrower(int numThrows) {
this.numThrows = numThrows;
}
@Override
public Object call() {
public Integer call() {
if (count == numThrows) {
return numThrows;
}
count++;
throw new CountingException(count);
}
}
static class TestReporter implements FailureReporter {
int numBeforeRetry = 0;
int numOnFinalFailure = 0;
@Override
public void beforeRetry(Throwable e, int failures, int maxAttempts) {
numBeforeRetry++;
assertThat(failures).isEqualTo(numBeforeRetry);
}
@Override
public void afterFinalFailure(Throwable e, int failures) {
numOnFinalFailure++;
}
void assertNumbers(int expectedBeforeRetry, int expectedOnFinalFailure) {
assertThat(numBeforeRetry).isEqualTo(expectedBeforeRetry);
assertThat(numOnFinalFailure).isEqualTo(expectedOnFinalFailure);
}
}
@Test
public void testRetryableException() throws Exception {
thrown.expect(CountingException.class, "3");
retrier.callWithRetry(new CountingThrower(), CountingException.class);
retrier.callWithRetry(new CountingThrower(3), CountingException.class);
}
@Test
public void testUnretryableException() throws Exception {
thrown.expect(CountingException.class, "1");
retrier.callWithRetry(new CountingThrower(), IllegalArgumentException.class);
retrier.callWithRetry(new CountingThrower(5), IllegalArgumentException.class);
}
@Test
public void testRetrySucceeded() throws Exception {
assertThat(retrier.callWithRetry(new CountingThrower(2), CountingException.class))
.isEqualTo(2);
}
@Test
public void testRetryFailed_withReporter() throws Exception {
thrown.expect(CountingException.class, "3");
TestReporter reporter = new TestReporter();
try {
retrier.callWithRetry(new CountingThrower(3), reporter, CountingException.class);
} catch (CountingException expected) {
reporter.assertNumbers(2, 1);
throw expected;
}
}
@Test
public void testRetrySucceeded_withReporter() throws Exception {
TestReporter reporter = new TestReporter();
assertThat(retrier.callWithRetry(new CountingThrower(2), reporter, CountingException.class))
.isEqualTo(2);
reporter.assertNumbers(2, 0);
}
@Test
public void testFirstTrySucceeded_withReporter() throws Exception {
TestReporter reporter = new TestReporter();
assertThat(retrier.callWithRetry(new CountingThrower(0), reporter, CountingException.class))
.isEqualTo(0);
reporter.assertNumbers(0, 0);
}
}