Implement retry for transient errors in WHOIS server

We now attempt to retry Whois queries in the event of a short-lived error. Currently, we consider 'DatastoreTimeoutException' and 'DatastoreFailureException' as transient.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=152044934
This commit is contained in:
larryruili 2017-04-03 12:49:40 -07:00 committed by Ben McIlwain
parent c3df4e26a3
commit 7359cc13b8
6 changed files with 77 additions and 13 deletions

View file

@ -52,11 +52,12 @@ public class Retrier implements Serializable {
* <p>Retrying is done a fixed number of times, with exponential backoff, if the exception that is * <p>Retrying is done a fixed number of times, with exponential backoff, if the exception that is
* thrown is deemed retryable by the predicate. If the error is not considered retryable, or if * thrown is deemed retryable by the predicate. If the error is not considered retryable, or if
* the thread is interrupted, or if the allowable number of attempts has been exhausted, the * the thread is interrupted, or if the allowable number of attempts has been exhausted, the
* original exception is propagated through to the caller. * original exception is propagated through to the caller. Checked exceptions are wrapped in a
* RuntimeException, while unchecked exceptions are propagated as-is.
* *
* @return <V> the value returned by the {@link Callable}. * @return <V> the value returned by the {@link Callable}.
*/ */
public final <V> V callWithRetry(Callable<V> callable, Predicate<Throwable> isRetryable) { private <V> V callWithRetry(Callable<V> callable, Predicate<Throwable> isRetryable) {
int failures = 0; int failures = 0;
while (true) { while (true) {
try { try {
@ -87,7 +88,8 @@ public class Retrier implements Serializable {
* <p>Retrying is done a fixed number of times, with exponential backoff, if the exception that is * <p>Retrying is done a fixed number of times, with exponential backoff, if the exception that is
* thrown is on a whitelist of retryable errors. If the error is not on the whitelist, or if the * thrown is on a whitelist of retryable errors. If the error is not on the whitelist, or if the
* thread is interrupted, or if the allowable number of attempts has been exhausted, the original * thread is interrupted, or if the allowable number of attempts has been exhausted, the original
* exception is propagated through to the caller. * exception is propagated through to the caller. Checked exceptions are wrapped in a
* RuntimeException, while unchecked exceptions are propagated as-is.
* *
* @return <V> the value returned by the {@link Callable}. * @return <V> the value returned by the {@link Callable}.
*/ */

View file

@ -68,4 +68,11 @@ public final class WhoisException extends Exception implements WhoisResponse {
.toString(); .toString();
return WhoisResponseResults.create(plaintext, 0); return WhoisResponseResults.create(plaintext, 0);
} }
/** Exception that wraps WhoisExceptions returned from Retrier. */
public static final class UncheckedWhoisException extends RuntimeException {
UncheckedWhoisException(WhoisException whoisException) {
super(whoisException);
}
}
} }

View file

@ -35,7 +35,7 @@ public interface WhoisResponse {
/** Returns the time at which this response was created. */ /** Returns the time at which this response was created. */
DateTime getTimestamp(); DateTime getTimestamp();
/** A wraper class for the plaintext response of a WHOIS command and its number of results. */ /** A wrapper class for the plaintext response of a WHOIS command and its number of results. */
@AutoValue @AutoValue
abstract static class WhoisResponseResults { abstract static class WhoisResponseResults {
public abstract String plainTextOutput(); public abstract String plainTextOutput();

View file

@ -18,6 +18,8 @@ import static google.registry.request.Action.Method.POST;
import static javax.servlet.http.HttpServletResponse.SC_INTERNAL_SERVER_ERROR; import static javax.servlet.http.HttpServletResponse.SC_INTERNAL_SERVER_ERROR;
import static javax.servlet.http.HttpServletResponse.SC_OK; import static javax.servlet.http.HttpServletResponse.SC_OK;
import com.google.appengine.api.datastore.DatastoreFailureException;
import com.google.appengine.api.datastore.DatastoreTimeoutException;
import com.google.common.net.MediaType; import com.google.common.net.MediaType;
import google.registry.config.RegistryConfig.Config; import google.registry.config.RegistryConfig.Config;
import google.registry.request.Action; import google.registry.request.Action;
@ -26,9 +28,12 @@ import google.registry.request.auth.Auth;
import google.registry.request.auth.AuthLevel; import google.registry.request.auth.AuthLevel;
import google.registry.util.Clock; import google.registry.util.Clock;
import google.registry.util.FormattingLogger; import google.registry.util.FormattingLogger;
import google.registry.util.Retrier;
import google.registry.whois.WhoisException.UncheckedWhoisException;
import google.registry.whois.WhoisMetrics.WhoisMetric; import google.registry.whois.WhoisMetrics.WhoisMetric;
import google.registry.whois.WhoisResponse.WhoisResponseResults; import google.registry.whois.WhoisResponse.WhoisResponseResults;
import java.io.Reader; import java.io.Reader;
import java.util.concurrent.Callable;
import javax.inject.Inject; import javax.inject.Inject;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -72,6 +77,7 @@ public class WhoisServer implements Runnable {
@Inject Clock clock; @Inject Clock clock;
@Inject Reader input; @Inject Reader input;
@Inject Response response; @Inject Response response;
@Inject Retrier retrier;
@Inject WhoisReader whoisReader; @Inject WhoisReader whoisReader;
@Inject @Config("whoisDisclaimer") String disclaimer; @Inject @Config("whoisDisclaimer") String disclaimer;
@Inject WhoisMetric.Builder metricBuilder; @Inject WhoisMetric.Builder metricBuilder;
@ -81,25 +87,41 @@ public class WhoisServer implements Runnable {
@Override @Override
public void run() { public void run() {
String responseText; String responseText;
DateTime now = clock.nowUtc(); final DateTime now = clock.nowUtc();
try { try {
WhoisCommand command = whoisReader.readCommand(input, now); final WhoisCommand command = whoisReader.readCommand(input, now);
metricBuilder.setCommand(command); metricBuilder.setCommand(command);
WhoisResponseResults results = WhoisResponseResults results =
command.executeQuery(now).getResponse(PREFER_UNICODE, disclaimer); retrier.callWithRetry(
new Callable<WhoisResponseResults>() {
@Override
public WhoisResponseResults call() {
WhoisResponseResults results;
try {
results = command.executeQuery(now).getResponse(PREFER_UNICODE, disclaimer);
} catch (WhoisException e) {
throw new UncheckedWhoisException(e);
}
return results;
}
},
DatastoreTimeoutException.class,
DatastoreFailureException.class);
responseText = results.plainTextOutput(); responseText = results.plainTextOutput();
metricBuilder.setNumResults(results.numResults()); setWhoisMetrics(metricBuilder, results.numResults(), SC_OK);
metricBuilder.setStatus(SC_OK); } catch (UncheckedWhoisException u) {
WhoisException e = (WhoisException) u.getCause();
WhoisResponseResults results = e.getResponse(PREFER_UNICODE, disclaimer);
responseText = results.plainTextOutput();
setWhoisMetrics(metricBuilder, 0, e.getStatus());
} catch (WhoisException e) { } catch (WhoisException e) {
WhoisResponseResults results = e.getResponse(PREFER_UNICODE, disclaimer); WhoisResponseResults results = e.getResponse(PREFER_UNICODE, disclaimer);
responseText = results.plainTextOutput(); responseText = results.plainTextOutput();
metricBuilder.setNumResults(0); setWhoisMetrics(metricBuilder, 0, e.getStatus());
metricBuilder.setStatus(e.getStatus());
} catch (Throwable t) { } catch (Throwable t) {
logger.severe(t, "WHOIS request crashed"); logger.severe(t, "WHOIS request crashed");
responseText = "Internal Server Error"; responseText = "Internal Server Error";
metricBuilder.setNumResults(0); setWhoisMetrics(metricBuilder, 0, SC_INTERNAL_SERVER_ERROR);
metricBuilder.setStatus(SC_INTERNAL_SERVER_ERROR);
} }
// Note that we always return 200 (OK) even if an error was hit. This is because returning an // Note that we always return 200 (OK) even if an error was hit. This is because returning an
// non-OK HTTP status code will cause the proxy server to silently close the connection. Since // non-OK HTTP status code will cause the proxy server to silently close the connection. Since
@ -110,4 +132,10 @@ public class WhoisServer implements Runnable {
response.setPayload(responseText); response.setPayload(responseText);
whoisMetrics.recordWhoisMetric(metricBuilder.build()); whoisMetrics.recordWhoisMetric(metricBuilder.build());
} }
private static void setWhoisMetrics(
WhoisMetric.Builder metricBuilder, int numResults, int status) {
metricBuilder.setNumResults(numResults);
metricBuilder.setStatus(status);
}
} }

View file

@ -36,6 +36,8 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import com.google.appengine.api.datastore.DatastoreFailureException;
import com.google.appengine.api.datastore.DatastoreTimeoutException;
import google.registry.model.domain.DomainResource; import google.registry.model.domain.DomainResource;
import google.registry.model.ofy.Ofy; import google.registry.model.ofy.Ofy;
import google.registry.model.registrar.Registrar; import google.registry.model.registrar.Registrar;
@ -43,7 +45,9 @@ import google.registry.model.registry.Registry;
import google.registry.testing.AppEngineRule; import google.registry.testing.AppEngineRule;
import google.registry.testing.FakeClock; import google.registry.testing.FakeClock;
import google.registry.testing.FakeResponse; import google.registry.testing.FakeResponse;
import google.registry.testing.FakeSleeper;
import google.registry.testing.InjectRule; import google.registry.testing.InjectRule;
import google.registry.util.Retrier;
import google.registry.whois.WhoisMetrics.WhoisMetric; import google.registry.whois.WhoisMetrics.WhoisMetric;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
@ -75,6 +79,7 @@ public class WhoisServerTest {
whoisServer.whoisMetrics = new WhoisMetrics(); whoisServer.whoisMetrics = new WhoisMetrics();
whoisServer.metricBuilder = WhoisMetric.builderForRequest(clock); whoisServer.metricBuilder = WhoisMetric.builderForRequest(clock);
whoisServer.disclaimer = "Doodle Disclaimer"; whoisServer.disclaimer = "Doodle Disclaimer";
whoisServer.retrier = new Retrier(new FakeSleeper(clock), 3);
return whoisServer; return whoisServer;
} }
@ -490,4 +495,24 @@ public class WhoisServerTest {
verify(server.whoisMetrics).recordWhoisMetric(eq(expected)); verify(server.whoisMetrics).recordWhoisMetric(eq(expected));
assertThat(response.getPayload()).isEqualTo("Internal Server Error"); assertThat(response.getPayload()).isEqualTo("Internal Server Error");
} }
@Test
public void testRun_retryOnTransientFailure() throws Exception {
persistResource(makeHostResource("ns1.cat.lol", "1.2.3.4"));
WhoisServer server = newWhoisServer("ns1.cat.lol");
WhoisResponse expectedResponse =
server.whoisReader.readCommand(server.input, clock.nowUtc()).executeQuery(clock.nowUtc());
WhoisReader mockReader = mock(WhoisReader.class);
WhoisCommand mockCommand = mock(WhoisCommand.class);
when(mockReader.readCommand(any(Reader.class), any(DateTime.class))).thenReturn(mockCommand);
when(mockCommand.executeQuery(any(DateTime.class)))
.thenThrow(new DatastoreFailureException("Expected transient exception #1"))
.thenThrow(new DatastoreTimeoutException("Expected transient exception #2"))
.thenReturn(expectedResponse);
server.whoisReader = mockReader;
server.run();
assertThat(response.getPayload()).isEqualTo(loadWhoisTestFile("whois_server_nameserver.txt"));
}
} }

View file

@ -18,6 +18,7 @@ import dagger.Component;
import google.registry.config.RegistryConfig.ConfigModule; import google.registry.config.RegistryConfig.ConfigModule;
import google.registry.request.RequestModule; import google.registry.request.RequestModule;
import google.registry.util.SystemClock.SystemClockModule; import google.registry.util.SystemClock.SystemClockModule;
import google.registry.util.SystemSleeper.SystemSleeperModule;
import javax.inject.Singleton; import javax.inject.Singleton;
@Singleton @Singleton
@ -25,6 +26,7 @@ import javax.inject.Singleton;
ConfigModule.class, ConfigModule.class,
RequestModule.class, RequestModule.class,
SystemClockModule.class, SystemClockModule.class,
SystemSleeperModule.class,
WhoisModule.class, WhoisModule.class,
}) })
interface WhoisTestComponent { interface WhoisTestComponent {