Improve logs in the GCP proxy

Tweaked a few logging levels to not spam error level logs. Also make it easy to debug issues in case relay retry fails.

[1] Put non-fatal exceptions that should be logged at warning in their explicit sets. Also always use the root cause to determine if an exception is non-fatal, because sometimes the actual causes are wrapped inside other exceptions.

[2] Record the cause of a relay failure, and record if a relay retry is successful. This way we can look at the log and figure out if a relay is eventually successful.

[3] Add a log when the frontend connection from the client is terminated.

[4] Alway close the relay channel when a relay has failed, which, depend on if the channel is frontend or backend, will reconnect and trigger a retry.

[5] Lastly changed failure test to use assertThrows instead of fail.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=208649916
This commit is contained in:
jianglai 2018-08-14 08:22:31 -07:00
parent b552c1d115
commit 0e64015cdf
10 changed files with 154 additions and 95 deletions

View file

@ -14,13 +14,14 @@
package google.registry.proxy.handler;
import static com.google.common.base.Preconditions.checkArgument;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableSet;
import com.google.common.flogger.FluentLogger;
import google.registry.proxy.metric.FrontendMetrics;
import io.netty.buffer.ByteBuf;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelPromise;
@ -58,10 +59,16 @@ import javax.net.ssl.SSLHandshakeException;
* <p>This handler is session aware and will store all the session cookies that the are contained in
* the HTTP response headers, which are added back to headers of subsequent HTTP requests.
*/
abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpResponse> {
public abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpResponse> {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
protected static final ImmutableSet<Class<? extends Exception>> NON_FATAL_INBOUND_EXCEPTIONS =
ImmutableSet.of(ReadTimeoutException.class, SSLHandshakeException.class);
protected static final ImmutableSet<Class<? extends Exception>> NON_FATAL_OUTBOUND_EXCEPTIONS =
ImmutableSet.of(NonOkHttpResponseException.class);
private final Map<String, Cookie> cookieStore = new LinkedHashMap<>();
private final String relayHost;
private final String relayPath;
@ -153,12 +160,9 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
@Override
protected void encode(ChannelHandlerContext ctx, FullHttpResponse response, ByteBuf byteBuf)
throws Exception {
checkArgument(
response.status().equals(HttpResponseStatus.OK),
"Cannot relay HTTP response status \"%s\" in channel %s:\n%s",
response.status(),
ctx.channel(),
response.content().toString(UTF_8));
if (!response.status().equals(HttpResponseStatus.OK)) {
throw new NonOkHttpResponseException(response, ctx.channel());
}
saveCookies(response);
byteBuf.writeBytes(encodeFullHttpResponse(response));
}
@ -166,10 +170,7 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
/** Terminates connection upon inbound exception. */
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
// ReadTimeoutException is non fatal as the client times out due to inactivity.
// SslHandshakeException is caused by the client not able to complete the handshake, we should
// not log it at error as we do not control client behavior.
if (cause instanceof ReadTimeoutException || cause instanceof SSLHandshakeException) {
if (NON_FATAL_INBOUND_EXCEPTIONS.contains(Throwables.getRootCause(cause).getClass())) {
logger.atWarning().withCause(cause).log(
"Inbound exception caught for channel %s", ctx.channel());
} else {
@ -187,10 +188,7 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
(ChannelFuture channelFuture) -> {
if (!channelFuture.isSuccess()) {
Throwable cause = channelFuture.cause();
// If the failure is caused by IllegalArgumentException, we know that it is because we
// got a non 200 response. This is an expected error from the backend and should not be
// logged at severe.
if (Throwables.getRootCause(cause) instanceof IllegalArgumentException) {
if (NON_FATAL_OUTBOUND_EXCEPTIONS.contains(Throwables.getRootCause(cause).getClass())) {
logger.atWarning().withCause(channelFuture.cause()).log(
"Outbound exception caught for channel %s", channelFuture.channel());
} else {
@ -202,4 +200,14 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
});
super.write(ctx, msg, promise);
}
/** Exception thrown when the response status from GAE is not 200. */
public static class NonOkHttpResponseException extends Exception {
NonOkHttpResponseException(FullHttpResponse response, Channel channel) {
super(
String.format(
"Cannot relay HTTP response status \"%s\" in channel %s:\n%s",
response.status(), channel, response.content().toString(UTF_8)));
}
}
}

View file

@ -66,39 +66,51 @@ public class RelayHandler<I> extends SimpleChannelInboundHandler<I> {
logger.atSevere().log("Relay channel not specified for channel: %s", channel);
ChannelFuture unusedFuture = channel.close();
} else {
writeToRelayChannel(channel, relayChannel, msg);
writeToRelayChannel(channel, relayChannel, msg, false);
}
}
public static void writeToRelayChannel(Channel channel, Channel relayChannel, Object msg) {
public static void writeToRelayChannel(
Channel channel, Channel relayChannel, Object msg, boolean retry) {
ChannelFuture unusedFuture =
relayChannel
.writeAndFlush(msg)
.addListener(
future -> {
if (!future.isSuccess()) {
logger.atWarning().log(
"Relay failed: %s --> %s\nINBOUND: %s\nOUTBOUND: %s",
// TODO (jianglai): do not log the message once retry behavior is confirmed.
logger.atWarning().withCause(future.cause()).log(
"Relay failed: %s --> %s\nINBOUND: %s\nOUTBOUND: %s\nMESSAGE: %s",
channel.attr(PROTOCOL_KEY).get().name(),
relayChannel.attr(PROTOCOL_KEY).get().name(),
channel,
relayChannel);
relayChannel,
msg);
// If we cannot write to the relay channel and the originating channel has
// a relay buffer (i. e. we tried to relay the frontend to the backend), store
// the message in the buffer for retry later. Otherwise, we are relaying from
// the backend to the frontend, and this relay failure cannot be recovered
// from, we should just kill the relay (frontend) channel, which in turn will
// kill the backend channel. We should not kill any backend channel while the
// the frontend channel is open, because that will just trigger a reconnect.
// It is fine to just save the message object itself, not a clone of it,
// because if the relay is not successful, its content is not read, therefore
// its buffer is not cleared.
// the message in the buffer for retry later. The relay channel (backend) should
// be killed (if it is not already dead, usually the relay is unsuccessful
// because the connection is closed), and a new backend channel will re-connect
// as long as the frontend channel is open. Otherwise, we are relaying from the
// backend to the frontend, and this relay failure cannot be recovered from: we
// should just kill the relay (frontend) channel, which in turn will kill the
// backend channel. It is fine to just save the message object itself, not a
// clone of it, because if the relay is not successful, its content is not read,
// therefore its buffer is not cleared.
Queue<Object> relayBuffer = channel.attr(RELAY_BUFFER_KEY).get();
if (relayBuffer != null) {
channel.attr(RELAY_BUFFER_KEY).get().add(msg);
} else {
ChannelFuture unusedFuture2 = relayChannel.close();
}
ChannelFuture unusedFuture2 = relayChannel.close();
} else if (retry) {
// TODO (jianglai): do not log the message once retry behavior is confirmed.
logger.atInfo().log(
"Relay retry succeeded: %s --> %s\nINBOUND: %s\nOUTBOUND: %s\nsMESSAGE: %s",
channel.attr(PROTOCOL_KEY).get().name(),
relayChannel.attr(PROTOCOL_KEY).get().name(),
channel,
relayChannel,
msg);
}
});
}