mirror of
https://github.com/google/nomulus.git
synced 2025-05-16 17:37:13 +02:00
Improve logs in the GCP proxy
Tweaked a few logging levels to not spam error level logs. Also make it easy to debug issues in case relay retry fails. [1] Put non-fatal exceptions that should be logged at warning in their explicit sets. Also always use the root cause to determine if an exception is non-fatal, because sometimes the actual causes are wrapped inside other exceptions. [2] Record the cause of a relay failure, and record if a relay retry is successful. This way we can look at the log and figure out if a relay is eventually successful. [3] Add a log when the frontend connection from the client is terminated. [4] Alway close the relay channel when a relay has failed, which, depend on if the channel is frontend or backend, will reconnect and trigger a retry. [5] Lastly changed failure test to use assertThrows instead of fail. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=208649916
This commit is contained in:
parent
b552c1d115
commit
0e64015cdf
10 changed files with 154 additions and 95 deletions
|
@ -14,13 +14,14 @@
|
|||
|
||||
package google.registry.proxy.handler;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.flogger.FluentLogger;
|
||||
import google.registry.proxy.metric.FrontendMetrics;
|
||||
import io.netty.buffer.ByteBuf;
|
||||
import io.netty.channel.Channel;
|
||||
import io.netty.channel.ChannelFuture;
|
||||
import io.netty.channel.ChannelHandlerContext;
|
||||
import io.netty.channel.ChannelPromise;
|
||||
|
@ -58,10 +59,16 @@ import javax.net.ssl.SSLHandshakeException;
|
|||
* <p>This handler is session aware and will store all the session cookies that the are contained in
|
||||
* the HTTP response headers, which are added back to headers of subsequent HTTP requests.
|
||||
*/
|
||||
abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpResponse> {
|
||||
public abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpResponse> {
|
||||
|
||||
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
|
||||
|
||||
protected static final ImmutableSet<Class<? extends Exception>> NON_FATAL_INBOUND_EXCEPTIONS =
|
||||
ImmutableSet.of(ReadTimeoutException.class, SSLHandshakeException.class);
|
||||
|
||||
protected static final ImmutableSet<Class<? extends Exception>> NON_FATAL_OUTBOUND_EXCEPTIONS =
|
||||
ImmutableSet.of(NonOkHttpResponseException.class);
|
||||
|
||||
private final Map<String, Cookie> cookieStore = new LinkedHashMap<>();
|
||||
private final String relayHost;
|
||||
private final String relayPath;
|
||||
|
@ -153,12 +160,9 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
|
|||
@Override
|
||||
protected void encode(ChannelHandlerContext ctx, FullHttpResponse response, ByteBuf byteBuf)
|
||||
throws Exception {
|
||||
checkArgument(
|
||||
response.status().equals(HttpResponseStatus.OK),
|
||||
"Cannot relay HTTP response status \"%s\" in channel %s:\n%s",
|
||||
response.status(),
|
||||
ctx.channel(),
|
||||
response.content().toString(UTF_8));
|
||||
if (!response.status().equals(HttpResponseStatus.OK)) {
|
||||
throw new NonOkHttpResponseException(response, ctx.channel());
|
||||
}
|
||||
saveCookies(response);
|
||||
byteBuf.writeBytes(encodeFullHttpResponse(response));
|
||||
}
|
||||
|
@ -166,10 +170,7 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
|
|||
/** Terminates connection upon inbound exception. */
|
||||
@Override
|
||||
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
|
||||
// ReadTimeoutException is non fatal as the client times out due to inactivity.
|
||||
// SslHandshakeException is caused by the client not able to complete the handshake, we should
|
||||
// not log it at error as we do not control client behavior.
|
||||
if (cause instanceof ReadTimeoutException || cause instanceof SSLHandshakeException) {
|
||||
if (NON_FATAL_INBOUND_EXCEPTIONS.contains(Throwables.getRootCause(cause).getClass())) {
|
||||
logger.atWarning().withCause(cause).log(
|
||||
"Inbound exception caught for channel %s", ctx.channel());
|
||||
} else {
|
||||
|
@ -187,10 +188,7 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
|
|||
(ChannelFuture channelFuture) -> {
|
||||
if (!channelFuture.isSuccess()) {
|
||||
Throwable cause = channelFuture.cause();
|
||||
// If the failure is caused by IllegalArgumentException, we know that it is because we
|
||||
// got a non 200 response. This is an expected error from the backend and should not be
|
||||
// logged at severe.
|
||||
if (Throwables.getRootCause(cause) instanceof IllegalArgumentException) {
|
||||
if (NON_FATAL_OUTBOUND_EXCEPTIONS.contains(Throwables.getRootCause(cause).getClass())) {
|
||||
logger.atWarning().withCause(channelFuture.cause()).log(
|
||||
"Outbound exception caught for channel %s", channelFuture.channel());
|
||||
} else {
|
||||
|
@ -202,4 +200,14 @@ abstract class HttpsRelayServiceHandler extends ByteToMessageCodec<FullHttpRespo
|
|||
});
|
||||
super.write(ctx, msg, promise);
|
||||
}
|
||||
|
||||
/** Exception thrown when the response status from GAE is not 200. */
|
||||
public static class NonOkHttpResponseException extends Exception {
|
||||
NonOkHttpResponseException(FullHttpResponse response, Channel channel) {
|
||||
super(
|
||||
String.format(
|
||||
"Cannot relay HTTP response status \"%s\" in channel %s:\n%s",
|
||||
response.status(), channel, response.content().toString(UTF_8)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,39 +66,51 @@ public class RelayHandler<I> extends SimpleChannelInboundHandler<I> {
|
|||
logger.atSevere().log("Relay channel not specified for channel: %s", channel);
|
||||
ChannelFuture unusedFuture = channel.close();
|
||||
} else {
|
||||
writeToRelayChannel(channel, relayChannel, msg);
|
||||
writeToRelayChannel(channel, relayChannel, msg, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void writeToRelayChannel(Channel channel, Channel relayChannel, Object msg) {
|
||||
public static void writeToRelayChannel(
|
||||
Channel channel, Channel relayChannel, Object msg, boolean retry) {
|
||||
ChannelFuture unusedFuture =
|
||||
relayChannel
|
||||
.writeAndFlush(msg)
|
||||
.addListener(
|
||||
future -> {
|
||||
if (!future.isSuccess()) {
|
||||
logger.atWarning().log(
|
||||
"Relay failed: %s --> %s\nINBOUND: %s\nOUTBOUND: %s",
|
||||
// TODO (jianglai): do not log the message once retry behavior is confirmed.
|
||||
logger.atWarning().withCause(future.cause()).log(
|
||||
"Relay failed: %s --> %s\nINBOUND: %s\nOUTBOUND: %s\nMESSAGE: %s",
|
||||
channel.attr(PROTOCOL_KEY).get().name(),
|
||||
relayChannel.attr(PROTOCOL_KEY).get().name(),
|
||||
channel,
|
||||
relayChannel);
|
||||
relayChannel,
|
||||
msg);
|
||||
// If we cannot write to the relay channel and the originating channel has
|
||||
// a relay buffer (i. e. we tried to relay the frontend to the backend), store
|
||||
// the message in the buffer for retry later. Otherwise, we are relaying from
|
||||
// the backend to the frontend, and this relay failure cannot be recovered
|
||||
// from, we should just kill the relay (frontend) channel, which in turn will
|
||||
// kill the backend channel. We should not kill any backend channel while the
|
||||
// the frontend channel is open, because that will just trigger a reconnect.
|
||||
// It is fine to just save the message object itself, not a clone of it,
|
||||
// because if the relay is not successful, its content is not read, therefore
|
||||
// its buffer is not cleared.
|
||||
// the message in the buffer for retry later. The relay channel (backend) should
|
||||
// be killed (if it is not already dead, usually the relay is unsuccessful
|
||||
// because the connection is closed), and a new backend channel will re-connect
|
||||
// as long as the frontend channel is open. Otherwise, we are relaying from the
|
||||
// backend to the frontend, and this relay failure cannot be recovered from: we
|
||||
// should just kill the relay (frontend) channel, which in turn will kill the
|
||||
// backend channel. It is fine to just save the message object itself, not a
|
||||
// clone of it, because if the relay is not successful, its content is not read,
|
||||
// therefore its buffer is not cleared.
|
||||
Queue<Object> relayBuffer = channel.attr(RELAY_BUFFER_KEY).get();
|
||||
if (relayBuffer != null) {
|
||||
channel.attr(RELAY_BUFFER_KEY).get().add(msg);
|
||||
} else {
|
||||
ChannelFuture unusedFuture2 = relayChannel.close();
|
||||
}
|
||||
ChannelFuture unusedFuture2 = relayChannel.close();
|
||||
} else if (retry) {
|
||||
// TODO (jianglai): do not log the message once retry behavior is confirmed.
|
||||
logger.atInfo().log(
|
||||
"Relay retry succeeded: %s --> %s\nINBOUND: %s\nOUTBOUND: %s\nsMESSAGE: %s",
|
||||
channel.attr(PROTOCOL_KEY).get().name(),
|
||||
relayChannel.attr(PROTOCOL_KEY).get().name(),
|
||||
channel,
|
||||
relayChannel,
|
||||
msg);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue