From 5315752bc060a257c9f84bca1cc40b912398fd9a Mon Sep 17 00:00:00 2001 From: Pavlo Tkach <3469726+ptkach@users.noreply.github.com> Date: Thu, 4 Jan 2024 18:35:21 -0500 Subject: [PATCH] Add ICANN csv response GZIP decoding (#2269) --- .../rdap/UpdateRegistrarRdapBaseUrlsAction.java | 10 +++++++++- .../registry/request/UrlConnectionUtils.java | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/google/registry/rdap/UpdateRegistrarRdapBaseUrlsAction.java b/core/src/main/java/google/registry/rdap/UpdateRegistrarRdapBaseUrlsAction.java index da5220888..15a14b5d7 100644 --- a/core/src/main/java/google/registry/rdap/UpdateRegistrarRdapBaseUrlsAction.java +++ b/core/src/main/java/google/registry/rdap/UpdateRegistrarRdapBaseUrlsAction.java @@ -17,6 +17,8 @@ package google.registry.rdap; import static com.google.api.client.http.HttpStatusCodes.STATUS_CODE_OK; import static com.google.common.net.HttpHeaders.ACCEPT_ENCODING; import static google.registry.persistence.transaction.TransactionManagerFactory.tm; +import static google.registry.request.UrlConnectionUtils.gUnzipBytes; +import static google.registry.request.UrlConnectionUtils.isGZipped; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableMap; @@ -115,7 +117,13 @@ public final class UpdateRegistrarRdapBaseUrlsAction implements Runnable { if (connection.getResponseCode() != STATUS_CODE_OK) { throw new UrlConnectionException("Failed to load RDAP base URLs from ICANN", connection); } - csvString = new String(UrlConnectionUtils.getResponseBytes(connection), UTF_8); + // With GZIP encoding header in the request (see above) ICANN had still sent response in plain + // text until at some point they started sending the response encoded in gzip, which broke our + // parsing of the response. Because of that it was decided to check for the response encoding, + // just in case they ever start sending a plain text again. + byte[] responseBytes = UrlConnectionUtils.getResponseBytes(connection); + csvString = + new String(isGZipped(responseBytes) ? gUnzipBytes(responseBytes) : responseBytes, UTF_8); } finally { connection.disconnect(); } diff --git a/core/src/main/java/google/registry/request/UrlConnectionUtils.java b/core/src/main/java/google/registry/request/UrlConnectionUtils.java index 143fdb235..fa14dd342 100644 --- a/core/src/main/java/google/registry/request/UrlConnectionUtils.java +++ b/core/src/main/java/google/registry/request/UrlConnectionUtils.java @@ -25,12 +25,15 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Strings; import com.google.common.io.ByteStreams; import com.google.common.net.MediaType; +import java.io.ByteArrayInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URLConnection; import java.util.Random; +import java.util.zip.GZIPInputStream; +import org.apache.commons.compress.utils.IOUtils; /** Utilities for common functionality relating to {@link URLConnection}s. */ public final class UrlConnectionUtils { @@ -55,6 +58,20 @@ public final class UrlConnectionUtils { } } + /** Decodes compressed data in GZIP format. */ + public static byte[] gUnzipBytes(byte[] bytes) throws IOException { + try (GZIPInputStream inputStream = new GZIPInputStream(new ByteArrayInputStream(bytes))) { + return IOUtils.toByteArray(inputStream); + } + } + + /** Checks whether {@code bytes} are GZIP encoded. */ + public static boolean isGZipped(byte[] bytes) { + // See GzipOutputStream.writeHeader() + return (bytes.length > 2 && bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) + && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8)); + } + /** Sets auth on the given connection with the given username/password. */ public static void setBasicAuth(HttpURLConnection connection, String username, String password) { setBasicAuth(connection, String.format("%s:%s", username, password));