diff --git a/java/google/registry/flows/EppXmlSanitizer.java b/java/google/registry/flows/EppXmlSanitizer.java index 2113dc5dc..ee20afe71 100644 --- a/java/google/registry/flows/EppXmlSanitizer.java +++ b/java/google/registry/flows/EppXmlSanitizer.java @@ -14,14 +14,18 @@ package google.registry.flows; +import static com.google.common.base.Preconditions.checkState; + import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableSet; import com.google.common.flogger.FluentLogger; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Locale; +import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.xml.namespace.QName; @@ -32,6 +36,7 @@ import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Characters; +import javax.xml.stream.events.StartDocument; import javax.xml.stream.events.XMLEvent; /** @@ -71,31 +76,47 @@ public class EppXmlSanitizer { private static final XMLEventFactory XML_EVENT_FACTORY = XMLEventFactory.newFactory(); /** - * Returns sanitized and pretty-printed EPP XML message. For malformed XML messages, - * base64-encoded raw bytes will be returned. + * Returns sanitized EPP XML message. For malformed XML messages, base64-encoded raw bytes will be + * returned. * - *

The xml version header {@code } is added to the result if the input - * does not already contain it. Also, an empty element will be formatted as {@code } - * instead of {@code }. + *

The output always begins with version and encoding declarations no matter if the input + * includes them. If encoding is not declared by input, UTF-8 will be used according to XML + * standard. + * + *

Also, an empty element will be formatted as {@code } instead of {@code }. */ public static String sanitizeEppXml(byte[] inputXmlBytes) { try { // Keep exactly one newline at end of sanitized string. - return CharMatcher.whitespace() - .trimTrailingFrom(new String(sanitize(inputXmlBytes), StandardCharsets.UTF_8)) - + "\n"; - } catch (XMLStreamException e) { + return CharMatcher.whitespace().trimTrailingFrom(sanitizeAndEncode(inputXmlBytes)) + "\n"; + } catch (XMLStreamException | UnsupportedEncodingException e) { logger.atWarning().withCause(e).log("Failed to sanitize EPP XML message."); return Base64.getMimeEncoder().encodeToString(inputXmlBytes); } } - private static byte[] sanitize(byte[] inputXmlBytes) throws XMLStreamException { + private static String sanitizeAndEncode(byte[] inputXmlBytes) + throws XMLStreamException, UnsupportedEncodingException { XMLEventReader xmlEventReader = XML_INPUT_FACTORY.createXMLEventReader(new ByteArrayInputStream(inputXmlBytes)); + if (!xmlEventReader.hasNext()) { + return ""; + } + + XMLEvent firstEvent = xmlEventReader.nextEvent(); + checkState(firstEvent.isStartDocument(), "Missing StartDocument"); + // Get input encoding for use in XMLEventWriter creation, so that sanitized XML preserves the + // encoding declaration. According to XML spec, UTF-8 is to be used unless input declares + // otherwise. Epp officially allows UTF-8 and UTF-16. + String inputEncoding = + Optional.ofNullable(((StartDocument) firstEvent).getCharacterEncodingScheme()) + .orElse(StandardCharsets.UTF_8.name()); + ByteArrayOutputStream outputXmlBytes = new ByteArrayOutputStream(); - XMLEventWriter xmlEventWriter = XML_OUTPUT_FACTORY.createXMLEventWriter(outputXmlBytes); + XMLEventWriter xmlEventWriter = + XML_OUTPUT_FACTORY.createXMLEventWriter(outputXmlBytes, inputEncoding); + xmlEventWriter.add(firstEvent); while (xmlEventReader.hasNext()) { XMLEvent xmlEvent = xmlEventReader.nextEvent(); @@ -118,7 +139,7 @@ public class EppXmlSanitizer { } } xmlEventWriter.flush(); - return outputXmlBytes.toByteArray(); + return outputXmlBytes.toString(inputEncoding); } private static String maskSensitiveData(String original) { diff --git a/javatests/google/registry/flows/EppXmlSanitizerTest.java b/javatests/google/registry/flows/EppXmlSanitizerTest.java index 3cc098608..1bdad99f6 100644 --- a/javatests/google/registry/flows/EppXmlSanitizerTest.java +++ b/javatests/google/registry/flows/EppXmlSanitizerTest.java @@ -17,6 +17,7 @@ package google.registry.flows; import static com.google.common.truth.Truth.assertThat; import static google.registry.flows.EppXmlSanitizer.sanitizeEppXml; import static google.registry.testing.TestDataHelper.loadBytes; +import static java.nio.charset.StandardCharsets.UTF_16LE; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableMap; @@ -30,12 +31,12 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) public class EppXmlSanitizerTest { - private static final String XML_HEADER = ""; + private static final String UTF8_HEADER = ""; @Test public void testSanitize_noSensitiveData_noop() throws Exception { byte[] inputXmlBytes = loadBytes(getClass(), "host_create.xml").read(); - String expectedXml = XML_HEADER + new String(inputXmlBytes, UTF_8); + String expectedXml = UTF8_HEADER + new String(inputXmlBytes, UTF_8); String sanitizedXml = sanitizeEppXml(inputXmlBytes); assertThat(sanitizedXml).isEqualTo(expectedXml); @@ -50,7 +51,7 @@ public class EppXmlSanitizerTest { ImmutableMap.of("PW", "oldpass", "NEWPW", "newPw")) .getEppXml(); String expectedXml = - XML_HEADER + UTF8_HEADER + new EppLoader( this, "login_update_password.xml", @@ -68,7 +69,7 @@ public class EppXmlSanitizerTest { this, "login_wrong_case.xml", ImmutableMap.of("PW", "oldpass", "NEWPW", "newPw")) .getEppXml(); String expectedXml = - XML_HEADER + UTF8_HEADER + new EppLoader( this, "login_wrong_case.xml", @@ -83,7 +84,7 @@ public class EppXmlSanitizerTest { public void testSanitize_contactAuthInfo_sanitized() throws Exception { byte[] inputXmlBytes = loadBytes(getClass(), "contact_info.xml").read(); String expectedXml = - XML_HEADER + UTF8_HEADER + new EppLoader(this, "contact_info_sanitized.xml", ImmutableMap.of()).getEppXml(); String sanitizedXml = sanitizeEppXml(inputXmlBytes); @@ -94,7 +95,7 @@ public class EppXmlSanitizerTest { public void testSanitize_contactCreateResponseAuthInfo_sanitized() throws Exception { byte[] inputXmlBytes = loadBytes(getClass(), "contact_info_from_create_response.xml").read(); String expectedXml = - XML_HEADER + UTF8_HEADER + new EppLoader( this, "contact_info_from_create_response_sanitized.xml", ImmutableMap.of()) .getEppXml(); @@ -106,7 +107,7 @@ public class EppXmlSanitizerTest { @Test public void testSanitize_emptyElement_transformedToLongForm() { byte[] inputXmlBytes = "".getBytes(UTF_8); - assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(XML_HEADER + "\n"); + assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(UTF8_HEADER + "\n"); } @Test @@ -119,7 +120,22 @@ public class EppXmlSanitizerTest { @Test public void testSanitize_unicode_hasCorrectCharCount() { byte[] inputXmlBytes = "\u007F\u4E43x".getBytes(UTF_8); - String expectedXml = XML_HEADER + "C**\n"; + String expectedXml = UTF8_HEADER + "C**\n"; assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(expectedXml); } + + @Test + public void testSanitize_emptyString_encodedToBase64() { + byte[] inputXmlBytes = "".getBytes(UTF_8); + assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(""); + } + + @Test + public void testSanitize_utf16_encodingPreserved() { + // Test data should specify an endian-specific UTF-16 scheme for easy assertion. If 'UTF-16' is + // used, the XMLEventReader in sanitizer may resolve it to an endian-specific one. + String inputXml = "

\u03bc

\n"; + String sanitizedXml = sanitizeEppXml(inputXml.getBytes(UTF_16LE)); + assertThat(sanitizedXml).isEqualTo(inputXml); + } } diff --git a/javatests/google/registry/testing/GpgSystemCommandRule.java b/javatests/google/registry/testing/GpgSystemCommandRule.java index b073a46b5..522295f32 100644 --- a/javatests/google/registry/testing/GpgSystemCommandRule.java +++ b/javatests/google/registry/testing/GpgSystemCommandRule.java @@ -17,6 +17,7 @@ package google.registry.testing; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.truth.Truth.assertWithMessage; import static java.nio.charset.StandardCharsets.UTF_8; @@ -84,9 +85,14 @@ public final class GpgSystemCommandRule extends ExternalResource { @Override protected void before() throws IOException, InterruptedException { checkState(Objects.equals(cwd, DEV_NULL)); - File tmpRoot = null; - tmpRoot = new File(System.getenv("TMPDIR")); - cwd = File.createTempFile(TEMP_FILE_PREFIX, "", tmpRoot); + String tmpRootDirString = System.getenv("TMPDIR"); + // Create the working directory for the forked process on Temp file system. Create under the + // path specified by 'TMPDIR' envrionment variable if defined, otherwise create under the + // runtime's default (typically /tmp). + cwd = + isNullOrEmpty(tmpRootDirString) + ? File.createTempFile(TEMP_FILE_PREFIX, "") + : File.createTempFile(TEMP_FILE_PREFIX, "", new File(tmpRootDirString)); cwd.delete(); cwd.mkdir(); conf = new File(cwd, ".gnupg"); @@ -118,6 +124,7 @@ public final class GpgSystemCommandRule extends ExternalResource { @Override protected void after() { + // TODO(weiminyu): we should delete the cwd tree. cwd = DEV_NULL; conf = DEV_NULL; } diff --git a/javatests/google/registry/testing/TestDataHelper.java b/javatests/google/registry/testing/TestDataHelper.java index a7cf34415..49dcd83c7 100644 --- a/javatests/google/registry/testing/TestDataHelper.java +++ b/javatests/google/registry/testing/TestDataHelper.java @@ -23,7 +23,9 @@ import com.google.common.collect.ImmutableMap; import com.google.common.io.ByteSource; import com.google.common.io.MoreFiles; import com.google.common.io.Resources; +import java.io.IOException; import java.net.URI; +import java.nio.file.FileSystemAlreadyExistsException; import java.nio.file.FileSystems; import java.nio.file.Path; import java.nio.file.Paths; @@ -93,7 +95,21 @@ public final class TestDataHelper { /** Returns a recursive iterable of all files in the given directory. */ public static Iterable listFiles(Class context, String directory) throws Exception { URI dir = Resources.getResource(context, directory).toURI(); - FileSystems.newFileSystem(dir, ImmutableMap.of("create", "true")); + ensureFileSystemPresentForUri(dir); return MoreFiles.fileTraverser().breadthFirst(Paths.get(dir)); } + + private static void ensureFileSystemPresentForUri(URI uri) throws IOException { + if (uri.getScheme().equals(FileSystems.getDefault().provider().getScheme())) { + // URI maps to default file system (file://...), which must be present. Besides, calling + // FileSystem.newFileSystem on this URI may trigger FileSystemAlreadyExistsException. + return; + } + try { + // URI maps to a special file system, e.g., jar:... + FileSystems.newFileSystem(uri, ImmutableMap.of("create", "true")); + } catch (FileSystemAlreadyExistsException e) { + // ignore. + } + } }