diff --git a/java/google/registry/flows/EppXmlSanitizer.java b/java/google/registry/flows/EppXmlSanitizer.java
index 2113dc5dc..ee20afe71 100644
--- a/java/google/registry/flows/EppXmlSanitizer.java
+++ b/java/google/registry/flows/EppXmlSanitizer.java
@@ -14,14 +14,18 @@
package google.registry.flows;
+import static com.google.common.base.Preconditions.checkState;
+
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableSet;
import com.google.common.flogger.FluentLogger;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Locale;
+import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.namespace.QName;
@@ -32,6 +36,7 @@ import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
+import javax.xml.stream.events.StartDocument;
import javax.xml.stream.events.XMLEvent;
/**
@@ -71,31 +76,47 @@ public class EppXmlSanitizer {
private static final XMLEventFactory XML_EVENT_FACTORY = XMLEventFactory.newFactory();
/**
- * Returns sanitized and pretty-printed EPP XML message. For malformed XML messages,
- * base64-encoded raw bytes will be returned.
+ * Returns sanitized EPP XML message. For malformed XML messages, base64-encoded raw bytes will be
+ * returned.
*
- *
The xml version header {@code } is added to the result if the input
- * does not already contain it. Also, an empty element will be formatted as {@code }
- * instead of {@code }.
+ *
The output always begins with version and encoding declarations no matter if the input
+ * includes them. If encoding is not declared by input, UTF-8 will be used according to XML
+ * standard.
+ *
+ *
Also, an empty element will be formatted as {@code } instead of {@code }.
*/
public static String sanitizeEppXml(byte[] inputXmlBytes) {
try {
// Keep exactly one newline at end of sanitized string.
- return CharMatcher.whitespace()
- .trimTrailingFrom(new String(sanitize(inputXmlBytes), StandardCharsets.UTF_8))
- + "\n";
- } catch (XMLStreamException e) {
+ return CharMatcher.whitespace().trimTrailingFrom(sanitizeAndEncode(inputXmlBytes)) + "\n";
+ } catch (XMLStreamException | UnsupportedEncodingException e) {
logger.atWarning().withCause(e).log("Failed to sanitize EPP XML message.");
return Base64.getMimeEncoder().encodeToString(inputXmlBytes);
}
}
- private static byte[] sanitize(byte[] inputXmlBytes) throws XMLStreamException {
+ private static String sanitizeAndEncode(byte[] inputXmlBytes)
+ throws XMLStreamException, UnsupportedEncodingException {
XMLEventReader xmlEventReader =
XML_INPUT_FACTORY.createXMLEventReader(new ByteArrayInputStream(inputXmlBytes));
+ if (!xmlEventReader.hasNext()) {
+ return "";
+ }
+
+ XMLEvent firstEvent = xmlEventReader.nextEvent();
+ checkState(firstEvent.isStartDocument(), "Missing StartDocument");
+ // Get input encoding for use in XMLEventWriter creation, so that sanitized XML preserves the
+ // encoding declaration. According to XML spec, UTF-8 is to be used unless input declares
+ // otherwise. Epp officially allows UTF-8 and UTF-16.
+ String inputEncoding =
+ Optional.ofNullable(((StartDocument) firstEvent).getCharacterEncodingScheme())
+ .orElse(StandardCharsets.UTF_8.name());
+
ByteArrayOutputStream outputXmlBytes = new ByteArrayOutputStream();
- XMLEventWriter xmlEventWriter = XML_OUTPUT_FACTORY.createXMLEventWriter(outputXmlBytes);
+ XMLEventWriter xmlEventWriter =
+ XML_OUTPUT_FACTORY.createXMLEventWriter(outputXmlBytes, inputEncoding);
+ xmlEventWriter.add(firstEvent);
while (xmlEventReader.hasNext()) {
XMLEvent xmlEvent = xmlEventReader.nextEvent();
@@ -118,7 +139,7 @@ public class EppXmlSanitizer {
}
}
xmlEventWriter.flush();
- return outputXmlBytes.toByteArray();
+ return outputXmlBytes.toString(inputEncoding);
}
private static String maskSensitiveData(String original) {
diff --git a/javatests/google/registry/flows/EppXmlSanitizerTest.java b/javatests/google/registry/flows/EppXmlSanitizerTest.java
index 3cc098608..1bdad99f6 100644
--- a/javatests/google/registry/flows/EppXmlSanitizerTest.java
+++ b/javatests/google/registry/flows/EppXmlSanitizerTest.java
@@ -17,6 +17,7 @@ package google.registry.flows;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.flows.EppXmlSanitizer.sanitizeEppXml;
import static google.registry.testing.TestDataHelper.loadBytes;
+import static java.nio.charset.StandardCharsets.UTF_16LE;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.collect.ImmutableMap;
@@ -30,12 +31,12 @@ import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class EppXmlSanitizerTest {
- private static final String XML_HEADER = "";
+ private static final String UTF8_HEADER = "";
@Test
public void testSanitize_noSensitiveData_noop() throws Exception {
byte[] inputXmlBytes = loadBytes(getClass(), "host_create.xml").read();
- String expectedXml = XML_HEADER + new String(inputXmlBytes, UTF_8);
+ String expectedXml = UTF8_HEADER + new String(inputXmlBytes, UTF_8);
String sanitizedXml = sanitizeEppXml(inputXmlBytes);
assertThat(sanitizedXml).isEqualTo(expectedXml);
@@ -50,7 +51,7 @@ public class EppXmlSanitizerTest {
ImmutableMap.of("PW", "oldpass", "NEWPW", "newPw"))
.getEppXml();
String expectedXml =
- XML_HEADER
+ UTF8_HEADER
+ new EppLoader(
this,
"login_update_password.xml",
@@ -68,7 +69,7 @@ public class EppXmlSanitizerTest {
this, "login_wrong_case.xml", ImmutableMap.of("PW", "oldpass", "NEWPW", "newPw"))
.getEppXml();
String expectedXml =
- XML_HEADER
+ UTF8_HEADER
+ new EppLoader(
this,
"login_wrong_case.xml",
@@ -83,7 +84,7 @@ public class EppXmlSanitizerTest {
public void testSanitize_contactAuthInfo_sanitized() throws Exception {
byte[] inputXmlBytes = loadBytes(getClass(), "contact_info.xml").read();
String expectedXml =
- XML_HEADER
+ UTF8_HEADER
+ new EppLoader(this, "contact_info_sanitized.xml", ImmutableMap.of()).getEppXml();
String sanitizedXml = sanitizeEppXml(inputXmlBytes);
@@ -94,7 +95,7 @@ public class EppXmlSanitizerTest {
public void testSanitize_contactCreateResponseAuthInfo_sanitized() throws Exception {
byte[] inputXmlBytes = loadBytes(getClass(), "contact_info_from_create_response.xml").read();
String expectedXml =
- XML_HEADER
+ UTF8_HEADER
+ new EppLoader(
this, "contact_info_from_create_response_sanitized.xml", ImmutableMap.of())
.getEppXml();
@@ -106,7 +107,7 @@ public class EppXmlSanitizerTest {
@Test
public void testSanitize_emptyElement_transformedToLongForm() {
byte[] inputXmlBytes = "".getBytes(UTF_8);
- assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(XML_HEADER + "\n");
+ assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(UTF8_HEADER + "\n");
}
@Test
@@ -119,7 +120,22 @@ public class EppXmlSanitizerTest {
@Test
public void testSanitize_unicode_hasCorrectCharCount() {
byte[] inputXmlBytes = "\u007F\u4E43x".getBytes(UTF_8);
- String expectedXml = XML_HEADER + "C**\n";
+ String expectedXml = UTF8_HEADER + "C**\n";
assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo(expectedXml);
}
+
+ @Test
+ public void testSanitize_emptyString_encodedToBase64() {
+ byte[] inputXmlBytes = "".getBytes(UTF_8);
+ assertThat(sanitizeEppXml(inputXmlBytes)).isEqualTo("");
+ }
+
+ @Test
+ public void testSanitize_utf16_encodingPreserved() {
+ // Test data should specify an endian-specific UTF-16 scheme for easy assertion. If 'UTF-16' is
+ // used, the XMLEventReader in sanitizer may resolve it to an endian-specific one.
+ String inputXml = "
\u03bc
\n";
+ String sanitizedXml = sanitizeEppXml(inputXml.getBytes(UTF_16LE));
+ assertThat(sanitizedXml).isEqualTo(inputXml);
+ }
}
diff --git a/javatests/google/registry/testing/GpgSystemCommandRule.java b/javatests/google/registry/testing/GpgSystemCommandRule.java
index b073a46b5..522295f32 100644
--- a/javatests/google/registry/testing/GpgSystemCommandRule.java
+++ b/javatests/google/registry/testing/GpgSystemCommandRule.java
@@ -17,6 +17,7 @@ package google.registry.testing;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.truth.Truth.assertWithMessage;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -84,9 +85,14 @@ public final class GpgSystemCommandRule extends ExternalResource {
@Override
protected void before() throws IOException, InterruptedException {
checkState(Objects.equals(cwd, DEV_NULL));
- File tmpRoot = null;
- tmpRoot = new File(System.getenv("TMPDIR"));
- cwd = File.createTempFile(TEMP_FILE_PREFIX, "", tmpRoot);
+ String tmpRootDirString = System.getenv("TMPDIR");
+ // Create the working directory for the forked process on Temp file system. Create under the
+ // path specified by 'TMPDIR' envrionment variable if defined, otherwise create under the
+ // runtime's default (typically /tmp).
+ cwd =
+ isNullOrEmpty(tmpRootDirString)
+ ? File.createTempFile(TEMP_FILE_PREFIX, "")
+ : File.createTempFile(TEMP_FILE_PREFIX, "", new File(tmpRootDirString));
cwd.delete();
cwd.mkdir();
conf = new File(cwd, ".gnupg");
@@ -118,6 +124,7 @@ public final class GpgSystemCommandRule extends ExternalResource {
@Override
protected void after() {
+ // TODO(weiminyu): we should delete the cwd tree.
cwd = DEV_NULL;
conf = DEV_NULL;
}
diff --git a/javatests/google/registry/testing/TestDataHelper.java b/javatests/google/registry/testing/TestDataHelper.java
index a7cf34415..49dcd83c7 100644
--- a/javatests/google/registry/testing/TestDataHelper.java
+++ b/javatests/google/registry/testing/TestDataHelper.java
@@ -23,7 +23,9 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteSource;
import com.google.common.io.MoreFiles;
import com.google.common.io.Resources;
+import java.io.IOException;
import java.net.URI;
+import java.nio.file.FileSystemAlreadyExistsException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.Paths;
@@ -93,7 +95,21 @@ public final class TestDataHelper {
/** Returns a recursive iterable of all files in the given directory. */
public static Iterable listFiles(Class> context, String directory) throws Exception {
URI dir = Resources.getResource(context, directory).toURI();
- FileSystems.newFileSystem(dir, ImmutableMap.of("create", "true"));
+ ensureFileSystemPresentForUri(dir);
return MoreFiles.fileTraverser().breadthFirst(Paths.get(dir));
}
+
+ private static void ensureFileSystemPresentForUri(URI uri) throws IOException {
+ if (uri.getScheme().equals(FileSystems.getDefault().provider().getScheme())) {
+ // URI maps to default file system (file://...), which must be present. Besides, calling
+ // FileSystem.newFileSystem on this URI may trigger FileSystemAlreadyExistsException.
+ return;
+ }
+ try {
+ // URI maps to a special file system, e.g., jar:...
+ FileSystems.newFileSystem(uri, ImmutableMap.of("create", "true"));
+ } catch (FileSystemAlreadyExistsException e) {
+ // ignore.
+ }
+ }
}