google-nomulus/java/google/registry/tools/CanonicalizeLabelsCommand.java
mmuller b70f57b7c7 Update copyright year on all license headers
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=146111211
2017-02-02 16:27:22 -05:00

88 lines
3.1 KiB
Java

// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools;
import static google.registry.util.DomainNameUtils.canonicalizeDomainName;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.google.common.base.Joiner;
import com.google.common.io.CharStreams;
import com.google.common.io.Files;
import google.registry.util.DomainNameUtils;
import google.registry.util.Idn;
import google.registry.util.NonFinalForTesting;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
/** Command to clean up a set of labels and turn them into punycode. */
@Parameters(commandDescription = "Canonicalize domain labels")
final class CanonicalizeLabelsCommand implements Command {
@Parameter(
description = "Filename of file containing domain labels, one per line",
required = true)
private List<String> mainParameters;
@NonFinalForTesting
private static InputStream stdin = System.in;
@Override
public void run() throws IOException {
Set<String> labels = new TreeSet<>();
for (String label : mainParameters.isEmpty()
? CharStreams.readLines(new InputStreamReader(stdin))
: Files.readLines(new File(mainParameters.get(0)), UTF_8)) {
label = label.trim();
if (label.startsWith("-")) {
label = label.substring(1);
}
if (label.endsWith("-")) {
label = label.substring(0, label.length() - 1);
}
String canonical = canonicalize(label);
if (canonical.startsWith(DomainNameUtils.ACE_PREFIX)
&& Idn.toUnicode(canonical).equals(canonical)) {
System.err.println("Bad IDN: " + label);
continue; // Bad IDN code points.
}
labels.add(canonical);
if (!canonical.startsWith("xn--")) {
// Using both "" and "-" to canonicalize labels.
labels.add(canonicalize(label.replaceAll(" ", "")));
labels.add(canonicalize(label.replaceAll(" ", "-")));
labels.add(canonicalize(label.replaceAll("_", "")));
labels.add(canonicalize(label.replaceAll("_", "-")));
}
}
labels.remove(""); // We used "" for invalid labels.
System.out.println(Joiner.on('\n').join(labels));
}
private String canonicalize(String rawLabel) {
try {
return canonicalizeDomainName(rawLabel.replaceAll(" ", ""));
} catch (Exception e) {
System.err.printf("Error canonicalizing %s: %s\n", rawLabel, e.getMessage());
return "";
}
}
}