// Copyright 2017 The Nomulus Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package google.registry.tldconfig.idn; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.base.Optional; import com.google.common.collect.ImmutableRangeSet; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.re2j.Matcher; import com.google.re2j.Pattern; import java.net.URI; /** An IDN table for a particular TLD. */ public final class IdnTable { /** Regular expression to match a line of an IDN table. */ private static final Pattern LINE_PATTERN = Pattern.compile("^U\\+([0-9a-fA-F]{4,6})"); private static final String URL_LINE_PREFIX = "# URL: "; private static final String POLICY_LINE_PREFIX = "# Policy: "; /** Language name for this table (corresponds to filename.) */ private final String name; /** * Public URL of this IDN table, which is needed by RDE. * * @see * DNRD Objects Mapping - <rdeIDN:idnTableRef> object */ private final URI url; /** Public URL of policy for this IDN table, which is needed by RDE. */ private final URI policy; /** {@link RangeSet} containing the valid codepoints in this table. */ private final RangeSet validCodepoints; /** Validates the language rules associated with this IDN table. */ private final Optional languageValidator; private IdnTable( String name, URI url, URI policy, RangeSet validCodepoints, Optional languageValidator) { this.name = name; this.url = checkNotNull(url, "%s missing '# URL: http://foo.example/page' line", name); this.policy = checkNotNull(policy, "%s missing '# Policy: http://foo.example/page' line", name); this.validCodepoints = checkNotNull(validCodepoints); this.languageValidator = languageValidator; } public String getName() { return name; } public URI getUrl() { return url; } public URI getPolicy() { return policy; } /** * Returns true if the given label is valid for this IDN table. A label is considered valid if all * of its codepoints are in the IDN table. */ boolean isValidLabel(String label) { final int length = label.length(); for (int i = 0; i < length; ) { int codepoint = label.codePointAt(i); if (!validCodepoints.contains(codepoint)) { return false; } // Some codepoints take up more than one character in Java strings (e.g. high and low // surrogates). i += Character.charCount(codepoint); } return !(languageValidator.isPresent() && !languageValidator.get().isValidLabelForLanguage(label)); } /** Creates an IDN table given the lines from text file. */ static IdnTable createFrom( String language, Iterable data, Optional languageValidator) { ImmutableRangeSet.Builder rangeSet = new ImmutableRangeSet.Builder<>(); URI url = null; URI policy = null; for (String line : data) { // Remove leading and trailing whitespace. line = line.trim(); // Handle special comment lines. if (line.startsWith(URL_LINE_PREFIX)) { url = URI.create(line.substring(URL_LINE_PREFIX.length())); } else if (line.startsWith(POLICY_LINE_PREFIX)) { policy = URI.create(line.substring(POLICY_LINE_PREFIX.length())); } // Skip empty and comment lines. if (line.isEmpty() || line.startsWith("#")) { continue; } int codepoint = readCodepoint(line); rangeSet.add(Range.singleton(codepoint)); } return new IdnTable(language, url, policy, rangeSet.build(), languageValidator); } /** * Read the codepoint from a single line. The expected format of each line is: * {@code U+XXXX} * Where {@code XXXX} holds the hex value of the codepoint. */ private static int readCodepoint(String line) { Matcher matcher = LINE_PATTERN.matcher(line); checkArgument(matcher.lookingAt(), "Can't parse line: %s", line); String hexString = matcher.group(1); return Integer.valueOf(hexString, 16); } }