mv com/google/domain/registry google/registry

This change renames directories in preparation for the great package
rename. The repository is now in a broken state because the code
itself hasn't been updated. However this should ensure that git
correctly preserves history for each file.
This commit is contained in:
Justine Tunney 2016-05-13 18:55:08 -04:00
parent a41677aea1
commit 5012893c1d
2396 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,21 @@
package(
default_visibility = ["//java/com/google/domain/registry:registry_project"],
)
java_library(
name = "idn",
srcs = glob(["*.java"]),
resources = glob(["*.txt"]),
deps = [
"//java/com/google/common/annotations",
"//java/com/google/common/base",
"//java/com/google/common/collect",
"//java/com/google/common/io",
"//java/com/google/domain/registry/util",
"//third_party/java/joda_time",
"//third_party/java/jsr305_annotations",
"//third_party/java/objectify:objectify-v4_1",
"//third_party/java/servlet/servlet_api",
],
)

View file

@ -0,0 +1,56 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tldconfig.idn;
import static com.google.domain.registry.tldconfig.idn.IdnTableEnum.EXTENDED_LATIN;
import static com.google.domain.registry.tldconfig.idn.IdnTableEnum.JA;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.domain.registry.util.Idn;
import com.google.domain.registry.util.NonFinalForTesting;
/** Validates whether a given IDN label can be provisioned for a particular TLD. */
public final class IdnLabelValidator {
/** Most TLDs will use this generic list of IDN tables. */
private static final ImmutableList<IdnTableEnum> DEFAULT_IDN_TABLES =
ImmutableList.of(EXTENDED_LATIN, JA);
/** Some TLDs have their own IDN tables, configured here. */
@NonFinalForTesting
private static ImmutableMap<String, ImmutableList<IdnTableEnum>> idnTableListsPerTld =
ImmutableMap.of("xn--q9jyb4c", ImmutableList.of(EXTENDED_LATIN, JA));
/**
* Returns name of first matching {@link IdnTable} if domain label is valid for the given TLD.
*
* <p>A label is valid if it is considered valid by at least one configured IDN table for that
* TLD. If no match is found, an absent value is returned.
*/
public static Optional<String> findValidIdnTableForTld(String label, String tld) {
String unicodeString = Idn.toUnicode(label);
for (IdnTableEnum idnTable
: Optional.fromNullable(idnTableListsPerTld.get(tld)).or(DEFAULT_IDN_TABLES)) {
if (idnTable.getTable().isValidLabel(unicodeString)) {
return Optional.of(idnTable.getTable().getName());
}
}
return Optional.absent();
}
private IdnLabelValidator() {}
}

View file

@ -0,0 +1,142 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tldconfig.idn;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import java.net.URI;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** An IDN table for a particular TLD. */
public final class IdnTable {
/** Regular expression to match a line of an IDN table. */
private static final Pattern LINE_PATTERN = Pattern.compile("^U\\+([0-9a-fA-F]{4,6})");
private static final String URL_LINE_PREFIX = "# URL: ";
private static final String POLICY_LINE_PREFIX = "# Policy: ";
/** Language name for this table (corresponds to filename.) */
private final String name;
/**
* Public URL of this IDN table, which is needed by RDE.
*
* @see "https://tools.ietf.org/html/draft-arias-noguchi-dnrd-objects-mapping-05#section-5.5.1.1"
*/
private final URI url;
/** Public URL of policy for this IDN table, which is needed by RDE. */
private final URI policy;
/** {@link RangeSet} containing the valid codepoints in this table. */
private final RangeSet<Integer> validCodepoints;
/** Validates the language rules associated with this IDN table. */
private final Optional<LanguageValidator> languageValidator;
private IdnTable(
String name,
URI url,
URI policy,
RangeSet<Integer> validCodepoints,
Optional<LanguageValidator> languageValidator) {
this.name = name;
this.url = checkNotNull(url, "%s missing '# URL: http://foo.example/page' line", name);
this.policy = checkNotNull(policy, "%s missing '# Policy: http://foo.example/page' line", name);
this.validCodepoints = checkNotNull(validCodepoints);
this.languageValidator = languageValidator;
}
public String getName() {
return name;
}
public URI getUrl() {
return url;
}
public URI getPolicy() {
return policy;
}
/**
* Returns true if the given label is valid for this IDN table. A label is considered valid if all
* of its codepoints are in the IDN table.
*/
boolean isValidLabel(String label) {
final int length = label.length();
for (int i = 0; i < length; ) {
int codepoint = label.codePointAt(i);
if (!validCodepoints.contains(codepoint)) {
return false;
}
// Some codepoints take up more than one character in Java strings (e.g. high and low
// surrogates).
i += Character.charCount(codepoint);
}
return !(languageValidator.isPresent()
&& !languageValidator.get().isValidLabelForLanguage(label));
}
/** Creates an IDN table given the lines from text file. */
static IdnTable createFrom(
String language, Iterable<String> data, Optional<LanguageValidator> languageValidator) {
ImmutableRangeSet.Builder<Integer> rangeSet = new ImmutableRangeSet.Builder<>();
URI url = null;
URI policy = null;
for (String line : data) {
// Remove leading and trailing whitespace.
line = line.trim();
// Handle special comment lines.
if (line.startsWith(URL_LINE_PREFIX)) {
url = URI.create(line.substring(URL_LINE_PREFIX.length()));
} else if (line.startsWith(POLICY_LINE_PREFIX)) {
policy = URI.create(line.substring(POLICY_LINE_PREFIX.length()));
}
// Skip empty and comment lines.
if (line.isEmpty() || line.startsWith("#")) {
continue;
}
int codepoint = readCodepoint(line);
rangeSet.add(Range.<Integer>singleton(codepoint));
}
return new IdnTable(language, url, policy, rangeSet.build(), languageValidator);
}
/**
* Read the codepoint from a single line. The expected format of each line is:
* {@code U+XXXX}
* Where {@code XXXX} holds the hex value of the codepoint.
*/
private static int readCodepoint(String line) {
Matcher matcher = LINE_PATTERN.matcher(line);
checkArgument(matcher.lookingAt(), "Can't parse line: %s", line);
String hexString = matcher.group(1);
return Integer.valueOf(hexString, 16);
}
}

View file

@ -0,0 +1,49 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tldconfig.idn;
import static com.google.common.io.Resources.readLines;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.base.Ascii;
import com.google.common.io.Resources;
import java.io.IOException;
import java.net.URL;
/** Wrapper enum that loads all {@link IdnTable} resources into memory. */
public enum IdnTableEnum {
EXTENDED_LATIN,
JA;
private final IdnTable table;
private IdnTableEnum() {
this.table = load(Ascii.toLowerCase(name()));
}
public IdnTable getTable() {
return table;
}
private static IdnTable load(String name) {
try {
URL resource = Resources.getResource(IdnTableEnum.class, name + ".txt");
return IdnTable.createFrom(name, readLines(resource, UTF_8), LanguageValidator.get(name));
} catch (IOException e) {
throw new RuntimeException(e); // should never happen
}
}
}

View file

@ -0,0 +1,122 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tldconfig.idn;
import static java.lang.Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION;
import static java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS;
import static java.lang.Character.UnicodeBlock.HIRAGANA;
import static java.lang.Character.UnicodeBlock.KATAKANA;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Range;
import java.lang.Character.UnicodeBlock;
/**
* Validates Japanese language domain labels. This class should only be used with a Japanese
* language IDN table.
*/
class JapaneseLanguageValidator extends LanguageValidator {
/** Any string with Japanese characters can have at most 15 characters. */
private static final int MAX_LENGTH_JAPANESE_STRING = 15;
/** Equals the codepoint for the character '〆'. */
private static final int IDEOGRAPHIC_CLOSING_MARK = 0x3006;
/** Equals the codepoint for the character '・'. */
private static final int KATAKANA_MIDDLE_DOT = 0x30FB;
/** Equals the codepoint for the character 'ー'. */
private static final int KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = 0x30FC;
/** The set of {@link UnicodeBlock} objects containing valid Japanese codepoints. */
private static final ImmutableSet<UnicodeBlock> JAPANESE_UNICODE_BLOCKS = ImmutableSet.of(
CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, CJK_UNIFIED_IDEOGRAPHS);
/**
* Codepoints which are technically considered to be in the Japanese language, but are
* "exceptions" in that they can not appear in a label with a KATAKANA MIDDLE DOT or
* IDEOGRAPHIC_CLOSING_MARK unless other Japanese non-exception codepoints are also present.
*/
private static final ImmutableRangeSet<Integer> JAPANESE_EXCEPTION_CODEPOINTS =
new ImmutableRangeSet.Builder<Integer>()
.add(Range.<Integer>singleton(IDEOGRAPHIC_CLOSING_MARK))
.add(Range.<Integer>singleton(KATAKANA_MIDDLE_DOT))
.add(Range.<Integer>singleton(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK))
.build();
@Override
boolean isValidLabelForLanguage(String label) {
boolean requiresJapaneseNonExceptionCodepoint = false;
boolean hasJapaneseCodepoint = false;
boolean hasJapaneseNonExceptionCodepoint = false;
final int length = label.length();
int codepoints = 0;
UnicodeBlock precedingUnicodeBlock = null;
for (int i = 0; i < length; ) {
int codepoint = label.codePointAt(i);
UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);
// A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
// language codepoint to also appear in the label.
if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
requiresJapaneseNonExceptionCodepoint = true;
}
// The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
// character.
if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
&& precedingUnicodeBlock != HIRAGANA && precedingUnicodeBlock != KATAKANA) {
return false;
}
// If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
// Japanese codepoint.
if (isJapanese && !isException) {
hasJapaneseNonExceptionCodepoint = true;
}
// Make a note if we've seen any Japanese codepoint. Note that this object should really only
// be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
// Japanese. But we do the additional check again the characters UnicodeBlock just in case.
if (isJapanese) {
hasJapaneseCodepoint = true;
}
// Some codepoints take up more than one character in Java strings (e.g. high and low
// surrogates).
i += Character.charCount(codepoint);
++codepoints;
precedingUnicodeBlock = unicodeBlock;
}
// A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
// some Japanese character in the label. The Japanese "exception" characters do not count in
// this regard.
if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
return false;
}
// Any label with Japanese characters (including "exception" characters) can only be 15
// codepoints long.
return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));
}
}

View file

@ -0,0 +1,33 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.tldconfig.idn;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
abstract class LanguageValidator {
/** A registry of all known language validators keyed by their language code. */
private static final ImmutableMap<String, LanguageValidator> LANGUAGE_VALIDATORS =
ImmutableMap.<String, LanguageValidator>of("ja", new JapaneseLanguageValidator());
/** Return the language validator for the given language code (if one exists). */
static Optional<LanguageValidator> get(String language) {
return Optional.fromNullable(LANGUAGE_VALIDATORS.get(language));
}
/** Returns true if the label meets the context rules for this language. */
abstract boolean isValidLabelForLanguage(String label);
}

View file

@ -0,0 +1,133 @@
# URL: https://www.iana.org/domains/idn-tables/tables/google_latn_1.0.txt
# Policy: https://www.registry.google/about/policies/domainabuse/
U+002D # HYPHEN-MINUS
U+0030 # DIGIT ZERO
U+0031 # DIGIT ONE
U+0032 # DIGIT TWO
U+0033 # DIGIT THREE
U+0034 # DIGIT FOUR
U+0035 # DIGIT FIVE
U+0036 # DIGIT SIX
U+0037 # DIGIT SEVEN
U+0038 # DIGIT EIGHT
U+0039 # DIGIT NINE
U+0061 # LATIN SMALL LETTER A
U+00E1 # LATIN SMALL LETTER A WITH ACUTE
U+00E0 # LATIN SMALL LETTER A WITH GRAVE
U+0103 # LATIN SMALL LETTER A WITH BREVE
U+00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
U+00E5 # LATIN SMALL LETTER A WITH RING ABOVE
U+00E4 # LATIN SMALL LETTER A WITH DIAERESIS
U+00E3 # LATIN SMALL LETTER A WITH TILDE
U+0105 # LATIN SMALL LETTER A WITH OGONEK
U+0101 # LATIN SMALL LETTER A WITH MACRON
U+01CE # LATIN SMALL LETTER A WITH CARON
U+00E6 # LATIN SMALL LETTER AE
U+0062 # LATIN SMALL LETTER B
U+0063 # LATIN SMALL LETTER C
U+0107 # LATIN SMALL LETTER C WITH ACUTE
U+010D # LATIN SMALL LETTER C WITH CARON
U+010B # LATIN SMALL LETTER C WITH DOT ABOVE
U+00E7 # LATIN SMALL LETTER C WITH CEDILLA
U+0064 # LATIN SMALL LETTER D
U+010F # LATIN SMALL LETTER D WITH CARON
U+0111 # LATIN SMALL LETTER D WITH STROKE
U+00F0 # LATIN SMALL LETTER ETH
U+0065 # LATIN SMALL LETTER E
U+00E9 # LATIN SMALL LETTER E WITH ACUTE
U+00E8 # LATIN SMALL LETTER E WITH GRAVE
U+00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
U+011B # LATIN SMALL LETTER E WITH CARON
U+00EB # LATIN SMALL LETTER E WITH DIAERESIS
U+0119 # LATIN SMALL LETTER E WITH OGONEK
U+0113 # LATIN SMALL LETTER E WITH MACRON
U+0117 # LATIN SMALL LETTER E WITH DOT ABOVE
U+0259 # LATIN SMALL LETTER SCHWA
U+0066 # LATIN SMALL LETTER F
U+0067 # LATIN SMALL LETTER G
U+011F # LATIN SMALL LETTER G WITH BREVE
U+01E7 # LATIN SMALL LETTER G WITH CARON
U+0121 # LATIN SMALL LETTER G WITH DOT ABOVE
U+0123 # LATIN SMALL LETTER G WITH CEDILLA
U+01E5 # LATIN SMALL LETTER G WITH STROKE
U+0068 # LATIN SMALL LETTER H
U+0127 # LATIN SMALL LETTER H WITH STROKE
U+0069 # LATIN SMALL LETTER I
U+0131 # LATIN SMALL LETTER DOTLESS I
U+00ED # LATIN SMALL LETTER I WITH ACUTE
U+00EC # LATIN SMALL LETTER I WITH GRAVE
U+00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
U+00EF # LATIN SMALL LETTER I WITH DIAERESIS
U+012F # LATIN SMALL LETTER I WITH OGONEK
U+012B # LATIN SMALL LETTER I WITH MACRON
U+01D0 # LATIN SMALL LETTER I WITH CARON
U+006A # LATIN SMALL LETTER J
U+006B # LATIN SMALL LETTER K
U+01E9 # LATIN SMALL LETTER K WITH CARON
U+0137 # LATIN SMALL LETTER K WITH CEDILLA
U+006C # LATIN SMALL LETTER L
U+013A # LATIN SMALL LETTER L WITH ACUTE
U+013E # LATIN SMALL LETTER L WITH CARON
U+013C # LATIN SMALL LETTER L WITH CEDILLA
U+0142 # LATIN SMALL LETTER L WITH STROKE
U+006D # LATIN SMALL LETTER M
U+006E # LATIN SMALL LETTER N
U+0144 # LATIN SMALL LETTER N WITH ACUTE
U+0148 # LATIN SMALL LETTER N WITH CARON
U+00F1 # LATIN SMALL LETTER N WITH TILDE
U+0146 # LATIN SMALL LETTER N WITH CEDILLA
U+014B # LATIN SMALL LETTER ENG
U+006F # LATIN SMALL LETTER O
U+00F3 # LATIN SMALL LETTER O WITH ACUTE
U+00F2 # LATIN SMALL LETTER O WITH GRAVE
U+00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
U+00F6 # LATIN SMALL LETTER O WITH DIAERESIS
U+0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE
U+00F5 # LATIN SMALL LETTER O WITH TILDE
U+014D # LATIN SMALL LETTER O WITH MACRON
U+01D2 # LATIN SMALL LETTER O WITH CARON
U+00F8 # LATIN SMALL LETTER O WITH STROKE
U+0153 # LATIN SMALL LIGATURE OE
U+0070 # LATIN SMALL LETTER P
U+0071 # LATIN SMALL LETTER Q
U+0072 # LATIN SMALL LETTER R
U+0155 # LATIN SMALL LETTER R WITH ACUTE
U+0159 # LATIN SMALL LETTER R WITH CARON
U+0157 # LATIN SMALL LETTER R WITH CEDILLA
U+0073 # LATIN SMALL LETTER S
U+015B # LATIN SMALL LETTER S WITH ACUTE
U+0161 # LATIN SMALL LETTER S WITH CARON
U+015F # LATIN SMALL LETTER S WITH CEDILLA
U+0074 # LATIN SMALL LETTER T
U+0165 # LATIN SMALL LETTER T WITH CARON
U+0163 # LATIN SMALL LETTER T WITH CEDILLA
U+0167 # LATIN SMALL LETTER T WITH STROKE
U+0075 # LATIN SMALL LETTER U
U+00FA # LATIN SMALL LETTER U WITH ACUTE
U+00F9 # LATIN SMALL LETTER U WITH GRAVE
U+00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
U+016F # LATIN SMALL LETTER U WITH RING ABOVE
U+00FC # LATIN SMALL LETTER U WITH DIAERESIS
U+0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE
U+0173 # LATIN SMALL LETTER U WITH OGONEK
U+016B # LATIN SMALL LETTER U WITH MACRON
U+01D4 # LATIN SMALL LETTER U WITH CARON
U+0076 # LATIN SMALL LETTER V
U+0077 # LATIN SMALL LETTER W
U+1E83 # LATIN SMALL LETTER W WITH ACUTE
U+1E81 # LATIN SMALL LETTER W WITH GRAVE
U+0175 # LATIN SMALL LETTER W WITH CIRCUMFLEX
U+1E85 # LATIN SMALL LETTER W WITH DIAERESIS
U+0078 # LATIN SMALL LETTER X
U+0079 # LATIN SMALL LETTER Y
U+00FD # LATIN SMALL LETTER Y WITH ACUTE
U+1EF3 # LATIN SMALL LETTER Y WITH GRAVE
U+0177 # LATIN SMALL LETTER Y WITH CIRCUMFLEX
U+00FF # LATIN SMALL LETTER Y WITH DIAERESIS
U+007A # LATIN SMALL LETTER Z
U+017A # LATIN SMALL LETTER Z WITH ACUTE
U+017E # LATIN SMALL LETTER Z WITH CARON
U+017C # LATIN SMALL LETTER Z WITH DOT ABOVE
U+0292 # LATIN SMALL LETTER EZH
U+01EF # LATIN SMALL LETTER EZH WITH CARON
U+00FE # LATIN SMALL LETTER THORN

File diff suppressed because it is too large Load diff