mirror of
https://github.com/google/nomulus.git
synced 2025-05-06 23:17:51 +02:00
The dark lord Gosling designed the Java package naming system so that ownership flows from the DNS system. Since we own the domain name registry.google, it seems only appropriate that we should use google.registry as our package name.
122 lines
5.1 KiB
Java
122 lines
5.1 KiB
Java
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package google.registry.tldconfig.idn;
|
|
|
|
import static java.lang.Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION;
|
|
import static java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS;
|
|
import static java.lang.Character.UnicodeBlock.HIRAGANA;
|
|
import static java.lang.Character.UnicodeBlock.KATAKANA;
|
|
|
|
import com.google.common.collect.ImmutableRangeSet;
|
|
import com.google.common.collect.ImmutableSet;
|
|
import com.google.common.collect.Range;
|
|
|
|
import java.lang.Character.UnicodeBlock;
|
|
|
|
/**
|
|
* Validates Japanese language domain labels. This class should only be used with a Japanese
|
|
* language IDN table.
|
|
*/
|
|
class JapaneseLanguageValidator extends LanguageValidator {
|
|
|
|
/** Any string with Japanese characters can have at most 15 characters. */
|
|
private static final int MAX_LENGTH_JAPANESE_STRING = 15;
|
|
|
|
/** Equals the codepoint for the character '〆'. */
|
|
private static final int IDEOGRAPHIC_CLOSING_MARK = 0x3006;
|
|
|
|
/** Equals the codepoint for the character '・'. */
|
|
private static final int KATAKANA_MIDDLE_DOT = 0x30FB;
|
|
|
|
/** Equals the codepoint for the character 'ー'. */
|
|
private static final int KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = 0x30FC;
|
|
|
|
/** The set of {@link UnicodeBlock} objects containing valid Japanese codepoints. */
|
|
private static final ImmutableSet<UnicodeBlock> JAPANESE_UNICODE_BLOCKS = ImmutableSet.of(
|
|
CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, CJK_UNIFIED_IDEOGRAPHS);
|
|
|
|
/**
|
|
* Codepoints which are technically considered to be in the Japanese language, but are
|
|
* "exceptions" in that they can not appear in a label with a KATAKANA MIDDLE DOT or
|
|
* IDEOGRAPHIC_CLOSING_MARK unless other Japanese non-exception codepoints are also present.
|
|
*/
|
|
private static final ImmutableRangeSet<Integer> JAPANESE_EXCEPTION_CODEPOINTS =
|
|
new ImmutableRangeSet.Builder<Integer>()
|
|
.add(Range.<Integer>singleton(IDEOGRAPHIC_CLOSING_MARK))
|
|
.add(Range.<Integer>singleton(KATAKANA_MIDDLE_DOT))
|
|
.add(Range.<Integer>singleton(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK))
|
|
.build();
|
|
|
|
@Override
|
|
boolean isValidLabelForLanguage(String label) {
|
|
boolean requiresJapaneseNonExceptionCodepoint = false;
|
|
boolean hasJapaneseCodepoint = false;
|
|
boolean hasJapaneseNonExceptionCodepoint = false;
|
|
|
|
final int length = label.length();
|
|
int codepoints = 0;
|
|
UnicodeBlock precedingUnicodeBlock = null;
|
|
for (int i = 0; i < length; ) {
|
|
int codepoint = label.codePointAt(i);
|
|
UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
|
|
boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
|
|
boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);
|
|
|
|
// A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
|
|
// language codepoint to also appear in the label.
|
|
if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
|
|
requiresJapaneseNonExceptionCodepoint = true;
|
|
}
|
|
|
|
// The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
|
|
// character.
|
|
if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
|
|
&& precedingUnicodeBlock != HIRAGANA && precedingUnicodeBlock != KATAKANA) {
|
|
return false;
|
|
}
|
|
|
|
// If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
|
|
// Japanese codepoint.
|
|
if (isJapanese && !isException) {
|
|
hasJapaneseNonExceptionCodepoint = true;
|
|
}
|
|
|
|
// Make a note if we've seen any Japanese codepoint. Note that this object should really only
|
|
// be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
|
|
// Japanese. But we do the additional check again the characters UnicodeBlock just in case.
|
|
if (isJapanese) {
|
|
hasJapaneseCodepoint = true;
|
|
}
|
|
|
|
// Some codepoints take up more than one character in Java strings (e.g. high and low
|
|
// surrogates).
|
|
i += Character.charCount(codepoint);
|
|
++codepoints;
|
|
precedingUnicodeBlock = unicodeBlock;
|
|
}
|
|
|
|
// A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
|
|
// some Japanese character in the label. The Japanese "exception" characters do not count in
|
|
// this regard.
|
|
if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
|
|
return false;
|
|
}
|
|
|
|
// Any label with Japanese characters (including "exception" characters) can only be 15
|
|
// codepoints long.
|
|
return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));
|
|
|
|
}
|
|
}
|