mirror of
https://github.com/google/nomulus.git
synced 2025-07-22 02:36:03 +02:00
mv com/google/domain/registry google/registry
This change renames directories in preparation for the great package rename. The repository is now in a broken state because the code itself hasn't been updated. However this should ensure that git correctly preserves history for each file.
This commit is contained in:
parent
a41677aea1
commit
5012893c1d
2396 changed files with 0 additions and 0 deletions
|
@ -0,0 +1,122 @@
|
|||
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.domain.registry.tldconfig.idn;
|
||||
|
||||
import static java.lang.Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION;
|
||||
import static java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS;
|
||||
import static java.lang.Character.UnicodeBlock.HIRAGANA;
|
||||
import static java.lang.Character.UnicodeBlock.KATAKANA;
|
||||
|
||||
import com.google.common.collect.ImmutableRangeSet;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Range;
|
||||
|
||||
import java.lang.Character.UnicodeBlock;
|
||||
|
||||
/**
|
||||
* Validates Japanese language domain labels. This class should only be used with a Japanese
|
||||
* language IDN table.
|
||||
*/
|
||||
class JapaneseLanguageValidator extends LanguageValidator {
|
||||
|
||||
/** Any string with Japanese characters can have at most 15 characters. */
|
||||
private static final int MAX_LENGTH_JAPANESE_STRING = 15;
|
||||
|
||||
/** Equals the codepoint for the character '〆'. */
|
||||
private static final int IDEOGRAPHIC_CLOSING_MARK = 0x3006;
|
||||
|
||||
/** Equals the codepoint for the character '・'. */
|
||||
private static final int KATAKANA_MIDDLE_DOT = 0x30FB;
|
||||
|
||||
/** Equals the codepoint for the character 'ー'. */
|
||||
private static final int KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = 0x30FC;
|
||||
|
||||
/** The set of {@link UnicodeBlock} objects containing valid Japanese codepoints. */
|
||||
private static final ImmutableSet<UnicodeBlock> JAPANESE_UNICODE_BLOCKS = ImmutableSet.of(
|
||||
CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, CJK_UNIFIED_IDEOGRAPHS);
|
||||
|
||||
/**
|
||||
* Codepoints which are technically considered to be in the Japanese language, but are
|
||||
* "exceptions" in that they can not appear in a label with a KATAKANA MIDDLE DOT or
|
||||
* IDEOGRAPHIC_CLOSING_MARK unless other Japanese non-exception codepoints are also present.
|
||||
*/
|
||||
private static final ImmutableRangeSet<Integer> JAPANESE_EXCEPTION_CODEPOINTS =
|
||||
new ImmutableRangeSet.Builder<Integer>()
|
||||
.add(Range.<Integer>singleton(IDEOGRAPHIC_CLOSING_MARK))
|
||||
.add(Range.<Integer>singleton(KATAKANA_MIDDLE_DOT))
|
||||
.add(Range.<Integer>singleton(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK))
|
||||
.build();
|
||||
|
||||
@Override
|
||||
boolean isValidLabelForLanguage(String label) {
|
||||
boolean requiresJapaneseNonExceptionCodepoint = false;
|
||||
boolean hasJapaneseCodepoint = false;
|
||||
boolean hasJapaneseNonExceptionCodepoint = false;
|
||||
|
||||
final int length = label.length();
|
||||
int codepoints = 0;
|
||||
UnicodeBlock precedingUnicodeBlock = null;
|
||||
for (int i = 0; i < length; ) {
|
||||
int codepoint = label.codePointAt(i);
|
||||
UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
|
||||
boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
|
||||
boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);
|
||||
|
||||
// A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
|
||||
// language codepoint to also appear in the label.
|
||||
if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
|
||||
requiresJapaneseNonExceptionCodepoint = true;
|
||||
}
|
||||
|
||||
// The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
|
||||
// character.
|
||||
if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
|
||||
&& precedingUnicodeBlock != HIRAGANA && precedingUnicodeBlock != KATAKANA) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
|
||||
// Japanese codepoint.
|
||||
if (isJapanese && !isException) {
|
||||
hasJapaneseNonExceptionCodepoint = true;
|
||||
}
|
||||
|
||||
// Make a note if we've seen any Japanese codepoint. Note that this object should really only
|
||||
// be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
|
||||
// Japanese. But we do the additional check again the characters UnicodeBlock just in case.
|
||||
if (isJapanese) {
|
||||
hasJapaneseCodepoint = true;
|
||||
}
|
||||
|
||||
// Some codepoints take up more than one character in Java strings (e.g. high and low
|
||||
// surrogates).
|
||||
i += Character.charCount(codepoint);
|
||||
++codepoints;
|
||||
precedingUnicodeBlock = unicodeBlock;
|
||||
}
|
||||
|
||||
// A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
|
||||
// some Japanese character in the label. The Japanese "exception" characters do not count in
|
||||
// this regard.
|
||||
if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Any label with Japanese characters (including "exception" characters) can only be 15
|
||||
// codepoints long.
|
||||
return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));
|
||||
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue