mv com/google/domain/registry google/registry

This change renames directories in preparation for the great package rename. The repository is now in a broken state because the code itself hasn't been updated. However this should ensure that git correctly preserves history for each file.
2025-07-22 02:36:03 +02:00 · 2016-05-13 18:55:08 -04:00 · 2016-05-13 18:55:08 -04:00 · 5012893c1d
commit 5012893c1d
parent a41677aea1
2396 changed files with 0 additions and 0 deletions
--- a/java/google/registry/tldconfig/idn/JapaneseLanguageValidator.java
+++ b/java/google/registry/tldconfig/idn/JapaneseLanguageValidator.java
@ -0,0 +1,122 @@
+// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.domain.registry.tldconfig.idn;
+
+import static java.lang.Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION;
+import static java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS;
+import static java.lang.Character.UnicodeBlock.HIRAGANA;
+import static java.lang.Character.UnicodeBlock.KATAKANA;
+
+import com.google.common.collect.ImmutableRangeSet;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Range;
+
+import java.lang.Character.UnicodeBlock;
+
+/**
+ * Validates Japanese language domain labels. This class should only be used with a Japanese
+ * language IDN table.
+ */
+class JapaneseLanguageValidator extends LanguageValidator {
+
+  /** Any string with Japanese characters can have at most 15 characters. */
+  private static final int MAX_LENGTH_JAPANESE_STRING = 15;
+
+  /** Equals the codepoint for the character '〆'. */
+  private static final int IDEOGRAPHIC_CLOSING_MARK = 0x3006;
+
+  /** Equals the codepoint for the character '・'. */
+  private static final int KATAKANA_MIDDLE_DOT = 0x30FB;
+
+  /** Equals the codepoint for the character 'ー'. */
+  private static final int KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = 0x30FC;
+
+  /** The set of {@link UnicodeBlock} objects containing valid Japanese codepoints. */
+  private static final ImmutableSet<UnicodeBlock> JAPANESE_UNICODE_BLOCKS = ImmutableSet.of(
+      CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, CJK_UNIFIED_IDEOGRAPHS);
+
+  /**
+   * Codepoints which are technically considered to be in the Japanese language, but are
+   * "exceptions" in that they can not appear in a label with a KATAKANA MIDDLE DOT or
+   * IDEOGRAPHIC_CLOSING_MARK unless other Japanese non-exception codepoints are also present.
+   */
+  private static final ImmutableRangeSet<Integer> JAPANESE_EXCEPTION_CODEPOINTS =
+      new ImmutableRangeSet.Builder<Integer>()
+      .add(Range.<Integer>singleton(IDEOGRAPHIC_CLOSING_MARK))
+      .add(Range.<Integer>singleton(KATAKANA_MIDDLE_DOT))
+      .add(Range.<Integer>singleton(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK))
+      .build();
+
+  @Override
+  boolean isValidLabelForLanguage(String label) {
+    boolean requiresJapaneseNonExceptionCodepoint = false;
+    boolean hasJapaneseCodepoint = false;
+    boolean hasJapaneseNonExceptionCodepoint = false;
+
+    final int length = label.length();
+    int codepoints = 0;
+    UnicodeBlock precedingUnicodeBlock = null;
+    for (int i = 0; i < length; ) {
+      int codepoint = label.codePointAt(i);
+      UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
+      boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
+      boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);
+
+      // A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
+      // language codepoint to also appear in the label.
+      if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
+        requiresJapaneseNonExceptionCodepoint = true;
+      }
+
+      // The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
+      // character.
+      if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
+          && precedingUnicodeBlock != HIRAGANA && precedingUnicodeBlock != KATAKANA) {
+        return false;
+      }
+
+      // If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
+      // Japanese codepoint.
+      if (isJapanese && !isException) {
+        hasJapaneseNonExceptionCodepoint = true;
+      }
+
+      // Make a note if we've seen any Japanese codepoint. Note that this object should really only
+      // be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
+      // Japanese. But we do the additional check again the characters UnicodeBlock just in case.
+      if (isJapanese) {
+        hasJapaneseCodepoint = true;
+      }
+
+      // Some codepoints take up more than one character in Java strings (e.g. high and low
+      // surrogates).
+      i += Character.charCount(codepoint);
+      ++codepoints;
+      precedingUnicodeBlock = unicodeBlock;
+    }
+
+    // A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
+    // some Japanese character in the label. The Japanese "exception" characters do not count in
+    // this regard.
+    if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
+      return false;
+    }
+
+    // Any label with Japanese characters (including "exception" characters) can only be 15
+    // codepoints long.
+    return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));
+
+  }
+}