Canonicalize domain/host names in initial import script (#1347)

* Canonicalize domain/host names in initial import script

* Add tests and make reduce some method visibility
This commit is contained in:
Ben McIlwain 2021-10-07 11:59:46 -04:00 committed by GitHub
parent 7447afd12f
commit ba05a67b7c
4 changed files with 90 additions and 13 deletions

View file

@ -55,7 +55,7 @@ public final class CommitLogImports {
* represents the changes in one transaction. The {@code CommitLogManifest} contains deleted
* entity keys, whereas each {@code CommitLogMutation} contains one whole entity.
*/
public static ImmutableList<ImmutableList<VersionedEntity>> loadEntitiesByTransaction(
static ImmutableList<ImmutableList<VersionedEntity>> loadEntitiesByTransaction(
InputStream inputStream) {
try (InputStream input = new BufferedInputStream(inputStream)) {
Iterator<ImmutableObject> commitLogs = createDeserializingIterator(input, false);
@ -104,7 +104,7 @@ public final class CommitLogImports {
* represents the changes in one transaction. The {@code CommitLogManifest} contains deleted
* entity keys, whereas each {@code CommitLogMutation} contains one whole entity.
*/
public static ImmutableList<VersionedEntity> loadEntities(InputStream inputStream) {
static ImmutableList<VersionedEntity> loadEntities(InputStream inputStream) {
return loadEntitiesByTransaction(inputStream).stream()
.flatMap(ImmutableList::stream)
.collect(toImmutableList());

View file

@ -105,29 +105,29 @@ public abstract class VersionedEntity implements Serializable {
* VersionedEntity VersionedEntities}. See {@link CommitLogImports#loadEntities} for more
* information.
*/
public static Stream<VersionedEntity> fromManifest(CommitLogManifest manifest) {
static Stream<VersionedEntity> fromManifest(CommitLogManifest manifest) {
long commitTimeMillis = manifest.getCommitTime().getMillis();
return manifest.getDeletions().stream()
.map(com.googlecode.objectify.Key::getRaw)
.map(key -> builder().commitTimeMills(commitTimeMillis).key(key).build());
.map(key -> newBuilder().commitTimeMills(commitTimeMillis).key(key).build());
}
/* Converts a {@link CommitLogMutation} to a {@link VersionedEntity}. */
public static VersionedEntity fromMutation(CommitLogMutation mutation) {
static VersionedEntity fromMutation(CommitLogMutation mutation) {
return from(
com.googlecode.objectify.Key.create(mutation).getParent().getId(),
mutation.getEntityProtoBytes());
}
public static VersionedEntity from(long commitTimeMillis, byte[] entityProtoBytes) {
return builder()
return newBuilder()
.entityProtoBytes(entityProtoBytes)
.key(EntityTranslator.createFromPbBytes(entityProtoBytes).getKey())
.commitTimeMills(commitTimeMillis)
.build();
}
static Builder builder() {
private static Builder newBuilder() {
return new AutoValue_VersionedEntity.Builder();
}
@ -142,7 +142,7 @@ public abstract class VersionedEntity implements Serializable {
public abstract VersionedEntity build();
public Builder entityProtoBytes(byte[] bytes) {
Builder entityProtoBytes(byte[] bytes) {
return entityProtoBytes(new ImmutableBytes(bytes));
}
}

View file

@ -22,6 +22,7 @@ import static google.registry.beam.initsql.BackupPaths.getExportFilePatterns;
import static google.registry.model.ofy.ObjectifyService.auditedOfy;
import static google.registry.util.DateTimeUtils.START_OF_TIME;
import static google.registry.util.DateTimeUtils.isBeforeOrAt;
import static google.registry.util.DomainNameUtils.canonicalizeDomainName;
import static java.util.Comparator.comparing;
import static org.apache.beam.sdk.values.TypeDescriptors.kvs;
import static org.apache.beam.sdk.values.TypeDescriptors.strings;
@ -277,7 +278,7 @@ public final class Transforms {
// Prober contacts referencing phantom registrars. They and their associated history entries can
// be safely ignored.
private static final ImmutableSet IGNORED_CONTACTS =
private static final ImmutableSet<String> IGNORED_CONTACTS =
ImmutableSet.of(
"1_WJ0TEST-GOOGLE", "1_WJ1TEST-GOOGLE", "1_WJ2TEST-GOOGLE", "1_WJ3TEST-GOOGLE");
@ -320,7 +321,8 @@ public final class Transforms {
return true;
}
private static Entity repairBadData(Entity entity) {
@VisibleForTesting
static Entity repairBadData(Entity entity) {
if (entity.getKind().equals("Cancellation")
&& Objects.equals(entity.getProperty("reason"), "AUTO_RENEW")) {
// AUTO_RENEW has been moved from 'reason' to flags. Change reason to RENEW and add the
@ -329,6 +331,16 @@ public final class Transforms {
entity.setUnindexedProperty("reason", Reason.RENEW.name());
entity.setUnindexedProperty("flags", ImmutableList.of(Flag.AUTO_RENEW.name()));
}
// Canonicalize old domain/host names from 2016 and earlier before we were enforcing this.
else if (entity.getKind().equals("DomainBase")) {
entity.setIndexedProperty(
"fullyQualifiedDomainName",
canonicalizeDomainName((String) entity.getProperty("fullyQualifiedDomainName")));
} else if (entity.getKind().equals("HostResource")) {
entity.setIndexedProperty(
"fullyQualifiedHostName",
canonicalizeDomainName((String) entity.getProperty("fullyQualifiedHostName")));
}
return entity;
}
@ -365,7 +377,8 @@ public final class Transforms {
* Returns a {@link PTransform} that produces a {@link PCollection} containing all elements in the
* given {@link Iterable}.
*/
static PTransform<PBegin, PCollection<String>> toStringPCollection(Iterable<String> strings) {
private static PTransform<PBegin, PCollection<String>> toStringPCollection(
Iterable<String> strings) {
return Create.of(strings).withCoder(StringUtf8Coder.of());
}
@ -373,7 +386,7 @@ public final class Transforms {
* Returns a {@link PTransform} from file {@link Metadata} to {@link VersionedEntity} using
* caller-provided {@code transformer}.
*/
static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> processFiles(
private static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> processFiles(
DoFn<ReadableFile, VersionedEntity> transformer) {
return new PTransform<PCollection<Metadata>, PCollection<VersionedEntity>>() {
@Override
@ -389,7 +402,7 @@ public final class Transforms {
private final DateTime fromTime;
private final DateTime toTime;
public FilterCommitLogFileByTime(DateTime fromTime, DateTime toTime) {
FilterCommitLogFileByTime(DateTime fromTime, DateTime toTime) {
checkNotNull(fromTime, "fromTime");
checkNotNull(toTime, "toTime");
checkArgument(

View file

@ -0,0 +1,64 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.initsql;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.beam.initsql.Transforms.repairBadData;
import static google.registry.model.ofy.ObjectifyService.auditedOfy;
import static google.registry.persistence.transaction.TransactionManagerFactory.ofyTm;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.newDomainBase;
import static google.registry.testing.DatabaseHelper.newHostResource;
import com.google.appengine.api.datastore.Entity;
import google.registry.model.domain.DomainBase;
import google.registry.model.host.HostResource;
import google.registry.testing.AppEngineExtension;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Unit tests for {@link Transforms}. */
public class TransformsTest {
@RegisterExtension
public final AppEngineExtension appEngine =
AppEngineExtension.builder().withDatastoreAndCloudSql().build();
@BeforeEach
void beforeEach() {
createTld("tld");
}
@Test
void testRepairBadData_canonicalizesDomainName() {
DomainBase domain = newDomainBase("foobar.tld");
Entity entity = ofyTm().transact(() -> auditedOfy().toEntity(domain));
entity.setIndexedProperty("fullyQualifiedDomainName", "FOOBäR.TLD");
assertThat(((DomainBase) auditedOfy().toPojo(repairBadData(entity))).getDomainName())
.isEqualTo("xn--foobr-jra.tld");
}
@Test
void testRepairBadData_canonicalizesHostName() {
HostResource host = newHostResource("baz.foobar.tld");
Entity entity = ofyTm().transact(() -> auditedOfy().toEntity(host));
entity.setIndexedProperty(
"fullyQualifiedHostName", "b̴̹͔͓̣̭̫͇͕̻̬̱͇͗͌́̆̋͒a̶̬̖͚̋̈́̽̇͝͠z̵͠.FOOBäR.TLD");
assertThat(((HostResource) auditedOfy().toPojo(repairBadData(entity))).getHostName())
.isEqualTo(
"xn--baz-kdcb2ajgzb4jtg6doej4e6b9am7c7b6c5nd4k7gpa2a9a7dufyewec.xn--foobr-jra.tld");
}
}