Canonicalize domain/host names in initial import script (#1347)

* Canonicalize domain/host names in initial import script

* Add tests and make reduce some method visibility
This commit is contained in:
Ben McIlwain 2021-10-07 11:59:46 -04:00 committed by GitHub
parent 7447afd12f
commit ba05a67b7c
4 changed files with 90 additions and 13 deletions

View file

@ -55,7 +55,7 @@ public final class CommitLogImports {
* represents the changes in one transaction. The {@code CommitLogManifest} contains deleted * represents the changes in one transaction. The {@code CommitLogManifest} contains deleted
* entity keys, whereas each {@code CommitLogMutation} contains one whole entity. * entity keys, whereas each {@code CommitLogMutation} contains one whole entity.
*/ */
public static ImmutableList<ImmutableList<VersionedEntity>> loadEntitiesByTransaction( static ImmutableList<ImmutableList<VersionedEntity>> loadEntitiesByTransaction(
InputStream inputStream) { InputStream inputStream) {
try (InputStream input = new BufferedInputStream(inputStream)) { try (InputStream input = new BufferedInputStream(inputStream)) {
Iterator<ImmutableObject> commitLogs = createDeserializingIterator(input, false); Iterator<ImmutableObject> commitLogs = createDeserializingIterator(input, false);
@ -104,7 +104,7 @@ public final class CommitLogImports {
* represents the changes in one transaction. The {@code CommitLogManifest} contains deleted * represents the changes in one transaction. The {@code CommitLogManifest} contains deleted
* entity keys, whereas each {@code CommitLogMutation} contains one whole entity. * entity keys, whereas each {@code CommitLogMutation} contains one whole entity.
*/ */
public static ImmutableList<VersionedEntity> loadEntities(InputStream inputStream) { static ImmutableList<VersionedEntity> loadEntities(InputStream inputStream) {
return loadEntitiesByTransaction(inputStream).stream() return loadEntitiesByTransaction(inputStream).stream()
.flatMap(ImmutableList::stream) .flatMap(ImmutableList::stream)
.collect(toImmutableList()); .collect(toImmutableList());

View file

@ -105,29 +105,29 @@ public abstract class VersionedEntity implements Serializable {
* VersionedEntity VersionedEntities}. See {@link CommitLogImports#loadEntities} for more * VersionedEntity VersionedEntities}. See {@link CommitLogImports#loadEntities} for more
* information. * information.
*/ */
public static Stream<VersionedEntity> fromManifest(CommitLogManifest manifest) { static Stream<VersionedEntity> fromManifest(CommitLogManifest manifest) {
long commitTimeMillis = manifest.getCommitTime().getMillis(); long commitTimeMillis = manifest.getCommitTime().getMillis();
return manifest.getDeletions().stream() return manifest.getDeletions().stream()
.map(com.googlecode.objectify.Key::getRaw) .map(com.googlecode.objectify.Key::getRaw)
.map(key -> builder().commitTimeMills(commitTimeMillis).key(key).build()); .map(key -> newBuilder().commitTimeMills(commitTimeMillis).key(key).build());
} }
/* Converts a {@link CommitLogMutation} to a {@link VersionedEntity}. */ /* Converts a {@link CommitLogMutation} to a {@link VersionedEntity}. */
public static VersionedEntity fromMutation(CommitLogMutation mutation) { static VersionedEntity fromMutation(CommitLogMutation mutation) {
return from( return from(
com.googlecode.objectify.Key.create(mutation).getParent().getId(), com.googlecode.objectify.Key.create(mutation).getParent().getId(),
mutation.getEntityProtoBytes()); mutation.getEntityProtoBytes());
} }
public static VersionedEntity from(long commitTimeMillis, byte[] entityProtoBytes) { public static VersionedEntity from(long commitTimeMillis, byte[] entityProtoBytes) {
return builder() return newBuilder()
.entityProtoBytes(entityProtoBytes) .entityProtoBytes(entityProtoBytes)
.key(EntityTranslator.createFromPbBytes(entityProtoBytes).getKey()) .key(EntityTranslator.createFromPbBytes(entityProtoBytes).getKey())
.commitTimeMills(commitTimeMillis) .commitTimeMills(commitTimeMillis)
.build(); .build();
} }
static Builder builder() { private static Builder newBuilder() {
return new AutoValue_VersionedEntity.Builder(); return new AutoValue_VersionedEntity.Builder();
} }
@ -142,7 +142,7 @@ public abstract class VersionedEntity implements Serializable {
public abstract VersionedEntity build(); public abstract VersionedEntity build();
public Builder entityProtoBytes(byte[] bytes) { Builder entityProtoBytes(byte[] bytes) {
return entityProtoBytes(new ImmutableBytes(bytes)); return entityProtoBytes(new ImmutableBytes(bytes));
} }
} }

View file

@ -22,6 +22,7 @@ import static google.registry.beam.initsql.BackupPaths.getExportFilePatterns;
import static google.registry.model.ofy.ObjectifyService.auditedOfy; import static google.registry.model.ofy.ObjectifyService.auditedOfy;
import static google.registry.util.DateTimeUtils.START_OF_TIME; import static google.registry.util.DateTimeUtils.START_OF_TIME;
import static google.registry.util.DateTimeUtils.isBeforeOrAt; import static google.registry.util.DateTimeUtils.isBeforeOrAt;
import static google.registry.util.DomainNameUtils.canonicalizeDomainName;
import static java.util.Comparator.comparing; import static java.util.Comparator.comparing;
import static org.apache.beam.sdk.values.TypeDescriptors.kvs; import static org.apache.beam.sdk.values.TypeDescriptors.kvs;
import static org.apache.beam.sdk.values.TypeDescriptors.strings; import static org.apache.beam.sdk.values.TypeDescriptors.strings;
@ -277,7 +278,7 @@ public final class Transforms {
// Prober contacts referencing phantom registrars. They and their associated history entries can // Prober contacts referencing phantom registrars. They and their associated history entries can
// be safely ignored. // be safely ignored.
private static final ImmutableSet IGNORED_CONTACTS = private static final ImmutableSet<String> IGNORED_CONTACTS =
ImmutableSet.of( ImmutableSet.of(
"1_WJ0TEST-GOOGLE", "1_WJ1TEST-GOOGLE", "1_WJ2TEST-GOOGLE", "1_WJ3TEST-GOOGLE"); "1_WJ0TEST-GOOGLE", "1_WJ1TEST-GOOGLE", "1_WJ2TEST-GOOGLE", "1_WJ3TEST-GOOGLE");
@ -320,7 +321,8 @@ public final class Transforms {
return true; return true;
} }
private static Entity repairBadData(Entity entity) { @VisibleForTesting
static Entity repairBadData(Entity entity) {
if (entity.getKind().equals("Cancellation") if (entity.getKind().equals("Cancellation")
&& Objects.equals(entity.getProperty("reason"), "AUTO_RENEW")) { && Objects.equals(entity.getProperty("reason"), "AUTO_RENEW")) {
// AUTO_RENEW has been moved from 'reason' to flags. Change reason to RENEW and add the // AUTO_RENEW has been moved from 'reason' to flags. Change reason to RENEW and add the
@ -329,6 +331,16 @@ public final class Transforms {
entity.setUnindexedProperty("reason", Reason.RENEW.name()); entity.setUnindexedProperty("reason", Reason.RENEW.name());
entity.setUnindexedProperty("flags", ImmutableList.of(Flag.AUTO_RENEW.name())); entity.setUnindexedProperty("flags", ImmutableList.of(Flag.AUTO_RENEW.name()));
} }
// Canonicalize old domain/host names from 2016 and earlier before we were enforcing this.
else if (entity.getKind().equals("DomainBase")) {
entity.setIndexedProperty(
"fullyQualifiedDomainName",
canonicalizeDomainName((String) entity.getProperty("fullyQualifiedDomainName")));
} else if (entity.getKind().equals("HostResource")) {
entity.setIndexedProperty(
"fullyQualifiedHostName",
canonicalizeDomainName((String) entity.getProperty("fullyQualifiedHostName")));
}
return entity; return entity;
} }
@ -365,7 +377,8 @@ public final class Transforms {
* Returns a {@link PTransform} that produces a {@link PCollection} containing all elements in the * Returns a {@link PTransform} that produces a {@link PCollection} containing all elements in the
* given {@link Iterable}. * given {@link Iterable}.
*/ */
static PTransform<PBegin, PCollection<String>> toStringPCollection(Iterable<String> strings) { private static PTransform<PBegin, PCollection<String>> toStringPCollection(
Iterable<String> strings) {
return Create.of(strings).withCoder(StringUtf8Coder.of()); return Create.of(strings).withCoder(StringUtf8Coder.of());
} }
@ -373,7 +386,7 @@ public final class Transforms {
* Returns a {@link PTransform} from file {@link Metadata} to {@link VersionedEntity} using * Returns a {@link PTransform} from file {@link Metadata} to {@link VersionedEntity} using
* caller-provided {@code transformer}. * caller-provided {@code transformer}.
*/ */
static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> processFiles( private static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> processFiles(
DoFn<ReadableFile, VersionedEntity> transformer) { DoFn<ReadableFile, VersionedEntity> transformer) {
return new PTransform<PCollection<Metadata>, PCollection<VersionedEntity>>() { return new PTransform<PCollection<Metadata>, PCollection<VersionedEntity>>() {
@Override @Override
@ -389,7 +402,7 @@ public final class Transforms {
private final DateTime fromTime; private final DateTime fromTime;
private final DateTime toTime; private final DateTime toTime;
public FilterCommitLogFileByTime(DateTime fromTime, DateTime toTime) { FilterCommitLogFileByTime(DateTime fromTime, DateTime toTime) {
checkNotNull(fromTime, "fromTime"); checkNotNull(fromTime, "fromTime");
checkNotNull(toTime, "toTime"); checkNotNull(toTime, "toTime");
checkArgument( checkArgument(

View file

@ -0,0 +1,64 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.initsql;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.beam.initsql.Transforms.repairBadData;
import static google.registry.model.ofy.ObjectifyService.auditedOfy;
import static google.registry.persistence.transaction.TransactionManagerFactory.ofyTm;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.newDomainBase;
import static google.registry.testing.DatabaseHelper.newHostResource;
import com.google.appengine.api.datastore.Entity;
import google.registry.model.domain.DomainBase;
import google.registry.model.host.HostResource;
import google.registry.testing.AppEngineExtension;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Unit tests for {@link Transforms}. */
public class TransformsTest {
@RegisterExtension
public final AppEngineExtension appEngine =
AppEngineExtension.builder().withDatastoreAndCloudSql().build();
@BeforeEach
void beforeEach() {
createTld("tld");
}
@Test
void testRepairBadData_canonicalizesDomainName() {
DomainBase domain = newDomainBase("foobar.tld");
Entity entity = ofyTm().transact(() -> auditedOfy().toEntity(domain));
entity.setIndexedProperty("fullyQualifiedDomainName", "FOOBäR.TLD");
assertThat(((DomainBase) auditedOfy().toPojo(repairBadData(entity))).getDomainName())
.isEqualTo("xn--foobr-jra.tld");
}
@Test
void testRepairBadData_canonicalizesHostName() {
HostResource host = newHostResource("baz.foobar.tld");
Entity entity = ofyTm().transact(() -> auditedOfy().toEntity(host));
entity.setIndexedProperty(
"fullyQualifiedHostName", "b̴̹͔͓̣̭̫͇͕̻̬̱͇͗͌́̆̋͒a̶̬̖͚̋̈́̽̇͝͠z̵͠.FOOBäR.TLD");
assertThat(((HostResource) auditedOfy().toPojo(repairBadData(entity))).getHostName())
.isEqualTo(
"xn--baz-kdcb2ajgzb4jtg6doej4e6b9am7c7b6c5nd4k7gpa2a9a7dufyewec.xn--foobr-jra.tld");
}
}