Create pipeline to save synthetic DomainHistory objects (#1795)

This runs over all domains that weren't deleted as of September 5. This
will fix most of b/248112997, which is itself caused by b/245940594 --
creating synthetic history objects means that the RDE pipeline will look
at those instead of the potentially-no-longer-valid data in the old
history objects.
This commit is contained in:
gbrodman 2022-09-22 14:58:50 -04:00 committed by GitHub
parent 7f0ffb3d0b
commit 2388cece95
3 changed files with 212 additions and 0 deletions

View file

@ -707,6 +707,10 @@ createToolTask(
createToolTask( createToolTask(
'jpaDemoPipeline', 'google.registry.beam.common.JpaDemoPipeline') 'jpaDemoPipeline', 'google.registry.beam.common.JpaDemoPipeline')
createToolTask(
'createSyntheticDomainHistories',
'google.registry.tools.javascrap.CreateSyntheticDomainHistoriesPipeline')
project.tasks.create('generateSqlSchema', JavaExec) { project.tasks.create('generateSqlSchema', JavaExec) {
classpath = sourceSets.nonprod.runtimeClasspath classpath = sourceSets.nonprod.runtimeClasspath
main = 'google.registry.tools.DevTool' main = 'google.registry.tools.DevTool'

View file

@ -0,0 +1,130 @@
// Copyright 2022 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import com.google.common.collect.ImmutableMap;
import dagger.Component;
import google.registry.beam.common.RegistryJpaIO;
import google.registry.beam.common.RegistryPipelineOptions;
import google.registry.config.RegistryConfig.Config;
import google.registry.config.RegistryConfig.ConfigModule;
import google.registry.model.domain.Domain;
import google.registry.model.reporting.HistoryEntry;
import google.registry.persistence.PersistenceModule.TransactionIsolationLevel;
import google.registry.persistence.VKey;
import java.io.Serializable;
import javax.inject.Singleton;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.joda.time.DateTime;
/**
* Pipeline that creates a synthetic history for every non-deleted {@link Domain} in SQL.
*
* <p>This is created to fix the issue identified in b/248112997. After b/245940594, there were some
* domains where the most recent history object did not represent the state of the domain as it
* exists in the world. Because RDE loads only from DomainHistory objects, this means that RDE was
* producing wrong data. This pipeline mitigates that issue by creating synthetic history events for
* every domain that was not deleted as of the start of the pipeline -- then, we can guarantee that
* this new history object represents the state of the domain as far as we know.
*
* <p>To run the pipeline (replace the environment as appropriate):
*
* <p><code>
* $ ./nom_build :core:createSyntheticDomainHistories --args="--region=us-central1
* --runner=DataflowRunner
* --registryEnvironment=CRASH
* --project={project-id}
* --workerMachineType=n2-standard-4"
* </code>
*/
public class CreateSyntheticDomainHistoriesPipeline implements Serializable {
private static final String HISTORY_REASON =
"Create synthetic domain histories to fix RDE for b/248112997";
private static final DateTime BAD_PIPELINE_START_TIME =
DateTime.parse("2022-09-05T00:00:00.000Z");
static void setup(Pipeline pipeline, String registryAdminRegistrarId) {
pipeline
.apply(
"Read all domain repo IDs",
RegistryJpaIO.read(
"SELECT repoId FROM Domain WHERE deletionTime > :badPipelineStartTime",
ImmutableMap.of("badPipelineStartTime", BAD_PIPELINE_START_TIME),
String.class,
repoId -> VKey.createSql(Domain.class, repoId)))
.apply(
"Save a synthetic DomainHistory for each domain",
ParDo.of(new DomainHistoryCreator(registryAdminRegistrarId)));
}
private static class DomainHistoryCreator extends DoFn<VKey<Domain>, Void> {
private final String registryAdminRegistrarId;
private DomainHistoryCreator(String registryAdminRegistrarId) {
this.registryAdminRegistrarId = registryAdminRegistrarId;
}
@ProcessElement
public void processElement(
@Element VKey<Domain> key, PipelineOptions options, OutputReceiver<Void> outputReceiver) {
jpaTm()
.transact(
() -> {
Domain domain = jpaTm().loadByKey(key);
jpaTm()
.put(
HistoryEntry.createBuilderForResource(domain)
.setRegistrarId(registryAdminRegistrarId)
.setBySuperuser(true)
.setRequestedByRegistrar(false)
.setModificationTime(jpaTm().getTransactionTime())
.setReason(HISTORY_REASON)
.setType(HistoryEntry.Type.SYNTHETIC)
.build());
outputReceiver.output(null);
});
}
}
public static void main(String[] args) {
RegistryPipelineOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(RegistryPipelineOptions.class);
RegistryPipelineOptions.validateRegistryPipelineOptions(options);
options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_READ_COMMITTED);
String registryAdminRegistrarId =
DaggerCreateSyntheticDomainHistoriesPipeline_ConfigComponent.create()
.getRegistryAdminRegistrarId();
Pipeline pipeline = Pipeline.create(options);
setup(pipeline, registryAdminRegistrarId);
pipeline.run();
}
@Singleton
@Component(modules = ConfigModule.class)
interface ConfigComponent {
@Config("registryAdminClientId")
String getRegistryAdminRegistrarId();
}
}

View file

@ -0,0 +1,78 @@
// Copyright 2022 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.model.ImmutableObjectSubject.assertAboutImmutableObjects;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.loadAllOf;
import static google.registry.testing.DatabaseHelper.newDomain;
import static google.registry.testing.DatabaseHelper.persistActiveHost;
import static google.registry.testing.DatabaseHelper.persistNewRegistrar;
import static google.registry.testing.DatabaseHelper.persistSimpleResource;
import com.google.common.collect.Iterables;
import google.registry.beam.TestPipelineExtension;
import google.registry.model.domain.Domain;
import google.registry.model.domain.DomainHistory;
import google.registry.model.reporting.HistoryEntry;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.testing.DatastoreEntityExtension;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Tests for {@link CreateSyntheticDomainHistoriesPipeline}. */
public class CreateSyntheticDomainHistoriesPipelineTest {
@RegisterExtension
JpaTestExtensions.JpaIntegrationTestExtension jpaEextension =
new JpaTestExtensions.Builder().buildIntegrationTestExtension();
@RegisterExtension
DatastoreEntityExtension datastoreEntityExtension =
new DatastoreEntityExtension().allThreads(true);
@RegisterExtension TestPipelineExtension pipeline = TestPipelineExtension.create();
private Domain domain;
@BeforeEach
void beforeEach() {
persistNewRegistrar("TheRegistrar");
persistNewRegistrar("NewRegistrar");
createTld("tld");
domain =
persistSimpleResource(
newDomain("example.tld")
.asBuilder()
.setNameservers(persistActiveHost("external.com").createVKey())
.build());
}
@Test
void testSuccess() {
assertThat(jpaTm().transact(() -> jpaTm().loadAllOf(DomainHistory.class))).isEmpty();
CreateSyntheticDomainHistoriesPipeline.setup(pipeline, "NewRegistrar");
pipeline.run().waitUntilFinish();
DomainHistory domainHistory = Iterables.getOnlyElement(loadAllOf(DomainHistory.class));
assertThat(domainHistory.getType()).isEqualTo(HistoryEntry.Type.SYNTHETIC);
assertThat(domainHistory.getRegistrarId()).isEqualTo("NewRegistrar");
assertAboutImmutableObjects()
.that(domainHistory.getDomainBase().get())
.hasFieldsEqualTo(domain);
}
}