Refactor createSynthetic to be a command instead of a pipeline (#1813)

This commit is contained in:
gbrodman 2022-10-11 12:23:31 -04:00 committed by GitHub
parent 8eff021de2
commit 3a15a390e7
8 changed files with 218 additions and 244 deletions

View file

@ -48,7 +48,7 @@ import org.json.simple.JSONValue;
* <p>By default - connects to the TOOLS service. To create a Connection to another service, call
* the {@link #withService} function.
*/
class AppEngineConnection {
public class AppEngineConnection {
/** Pattern to heuristically extract title tag contents in HTML responses. */
private static final Pattern HTML_TITLE_TAG_PATTERN = Pattern.compile("<title>(.*?)</title>");

View file

@ -15,6 +15,6 @@
package google.registry.tools;
/** A command that can send HTTP requests to a backend module. */
interface CommandWithConnection extends Command {
public interface CommandWithConnection extends Command {
void setConnection(AppEngineConnection connection);
}

View file

@ -17,6 +17,7 @@ package google.registry.tools;
import com.google.common.collect.ImmutableMap;
import google.registry.tools.javascrap.CompareEscrowDepositsCommand;
import google.registry.tools.javascrap.CreateCancellationsForOneTimesCommand;
import google.registry.tools.javascrap.CreateSyntheticDomainHistoriesCommand;
/** Container class to create and run remote commands against a Datastore instance. */
public final class RegistryTool {
@ -47,6 +48,7 @@ public final class RegistryTool {
.put("create_registrar", CreateRegistrarCommand.class)
.put("create_registrar_groups", CreateRegistrarGroupsCommand.class)
.put("create_reserved_list", CreateReservedListCommand.class)
.put("create_synthetic_domain_histories", CreateSyntheticDomainHistoriesCommand.class)
.put("create_tld", CreateTldCommand.class)
.put("curl", CurlCommand.class)
.put("delete_allocation_tokens", DeleteAllocationTokensCommand.class)

View file

@ -43,6 +43,7 @@ import google.registry.request.Modules.UserServiceModule;
import google.registry.tools.AuthModule.LocalCredentialModule;
import google.registry.tools.javascrap.CompareEscrowDepositsCommand;
import google.registry.tools.javascrap.CreateCancellationsForOneTimesCommand;
import google.registry.tools.javascrap.CreateSyntheticDomainHistoriesCommand;
import google.registry.util.UtilsModule;
import google.registry.whois.NonCachingWhoisModule;
import javax.annotation.Nullable;
@ -106,6 +107,8 @@ interface RegistryToolComponent {
void inject(CreateRegistrarCommand command);
void inject(CreateSyntheticDomainHistoriesCommand command);
void inject(CreateTldCommand command);
void inject(EncryptEscrowDepositCommand command);

View file

@ -28,8 +28,8 @@ import java.lang.reflect.Method;
* {@link RemoteApiOptions} with a JSON representing a user credential.
*/
public class RemoteApiOptionsUtil {
static RemoteApiOptions useGoogleCredentialStream(RemoteApiOptions options, InputStream stream)
throws Exception {
public static RemoteApiOptions useGoogleCredentialStream(
RemoteApiOptions options, InputStream stream) throws Exception {
Method method =
options.getClass().getDeclaredMethod("useGoogleCredentialStream", InputStream.class);
checkState(

View file

@ -0,0 +1,209 @@
// Copyright 2022 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.beust.jcommander.Parameters;
import com.google.appengine.tools.remoteapi.RemoteApiInstaller;
import com.google.appengine.tools.remoteapi.RemoteApiOptions;
import com.google.common.collect.ImmutableSet;
import com.google.common.flogger.FluentLogger;
import google.registry.config.CredentialModule;
import google.registry.config.RegistryConfig;
import google.registry.config.RegistryConfig.Config;
import google.registry.model.domain.Domain;
import google.registry.model.ofy.ObjectifyService;
import google.registry.model.reporting.HistoryEntry;
import google.registry.persistence.VKey;
import google.registry.tools.AppEngineConnection;
import google.registry.tools.CommandWithConnection;
import google.registry.tools.CommandWithRemoteApi;
import google.registry.tools.ConfirmingCommand;
import google.registry.tools.RemoteApiOptionsUtil;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import javax.inject.Inject;
import org.joda.time.DateTime;
/**
* Command that creates an additional synthetic history object for domains.
*
* <p>This is created to fix the issue identified in b/248112997. After b/245940594, there were some
* domains where the most recent history object did not represent the state of the domain as it
* exists in the world. Because RDE loads only from DomainHistory objects, this means that RDE was
* producing wrong data. This command mitigates that issue by creating synthetic history events for
* every domain that was not deleted as of the start of the bad {@link
* google.registry.beam.resave.ResaveAllEppResourcesPipeline} -- then, we can guarantee that this
* new history object represents the state of the domain as far as we know.
*
* <p>A previous run of this command (in pipeline form) attempted to do this and succeeded in most
* cases. Unfortunately, that pipeline had an issue where it used self-allocated IDs for some of the
* dependent objects (e.g. {@link google.registry.model.domain.secdns.DomainDsDataHistory}). As a
* result, we want to run this again as a command using Datastore-allocated IDs to re-create
* synthetic history objects for any domain whose last history object is one of the
* potentially-incorrect synthetic objects.
*
* <p>We further restrict the domains to domains whose latest history object is before October 4.
* This is an arbitrary date that is suitably far after the previous incorrect run of this synthetic
* history pipeline, with the purpose of making future runs of this command idempotent (in case the
* command fails, we can just run it again and again).
*/
@Parameters(
separators = " =",
commandDescription = "Create synthetic domain history objects to fix RDE.")
public class CreateSyntheticDomainHistoriesCommand extends ConfirmingCommand
implements CommandWithRemoteApi, CommandWithConnection {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private static final String HISTORY_REASON =
"Create synthetic domain histories to fix RDE for b/248112997";
private static final DateTime BAD_PIPELINE_END_TIME = DateTime.parse("2022-09-10T12:00:00.000Z");
private static final DateTime NEW_SYNTHETIC_ROUND_START =
DateTime.parse("2022-10-04T00:00:00.000Z");
private static final ExecutorService executor = Executors.newFixedThreadPool(20);
private static final AtomicInteger numDomainsProcessed = new AtomicInteger();
private AppEngineConnection connection;
@Inject
@Config("registryAdminClientId")
String registryAdminRegistrarId;
@Inject @CredentialModule.LocalCredentialJson String localCredentialJson;
private final ThreadLocal<RemoteApiInstaller> installerThreadLocal =
ThreadLocal.withInitial(this::createInstaller);
private ImmutableSet<String> domainRepoIds;
@Override
protected String prompt() {
jpaTm()
.transact(
() -> {
domainRepoIds =
jpaTm()
.query(
"SELECT dh.domainRepoId FROM DomainHistory dh JOIN Tld t ON t.tldStr ="
+ " dh.domainBase.tld WHERE t.tldType = 'REAL' AND dh.type ="
+ " 'SYNTHETIC' AND dh.modificationTime > :badPipelineEndTime AND"
+ " dh.modificationTime < :newSyntheticRoundStart AND"
+ " (dh.domainRepoId, dh.modificationTime) IN (SELECT domainRepoId,"
+ " MAX(modificationTime) FROM DomainHistory GROUP BY domainRepoId)",
String.class)
.setParameter("badPipelineEndTime", BAD_PIPELINE_END_TIME)
.setParameter("newSyntheticRoundStart", NEW_SYNTHETIC_ROUND_START)
.getResultStream()
.collect(toImmutableSet());
});
return String.format(
"Attempt to create synthetic history entries for %d domains?", domainRepoIds.size());
}
@Override
protected String execute() throws Exception {
List<Future<?>> futures = new ArrayList<>();
for (String domainRepoId : domainRepoIds) {
futures.add(
executor.submit(
() -> {
// Make sure the remote API is installed for ID generation
installerThreadLocal.get();
jpaTm()
.transact(
() -> {
Domain domain =
jpaTm().loadByKey(VKey.createSql(Domain.class, domainRepoId));
jpaTm()
.put(
HistoryEntry.createBuilderForResource(domain)
.setRegistrarId(registryAdminRegistrarId)
.setBySuperuser(true)
.setRequestedByRegistrar(false)
.setModificationTime(jpaTm().getTransactionTime())
.setReason(HISTORY_REASON)
.setType(HistoryEntry.Type.SYNTHETIC)
.build());
});
int numProcessed = numDomainsProcessed.incrementAndGet();
if (numProcessed % 1000 == 0) {
System.out.printf("Saved histories for %d domains%n", numProcessed);
}
return null;
}));
}
for (Future<?> future : futures) {
try {
future.get();
} catch (Exception e) {
logger.atSevere().withCause(e).log("Error");
}
}
executor.shutdown();
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
return String.format("Saved entries for %d domains", numDomainsProcessed.get());
}
@Override
public void setConnection(AppEngineConnection connection) {
this.connection = connection;
}
/**
* Installs the remote API so that the worker threads can use Datastore for ID generation.
*
* <p>Lifted from the RegistryCli class
*/
private RemoteApiInstaller createInstaller() {
RemoteApiInstaller installer = new RemoteApiInstaller();
RemoteApiOptions options = new RemoteApiOptions();
options.server(connection.getServer().getHost(), getPort(connection.getServer()));
if (RegistryConfig.areServersLocal()) {
// Use dev credentials for localhost.
options.useDevelopmentServerCredential();
} else {
try {
RemoteApiOptionsUtil.useGoogleCredentialStream(
options, new ByteArrayInputStream(localCredentialJson.getBytes(UTF_8)));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
try {
installer.install(options);
} catch (IOException e) {
throw new RuntimeException(e);
}
ObjectifyService.initOfy();
return installer;
}
private static int getPort(URL url) {
return url.getPort() == -1 ? url.getDefaultPort() : url.getPort();
}
}

View file

@ -1,130 +0,0 @@
// Copyright 2022 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import com.google.common.collect.ImmutableMap;
import dagger.Component;
import google.registry.beam.common.RegistryJpaIO;
import google.registry.beam.common.RegistryPipelineOptions;
import google.registry.config.RegistryConfig.Config;
import google.registry.config.RegistryConfig.ConfigModule;
import google.registry.model.domain.Domain;
import google.registry.model.reporting.HistoryEntry;
import google.registry.persistence.PersistenceModule.TransactionIsolationLevel;
import google.registry.persistence.VKey;
import java.io.Serializable;
import javax.inject.Singleton;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.joda.time.DateTime;
/**
* Pipeline that creates a synthetic history for every non-deleted {@link Domain} in SQL.
*
* <p>This is created to fix the issue identified in b/248112997. After b/245940594, there were some
* domains where the most recent history object did not represent the state of the domain as it
* exists in the world. Because RDE loads only from DomainHistory objects, this means that RDE was
* producing wrong data. This pipeline mitigates that issue by creating synthetic history events for
* every domain that was not deleted as of the start of the pipeline -- then, we can guarantee that
* this new history object represents the state of the domain as far as we know.
*
* <p>To run the pipeline (replace the environment as appropriate):
*
* <p><code>
* $ ./nom_build :core:createSyntheticDomainHistories --args="--region=us-central1
* --runner=DataflowRunner
* --registryEnvironment=CRASH
* --project={project-id}
* --workerMachineType=n2-standard-4"
* </code>
*/
public class CreateSyntheticDomainHistoriesPipeline implements Serializable {
private static final String HISTORY_REASON =
"Create synthetic domain histories to fix RDE for b/248112997";
private static final DateTime BAD_PIPELINE_START_TIME =
DateTime.parse("2022-09-05T09:00:00.000Z");
static void setup(Pipeline pipeline, String registryAdminRegistrarId) {
pipeline
.apply(
"Read all domain repo IDs",
RegistryJpaIO.read(
"SELECT d.repoId FROM Domain d WHERE deletionTime > :badPipelineStartTime",
ImmutableMap.of("badPipelineStartTime", BAD_PIPELINE_START_TIME),
String.class,
repoId -> VKey.createSql(Domain.class, repoId)))
.apply(
"Save a synthetic DomainHistory for each domain",
ParDo.of(new DomainHistoryCreator(registryAdminRegistrarId)));
}
private static class DomainHistoryCreator extends DoFn<VKey<Domain>, Void> {
private final String registryAdminRegistrarId;
private DomainHistoryCreator(String registryAdminRegistrarId) {
this.registryAdminRegistrarId = registryAdminRegistrarId;
}
@ProcessElement
public void processElement(
@Element VKey<Domain> key, PipelineOptions options, OutputReceiver<Void> outputReceiver) {
jpaTm()
.transact(
() -> {
Domain domain = jpaTm().loadByKey(key);
jpaTm()
.put(
HistoryEntry.createBuilderForResource(domain)
.setRegistrarId(registryAdminRegistrarId)
.setBySuperuser(true)
.setRequestedByRegistrar(false)
.setModificationTime(jpaTm().getTransactionTime())
.setReason(HISTORY_REASON)
.setType(HistoryEntry.Type.SYNTHETIC)
.build());
outputReceiver.output(null);
});
}
}
public static void main(String[] args) {
RegistryPipelineOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(RegistryPipelineOptions.class);
RegistryPipelineOptions.validateRegistryPipelineOptions(options);
options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_REPEATABLE_READ);
String registryAdminRegistrarId =
DaggerCreateSyntheticDomainHistoriesPipeline_ConfigComponent.create()
.getRegistryAdminRegistrarId();
Pipeline pipeline = Pipeline.create(options);
setup(pipeline, registryAdminRegistrarId);
pipeline.run();
}
@Singleton
@Component(modules = ConfigModule.class)
interface ConfigComponent {
@Config("registryAdminClientId")
String getRegistryAdminRegistrarId();
}
}

View file

@ -1,110 +0,0 @@
// Copyright 2022 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.model.ImmutableObjectSubject.assertAboutImmutableObjects;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.loadAllOf;
import static google.registry.testing.DatabaseHelper.newContact;
import static google.registry.testing.DatabaseHelper.persistActiveHost;
import static google.registry.testing.DatabaseHelper.persistDomainWithDependentResources;
import static google.registry.testing.DatabaseHelper.persistNewRegistrar;
import static google.registry.testing.DatabaseHelper.persistResource;
import static google.registry.testing.DatabaseHelper.persistSimpleResource;
import google.registry.beam.TestPipelineExtension;
import google.registry.model.domain.Domain;
import google.registry.model.domain.DomainHistory;
import google.registry.model.reporting.HistoryEntry;
import google.registry.model.reporting.HistoryEntryDao;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.testing.DatastoreEntityExtension;
import google.registry.testing.FakeClock;
import org.joda.time.DateTime;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Tests for {@link CreateSyntheticDomainHistoriesPipeline}. */
public class CreateSyntheticDomainHistoriesPipelineTest {
private final FakeClock fakeClock = new FakeClock(DateTime.parse("2022-09-01T00:00:00.000Z"));
@RegisterExtension
JpaTestExtensions.JpaIntegrationTestExtension jpaEextension =
new JpaTestExtensions.Builder().withClock(fakeClock).buildIntegrationTestExtension();
@RegisterExtension
DatastoreEntityExtension datastoreEntityExtension =
new DatastoreEntityExtension().allThreads(true);
@RegisterExtension TestPipelineExtension pipeline = TestPipelineExtension.create();
private Domain domain;
@BeforeEach
void beforeEach() {
persistNewRegistrar("TheRegistrar");
persistNewRegistrar("NewRegistrar");
createTld("tld");
domain =
persistDomainWithDependentResources(
"example",
"tld",
persistResource(newContact("contact1234")),
fakeClock.nowUtc(),
DateTime.parse("2022-09-01T00:00:00.000Z"),
DateTime.parse("2024-09-01T00:00:00.000Z"));
domain =
persistSimpleResource(
domain
.asBuilder()
.setNameservers(persistActiveHost("external.com").createVKey())
.build());
fakeClock.setTo(DateTime.parse("2022-09-20T00:00:00.000Z"));
// shouldn't create any history objects for this domain
persistDomainWithDependentResources(
"ignored-example",
"tld",
persistResource(newContact("contact12345")),
fakeClock.nowUtc(),
DateTime.parse("2022-09-20T00:00:00.000Z"),
DateTime.parse("2024-09-20T00:00:00.000Z"));
}
@Test
void testSuccess() {
assertThat(loadAllOf(DomainHistory.class)).hasSize(2);
CreateSyntheticDomainHistoriesPipeline.setup(pipeline, "NewRegistrar");
pipeline.run().waitUntilFinish();
DomainHistory syntheticHistory =
HistoryEntryDao.loadHistoryObjectsForResource(domain.createVKey(), DomainHistory.class)
.get(1);
assertThat(syntheticHistory.getType()).isEqualTo(HistoryEntry.Type.SYNTHETIC);
assertThat(syntheticHistory.getRegistrarId()).isEqualTo("NewRegistrar");
assertAboutImmutableObjects()
.that(syntheticHistory.getDomainBase().get())
.isEqualExceptFields(domain, "updateTimestamp");
// four total histories, two CREATE and two SYNTHETIC
assertThat(loadAllOf(DomainHistory.class)).hasSize(4);
// can create multiple entries if we run it multiple times
pipeline.run().waitUntilFinish();
assertThat(HistoryEntryDao.loadHistoryObjectsForResource(domain.createVKey())).hasSize(3);
// six total histories, two CREATE and four SYNTHETIC
assertThat(loadAllOf(DomainHistory.class)).hasSize(6);
}
}