Add mapreduce action to create synthetic history entries (#1125)

* Add mapreduce action to create synthetic history entries

RDE and zone file generation require being able to tell what objects
looked like in the past (though not beyond 30 days, or whatever the
Datastore retention period is set to). In Datastore, to answer this we
look at commit logs, and in SQL we will look at the History objects
stored for each EPP resource. This action can be run once while in
Datastore-primary-SQL-secondary to make sure that every EPP resource has
at least one history entry for which the resource-at-this-time field is
filled out in the SQL world.
This commit is contained in:
gbrodman 2021-05-13 11:48:19 -04:00 committed by GitHub
parent 8cfefcef6e
commit a9adef6cc7
5 changed files with 301 additions and 47 deletions

View file

@ -391,6 +391,12 @@
<url-pattern>/_dr/task/wipeOutDatastore</url-pattern>
</servlet-mapping>
<!-- Action to create synthetic history entries during async replication to SQL -->
<servlet-mapping>
<servlet-name>backend-servlet</servlet-name>
<url-pattern>/_dr/task/createSyntheticHistoryEntries</url-pattern>
</servlet-mapping>
<!-- Security config -->
<security-constraint>
<web-resource-collection>

View file

@ -86,6 +86,7 @@ import google.registry.tmch.TmchCrlAction;
import google.registry.tmch.TmchDnlAction;
import google.registry.tmch.TmchModule;
import google.registry.tmch.TmchSmdrlAction;
import google.registry.tools.javascrap.CreateSyntheticHistoryEntriesAction;
/** Dagger component with per-request lifetime for "backend" App Engine module. */
@RequestScope
@ -129,6 +130,8 @@ interface BackendRequestComponent {
CopyDetailReportsAction copyDetailReportAction();
CreateSyntheticHistoryEntriesAction createSyntheticHistoryEntriesAction();
DeleteContactsAndHostsAction deleteContactsAndHostsAction();
DeleteExpiredDomainsAction deleteExpiredDomainsAction();

View file

@ -0,0 +1,129 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.persistence.transaction.TransactionManagerFactory.tm;
import com.google.appengine.tools.mapreduce.Mapper;
import com.google.common.collect.ImmutableList;
import com.googlecode.objectify.Key;
import google.registry.config.RegistryConfig.Config;
import google.registry.mapreduce.MapreduceRunner;
import google.registry.mapreduce.inputs.EppResourceInputs;
import google.registry.model.EppResource;
import google.registry.model.domain.DomainHistory;
import google.registry.model.reporting.HistoryEntry;
import google.registry.rde.RdeStagingAction;
import google.registry.request.Action;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import google.registry.tools.server.GenerateZoneFilesAction;
import javax.inject.Inject;
/**
* A mapreduce that creates synthetic history objects in SQL for all {@link EppResource} objects.
*
* <p>Certain operations, e.g. {@link RdeStagingAction} or {@link GenerateZoneFilesAction}, require
* that we are able to answer the question of "What did this EPP resource look like at a point in
* time?" In the Datastore world, we are able to answer this question using the commit logs, however
* this is no longer possible in the SQL world. Instead, we will use the history objects, e.g.
* {@link DomainHistory} to see what a particular resource looked like at that point in time, since
* history objects store a snapshot of that resource.
*
* <p>This command creates a synthetic history object at the current point in time for every single
* EPP resource to guarantee that later on, when examining in-the-past resources, we have some
* history object for which the EppResource field is filled. This synthetic history object contains
* basically nothing and its only purpose is to create a populated history object in SQL through
* asynchronous replication.
*
* <p>NB: This class operates entirely in Datastore, which may be counterintuitive at first glance.
* However, since this is meant to be run during the Datastore-primary, SQL-secondary stage of the
* migration, we want to make sure that we are using the most up-to-date version of the data. The
* resource field of the history objects will be populated during asynchronous migration, e.g. in
* {@link DomainHistory#beforeSqlSave(DomainHistory)}.
*/
@Action(
service = Action.Service.BACKEND,
path = "/_dr/task/createSyntheticHistoryEntries",
auth = Auth.AUTH_INTERNAL_OR_ADMIN)
public class CreateSyntheticHistoryEntriesAction implements Runnable {
private final MapreduceRunner mrRunner;
private final Response response;
private final String registryAdminRegistrarId;
@Inject
CreateSyntheticHistoryEntriesAction(
MapreduceRunner mrRunner,
Response response,
@Config("registryAdminClientId") String registryAdminRegistrarId) {
this.mrRunner = mrRunner;
this.response = response;
this.registryAdminRegistrarId = registryAdminRegistrarId;
}
/**
* The number of shards to run the map-only mapreduce on.
*
* <p>This is less than the default of 100 because we can afford it being slower, but we don't
* want to write out lots of large commit logs in a short period of time. If we did so, the
* asynchronous replication action (run every few minutes) might fall behind which may make the
* migration tougher.
*/
private static final int NUM_SHARDS = 10;
@Override
public void run() {
mrRunner
.setJobName("Create a synthetic HistoryEntry for each EPP resource")
.setModuleName("backend")
.setDefaultMapShards(NUM_SHARDS)
.runMapOnly(
new CreateSyntheticHistoryEntriesMapper(registryAdminRegistrarId),
ImmutableList.of(EppResourceInputs.createKeyInput(EppResource.class)))
.sendLinkToMapreduceConsole(response);
}
/** Mapper to re-save all EPP resources. */
public static class CreateSyntheticHistoryEntriesMapper
extends Mapper<Key<EppResource>, Void, Void> {
private final String registryAdminRegistrarId;
public CreateSyntheticHistoryEntriesMapper(String registryAdminRegistrarId) {
this.registryAdminRegistrarId = registryAdminRegistrarId;
}
@Override
public final void map(final Key<EppResource> resourceKey) {
tm().transact(
() -> {
EppResource eppResource = ofy().load().key(resourceKey).now();
tm().put(
new HistoryEntry.Builder<>()
.setClientId(registryAdminRegistrarId)
.setBySuperuser(true)
.setRequestedByRegistrar(false)
.setModificationTime(tm().getTransactionTime())
.setParent(eppResource)
.setReason(
"Backfill EppResource history objects during Cloud SQL migration")
.setType(HistoryEntry.Type.SYNTHETIC)
.build());
});
}
}
}

View file

@ -0,0 +1,115 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.tools.javascrap;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.loadByKey;
import static google.registry.testing.DatabaseHelper.persistActiveDomain;
import static google.registry.testing.DatabaseHelper.persistActiveHost;
import static google.registry.testing.DatabaseHelper.persistDomainAsDeleted;
import static google.registry.testing.DatabaseHelper.persistDomainWithDependentResources;
import static google.registry.util.DateTimeUtils.END_OF_TIME;
import static google.registry.util.DateTimeUtils.START_OF_TIME;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.googlecode.objectify.Key;
import google.registry.model.EppResource;
import google.registry.model.contact.ContactResource;
import google.registry.model.domain.DomainBase;
import google.registry.model.host.HostResource;
import google.registry.model.reporting.HistoryEntry;
import google.registry.model.reporting.HistoryEntryDao;
import google.registry.testing.FakeResponse;
import google.registry.testing.mapreduce.MapreduceTestCase;
import org.joda.time.DateTime;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
/** Tests for {@link CreateSyntheticHistoryEntriesAction}. */
public class CreateSyntheticHistoryEntriesActionTest
extends MapreduceTestCase<CreateSyntheticHistoryEntriesAction> {
private DomainBase domain;
private ContactResource contact;
@BeforeEach
void beforeEach() {
action =
new CreateSyntheticHistoryEntriesAction(
makeDefaultRunner(), new FakeResponse(), "adminRegistrarId");
createTld("tld");
domain = persistActiveDomain("example.tld");
contact = loadByKey(domain.getAdminContact());
}
@Test
void testCreation_forAllTypes() throws Exception {
DomainBase domain2 = persistActiveDomain("exampletwo.tld");
ContactResource contact2 = loadByKey(domain2.getAdminContact());
HostResource host = persistActiveHost("ns1.foobar.tld");
HostResource host2 = persistActiveHost("ns1.baz.tld");
assertThat(HistoryEntryDao.loadAllHistoryObjects(START_OF_TIME, END_OF_TIME)).isEmpty();
runMapreduce();
for (EppResource resource : ImmutableList.of(contact, contact2, domain, domain2, host, host2)) {
HistoryEntry historyEntry =
Iterables.getOnlyElement(
HistoryEntryDao.loadHistoryObjectsForResource(resource.createVKey()));
assertThat(historyEntry.getParent()).isEqualTo(Key.create(resource));
assertThat(historyEntry.getType()).isEqualTo(HistoryEntry.Type.SYNTHETIC);
}
assertThat(HistoryEntryDao.loadAllHistoryObjects(START_OF_TIME, END_OF_TIME)).hasSize(6);
}
@Test
void testCreation_withPreviousHistoryEntry() throws Exception {
DateTime now = DateTime.parse("1999-04-03T22:00:00.0Z");
DomainBase withHistoryEntry =
persistDomainWithDependentResources("foobar", "tld", contact, now, now, now.plusYears(1));
assertThat(
Iterables.getOnlyElement(
HistoryEntryDao.loadHistoryObjectsForResource(withHistoryEntry.createVKey()))
.getType())
.isEqualTo(HistoryEntry.Type.DOMAIN_CREATE);
runMapreduce();
Iterable<? extends HistoryEntry> historyEntries =
HistoryEntryDao.loadHistoryObjectsForResource(withHistoryEntry.createVKey());
assertThat(historyEntries).hasSize(2);
assertThat(Iterables.getLast(historyEntries).getType()).isEqualTo(HistoryEntry.Type.SYNTHETIC);
}
@Test
void testCreation_forDeletedResource() throws Exception {
persistDomainAsDeleted(domain, domain.getCreationTime().plusMonths(6));
runMapreduce();
assertThat(
Iterables.getOnlyElement(
HistoryEntryDao.loadHistoryObjectsForResource(domain.createVKey()))
.getType())
.isEqualTo(HistoryEntry.Type.SYNTHETIC);
}
private void runMapreduce() throws Exception {
action.run();
executeTasksUntilEmpty("mapreduce");
}
}

View file

@ -8,6 +8,7 @@ PATH CLASS METHOD
/_dr/task/brdaCopy BrdaCopyAction POST y INTERNAL,API APP ADMIN
/_dr/task/checkDatastoreBackup CheckBackupAction POST,GET y INTERNAL,API APP ADMIN
/_dr/task/copyDetailReports CopyDetailReportsAction POST n INTERNAL,API APP ADMIN
/_dr/task/createSyntheticHistoryEntries CreateSyntheticHistoryEntriesAction GET n INTERNAL,API APP ADMIN
/_dr/task/deleteContactsAndHosts DeleteContactsAndHostsAction GET n INTERNAL,API APP ADMIN
/_dr/task/deleteExpiredDomains DeleteExpiredDomainsAction GET n INTERNAL,API APP ADMIN
/_dr/task/deleteLoadTestData DeleteLoadTestDataAction POST n INTERNAL,API APP ADMIN