Switch to SQL sequence based allocateId, remove SelfAllocateId (#1831)

This commit is contained in:
Pavlo Tkach 2022-11-10 18:25:40 -05:00 committed by GitHub
parent 1608927e03
commit 4add693d01
13 changed files with 83 additions and 153 deletions

View file

@ -16,7 +16,6 @@ package google.registry.beam.common;
import google.registry.beam.common.RegistryJpaIO.Write;
import google.registry.config.RegistryEnvironment;
import google.registry.model.annotations.DeleteAfterMigration;
import google.registry.persistence.PersistenceModule.JpaTransactionManagerType;
import google.registry.persistence.PersistenceModule.TransactionIsolationLevel;
import java.util.Objects;
@ -66,17 +65,6 @@ public interface RegistryPipelineOptions extends GcpOptions {
void setSqlWriteShards(int maxConcurrentSqlWriters);
@DeleteAfterMigration
@Description(
"Whether to use self allocated primary IDs when building entities. This should only be used"
+ " when the IDs are not significant and the resulting entities are not persisted back to"
+ " the database. Use with caution as self allocated IDs are not unique across workers,"
+ " and persisting entities with these IDs can be dangerous.")
@Default.Boolean(false)
boolean getUseSelfAllocatedId();
void setUseSelfAllocatedId(boolean useSelfAllocatedId);
static RegistryPipelineComponent toRegistryPipelineComponent(RegistryPipelineOptions options) {
return DaggerRegistryPipelineComponent.builder()
.isolationOverride(options.getIsolationOverride())

View file

@ -22,8 +22,6 @@ import dagger.Lazy;
import google.registry.config.RegistryEnvironment;
import google.registry.config.SystemPropertySetter;
import google.registry.model.AppEngineEnvironment;
import google.registry.model.IdService;
import google.registry.model.IdService.SelfAllocatedIdSupplier;
import google.registry.persistence.transaction.JpaTransactionManager;
import google.registry.persistence.transaction.TransactionManagerFactory;
import org.apache.beam.sdk.harness.JvmInitializer;
@ -69,15 +67,5 @@ public class RegistryPipelineWorkerInitializer implements JvmInitializer {
new AppEngineEnvironment("s~" + registryPipelineComponent.getProjectId())
.setEnvironmentForAllThreads();
SystemPropertySetter.PRODUCTION_IMPL.setProperty(PROPERTY, "true");
// Use self-allocated IDs if requested. Note that this inevitably results in duplicate IDs from
// multiple workers, which can also collide with existing IDs in the database. So they cannot be
// dependent upon for comparison or anything significant. The resulting entities can never be
// persisted back into the database. This is a stop-gap measure that should only be used when
// you need to create Buildables in Beam, but do not have control over how the IDs are
// allocated, and you don't care about the generated IDs as long
// as you can build the entities.
if (registryOptions.getUseSelfAllocatedId()) {
IdService.setIdSupplier(SelfAllocatedIdSupplier.getInstance());
}
}
}

View file

@ -689,12 +689,7 @@ public class RdePipeline implements Serializable {
PipelineOptionsFactory.register(RdePipelineOptions.class);
RdePipelineOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(RdePipelineOptions.class);
// We need to self allocate the IDs because the pipeline creates EPP resources from history
// entries and projects them to watermark. These buildable entities would otherwise request an
// ID from datastore, which Beam does not have access to. The IDs are not included in the
// deposits or are these entities persisted back to the database, so it is OK to use a self
// allocated ID to get around the limitations of beam.
options.setUseSelfAllocatedId(true);
RegistryPipelineOptions.validateRegistryPipelineOptions(options);
options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_READ_COMMITTED);
DaggerRdePipeline_RdePipelineComponent.builder().options(options).build().rdePipeline().run();

View file

@ -19,7 +19,7 @@ import static com.google.common.base.Preconditions.checkState;
import static google.registry.model.IdService.allocateId;
import static google.registry.model.ModelUtils.getAllFields;
import google.registry.model.annotations.OfyIdAllocation;
import google.registry.model.annotations.IdAllocation;
import google.registry.util.TypeUtils.TypeInstantiator;
import java.lang.reflect.Field;
import java.util.Optional;
@ -55,10 +55,10 @@ public interface Buildable {
/** Build the instance. */
public S build() {
try {
// If this object has a Long or long @OfyIdAllocation field that is not set, set it now.
// If this object has a Long or long @IdAllocation field that is not set, set it now.
Field idField =
getAllFields(instance.getClass()).values().stream()
.filter(field -> field.isAnnotationPresent(OfyIdAllocation.class))
.filter(field -> field.isAnnotationPresent(IdAllocation.class))
.findFirst()
.orElse(null);
if (idField != null

View file

@ -33,7 +33,7 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import google.registry.config.RegistryConfig;
import google.registry.model.CacheUtils.AppEngineEnvironmentCacheLoader;
import google.registry.model.annotations.OfyIdAllocation;
import google.registry.model.annotations.IdAllocation;
import google.registry.model.eppcommon.StatusValue;
import google.registry.model.transfer.TransferData;
import google.registry.persistence.VKey;
@ -67,7 +67,7 @@ public abstract class EppResource extends UpdateAutoTimestampEntity implements B
*
* @see <a href="https://tools.ietf.org/html/rfc5730">RFC 5730</a>
*/
@OfyIdAllocation @Transient String repoId;
@IdAllocation @Transient String repoId;
/**
* The ID of the registrar that is currently sponsoring this resource.

View file

@ -14,117 +14,63 @@
//
package google.registry.model;
import static com.google.common.base.Preconditions.checkState;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import static org.joda.time.DateTimeZone.UTC;
import com.google.appengine.api.datastore.DatastoreServiceFactory;
import com.google.common.flogger.FluentLogger;
import google.registry.beam.common.RegistryPipelineWorkerInitializer;
import google.registry.config.RegistryEnvironment;
import google.registry.model.annotations.DeleteAfterMigration;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import google.registry.model.common.DatabaseMigrationStateSchedule;
import google.registry.model.common.DatabaseMigrationStateSchedule.MigrationState;
import java.math.BigInteger;
import org.joda.time.DateTime;
/**
* Allocates a {@link long} to use as a {@code @Id}, (part) of the primary SQL key for an entity.
*
* <p>Normally, the ID is globally unique and allocated by Datastore. It is possible to override
* this behavior by providing an ID supplier, such as in unit tests, where a self-allocated ID based
* on a monotonically increasing atomic {@link long} is used. Such an ID supplier can also be used
* in other scenarios, such as in a Beam pipeline to get around the limitation of Beam's inability
* to use GAE SDK to access Datastore. The override should be used with great care lest it results
* in irreversible data corruption.
*
* @see #setIdSupplier(Supplier)
*/
@DeleteAfterMigration
public final class IdService {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
/**
* A SQL Sequence based ID allocator that generates an ID from a monotonically increasing atomic
* {@link long}
*
* <p>The generated IDs are project-wide unique
*/
private static Long getSequenceBasedId() {
return jpaTm()
.transact(
() ->
(BigInteger)
jpaTm()
.getEntityManager()
.createNativeQuery("SELECT nextval('project_wide_unique_id_seq')")
.getSingleResult())
.longValue();
}
// TODO(ptkach): Remove once all instances switch to sequenceBasedId
/**
* A Datastore based ID allocator that generates an ID from a monotonically increasing atomic
* {@link long}
*
* <p>The generated IDs are project-wide unique
*/
private static Long getDatastoreBasedId() {
return DatastoreServiceFactory.getDatastoreService()
.allocateIds("common", 1)
.iterator()
.next()
.getId();
}
private IdService() {}
private static Supplier<Long> idSupplier =
RegistryEnvironment.UNITTEST.equals(RegistryEnvironment.get())
? SelfAllocatedIdSupplier.getInstance()
: DatastoreIdSupplier.getInstance();
/**
* Provides a {@link Supplier} of ID that overrides the default.
*
* <p>Currently, the only use case for an override is in the Beam pipeline, where access to
* Datastore is not possible through the App Engine API. As such, the setter explicitly checks if
* the runtime is Beam.
*
* <p>Because the provided supplier is not guaranteed to be globally unique and compatible with
* existing IDs in the database, one should proceed with great care. It is safe to use an
* arbitrary supplier when the resulting IDs are not significant and not persisted back to the
* database, i.e. the IDs are only required by the {@link Buildable} contract but are not used in
* any meaningful way. One example is the RDE pipeline where we project EPP resource entities from
* history entries to watermark time, which are then marshalled into XML elements in the RDE
* deposits, where the IDs are omitted.
*/
public static void setIdSupplier(Supplier<Long> idSupplier) {
checkState(
"true".equals(System.getProperty(RegistryPipelineWorkerInitializer.PROPERTY, "false")),
"Can only set ID supplier in a Beam pipeline");
logger.atWarning().log("Using ID supplier override!");
IdService.idSupplier = idSupplier;
}
/** Allocates an id. */
public static long allocateId() {
return idSupplier.get();
}
// TODO(b/201547855): Find a way to allocate a unique ID without datastore.
private static class DatastoreIdSupplier implements Supplier<Long> {
private static final DatastoreIdSupplier INSTANCE = new DatastoreIdSupplier();
/**
* A placeholder String passed into {@code DatastoreService.allocateIds} that ensures that all
* IDs are initialized from the same ID pool.
*/
private static final String APP_WIDE_ALLOCATION_KIND = "common";
public static DatastoreIdSupplier getInstance() {
return INSTANCE;
}
@Override
public Long get() {
return DatastoreServiceFactory.getDatastoreService()
.allocateIds(APP_WIDE_ALLOCATION_KIND, 1)
.iterator()
.next()
.getId();
}
}
/**
* An ID supplier that allocates an ID from a monotonically increasing atomic {@link long}.
*
* <p>The generated IDs are only unique within the same JVM. It is not suitable for production use
* unless in cases the IDs are not significant.
*/
public static class SelfAllocatedIdSupplier implements Supplier<Long> {
private static final SelfAllocatedIdSupplier INSTANCE = new SelfAllocatedIdSupplier();
/** Counts of used ids for self allocating IDs. */
private static final AtomicLong nextSelfAllocatedId = new AtomicLong(1); // ids cannot be zero
public static SelfAllocatedIdSupplier getInstance() {
return INSTANCE;
}
@Override
public Long get() {
return nextSelfAllocatedId.getAndIncrement();
}
public void reset() {
nextSelfAllocatedId.set(1);
}
return (DatabaseMigrationStateSchedule.getValueAtTime(DateTime.now(UTC))
.equals(MigrationState.SEQUENCE_BASED_ALLOCATE_ID)
|| RegistryEnvironment.UNITTEST.equals(RegistryEnvironment.get()))
? getSequenceBasedId()
: getDatastoreBasedId();
}
}

View file

@ -21,16 +21,9 @@ import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Annotation to indicate an ID field that needs to be allocated by Ofy.
*
* <p>This annotation is only used for the field of a {@link google.registry.model.Buildable} class
* that was previously annotated by both Ofy's and JPA's {@code @Id} annotations, of which the Ofy
* annotation has been removed. The field still needs to be allocated automatically by the builder,
* via the {@link IdService#allocateId()}.
*
* <p>It should be removed after we switch to using SQL to directly allocate IDs.
* This annotation is needed for any ID field that needs to be allocated with {@link IdService}
* class
*/
@DeleteAfterMigration
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
public @interface OfyIdAllocation {}
public @interface IdAllocation {}

View file

@ -28,7 +28,7 @@ import com.google.common.collect.ImmutableSet;
import google.registry.model.Buildable;
import google.registry.model.ImmutableObject;
import google.registry.model.UnsafeSerializable;
import google.registry.model.annotations.OfyIdAllocation;
import google.registry.model.annotations.IdAllocation;
import google.registry.model.common.TimeOfYear;
import google.registry.model.domain.DomainHistory;
import google.registry.model.domain.GracePeriod;
@ -146,7 +146,7 @@ public abstract class BillingEvent extends ImmutableObject
}
/** Entity id. */
@OfyIdAllocation @Id Long id;
@IdAllocation @Id Long id;
/** The registrar to bill. */
@Column(name = "registrarId", nullable = false)

View file

@ -82,7 +82,10 @@ public class DatabaseMigrationStateSchedule extends CrossTldSingleton {
SQL_PRIMARY(PrimaryDatabase.CLOUD_SQL, false, ReplayDirection.SQL_TO_DATASTORE),
/** Cloud SQL is the only DB being used. */
SQL_ONLY(PrimaryDatabase.CLOUD_SQL, false, ReplayDirection.NO_REPLAY);
SQL_ONLY(PrimaryDatabase.CLOUD_SQL, false, ReplayDirection.NO_REPLAY),
/** Toggles SQL Sequence based allocateId */
SEQUENCE_BASED_ALLOCATE_ID(PrimaryDatabase.CLOUD_SQL, false, ReplayDirection.NO_REPLAY);
private final PrimaryDatabase primaryDatabase;
private final boolean isReadOnly;
@ -160,7 +163,8 @@ public class DatabaseMigrationStateSchedule extends CrossTldSingleton {
.putAll(
MigrationState.SQL_ONLY,
MigrationState.SQL_PRIMARY_READ_ONLY,
MigrationState.SQL_PRIMARY);
MigrationState.SQL_PRIMARY)
.putAll(MigrationState.SQL_ONLY, MigrationState.SEQUENCE_BASED_ALLOCATE_ID);
// In addition, we can always transition from a state to itself (useful when updating the map).
Arrays.stream(MigrationState.values()).forEach(state -> builder.put(state, state));

View file

@ -25,7 +25,7 @@ import google.registry.model.EppResource;
import google.registry.model.ImmutableObject;
import google.registry.model.UnsafeSerializable;
import google.registry.model.annotations.ExternalMessagingName;
import google.registry.model.annotations.OfyIdAllocation;
import google.registry.model.annotations.IdAllocation;
import google.registry.model.contact.Contact;
import google.registry.model.contact.ContactHistory;
import google.registry.model.domain.Domain;
@ -120,7 +120,7 @@ public abstract class PollMessage extends ImmutableObject
/** Entity id. */
@Id
@OfyIdAllocation
@IdAllocation
@Column(name = "poll_message_id")
Long id;

View file

@ -21,7 +21,7 @@ import google.registry.model.Buildable;
import google.registry.model.EppResource;
import google.registry.model.ImmutableObject;
import google.registry.model.UnsafeSerializable;
import google.registry.model.annotations.OfyIdAllocation;
import google.registry.model.annotations.IdAllocation;
import google.registry.model.contact.ContactBase;
import google.registry.model.contact.ContactHistory;
import google.registry.model.domain.DomainBase;
@ -110,7 +110,7 @@ public abstract class HistoryEntry extends ImmutableObject
/** The autogenerated id of this event. */
@Id
@OfyIdAllocation
@IdAllocation
@Column(nullable = false, name = "historyRevisionId")
protected Long revisionId;

View file

@ -27,9 +27,12 @@ import com.googlecode.objectify.ObjectifyService;
import com.googlecode.objectify.annotation.Entity;
import com.googlecode.objectify.annotation.Id;
import google.registry.model.common.GaeUserIdConverter;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationTestExtension;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Tests for our replacement Objectify filter. */
class OfyFilterTest {
@ -57,6 +60,10 @@ class OfyFilterTest {
helper.tearDown();
}
@RegisterExtension
final JpaIntegrationTestExtension database =
new JpaTestExtensions.Builder().buildIntegrationTestExtension();
/**
* Key.create looks up kind metadata for the class of the object it is given. If this happens
* before the first reference to ObjectifyService, which statically triggers type registrations,

View file

@ -17,6 +17,7 @@ package google.registry.testing;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.io.Files.asCharSink;
import static com.google.common.truth.Truth.assertWithMessage;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import static google.registry.testing.DatabaseHelper.insertSimpleResources;
import static google.registry.util.PreconditionsUtils.checkArgumentNotNull;
import static google.registry.util.ResourceUtils.readResourceUtf8;
@ -40,7 +41,6 @@ import com.google.common.collect.Sets;
import com.google.common.io.Files;
import com.googlecode.objectify.Key;
import com.googlecode.objectify.ObjectifyFilter;
import google.registry.model.IdService.SelfAllocatedIdSupplier;
import google.registry.model.ofy.ObjectifyService;
import google.registry.model.registrar.Registrar;
import google.registry.model.registrar.Registrar.State;
@ -378,6 +378,17 @@ public final class AppEngineExtension implements BeforeEachCallback, AfterEachCa
jpaIntegrationTestExtension = builder.buildIntegrationTestExtension();
jpaIntegrationTestExtension.beforeEach(context);
}
// Reset SQL Sequence based id allocation so that ids are deterministic in tests.
jpaTm()
.transact(
() ->
jpaTm()
.getEntityManager()
.createNativeQuery(
"alter sequence if exists project_wide_unique_id_seq start 1 minvalue 1"
+ " restart with 1")
.executeUpdate());
}
if (withCloudSql) {
if (!withoutCannedData && !withJpaUnitTest) {
@ -440,8 +451,6 @@ public final class AppEngineExtension implements BeforeEachCallback, AfterEachCa
helper.setUp();
ObjectifyService.initOfy();
// Reset id allocation in ObjectifyService so that ids are deterministic in tests.
SelfAllocatedIdSupplier.getInstance().reset();
this.ofyTestEntities.forEach(AppEngineExtension::register);
}