Save bloom filters for premium list entries

This is the first step in the migration to remove the need to load all of
the premium list entries every time the cache expires (which causes slow-
downs). Once this is deployed, we can re-save all premium lists, creating
the bloom filters, and then the next step will be to read from them to
more efficiently determine if a label might be premium.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=147525017
This commit is contained in:
mcilwain 2017-02-14 14:56:48 -08:00 committed by Ben McIlwain
parent cdadb54acd
commit e8c5720826
8 changed files with 188 additions and 16 deletions

View file

@ -24,6 +24,7 @@ import com.google.appengine.api.datastore.AsyncDatastoreService;
import com.google.appengine.api.datastore.DatastoreServiceConfig;
import com.google.appengine.api.datastore.DatastoreServiceFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.googlecode.objectify.Key;
import com.googlecode.objectify.Objectify;
@ -35,6 +36,7 @@ import com.googlecode.objectify.impl.translate.opt.joda.MoneyStringTranslatorFac
import google.registry.config.RegistryEnvironment;
import google.registry.model.EntityClasses;
import google.registry.model.ImmutableObject;
import google.registry.model.translators.BloomFilterOfStringTranslatorFactory;
import google.registry.model.translators.CidrAddressBlockTranslatorFactory;
import google.registry.model.translators.CommitLogRevisionsTranslatorFactory;
import google.registry.model.translators.CreateAutoTimestampTranslatorFactory;
@ -43,7 +45,6 @@ import google.registry.model.translators.DurationTranslatorFactory;
import google.registry.model.translators.InetAddressTranslatorFactory;
import google.registry.model.translators.ReadableInstantUtcTranslatorFactory;
import google.registry.model.translators.UpdateAutoTimestampTranslatorFactory;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
@ -120,7 +121,8 @@ public class ObjectifyService {
/** Register translators that allow less common types to be stored directly in Datastore. */
private static void registerTranslators() {
for (TranslatorFactory<?> translatorFactory : Arrays.asList(
for (TranslatorFactory<?> translatorFactory : ImmutableList.of(
new BloomFilterOfStringTranslatorFactory(),
new CidrAddressBlockTranslatorFactory(),
new CommitLogRevisionsTranslatorFactory(),
new CreateAutoTimestampTranslatorFactory(),

View file

@ -19,6 +19,7 @@ import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Iterables.partition;
import static com.google.common.hash.Funnels.unencodedCharsFunnel;
import static google.registry.config.RegistryConfig.getDomainLabelListCacheDuration;
import static google.registry.model.common.EntityGroupRoot.getCrossTldKey;
import static google.registry.model.ofy.ObjectifyService.allocateId;
@ -29,6 +30,7 @@ import static google.registry.util.CollectionUtils.nullToEmptyImmutableCopy;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import com.google.appengine.api.datastore.EntityNotFoundException;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
@ -38,6 +40,7 @@ import com.google.common.cache.CacheLoader.InvalidCacheLoadException;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.hash.BloomFilter;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.googlecode.objectify.Key;
import com.googlecode.objectify.VoidWork;
@ -52,11 +55,13 @@ import com.googlecode.objectify.cmd.Query;
import google.registry.model.Buildable;
import google.registry.model.ImmutableObject;
import google.registry.model.annotations.ReportedOn;
import google.registry.model.annotations.VirtualEntity;
import google.registry.model.registry.Registry;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import javax.annotation.Nullable;
import org.joda.money.Money;
@ -76,24 +81,65 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
/** Stores the revision key for the set of currently used premium list entry entities. */
Key<PremiumListRevision> revisionKey;
/** The revision to be saved along with this entity. */
@Ignore
PremiumListRevision revision;
@Ignore
Map<String, PremiumListEntry> premiumListMap;
/** Virtual parent entity for premium list entry entities associated with a single revision. */
@ReportedOn
@Entity
@VirtualEntity
@Cache(expirationSeconds = RECOMMENDED_MEMCACHE_EXPIRATION)
public static class PremiumListRevision extends ImmutableObject {
@Parent
Key<PremiumList> parent;
@Id
long revisionId;
static Key<PremiumListRevision> createKey(PremiumList parent) {
/**
* A bloom filter that is used to determine efficiently and quickly whether a label might be
* premium.
*
* <p>If the label might be premium, then the premium list entry must be loaded by key and
* checked for existence. Otherwise, we know it's not premium, and no Datastore load is
* required.
*/
BloomFilter<String> probablePremiumLabels;
/**
* The maximum size of the bloom filter.
*
* <p>Trying to set it any larger will throw an error, as we know it won't fit into a Datastore
* entity. We use 90% of the 1 MB Datastore limit to leave some wriggle room for the other
* fields and miscellaneous entity serialization overhead.
*/
private static final int MAX_BLOOM_FILTER_BYTES = 900000;
/** Returns a new PremiumListRevision for the given key and premium list map. */
static PremiumListRevision create(PremiumList parent, Set<String> premiumLabels) {
PremiumListRevision revision = new PremiumListRevision();
revision.parent = Key.create(parent);
revision.revisionId = allocateId();
return Key.create(revision);
// All premium list labels are already punycoded, so don't perform any further character
// encoding on them.
revision.probablePremiumLabels =
BloomFilter.create(unencodedCharsFunnel(), premiumLabels.size());
for (String label : premiumLabels) {
revision.probablePremiumLabels.put(label);
}
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
revision.probablePremiumLabels.writeTo(bos);
checkArgument(bos.size() <= MAX_BLOOM_FILTER_BYTES,
"Too many premium labels were specified; bloom filter exceeds max entity size");
} catch (IOException e) {
throw new IllegalStateException("Could not serialize premium labels bloom filter", e);
}
return revision;
}
}
@ -132,7 +178,12 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
}
@OnLoad
private void loadPremiumListMap() {
private void onLoad() {
if (revisionKey != null) {
revision = ofy().load().key(revisionKey).now();
}
// TODO(b/32383610): Don't load up the premium list entries.
try {
ImmutableMap.Builder<String, PremiumListEntry> entriesMap = new ImmutableMap.Builder<>();
if (revisionKey != null) {
@ -159,10 +210,16 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
return nullToEmptyImmutableCopy(premiumListMap);
}
@VisibleForTesting
public Key<PremiumListRevision> getRevisionKey() {
return revisionKey;
}
@VisibleForTesting
public PremiumListRevision getRevision() {
return revision;
}
/** Returns the PremiumList with the specified name. */
public static Optional<PremiumList> get(String name) {
try {
@ -278,7 +335,8 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
}});
}
}
// Save the new PremiumList itself.
// Save the new PremiumList and revision itself.
PremiumList updated = ofy().transactNew(new Work<PremiumList>() {
@Override
public PremiumList run() {
@ -294,14 +352,14 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
.setCreationTime(
oldPremiumList.isPresent() ? oldPremiumList.get().creationTime : now)
.build();
ofy().save().entity(newList);
ofy().save().entities(newList, revision);
return newList;
}});
// Update the cache.
PremiumList.cache.put(name, updated);
// If needed and there are any, delete the entities under the old PremiumList.
if (entriesToUpdate && oldPremiumList.isPresent()) {
oldPremiumList.get().deleteEntries();
oldPremiumList.get().deleteRevisionAndEntries();
}
return updated;
}
@ -318,11 +376,11 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
public void vrun() {
ofy().delete().entity(PremiumList.this);
}});
deleteEntries();
deleteRevisionAndEntries();
cache.invalidate(name);
}
private void deleteEntries() {
private void deleteRevisionAndEntries() {
if (revisionKey == null) {
return;
}
@ -335,6 +393,11 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
ofy().delete().keys(batch);
}});
}
ofy().transactNew(new VoidWork() {
@Override
public void vrun() {
ofy().delete().key(revisionKey);
}});
}
private Query<PremiumListEntry> loadEntriesForCurrentRevision() {
@ -371,8 +434,9 @@ public final class PremiumList extends BaseDomainLabelList<Money, PremiumList.Pr
@Override
public PremiumList build() {
final PremiumList instance = getInstance();
if (getInstance().revisionKey == null || entriesWereUpdated) {
getInstance().revisionKey = PremiumListRevision.createKey(instance);
if (instance.revisionKey == null || entriesWereUpdated) {
instance.revision = PremiumListRevision.create(instance, instance.premiumListMap.keySet());
instance.revisionKey = Key.create(instance.revision);
}
// When we build an instance, make sure all entries are parented on its revisionKey.
instance.premiumListMap = Maps.transformValues(

View file

@ -0,0 +1,90 @@
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.model.translators;
import static com.google.common.hash.Funnels.unencodedCharsFunnel;
import static com.googlecode.objectify.repackaged.gentyref.GenericTypeReflector.erase;
import static com.googlecode.objectify.repackaged.gentyref.GenericTypeReflector.getTypeParameter;
import com.google.appengine.api.datastore.Blob;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnel;
import com.google.common.reflect.TypeToken;
import com.googlecode.objectify.impl.Path;
import com.googlecode.objectify.impl.Property;
import com.googlecode.objectify.impl.translate.CreateContext;
import com.googlecode.objectify.impl.translate.LoadContext;
import com.googlecode.objectify.impl.translate.SaveContext;
import com.googlecode.objectify.impl.translate.Translator;
import com.googlecode.objectify.impl.translate.TranslatorFactory;
import com.googlecode.objectify.impl.translate.ValueTranslator;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import javax.annotation.Nullable;
/** Stores CharSequence {@link BloomFilter}s as blobs. */
public class BloomFilterOfStringTranslatorFactory
implements TranslatorFactory<BloomFilter<String>> {
@Override
public Translator<BloomFilter<String>> create(
Path path, Property property, Type type, CreateContext ctx) {
if (!BloomFilter.class.equals(erase(type))) {
return null; // Skip me and try to find another matching translator
}
Type fieldBloomFilterType = getTypeParameter(type, BloomFilter.class.getTypeParameters()[0]);
if (fieldBloomFilterType == null) {
return null; // No type information is available
}
if (!TypeToken.of(String.class).getType().equals(fieldBloomFilterType)) {
return null; // We can only handle BloomFilters of CharSequences
}
return new ValueTranslator<BloomFilter<String>, Blob>(path, Blob.class) {
@Override
@Nullable
protected BloomFilter<String> loadValue(Blob value, LoadContext ctx) {
if (value == null) {
return null;
}
try {
@SuppressWarnings("unchecked")
Funnel<String> castedFunnel = (Funnel<String>) (Funnel<?>) unencodedCharsFunnel();
return BloomFilter.readFrom(new ByteArrayInputStream(value.getBytes()), castedFunnel);
} catch (IOException e) {
throw new IllegalStateException("Error loading bloom filter data", e);
}
}
@Override
@Nullable
protected Blob saveValue(BloomFilter<String> value, SaveContext ctx) {
if (value == null) {
return null;
}
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
value.writeTo(bos);
} catch (IOException e) {
throw new IllegalStateException("Error saving bloom filter data", e);
}
return new Blob(bos.toByteArray());
}
};
}
}

View file

@ -17,6 +17,7 @@ OneTime
PollMessage
PremiumList
PremiumListEntry
PremiumListRevision
RdeRevision
Recurring
Registrar

View file

@ -13,6 +13,7 @@ Modification
OneTime
PremiumList
PremiumListEntry
PremiumListRevision
Recurring
Registrar
RegistrarContact

View file

@ -15,6 +15,7 @@
package google.registry.model.registry.label;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.model.registry.label.PremiumList.getPremiumPrice;
import static google.registry.testing.DatastoreHelper.createTld;
@ -54,13 +55,14 @@ public class PremiumListTest {
@Before
public void before() throws Exception {
// createTld() overwrites the premium list, so call it first.
createTld("tld");
PremiumList pl = persistPremiumList(
"tld",
"lol,USD 999 # yup",
"rich,USD 1999 #tada",
"icann,JPY 100",
"johnny-be-goode,USD 20.50");
createTld("tld");
persistResource(Registry.get("tld").asBuilder().setPremiumList(pl).build());
}
@ -260,6 +262,17 @@ public class PremiumListTest {
.isNotEqualTo(pl.getRevisionKey());
}
@Test
public void testProbablePremiumLabels() throws Exception {
PremiumList pl = PremiumList.get("tld").get();
assertThat(pl.getRevision().probablePremiumLabels.mightContain("notpremium")).isFalse();
for (String label : ImmutableList.of("rich", "lol", "johnny-be-goode", "icann")) {
assertWithMessage(label + " should be a probable premium")
.that(pl.getRevision().probablePremiumLabels.mightContain(label))
.isTrue();
}
}
/** Persists a premium list with a specified number of nonsense entries. */
private PremiumList persistHumongousPremiumList(String name, int size) {
String[] entries = new String[size];

View file

@ -742,6 +742,7 @@ class google.registry.model.registry.label.PremiumList$PremiumListEntry {
class google.registry.model.registry.label.PremiumList$PremiumListRevision {
@Id long revisionId;
@Parent com.googlecode.objectify.Key<google.registry.model.registry.label.PremiumList> parent;
com.google.common.hash.BloomFilter<java.lang.String> probablePremiumLabels;
}
enum google.registry.model.registry.label.ReservationType {
ALLOWED_IN_SUNRISE;

View file

@ -357,7 +357,7 @@ public class DatastoreHelper {
// that we can avoid writing commit logs. This would cause issues since many tests replace the
// clock in Ofy with a non-advancing FakeClock, and commit logs currently require
// monotonically increasing timestamps.
ofy().saveWithoutBackup().entity(premiumList).now();
ofy().saveWithoutBackup().entities(premiumList, premiumList.getRevision()).now();
ofy().saveWithoutBackup().entities(premiumList.getPremiumListEntries().values()).now();
return premiumList;
}