Speed up updating of premium lists (#1482)

* Speed up updating of premium lists

There are two parts to this:
1. Don't load the premium entries in the command prompt (this isn't
necessary and we didn't display that information anyway).
2. Set a proper batch size (rather than just 1) when saving all the
premium entries. This means that we generate only one INSERT statement
rather than N statements.
This commit is contained in:
gbrodman 2022-01-10 16:33:35 -05:00 committed by GitHub
parent f9ef170a01
commit 61c9890b8b
12 changed files with 105 additions and 93 deletions

View file

@ -33,6 +33,7 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import dagger.Module;
import dagger.Provides;
import google.registry.persistence.transaction.JpaTransactionManager;
import google.registry.util.TaskQueueUtils;
import google.registry.util.YamlUtils;
import java.lang.annotation.Documented;
@ -1531,6 +1532,31 @@ public final class RegistryConfig {
return CONFIG_SETTINGS.get().hibernate.hikariIdleTimeout;
}
/**
* JDBC-specific: driver default batch size is 0, which means that every INSERT statement will be
* sent to the database individually. Batching allows us to group together multiple inserts into
* one single INSERT statement which can dramatically increase speed in situations with many
* inserts.
*
* <p>Hibernate docs, i.e.
* https://docs.jboss.org/hibernate/orm/5.6/userguide/html_single/Hibernate_User_Guide.html,
* recommend between 10 and 50.
*/
public static String getHibernateJdbcBatchSize() {
return CONFIG_SETTINGS.get().hibernate.jdbcBatchSize;
}
/**
* Returns the JDBC fetch size.
*
* <p>Postgresql-specific: driver default fetch size is 0, which disables streaming result sets.
* Here we set a small default geared toward Nomulus server transactions. Large queries can
* override the defaults using {@link JpaTransactionManager#setQueryFetchSize}.
*/
public static String getHibernateJdbcFetchSize() {
return CONFIG_SETTINGS.get().hibernate.jdbcFetchSize;
}
/** Returns the roid suffix to be used for the roids of all contacts and hosts. */
public static String getContactAndHostRoidSuffix() {
return CONFIG_SETTINGS.get().registryPolicy.contactAndHostRoidSuffix;

View file

@ -120,6 +120,8 @@ public class RegistryConfigSettings {
public String hikariMinimumIdle;
public String hikariMaximumPoolSize;
public String hikariIdleTimeout;
public String jdbcBatchSize;
public String jdbcFetchSize;
}
/** Configuration for Cloud SQL. */

View file

@ -221,6 +221,17 @@ hibernate:
hikariMinimumIdle: 1
hikariMaximumPoolSize: 10
hikariIdleTimeout: 300000
# The batch size is basically the number of insertions / updates in a single
# transaction that will be batched together into one INSERT/UPDATE statement.
# A larger batch size is useful when inserting or updating many entities in a
# single transaction. Hibernate docs
# (https://docs.jboss.org/hibernate/orm/5.6/userguide/html_single/Hibernate_User_Guide.html)
# recommend between 10 and 50.
jdbcBatchSize: 50
# The fetch size is the number of entities retrieved at a time from the
# database cursor. Here we set a small default geared toward Nomulus server
# transactions. Large queries can override the defaults on a per-query basis.
jdbcFetchSize: 20
cloudSql:
# jdbc url for the Cloud SQL database.

View file

@ -26,7 +26,6 @@ import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Streams;
import com.google.common.flogger.FluentLogger;
import com.google.common.net.MediaType;
import google.registry.config.RegistryConfig.Config;
@ -143,7 +142,7 @@ public class ExportPremiumTermsAction implements Runnable {
PremiumListDao.getLatestRevision(premiumListName).isPresent(),
"Could not load premium list for " + tld);
SortedSet<String> premiumTerms =
Streams.stream(PremiumListDao.loadAllPremiumEntries(premiumListName))
PremiumListDao.loadAllPremiumEntries(premiumListName).stream()
.map(PremiumEntry::toString)
.collect(ImmutableSortedSet.toImmutableSortedSet(String::compareTo));

View file

@ -21,7 +21,6 @@ import static com.google.common.hash.Funnels.stringFunnel;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Streams;
import com.google.common.hash.BloomFilter;
import google.registry.model.Buildable;
import google.registry.model.ImmutableObject;
@ -86,9 +85,8 @@ public final class PremiumList extends BaseDomainLabelList<BigDecimal, PremiumEn
*/
public synchronized ImmutableMap<String, BigDecimal> getLabelsToPrices() {
if (labelsToPrices == null) {
Iterable<PremiumEntry> entries = PremiumListDao.loadAllPremiumEntries(name);
labelsToPrices =
Streams.stream(entries)
PremiumListDao.loadAllPremiumEntries(name).stream()
.collect(
toImmutableMap(
PremiumEntry::getDomainLabel,

View file

@ -28,8 +28,8 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.CacheLoader.InvalidCacheLoadException;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Streams;
import google.registry.model.tld.label.PremiumList.PremiumEntry;
import google.registry.util.NonFinalForTesting;
import java.math.BigDecimal;
@ -56,8 +56,7 @@ public class PremiumListDao {
* <p>This is cached for a shorter duration because we need to periodically reload this entity to
* check if a new revision has been published, and if so, then use that.
*
* <p>We also cache the absence of premium lists with a given name to avoid unnecessary pointless
* lookups. Note that this cache is only applicable to PremiumList objects stored in SQL.
* <p>We also cache the absence of premium lists with a given name to avoid pointless lookups.
*/
@NonFinalForTesting
static LoadingCache<String, Optional<PremiumList>> premiumListCache =
@ -170,11 +169,10 @@ public class PremiumListDao {
if (!isNullOrEmpty(premiumList.getLabelsToPrices())) {
ImmutableSet.Builder<PremiumEntry> entries = new ImmutableSet.Builder<>();
premiumList.getLabelsToPrices().entrySet().stream()
premiumList
.getLabelsToPrices()
.forEach(
entry ->
entries.add(
PremiumEntry.create(revisionId, entry.getValue(), entry.getKey())));
(key, value) -> entries.add(PremiumEntry.create(revisionId, value, key)));
jpaTm().insertAll(entries.build());
}
});
@ -217,7 +215,7 @@ public class PremiumListDao {
*
* <p>This is an expensive operation and should only be used when the entire list is required.
*/
public static Iterable<PremiumEntry> loadPremiumEntries(PremiumList premiumList) {
public static List<PremiumEntry> loadPremiumEntries(PremiumList premiumList) {
return jpaTm()
.transact(
() ->
@ -254,15 +252,14 @@ public class PremiumListDao {
*
* <p>This is an expensive operation and should only be used when the entire list is required.
*/
public static Iterable<PremiumEntry> loadAllPremiumEntries(String premiumListName) {
public static ImmutableList<PremiumEntry> loadAllPremiumEntries(String premiumListName) {
PremiumList premiumList =
getLatestRevision(premiumListName)
.orElseThrow(
() ->
new IllegalArgumentException(
String.format("No premium list with name %s.", premiumListName)));
Iterable<PremiumEntry> entries = loadPremiumEntries(premiumList);
return Streams.stream(entries)
return loadPremiumEntries(premiumList).stream()
.map(
premiumEntry ->
new PremiumEntry.Builder()

View file

@ -20,6 +20,8 @@ import static google.registry.config.RegistryConfig.getHibernateHikariConnection
import static google.registry.config.RegistryConfig.getHibernateHikariIdleTimeout;
import static google.registry.config.RegistryConfig.getHibernateHikariMaximumPoolSize;
import static google.registry.config.RegistryConfig.getHibernateHikariMinimumIdle;
import static google.registry.config.RegistryConfig.getHibernateJdbcBatchSize;
import static google.registry.config.RegistryConfig.getHibernateJdbcFetchSize;
import static google.registry.config.RegistryConfig.getHibernateLogSqlQueries;
import static google.registry.persistence.transaction.TransactionManagerFactory.tm;
@ -76,15 +78,9 @@ public abstract class PersistenceModule {
public static final String HIKARI_DS_CLOUD_SQL_INSTANCE =
"hibernate.hikari.dataSource.cloudSqlInstance";
/**
* Postgresql-specific: driver default fetch size is 0, which disables streaming result sets. Here
* we set a small default geared toward Nomulus server transactions. Large queries can override
* the defaults using {@link JpaTransactionManager#setQueryFetchSize}.
*/
public static final String JDBC_BATCH_SIZE = "hibernate.jdbc.batch_size";
public static final String JDBC_FETCH_SIZE = "hibernate.jdbc.fetch_size";
private static final int DEFAULT_SERVER_FETCH_SIZE = 20;
@VisibleForTesting
@Provides
@DefaultHibernateConfigs
@ -111,7 +107,8 @@ public abstract class PersistenceModule {
properties.put(HIKARI_MAXIMUM_POOL_SIZE, getHibernateHikariMaximumPoolSize());
properties.put(HIKARI_IDLE_TIMEOUT, getHibernateHikariIdleTimeout());
properties.put(Environment.DIALECT, NomulusPostgreSQLDialect.class.getName());
properties.put(JDBC_FETCH_SIZE, Integer.toString(DEFAULT_SERVER_FETCH_SIZE));
properties.put(JDBC_BATCH_SIZE, getHibernateJdbcBatchSize());
properties.put(JDBC_FETCH_SIZE, getHibernateJdbcFetchSize());
return properties.build();
}

View file

@ -16,7 +16,6 @@ package google.registry.tools;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.google.common.collect.Streams;
import google.registry.model.tld.label.PremiumList;
import google.registry.model.tld.label.PremiumList.PremiumEntry;
import google.registry.model.tld.label.PremiumListDao;
@ -40,7 +39,7 @@ public class GetPremiumListCommand implements CommandWithRemoteApi {
System.out.printf(
"%s:\n%s\n",
premiumListName,
Streams.stream(PremiumListDao.loadAllPremiumEntries(premiumListName))
PremiumListDao.loadAllPremiumEntries(premiumListName).stream()
.sorted(Comparator.comparing(PremiumEntry::getDomainLabel))
.map(premiumEntry -> premiumEntry.toString(premiumList.get().getCurrency()))
.collect(Collectors.joining("\n")));

View file

@ -15,21 +15,15 @@
package google.registry.tools;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static google.registry.model.tld.label.PremiumListUtils.parseToPremiumList;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
import static google.registry.util.ListNamingUtils.convertFilePathToName;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.beust.jcommander.Parameters;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Streams;
import google.registry.model.tld.label.PremiumList;
import google.registry.model.tld.label.PremiumList.PremiumEntry;
import google.registry.model.tld.label.PremiumListDao;
import google.registry.model.tld.label.PremiumListUtils;
import java.nio.file.Files;
import java.util.List;
import java.util.Optional;
/** Command to safely update {@link PremiumList} in Database for a given TLD. */
@ -43,46 +37,12 @@ class UpdatePremiumListCommand extends CreateOrUpdatePremiumListCommand {
checkArgument(
list.isPresent(),
String.format("Could not update premium list %s because it doesn't exist.", name));
List<String> existingEntry = getExistingPremiumEntry(list.get()).asList();
inputData = Files.readAllLines(inputFile, UTF_8);
checkArgument(!inputData.isEmpty(), "New premium list data cannot be empty");
currency = list.get().getCurrency();
// reconstructing existing premium list to bypass Hibernate lazy initialization exception
PremiumList existingPremiumList = parseToPremiumList(name, currency, existingEntry);
PremiumList updatedPremiumList = parseToPremiumList(name, currency, inputData);
PremiumList updatedPremiumList = PremiumListUtils.parseToPremiumList(name, currency, inputData);
return String.format(
"Update premium list for %s?\n Old List: %s\n New List: %s",
name, existingPremiumList, updatedPremiumList);
}
/*
To get premium list content as a set of string. This is a workaround to avoid dealing with
Hibernate.LazyInitizationException error. It occurs when trying to access data of the
latest revision of an existing premium list.
"Cannot evaluate google.registry.model.tld.label.PremiumList.toString()'".
Ideally, the following should be the way to verify info in latest revision of a premium list:
PremiumList existingPremiumList =
PremiumListSqlDao.getLatestRevision(name)
.orElseThrow(
() ->
new IllegalArgumentException(
String.format(
"Could not update premium list %s because it doesn't exist.", name)));
assertThat(persistedList.getLabelsToPrices()).containsEntry("foo", new BigDecimal("9000.00"));
assertThat(persistedList.size()).isEqualTo(1);
*/
protected ImmutableSet<String> getExistingPremiumEntry(PremiumList list) {
Iterable<PremiumEntry> sqlListEntries =
jpaTm().transact(() -> PremiumListDao.loadPremiumEntries(list));
return Streams.stream(sqlListEntries)
.map(
premiumEntry ->
String.format(
"%s,%s %s",
premiumEntry.getDomainLabel(), list.getCurrency(), premiumEntry.getValue()))
.collect(toImmutableSet());
name, list, updatedPremiumList);
}
}

View file

@ -14,6 +14,7 @@
package google.registry.model.tld.label;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static google.registry.persistence.transaction.TransactionManagerFactory.jpaTm;
@ -24,14 +25,18 @@ import static org.joda.money.CurrencyUnit.USD;
import static org.joda.time.Duration.standardDays;
import static org.junit.jupiter.api.Assertions.assertThrows;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.flogger.FluentLogger;
import google.registry.persistence.transaction.TransactionManagerUtil;
import google.registry.testing.AppEngineExtension;
import google.registry.testing.FakeClock;
import google.registry.testing.TestCacheExtension;
import java.math.BigDecimal;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.stream.IntStream;
import org.joda.money.CurrencyUnit;
import org.joda.money.Money;
import org.junit.jupiter.api.BeforeEach;
@ -41,6 +46,8 @@ import org.junit.jupiter.api.extension.RegisterExtension;
/** Unit tests for {@link PremiumListDao}. */
public class PremiumListDaoTest {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private final FakeClock fakeClock = new FakeClock();
@RegisterExtension
@ -260,6 +267,27 @@ public class PremiumListDaoTest {
assertThat(PremiumListDao.premiumListCache.getIfPresent("testname")).isNull();
}
@Test
void testSave_largeSize_savedQuickly() {
Stopwatch stopwatch = Stopwatch.createStarted();
ImmutableMap<String, BigDecimal> prices =
IntStream.range(0, 20000).boxed().collect(toImmutableMap(String::valueOf, BigDecimal::new));
PremiumList list =
new PremiumList.Builder()
.setName("testname")
.setCurrency(USD)
.setLabelsToPrices(prices)
.setCreationTimestamp(fakeClock.nowUtc())
.build();
PremiumListDao.save(list);
long duration = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS);
if (duration >= 6000) {
// Don't fail directly since we can't rely on what sort of machines the test is running on
logger.atSevere().log(
"Expected premium list update to take 2-3 seconds but it took %d ms", duration);
}
}
private static Money moneyOf(CurrencyUnit unit, double amount) {
return Money.of(unit, BigDecimal.valueOf(amount).setScale(unit.getDecimalPlaces()));
}

View file

@ -60,7 +60,6 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Streams;
import com.google.common.net.InetAddresses;
import com.googlecode.objectify.Key;
import google.registry.dns.writer.VoidDnsWriter;
@ -1264,7 +1263,7 @@ public class DatabaseHelper {
/** Returns the entire map of {@link PremiumEntry}s for the given {@link PremiumList}. */
public static ImmutableMap<String, PremiumEntry> loadPremiumEntries(PremiumList premiumList) {
return Streams.stream(PremiumListDao.loadAllPremiumEntries(premiumList.getName()))
return PremiumListDao.loadAllPremiumEntries(premiumList.getName()).stream()
.collect(toImmutableMap(PremiumEntry::getDomainLabel, Function.identity()));
}

View file

@ -14,17 +14,20 @@
package google.registry.tools;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.model.ImmutableObjectSubject.immutableObjectCorrespondence;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.joda.money.CurrencyUnit.USD;
import static org.junit.jupiter.api.Assertions.assertThrows;
import com.google.common.collect.ImmutableSet;
import com.google.common.io.Files;
import google.registry.model.tld.Registry;
import google.registry.model.tld.label.PremiumList;
import google.registry.model.tld.label.PremiumList.PremiumEntry;
import google.registry.model.tld.label.PremiumListDao;
import java.io.File;
import java.math.BigDecimal;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;
@ -46,12 +49,10 @@ class UpdatePremiumListCommandTest<C extends UpdatePremiumListCommand>
Optional<PremiumList> list = PremiumListDao.getLatestRevision(TLD_TEST);
// ensure that no premium list is created before running the command
assertThat(list.isPresent()).isTrue();
// ensure that there's value in existing premium list;
UpdatePremiumListCommand command = new UpdatePremiumListCommand();
ImmutableSet<String> entries = command.getExistingPremiumEntry(list.get());
assertThat(entries.size()).isEqualTo(1);
// data from @beforeEach of CreateOrUpdatePremiumListCommandTestCase.java
assertThat(entries.contains("doge,USD 9090.00")).isTrue();
assertThat(PremiumListDao.loadPremiumEntries(list.get()))
.comparingElementsUsing(immutableObjectCorrespondence("revisionId"))
.containsExactly(PremiumEntry.create(0L, new BigDecimal("9090.00"), "doge"));
}
@Test
@ -77,11 +78,9 @@ class UpdatePremiumListCommandTest<C extends UpdatePremiumListCommand>
command.inputFile = Paths.get(tmpFile.getPath());
runCommandForced("--name=" + TLD_TEST, "--input=" + command.inputFile);
ImmutableSet<String> entries =
command.getExistingPremiumEntry(PremiumListDao.getLatestRevision(TLD_TEST).get());
assertThat(entries.size()).isEqualTo(1);
// verify that list is updated; cannot use only string since price is formatted;
assertThat(entries.contains("eth,USD 9999.00")).isTrue();
assertThat(PremiumListDao.loadAllPremiumEntries(TLD_TEST))
.comparingElementsUsing(immutableObjectCorrespondence("revisionId"))
.containsExactly(PremiumEntry.create(0L, new BigDecimal("9999.00"), "eth"));
}
@Test
@ -98,11 +97,9 @@ class UpdatePremiumListCommandTest<C extends UpdatePremiumListCommand>
command.inputFile = Paths.get(newPremiumFile.getPath());
runCommandForced("--name=" + TLD_TEST, "--input=" + command.inputFile);
ImmutableSet<String> entries =
command.getExistingPremiumEntry(PremiumListDao.getLatestRevision(TLD_TEST).get());
assertThat(entries.size()).isEqualTo(1);
// verify that list is updated; cannot use only string since price is formatted;
assertThat(entries.contains("eth,USD 9999.00")).isTrue();
assertThat(PremiumListDao.loadAllPremiumEntries(TLD_TEST))
.comparingElementsUsing(immutableObjectCorrespondence("revisionId"))
.containsExactly(PremiumEntry.create(0L, new BigDecimal("9999.00"), "eth"));
}
@Test
@ -116,12 +113,11 @@ class UpdatePremiumListCommandTest<C extends UpdatePremiumListCommand>
runCommandForced("--name=" + TLD_TEST, "--input=" + command.inputFile);
// assert all three lines from premiumTerms are added
ImmutableSet<String> entries =
command.getExistingPremiumEntry(PremiumListDao.getLatestRevision(TLD_TEST).get());
assertThat(entries.size()).isEqualTo(3);
assertThat(entries.contains("foo,USD 9000.00")).isTrue();
assertThat(entries.contains("doge,USD 100.00")).isTrue();
assertThat(entries.contains("elon,USD 2021.00")).isTrue();
assertThat(
PremiumListDao.loadAllPremiumEntries(TLD_TEST).stream()
.map(Object::toString)
.collect(toImmutableList()))
.containsExactly("foo, 9000.00", "doge, 100.00", "elon, 2021.00");
}
@Test