Reduce the QPS of the refresh DNS for all domains action (#2212)

This also adds a targeted QPS as a parameter in case we need to manually bump it
up (or down) for some reason without having to make code changes and re-deploy.
This commit is contained in:
Ben McIlwain 2023-11-08 13:47:37 -05:00 committed by GitHub
parent f50290ce1d
commit 992d1c1349
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 13 deletions

View file

@ -58,13 +58,25 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
private static final FluentLogger logger = FluentLogger.forEnclosingClass(); private static final FluentLogger logger = FluentLogger.forEnclosingClass();
/** The number of DNS updates to enqueue per transaction. */
private static final int DEFAULT_BATCH_SIZE = 250; private static final int DEFAULT_BATCH_SIZE = 250;
/**
* The default number of DNS updates it is safe to execute per minute.
*
* <p>This is mostly a guess based on existing system performance, but the point is to be on the
* safe side and not cause contention with ongoing DNS updates from clients.
*/
private static final int DEFAULT_REFRESH_QPS = 7;
private final Response response; private final Response response;
private final ImmutableSet<String> tlds; private final ImmutableSet<String> tlds;
// Recommended value for batch size is between 200 and 500 // Recommended value for batch size is between 200 and 500
private final int batchSize; private final int batchSize;
private final int refreshQps;
private final Random random; private final Random random;
@Inject @Inject
@ -72,10 +84,12 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
Response response, Response response,
@Parameter(PARAM_TLDS) ImmutableSet<String> tlds, @Parameter(PARAM_TLDS) ImmutableSet<String> tlds,
@Parameter("batchSize") Optional<Integer> batchSize, @Parameter("batchSize") Optional<Integer> batchSize,
@Parameter("refreshQps") Optional<Integer> refreshQps,
Random random) { Random random) {
this.response = response; this.response = response;
this.tlds = tlds; this.tlds = tlds;
this.batchSize = batchSize.orElse(DEFAULT_BATCH_SIZE); this.batchSize = batchSize.orElse(DEFAULT_BATCH_SIZE);
this.refreshQps = refreshQps.orElse(DEFAULT_REFRESH_QPS);
this.random = random; this.random = random;
} }
@ -83,7 +97,7 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
public void run() { public void run() {
assertTldsExist(tlds); assertTldsExist(tlds);
checkArgument(batchSize > 0, "Must specify a positive number for batch size"); checkArgument(batchSize > 0, "Must specify a positive number for batch size");
int smearMinutes = tm().transact(this::calculateSmearMinutes, TRANSACTION_REPEATABLE_READ); Duration smear = tm().transact(this::calculateSmear, TRANSACTION_REPEATABLE_READ);
ImmutableList<String> domainsBatch; ImmutableList<String> domainsBatch;
@Nullable String lastInPreviousBatch = null; @Nullable String lastInPreviousBatch = null;
@ -91,17 +105,16 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
Optional<String> lastInPreviousBatchOpt = Optional.ofNullable(lastInPreviousBatch); Optional<String> lastInPreviousBatchOpt = Optional.ofNullable(lastInPreviousBatch);
domainsBatch = domainsBatch =
tm().transact( tm().transact(
() -> refreshBatch(lastInPreviousBatchOpt, smearMinutes), () -> refreshBatch(lastInPreviousBatchOpt, smear), TRANSACTION_REPEATABLE_READ);
TRANSACTION_REPEATABLE_READ);
lastInPreviousBatch = domainsBatch.isEmpty() ? null : getLast(domainsBatch); lastInPreviousBatch = domainsBatch.isEmpty() ? null : getLast(domainsBatch);
} while (domainsBatch.size() == batchSize); } while (domainsBatch.size() == batchSize);
} }
/** /**
* Calculates the number of smear minutes to enqueue refreshes so that the DNS queue does not get * Calculates the smear duration to enqueue refreshes so that the DNS queue does not get
* overloaded. * overloaded.
*/ */
private int calculateSmearMinutes() { private Duration calculateSmear() {
Long activeDomains = Long activeDomains =
tm().query( tm().query(
"SELECT COUNT(*) FROM Domain WHERE tld IN (:tlds) AND deletionTime = :endOfTime", "SELECT COUNT(*) FROM Domain WHERE tld IN (:tlds) AND deletionTime = :endOfTime",
@ -109,7 +122,7 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
.setParameter("tlds", tlds) .setParameter("tlds", tlds)
.setParameter("endOfTime", END_OF_TIME) .setParameter("endOfTime", END_OF_TIME)
.getSingleResult(); .getSingleResult();
return Math.max(activeDomains.intValue() / 1000, 1); return Duration.standardSeconds(Math.max(activeDomains / refreshQps, 1));
} }
private ImmutableList<String> getBatch(Optional<String> lastInPreviousBatch) { private ImmutableList<String> getBatch(Optional<String> lastInPreviousBatch) {
@ -127,11 +140,12 @@ public class RefreshDnsForAllDomainsAction implements Runnable {
} }
@VisibleForTesting @VisibleForTesting
ImmutableList<String> refreshBatch(Optional<String> lastInPreviousBatch, int smearMinutes) { ImmutableList<String> refreshBatch(Optional<String> lastInPreviousBatch, Duration smear) {
ImmutableList<String> domainBatch = getBatch(lastInPreviousBatch); ImmutableList<String> domainBatch = getBatch(lastInPreviousBatch);
try { try {
// Smear the task execution time over the next N minutes. // Smear the task execution time over the next N seconds.
requestDomainDnsRefresh(domainBatch, Duration.standardMinutes(random.nextInt(smearMinutes))); requestDomainDnsRefresh(
domainBatch, Duration.standardSeconds(random.nextInt((int) smear.getStandardSeconds())));
} catch (Throwable t) { } catch (Throwable t) {
logger.atSevere().withCause(t).log("Error while enqueuing DNS refresh batch"); logger.atSevere().withCause(t).log("Error while enqueuing DNS refresh batch");
response.setStatus(HttpStatus.SC_OK); response.setStatus(HttpStatus.SC_OK);

View file

@ -81,4 +81,10 @@ public class ToolsServerModule {
static Optional<Integer> provideBatchSize(HttpServletRequest req) { static Optional<Integer> provideBatchSize(HttpServletRequest req) {
return extractOptionalIntParameter(req, "batchSize"); return extractOptionalIntParameter(req, "batchSize");
} }
@Provides
@Parameter("refreshQps")
static Optional<Integer> provideRefreshQps(HttpServletRequest req) {
return extractOptionalIntParameter(req, "refreshQps");
}
} }

View file

@ -35,6 +35,7 @@ import google.registry.testing.FakeResponse;
import java.util.Optional; import java.util.Optional;
import java.util.Random; import java.util.Random;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.api.extension.RegisterExtension;
@ -55,7 +56,7 @@ public class RefreshDnsForAllDomainsActionTest {
createTld("bar"); createTld("bar");
action = action =
new RefreshDnsForAllDomainsAction( new RefreshDnsForAllDomainsAction(
response, ImmutableSet.of("bar"), Optional.of(10), new Random()); response, ImmutableSet.of("bar"), Optional.of(10), Optional.empty(), new Random());
} }
@Test @Test
@ -74,9 +75,9 @@ public class RefreshDnsForAllDomainsActionTest {
// Set batch size to 1 since each batch will be enqueud at the same time // Set batch size to 1 since each batch will be enqueud at the same time
action = action =
new RefreshDnsForAllDomainsAction( new RefreshDnsForAllDomainsAction(
response, ImmutableSet.of("bar"), Optional.of(1), new Random()); response, ImmutableSet.of("bar"), Optional.of(1), Optional.of(7), new Random());
tm().transact(() -> action.refreshBatch(Optional.empty(), 1000)); tm().transact(() -> action.refreshBatch(Optional.empty(), Duration.standardMinutes(1000)));
tm().transact(() -> action.refreshBatch(Optional.empty(), 1000)); tm().transact(() -> action.refreshBatch(Optional.empty(), Duration.standardMinutes(1000)));
ImmutableList<DnsRefreshRequest> refreshRequests = ImmutableList<DnsRefreshRequest> refreshRequests =
tm().transact( tm().transact(
() -> () ->