From aa2af68af08d17b98aeb340200f90ff9ea5b5fdb Mon Sep 17 00:00:00 2001 From: mountford Date: Fri, 16 Sep 2016 13:24:51 -0700 Subject: [PATCH] RDAP: Add result set sizing logic for domain name searches Because we cannot weed out deleted domains in the query itself, the RDAP code must pull all domains with matching names, then throw out the deleted domains. So we don't know how many domains to fetch up front to fill up the desired maximum result set size. This CL adds a loop to attempt to fetch addition domains if the first fetch did not yield enough, while giving up after a while to avoid bogging down the system. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=133420297 --- .../registry/rdap/RdapDomainSearchAction.java | 68 ++++-- .../rdap/RdapDomainSearchActionTest.java | 229 ++++++++++++------ 2 files changed, 212 insertions(+), 85 deletions(-) diff --git a/java/google/registry/rdap/RdapDomainSearchAction.java b/java/google/registry/rdap/RdapDomainSearchAction.java index c8246ddaf..79d70bb29 100644 --- a/java/google/registry/rdap/RdapDomainSearchAction.java +++ b/java/google/registry/rdap/RdapDomainSearchAction.java @@ -29,6 +29,8 @@ import com.google.common.collect.Iterables; import com.google.common.primitives.Booleans; import com.googlecode.objectify.Key; import com.googlecode.objectify.cmd.Query; +import google.registry.config.ConfigModule.Config; +import google.registry.model.EppResourceUtils; import google.registry.model.domain.DomainResource; import google.registry.model.host.HostResource; import google.registry.rdap.RdapJsonFormatter.BoilerplateType; @@ -58,11 +60,15 @@ import org.joda.time.DateTime; public class RdapDomainSearchAction extends RdapActionBase { public static final String PATH = "/rdap/domains"; + + public static final int CHUNK_SIZE_SCALING_FACTOR = 5; + public static final int MAX_CHUNK_FETCHES = 20; @Inject Clock clock; @Inject @Parameter("name") Optional nameParam; @Inject @Parameter("nsLdhName") Optional nsLdhNameParam; @Inject @Parameter("nsIp") Optional nsIpParam; + @Inject @Config("rdapResultSetMaxSize") int rdapResultSetMaxSize; @Inject RdapDomainSearchAction() {} @Override @@ -137,21 +143,55 @@ public class RdapDomainSearchAction extends RdapActionBase { domainResource, false, rdapLinkBase, rdapWhoisServer, now)); // Handle queries with a wildcard. } else { - Query query = ofy().load() - .type(DomainResource.class) - // TODO(b/24463238): figure out how to limit the size of these queries effectively - .filter("fullyQualifiedDomainName >=", partialStringQuery.getInitialString()) - .filter("fullyQualifiedDomainName <", partialStringQuery.getNextInitialString()) - .limit(1000); - if (partialStringQuery.getSuffix() != null) { - query = query.filter("tld", partialStringQuery.getSuffix()); - } + // We can't query for undeleted domains as part of the query itself; that would require an + // inequality query on deletion time, and we are already using inequality queries on + // fullyQualifiedDomainName. So we need another way to limit the result set to the desired + // number of undeleted domains, which we do as follows. We query a batch of domains up to five + // times the size of the result set size limit (a factor picked out of thin air), and weed out + // all deleted domains. If we still have space in the result set (because there were an + // incredibly large number of deleted domains), we go back and query some more domains to try + // and find more results. We try this 20 times (meaning we search for 100 times as many + // domains as the result set size limit), then give up and return a result set that is smaller + // than the limit. Ugly? You bet! + // TODO(b/31546493): Add metrics to figure out how well this algorithm works. ImmutableList.Builder> builder = new ImmutableList.Builder<>(); - for (DomainResource domainResource : query) { - if (domainResource.getDeletionTime().isAfter(now)) { - builder.add( - RdapJsonFormatter.makeRdapJsonForDomain( - domainResource, false, rdapLinkBase, rdapWhoisServer, now)); + String previousChunkEnd = null; + for (int numResultsFound = 0, retry = 0; + (retry < MAX_CHUNK_FETCHES) && (numResultsFound < rdapResultSetMaxSize); + retry++) { + // Construct the query. + Query query = ofy().load() + .type(DomainResource.class) + .filter("fullyQualifiedDomainName <", partialStringQuery.getNextInitialString()); + if (previousChunkEnd == null) { + query = query.filter( + "fullyQualifiedDomainName >=", partialStringQuery.getInitialString()); + } else { + query = query.filter("fullyQualifiedDomainName >", previousChunkEnd); + } + if (partialStringQuery.getSuffix() != null) { + query = query.filter("tld", partialStringQuery.getSuffix()); + } + // Perform the query and weed out deleted domains. + previousChunkEnd = null; + int numDomainsInChunk = 0; + for (DomainResource domainResource : + query.limit(rdapResultSetMaxSize * CHUNK_SIZE_SCALING_FACTOR)) { + previousChunkEnd = domainResource.getFullyQualifiedDomainName(); + numDomainsInChunk++; + if (EppResourceUtils.isActive(domainResource, now)) { + builder.add( + RdapJsonFormatter.makeRdapJsonForDomain( + domainResource, false, rdapLinkBase, rdapWhoisServer, now)); + numResultsFound++; + if (numResultsFound >= rdapResultSetMaxSize) { + return builder.build(); + } + } + } + if ((previousChunkEnd == null) + || (numDomainsInChunk < rdapResultSetMaxSize * CHUNK_SIZE_SCALING_FACTOR)) { + break; } } return builder.build(); diff --git a/javatests/google/registry/rdap/RdapDomainSearchActionTest.java b/javatests/google/registry/rdap/RdapDomainSearchActionTest.java index 37f0c78b8..4708ddd9e 100644 --- a/javatests/google/registry/rdap/RdapDomainSearchActionTest.java +++ b/javatests/google/registry/rdap/RdapDomainSearchActionTest.java @@ -34,6 +34,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Range; import com.google.common.net.InetAddresses; import com.googlecode.objectify.Key; +import google.registry.model.contact.ContactResource; import google.registry.model.domain.DomainResource; import google.registry.model.domain.Period; import google.registry.model.host.HostResource; @@ -45,6 +46,8 @@ import google.registry.testing.AppEngineRule; import google.registry.testing.FakeClock; import google.registry.testing.FakeResponse; import google.registry.testing.InjectRule; +import java.util.List; +import java.util.Map; import org.joda.time.DateTime; import org.json.simple.JSONValue; import org.junit.Before; @@ -71,9 +74,13 @@ public class RdapDomainSearchActionTest { private final RdapDomainSearchAction action = new RdapDomainSearchAction(); + private Registrar registrar; private DomainResource domainCatLol; private DomainResource domainCatLol2; private DomainResource domainCatExample; + private ContactResource contact1; + private ContactResource contact2; + private ContactResource contact3; private HostResource hostNs1CatLol; private HostResource hostNs2CatLol; @@ -103,6 +110,7 @@ public class RdapDomainSearchActionTest { action.nsIpParam = Optional.absent(); break; } + action.rdapResultSetMaxSize = 5; action.run(); return JSONValue.parse(response.getPayload()); } @@ -113,22 +121,23 @@ public class RdapDomainSearchActionTest { // cat.lol and cat2.lol createTld("lol"); - Registrar registrar = persistResource( + registrar = persistResource( makeRegistrar("evilregistrar", "Yes Virginia