RDAP: Add result set sizing logic for domain name searches

Because we cannot weed out deleted domains in the query itself, the RDAP code must pull all domains with matching names, then throw out the deleted domains. So we don't know how many domains to fetch up front to fill up the desired maximum result set size. This CL adds a loop to attempt to fetch addition domains if the first fetch did not yield enough, while giving up after a while to avoid bogging down the system.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=133420297
This commit is contained in:
mountford 2016-09-16 13:24:51 -07:00 committed by Ben McIlwain
parent 743ff99ca3
commit aa2af68af0
2 changed files with 212 additions and 85 deletions

View file

@ -29,6 +29,8 @@ import com.google.common.collect.Iterables;
import com.google.common.primitives.Booleans; import com.google.common.primitives.Booleans;
import com.googlecode.objectify.Key; import com.googlecode.objectify.Key;
import com.googlecode.objectify.cmd.Query; import com.googlecode.objectify.cmd.Query;
import google.registry.config.ConfigModule.Config;
import google.registry.model.EppResourceUtils;
import google.registry.model.domain.DomainResource; import google.registry.model.domain.DomainResource;
import google.registry.model.host.HostResource; import google.registry.model.host.HostResource;
import google.registry.rdap.RdapJsonFormatter.BoilerplateType; import google.registry.rdap.RdapJsonFormatter.BoilerplateType;
@ -59,10 +61,14 @@ public class RdapDomainSearchAction extends RdapActionBase {
public static final String PATH = "/rdap/domains"; public static final String PATH = "/rdap/domains";
public static final int CHUNK_SIZE_SCALING_FACTOR = 5;
public static final int MAX_CHUNK_FETCHES = 20;
@Inject Clock clock; @Inject Clock clock;
@Inject @Parameter("name") Optional<String> nameParam; @Inject @Parameter("name") Optional<String> nameParam;
@Inject @Parameter("nsLdhName") Optional<String> nsLdhNameParam; @Inject @Parameter("nsLdhName") Optional<String> nsLdhNameParam;
@Inject @Parameter("nsIp") Optional<InetAddress> nsIpParam; @Inject @Parameter("nsIp") Optional<InetAddress> nsIpParam;
@Inject @Config("rdapResultSetMaxSize") int rdapResultSetMaxSize;
@Inject RdapDomainSearchAction() {} @Inject RdapDomainSearchAction() {}
@Override @Override
@ -137,21 +143,55 @@ public class RdapDomainSearchAction extends RdapActionBase {
domainResource, false, rdapLinkBase, rdapWhoisServer, now)); domainResource, false, rdapLinkBase, rdapWhoisServer, now));
// Handle queries with a wildcard. // Handle queries with a wildcard.
} else { } else {
// We can't query for undeleted domains as part of the query itself; that would require an
// inequality query on deletion time, and we are already using inequality queries on
// fullyQualifiedDomainName. So we need another way to limit the result set to the desired
// number of undeleted domains, which we do as follows. We query a batch of domains up to five
// times the size of the result set size limit (a factor picked out of thin air), and weed out
// all deleted domains. If we still have space in the result set (because there were an
// incredibly large number of deleted domains), we go back and query some more domains to try
// and find more results. We try this 20 times (meaning we search for 100 times as many
// domains as the result set size limit), then give up and return a result set that is smaller
// than the limit. Ugly? You bet!
// TODO(b/31546493): Add metrics to figure out how well this algorithm works.
ImmutableList.Builder<ImmutableMap<String, Object>> builder = new ImmutableList.Builder<>();
String previousChunkEnd = null;
for (int numResultsFound = 0, retry = 0;
(retry < MAX_CHUNK_FETCHES) && (numResultsFound < rdapResultSetMaxSize);
retry++) {
// Construct the query.
Query<DomainResource> query = ofy().load() Query<DomainResource> query = ofy().load()
.type(DomainResource.class) .type(DomainResource.class)
// TODO(b/24463238): figure out how to limit the size of these queries effectively .filter("fullyQualifiedDomainName <", partialStringQuery.getNextInitialString());
.filter("fullyQualifiedDomainName >=", partialStringQuery.getInitialString()) if (previousChunkEnd == null) {
.filter("fullyQualifiedDomainName <", partialStringQuery.getNextInitialString()) query = query.filter(
.limit(1000); "fullyQualifiedDomainName >=", partialStringQuery.getInitialString());
} else {
query = query.filter("fullyQualifiedDomainName >", previousChunkEnd);
}
if (partialStringQuery.getSuffix() != null) { if (partialStringQuery.getSuffix() != null) {
query = query.filter("tld", partialStringQuery.getSuffix()); query = query.filter("tld", partialStringQuery.getSuffix());
} }
ImmutableList.Builder<ImmutableMap<String, Object>> builder = new ImmutableList.Builder<>(); // Perform the query and weed out deleted domains.
for (DomainResource domainResource : query) { previousChunkEnd = null;
if (domainResource.getDeletionTime().isAfter(now)) { int numDomainsInChunk = 0;
for (DomainResource domainResource :
query.limit(rdapResultSetMaxSize * CHUNK_SIZE_SCALING_FACTOR)) {
previousChunkEnd = domainResource.getFullyQualifiedDomainName();
numDomainsInChunk++;
if (EppResourceUtils.isActive(domainResource, now)) {
builder.add( builder.add(
RdapJsonFormatter.makeRdapJsonForDomain( RdapJsonFormatter.makeRdapJsonForDomain(
domainResource, false, rdapLinkBase, rdapWhoisServer, now)); domainResource, false, rdapLinkBase, rdapWhoisServer, now));
numResultsFound++;
if (numResultsFound >= rdapResultSetMaxSize) {
return builder.build();
}
}
}
if ((previousChunkEnd == null)
|| (numDomainsInChunk < rdapResultSetMaxSize * CHUNK_SIZE_SCALING_FACTOR)) {
break;
} }
} }
return builder.build(); return builder.build();

View file

@ -34,6 +34,7 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Range; import com.google.common.collect.Range;
import com.google.common.net.InetAddresses; import com.google.common.net.InetAddresses;
import com.googlecode.objectify.Key; import com.googlecode.objectify.Key;
import google.registry.model.contact.ContactResource;
import google.registry.model.domain.DomainResource; import google.registry.model.domain.DomainResource;
import google.registry.model.domain.Period; import google.registry.model.domain.Period;
import google.registry.model.host.HostResource; import google.registry.model.host.HostResource;
@ -45,6 +46,8 @@ import google.registry.testing.AppEngineRule;
import google.registry.testing.FakeClock; import google.registry.testing.FakeClock;
import google.registry.testing.FakeResponse; import google.registry.testing.FakeResponse;
import google.registry.testing.InjectRule; import google.registry.testing.InjectRule;
import java.util.List;
import java.util.Map;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.json.simple.JSONValue; import org.json.simple.JSONValue;
import org.junit.Before; import org.junit.Before;
@ -71,9 +74,13 @@ public class RdapDomainSearchActionTest {
private final RdapDomainSearchAction action = new RdapDomainSearchAction(); private final RdapDomainSearchAction action = new RdapDomainSearchAction();
private Registrar registrar;
private DomainResource domainCatLol; private DomainResource domainCatLol;
private DomainResource domainCatLol2; private DomainResource domainCatLol2;
private DomainResource domainCatExample; private DomainResource domainCatExample;
private ContactResource contact1;
private ContactResource contact2;
private ContactResource contact3;
private HostResource hostNs1CatLol; private HostResource hostNs1CatLol;
private HostResource hostNs2CatLol; private HostResource hostNs2CatLol;
@ -103,6 +110,7 @@ public class RdapDomainSearchActionTest {
action.nsIpParam = Optional.absent(); action.nsIpParam = Optional.absent();
break; break;
} }
action.rdapResultSetMaxSize = 5;
action.run(); action.run();
return JSONValue.parse(response.getPayload()); return JSONValue.parse(response.getPayload());
} }
@ -113,22 +121,23 @@ public class RdapDomainSearchActionTest {
// cat.lol and cat2.lol // cat.lol and cat2.lol
createTld("lol"); createTld("lol");
Registrar registrar = persistResource( registrar = persistResource(
makeRegistrar("evilregistrar", "Yes Virginia <script>", Registrar.State.ACTIVE)); makeRegistrar("evilregistrar", "Yes Virginia <script>", Registrar.State.ACTIVE));
persistSimpleResources(makeRegistrarContacts(registrar)); persistSimpleResources(makeRegistrarContacts(registrar));
domainCatLol = persistResource(makeDomainResource( domainCatLol = persistResource(
makeDomainResource(
"cat.lol", "cat.lol",
makeAndPersistContactResource( contact1 = makeAndPersistContactResource(
"5372808-ERL", "5372808-ERL",
"Goblin Market", "Goblin Market",
"lol@cat.lol", "lol@cat.lol",
clock.nowUtc().minusYears(1)), clock.nowUtc().minusYears(1)),
makeAndPersistContactResource( contact2 = makeAndPersistContactResource(
"5372808-IRL", "5372808-IRL",
"Santa Claus", "Santa Claus",
"BOFH@cat.lol", "BOFH@cat.lol",
clock.nowUtc().minusYears(2)), clock.nowUtc().minusYears(2)),
makeAndPersistContactResource( contact3 = makeAndPersistContactResource(
"5372808-TRL", "5372808-TRL",
"The Raven", "The Raven",
"bog@cat.lol", "bog@cat.lol",
@ -142,12 +151,16 @@ public class RdapDomainSearchActionTest {
"bad:f00d:cafe::15:beef", "bad:f00d:cafe::15:beef",
clock.nowUtc().minusYears(2)), clock.nowUtc().minusYears(2)),
registrar) registrar)
.asBuilder().setSubordinateHosts(ImmutableSet.of("ns1.cat.lol", "ns2.cat.lol")).build()); .asBuilder()
.setSubordinateHosts(ImmutableSet.of("ns1.cat.lol", "ns2.cat.lol"))
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build());
persistResource( persistResource(
hostNs1CatLol.asBuilder().setSuperordinateDomain(Key.create(domainCatLol)).build()); hostNs1CatLol.asBuilder().setSuperordinateDomain(Key.create(domainCatLol)).build());
persistResource( persistResource(
hostNs2CatLol.asBuilder().setSuperordinateDomain(Key.create(domainCatLol)).build()); hostNs2CatLol.asBuilder().setSuperordinateDomain(Key.create(domainCatLol)).build());
domainCatLol2 = persistResource(makeDomainResource( domainCatLol2 = persistResource(
makeDomainResource(
"cat2.lol", "cat2.lol",
makeAndPersistContactResource( makeAndPersistContactResource(
"6372808-ERL", "6372808-ERL",
@ -168,13 +181,17 @@ public class RdapDomainSearchActionTest {
"ns1.cat.example", "10.20.30.40", clock.nowUtc().minusYears(1)), "ns1.cat.example", "10.20.30.40", clock.nowUtc().minusYears(1)),
makeAndPersistHostResource( makeAndPersistHostResource(
"ns2.dog.lol", "12:feed:5000::15:beef", clock.nowUtc().minusYears(2)), "ns2.dog.lol", "12:feed:5000::15:beef", clock.nowUtc().minusYears(2)),
registrar)); registrar)
.asBuilder()
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build());
// cat.example // cat.example
createTld("example"); createTld("example");
registrar = persistResource( registrar = persistResource(
makeRegistrar("goodregistrar", "St. John Chrysostom", Registrar.State.ACTIVE)); makeRegistrar("goodregistrar", "St. John Chrysostom", Registrar.State.ACTIVE));
persistSimpleResources(makeRegistrarContacts(registrar)); persistSimpleResources(makeRegistrarContacts(registrar));
domainCatExample = persistResource(makeDomainResource( domainCatExample = persistResource(
makeDomainResource(
"cat.example", "cat.example",
makeAndPersistContactResource( makeAndPersistContactResource(
"7372808-ERL", "7372808-ERL",
@ -194,12 +211,16 @@ public class RdapDomainSearchActionTest {
hostNs1CatLol, hostNs1CatLol,
makeAndPersistHostResource( makeAndPersistHostResource(
"ns2.external.tld", "bad:f00d:cafe::15:beef", clock.nowUtc().minusYears(2)), "ns2.external.tld", "bad:f00d:cafe::15:beef", clock.nowUtc().minusYears(2)),
registrar)); registrar)
.asBuilder()
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build());
// cat.みんな // cat.みんな
createTld("xn--q9jyb4c"); createTld("xn--q9jyb4c");
registrar = persistResource(makeRegistrar("unicoderegistrar", "みんな", Registrar.State.ACTIVE)); registrar = persistResource(makeRegistrar("unicoderegistrar", "みんな", Registrar.State.ACTIVE));
persistSimpleResources(makeRegistrarContacts(registrar)); persistSimpleResources(makeRegistrarContacts(registrar));
persistResource(makeDomainResource( persistResource(
makeDomainResource(
"cat.みんな", "cat.みんな",
makeAndPersistContactResource( makeAndPersistContactResource(
"8372808-ERL", "8372808-ERL",
@ -219,7 +240,10 @@ public class RdapDomainSearchActionTest {
makeAndPersistHostResource("ns1.cat.みんな", "1.2.3.5", clock.nowUtc().minusYears(1)), makeAndPersistHostResource("ns1.cat.みんな", "1.2.3.5", clock.nowUtc().minusYears(1)),
makeAndPersistHostResource( makeAndPersistHostResource(
"ns2.cat.みんな", "bad:f00d:cafe::14:beef", clock.nowUtc().minusYears(2)), "ns2.cat.みんな", "bad:f00d:cafe::14:beef", clock.nowUtc().minusYears(2)),
registrar)); registrar)
.asBuilder()
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build());
// cat.1.test // cat.1.test
createTld("1.test"); createTld("1.test");
registrar = registrar =
@ -246,7 +270,10 @@ public class RdapDomainSearchActionTest {
makeAndPersistHostResource( makeAndPersistHostResource(
"ns2.cat.2.test", "bad:f00d:cafe::14:beef", clock.nowUtc().minusYears(2)), "ns2.cat.2.test", "bad:f00d:cafe::14:beef", clock.nowUtc().minusYears(2)),
registrar) registrar)
.asBuilder().setSubordinateHosts(ImmutableSet.of("ns1.cat.1.test")).build()); .asBuilder()
.setSubordinateHosts(ImmutableSet.of("ns1.cat.1.test"))
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build());
// history entries // history entries
persistResource( persistResource(
@ -490,6 +517,66 @@ public class RdapDomainSearchActionTest {
assertThat(response.getStatus()).isEqualTo(404); assertThat(response.getStatus()).isEqualTo(404);
} }
private void createManyDomains(int numActiveDomains, int numTotalDomainsPerActiveDomain) {
for (int i = 1; i <= numActiveDomains * numTotalDomainsPerActiveDomain; i++) {
String domainName = String.format("domain%d.lol", i);
DomainResource domain =
makeDomainResource(
domainName, contact1, contact2, contact3, hostNs1CatLol, hostNs2CatLol, registrar)
.asBuilder()
.setCreationTimeForTest(clock.nowUtc().minusYears(3))
.build();
if (i % numTotalDomainsPerActiveDomain == 0) {
persistResource(domain);
} else {
persistDomainAsDeleted(domain, clock.nowUtc());
}
}
}
private void checkNumberOfDomainsInResult(Object obj, int expected) {
assertThat(obj).isInstanceOf(Map.class);
@SuppressWarnings("unchecked")
Map<String, Object> map = (Map<String, Object>) obj;
@SuppressWarnings("unchecked")
List<Object> domains = (List<Object>) map.get("domainSearchResults");
assertThat(domains).hasSize(expected);
}
@Test
public void testDomainMatch_manyDeletedDomains_fullResultSet() throws Exception {
// There are enough domains to fill a full result set; deleted domains are ignored.
createManyDomains(5, 4);
Object obj = generateActualJson(RequestType.NAME, "domain*.lol");
assertThat(response.getStatus()).isEqualTo(200);
checkNumberOfDomainsInResult(obj, 5);
}
@Test
public void testDomainMatch_manyDeletedDomains_partialResultSetDueToInsufficientDomains()
throws Exception {
// There are not enough domains to fill a full result set.
createManyDomains(3, 100);
Object obj = generateActualJson(RequestType.NAME, "domain*.lol");
assertThat(response.getStatus()).isEqualTo(200);
checkNumberOfDomainsInResult(obj, 3);
}
@Test
public void testDomainMatch_manyDeletedDomains_partialResultSetDueToFetchingLimit()
throws Exception {
// This is not exactly desired behavior, but expected: There are enough domains to fill a full
// result set, but there are so many deleted domains that we run out of patience before we work
// our way through all of them.
createManyDomains(5, 150);
Object obj = generateActualJson(RequestType.NAME, "domain*.lol");
assertThat(response.getStatus()).isEqualTo(200);
checkNumberOfDomainsInResult(obj, 3);
}
@Test @Test
public void testNameserverMatch_foundMultiple() throws Exception { public void testNameserverMatch_foundMultiple() throws Exception {
assertThat(generateActualJson(RequestType.NS_LDH_NAME, "ns1.cat.lol")) assertThat(generateActualJson(RequestType.NS_LDH_NAME, "ns1.cat.lol"))