Add metric for DNS UPDATE latency

Added:
- dns/update_latency, which measures the time since a DNS update was added to the pull queue until that updates is committed to the DnsWriter
- - It doesn't check that after being committed, it was actually published in the DNS.

- dns/publish_queue_delay, which measures how long since the initial insertion to the push queue until a publishDnsUpdate action was handled. It measures both for successes (which is what we care about) and various failures (which are important because the success for that publishDnsUpdate will be > than any of the previous failures)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=185995678
This commit is contained in:
guyben 2018-02-16 07:49:06 -08:00 committed by jianglai
parent 15f871a605
commit 6e4b2bd6a8
9 changed files with 279 additions and 47 deletions

View file

@ -21,7 +21,9 @@ import static google.registry.testing.TaskQueueHelper.assertNoTasksEnqueued;
import static google.registry.testing.TaskQueueHelper.assertTasksEnqueued;
import google.registry.testing.AppEngineRule;
import google.registry.testing.FakeClock;
import google.registry.testing.TaskQueueHelper.TaskMatcher;
import org.joda.time.DateTime;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@ -38,10 +40,11 @@ public class DnsQueueTest {
.withTaskQueue()
.build();
private DnsQueue dnsQueue;
private final FakeClock clock = new FakeClock(DateTime.parse("2010-01-01T10:00:00Z"));
@Before
public void init() {
dnsQueue = DnsQueue.create();
dnsQueue = DnsQueue.createForTesting(clock);
dnsQueue.leaseTasksBatchSize = 10;
}
@ -54,6 +57,7 @@ public class DnsQueueTest {
new TaskMatcher()
.param("Target-Type", "HOST")
.param("Target-Name", "octopus.tld")
.param("Create-Time", "2010-01-01T10:00:00.000Z")
.param("tld", "tld"));
}
@ -83,6 +87,7 @@ public class DnsQueueTest {
new TaskMatcher()
.param("Target-Type", "DOMAIN")
.param("Target-Name", "octopus.tld")
.param("Create-Time", "2010-01-01T10:00:00.000Z")
.param("tld", "tld"));
}
@ -101,4 +106,3 @@ public class DnsQueueTest {
assertThat(thrown).hasMessageThat().contains("TLD notatld does not exist");
}
}

View file

@ -20,12 +20,14 @@ import static google.registry.testing.DatastoreHelper.persistActiveDomain;
import static google.registry.testing.DatastoreHelper.persistActiveSubordinateHost;
import static google.registry.testing.DatastoreHelper.persistResource;
import static google.registry.testing.JUnitBackports.expectThrows;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import google.registry.dns.DnsMetrics.ActionStatus;
import google.registry.dns.DnsMetrics.CommitStatus;
import google.registry.dns.DnsMetrics.PublishStatus;
import google.registry.dns.writer.DnsWriter;
@ -37,6 +39,7 @@ import google.registry.testing.AppEngineRule;
import google.registry.testing.FakeClock;
import google.registry.testing.FakeLockHandler;
import google.registry.testing.InjectRule;
import java.util.Optional;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.junit.Before;
@ -87,6 +90,8 @@ public class PublishDnsUpdatesActionTest {
action.tld = tld;
action.hosts = ImmutableSet.of();
action.domains = ImmutableSet.of();
action.itemsCreateTime = Optional.of(clock.nowUtc().minusHours(2));
action.enqueuedTime = Optional.of(clock.nowUtc().minusHours(1));
action.dnsWriter = "correctWriter";
action.dnsWriterProxy = new DnsWriterProxy(ImmutableMap.of("correctWriter", dnsWriter));
action.dnsMetrics = dnsMetrics;
@ -111,6 +116,13 @@ public class PublishDnsUpdatesActionTest {
verify(dnsMetrics).incrementPublishHostRequests(1, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.REJECTED);
verify(dnsMetrics).recordCommit("correctWriter", CommitStatus.SUCCESS, Duration.ZERO, 0, 1);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.SUCCESS,
1,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
@ -131,6 +143,40 @@ public class PublishDnsUpdatesActionTest {
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.REJECTED);
verify(dnsMetrics).recordCommit("correctWriter", CommitStatus.SUCCESS, Duration.ZERO, 1, 0);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.SUCCESS,
1,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
}
@Test
public void testPublish_commitFails() throws Exception {
action = createAction("xn--q9jyb4c");
action.domains = ImmutableSet.of("example.xn--q9jyb4c", "example2.xn--q9jyb4c");
action.hosts =
ImmutableSet.of(
"ns1.example.xn--q9jyb4c", "ns2.example.xn--q9jyb4c", "ns1.example2.xn--q9jyb4c");
doThrow(new RuntimeException()).when(dnsWriter).commit();
expectThrows(RuntimeException.class, action::run);
verify(dnsMetrics).incrementPublishDomainRequests(2, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishDomainRequests(0, PublishStatus.REJECTED);
verify(dnsMetrics).incrementPublishHostRequests(3, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.REJECTED);
verify(dnsMetrics).recordCommit("correctWriter", CommitStatus.FAILURE, Duration.ZERO, 2, 3);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.COMMIT_FAILURE,
5,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
@ -157,6 +203,13 @@ public class PublishDnsUpdatesActionTest {
verify(dnsMetrics).incrementPublishHostRequests(3, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.REJECTED);
verify(dnsMetrics).recordCommit("correctWriter", CommitStatus.SUCCESS, Duration.ZERO, 2, 3);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.SUCCESS,
5,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
@ -177,6 +230,13 @@ public class PublishDnsUpdatesActionTest {
verify(dnsMetrics).incrementPublishHostRequests(0, PublishStatus.ACCEPTED);
verify(dnsMetrics).incrementPublishHostRequests(3, PublishStatus.REJECTED);
verify(dnsMetrics).recordCommit("correctWriter", CommitStatus.SUCCESS, Duration.ZERO, 0, 0);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.SUCCESS,
5,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
@ -184,24 +244,26 @@ public class PublishDnsUpdatesActionTest {
@Test
public void testLockIsntAvailable() throws Exception {
action = createAction("xn--q9jyb4c");
action.domains = ImmutableSet.of("example.com", "example2.com");
action.hosts = ImmutableSet.of("ns1.example.com", "ns2.example.com", "ns1.example2.com");
action.lockHandler = new FakeLockHandler(false);
ServiceUnavailableException thrown =
expectThrows(
ServiceUnavailableException.class,
() -> {
action = createAction("xn--q9jyb4c");
action.domains = ImmutableSet.of("example.com", "example2.com");
action.hosts =
ImmutableSet.of("ns1.example.com", "ns2.example.com", "ns1.example2.com");
action.lockHandler = new FakeLockHandler(false);
action.run();
verifyNoMoreInteractions(dnsWriter);
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
});
expectThrows(ServiceUnavailableException.class, action::run);
assertThat(thrown).hasMessageThat().contains("Lock failure");
verifyNoMoreInteractions(dnsWriter);
verify(dnsMetrics)
.recordActionResult(
"correctWriter",
ActionStatus.LOCK_FAILURE,
5,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verifyNoMoreInteractions(dnsQueue);
}
@Test
@ -214,6 +276,13 @@ public class PublishDnsUpdatesActionTest {
verifyNoMoreInteractions(dnsWriter);
verify(dnsMetrics)
.recordActionResult(
"wrongWriter",
ActionStatus.BAD_WRITER,
5,
Duration.standardHours(2),
Duration.standardHours(1));
verifyNoMoreInteractions(dnsMetrics);
verify(dnsQueue).addDomainRefreshTask("example.com");

View file

@ -21,6 +21,7 @@ import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static google.registry.dns.DnsConstants.DNS_PUBLISH_PUSH_QUEUE_NAME;
import static google.registry.dns.DnsConstants.DNS_PULL_QUEUE_NAME;
import static google.registry.dns.DnsConstants.DNS_TARGET_CREATE_TIME_PARAM;
import static google.registry.dns.DnsConstants.DNS_TARGET_NAME_PARAM;
import static google.registry.dns.DnsConstants.DNS_TARGET_TYPE_PARAM;
import static google.registry.request.RequestParameters.PARAM_TLD;
@ -52,7 +53,6 @@ import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.IntStream;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Duration;
import org.junit.Before;
import org.junit.Rule;
@ -66,7 +66,7 @@ public class ReadDnsQueueActionTest {
private static final int TEST_TLD_UPDATE_BATCH_SIZE = 100;
private DnsQueue dnsQueue;
private FakeClock clock = new FakeClock(DateTime.now(DateTimeZone.UTC));
private FakeClock clock = new FakeClock(DateTime.parse("3000-01-01TZ"));
@Rule
public final AppEngineRule appEngine = AppEngineRule.builder()
@ -88,7 +88,9 @@ public class ReadDnsQueueActionTest {
@Before
public void before() throws Exception {
clock.setTo(DateTime.now(DateTimeZone.UTC));
// Because of b/73372999 - the FakeClock can't be in the past, or the TaskQueues stop working.
// To make sure it's never in the past, we set the date far-far into the future
clock.setTo(DateTime.parse("3000-01-01TZ"));
createTlds("com", "net", "example");
persistResource(
Registry.get("com").asBuilder().setDnsWriters(ImmutableSet.of("comWriter")).build());
@ -100,7 +102,7 @@ public class ReadDnsQueueActionTest {
.setTldType(TldType.TEST)
.setDnsWriters(ImmutableSet.of("exampleWriter"))
.build());
dnsQueue = DnsQueue.create();
dnsQueue = DnsQueue.createForTesting(clock);
}
private void run() throws Exception {
@ -113,7 +115,7 @@ public class ReadDnsQueueActionTest {
action.taskEnqueuer = new TaskEnqueuer(new Retrier(null, 1));
action.jitterSeconds = Optional.empty();
// Advance the time a little, to ensure that leaseTasks() returns all tasks.
clock.setTo(DateTime.now(DateTimeZone.UTC).plusMillis(1));
clock.advanceBy(Duration.standardHours(1));
action.run();
}
@ -134,6 +136,8 @@ public class ReadDnsQueueActionTest {
private void assertTldsEnqueuedInPushQueue(ImmutableMultimap<String, String> tldsToDnsWriters)
throws Exception {
// By default, the publishDnsUpdates tasks will be enqueued one hour after the update items were
// created in the pull queue. This is because of the clock.advanceBy in run()
assertTasksEnqueued(
DNS_PUBLISH_PUSH_QUEUE_NAME,
transform(
@ -143,6 +147,8 @@ public class ReadDnsQueueActionTest {
.url(PublishDnsUpdatesAction.PATH)
.param("tld", tldToDnsWriter.getKey())
.param("dnsWriter", tldToDnsWriter.getValue())
.param("itemsCreated", "3000-01-01T00:00:00.000Z")
.param("enqueued", "3000-01-01T01:00:00.000Z")
.header("content-type", "application/x-www-form-urlencoded")));
}
@ -208,6 +214,29 @@ public class ReadDnsQueueActionTest {
assertTldsEnqueuedInPushQueue(ImmutableMultimap.of("com", "comWriter", "com", "otherWriter"));
}
@Test
public void testSuccess_differentUpdateTimes_usesMinimum() throws Exception {
clock.setTo(DateTime.parse("3000-02-03TZ"));
dnsQueue.addDomainRefreshTask("domain1.com");
clock.setTo(DateTime.parse("3000-02-04TZ"));
dnsQueue.addDomainRefreshTask("domain2.com");
clock.setTo(DateTime.parse("3000-02-05TZ"));
dnsQueue.addDomainRefreshTask("domain3.com");
run();
assertNoTasksEnqueued(DNS_PULL_QUEUE_NAME);
assertThat(getQueuedParams(DNS_PUBLISH_PUSH_QUEUE_NAME))
.containsExactly(
new ImmutableMultimap.Builder<String, String>()
.put("enqueued", "3000-02-05T01:00:00.000Z")
.put("itemsCreated", "3000-02-03T00:00:00.000Z")
.put("tld", "com")
.put("dnsWriter", "comWriter")
.put("domains", "domain1.com")
.put("domains", "domain2.com")
.put("domains", "domain3.com")
.build());
}
@Test
public void testSuccess_oneTldPaused_returnedToQueue() throws Exception {
persistResource(Registry.get("net").asBuilder().setDnsPaused(true).build());
@ -248,6 +277,7 @@ public class ReadDnsQueueActionTest {
.method(Method.PULL)
.param(DNS_TARGET_TYPE_PARAM, TargetType.DOMAIN.toString())
.param(DNS_TARGET_NAME_PARAM, "domain.wrongtld")
.param(DNS_TARGET_CREATE_TIME_PARAM, "3000-01-01TZ")
.param(PARAM_TLD, "net"));
run();
assertNoTasksEnqueued(DNS_PULL_QUEUE_NAME);