Add metrics for async batch operation processing

We want to know how long it's actually taking to process asynchronous
contact/host deletions and DNS refreshes on host renames. This adds
instrumentation. Five metrics are recorded as follows:

* An incrementable metric for each async task processed (split out by
  type of task and result).
* Two event metrics for processing time between when a task is enqueued
  and when it is processed -- tracked separately for contact/host
  deletion and DNS refresh on host rename.
* Two event metrics for batch size every time the two mapreduces are
  run (this is usually 0). Tracked separately for contact/host deletion
  and DNS refresh on host rename.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=157001310
This commit is contained in:
mcilwain 2017-05-24 10:00:14 -07:00 committed by Ben McIlwain
parent 1adeb57fea
commit bb67841884
14 changed files with 671 additions and 154 deletions

View file

@ -15,7 +15,9 @@
package google.registry.batch;
import static com.google.appengine.api.taskqueue.QueueFactory.getQueue;
import static google.registry.flows.async.AsyncFlowEnqueuer.QUEUE_ASYNC_DELETE;
import static google.registry.flows.async.AsyncFlowEnqueuer.QUEUE_ASYNC_HOST_RENAME;
import static google.registry.flows.async.AsyncFlowMetrics.OperationType.DNS_REFRESH;
import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.testing.DatastoreHelper.createTld;
import static google.registry.testing.DatastoreHelper.newDomainApplication;
@ -32,10 +34,17 @@ import static google.registry.util.DateTimeUtils.START_OF_TIME;
import static org.joda.time.Duration.millis;
import static org.joda.time.Duration.standardHours;
import static org.joda.time.Duration.standardSeconds;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import com.google.common.collect.ImmutableSet;
import com.googlecode.objectify.Key;
import google.registry.batch.RefreshDnsOnHostRenameAction.RefreshDnsOnHostRenameReducer;
import google.registry.flows.async.AsyncFlowEnqueuer;
import google.registry.flows.async.AsyncFlowMetrics;
import google.registry.flows.async.AsyncFlowMetrics.OperationResult;
import google.registry.model.host.HostResource;
import google.registry.testing.ExceptionRule;
import google.registry.testing.FakeClock;
@ -48,6 +57,7 @@ import google.registry.util.Retrier;
import google.registry.util.Sleeper;
import google.registry.util.SystemSleeper;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@ -71,12 +81,17 @@ public class RefreshDnsOnHostRenameActionTest
@Before
public void setup() throws Exception {
createTld("tld");
enqueuer = new AsyncFlowEnqueuer();
enqueuer.asyncDnsRefreshPullQueue = getQueue(QUEUE_ASYNC_HOST_RENAME);
enqueuer.retrier = new Retrier(new FakeSleeper(clock), 1);
enqueuer =
new AsyncFlowEnqueuer(
getQueue(QUEUE_ASYNC_DELETE),
getQueue(QUEUE_ASYNC_HOST_RENAME),
Duration.ZERO,
new Retrier(new FakeSleeper(clock), 1));
AsyncFlowMetrics asyncFlowMetricsMock = mock(AsyncFlowMetrics.class);
action = new RefreshDnsOnHostRenameAction();
action.asyncFlowMetrics = asyncFlowMetricsMock;
inject.setStaticField(
RefreshDnsOnHostRenameReducer.class, "asyncFlowMetrics", asyncFlowMetricsMock);
action.clock = clock;
action.mrRunner = makeDefaultRunner();
action.pullQueue = getQueue(QUEUE_ASYNC_HOST_RENAME);
@ -108,11 +123,15 @@ public class RefreshDnsOnHostRenameActionTest
persistResource(newDomainResource("example.tld", host));
persistResource(newDomainResource("otherexample.tld", host));
persistResource(newDomainResource("untouched.tld", persistActiveHost("ns2.example.tld")));
enqueuer.enqueueAsyncDnsRefresh(host);
DateTime timeEnqueued = clock.nowUtc();
enqueuer.enqueueAsyncDnsRefresh(host, timeEnqueued);
runMapreduce();
assertDnsTasksEnqueued("example.tld", "otherexample.tld");
assertNoTasksEnqueued(QUEUE_ASYNC_HOST_RENAME);
verify(action.asyncFlowMetrics).recordDnsRefreshBatchSize(1L);
verify(action.asyncFlowMetrics)
.recordAsyncFlowResult(DNS_REFRESH, OperationResult.SUCCESS, timeEnqueued);
verifyNoMoreInteractions(action.asyncFlowMetrics);
}
@Test
@ -123,23 +142,35 @@ public class RefreshDnsOnHostRenameActionTest
persistResource(newDomainResource("example1.tld", host1));
persistResource(newDomainResource("example2.tld", host2));
persistResource(newDomainResource("example3.tld", host3));
enqueuer.enqueueAsyncDnsRefresh(host1);
enqueuer.enqueueAsyncDnsRefresh(host2);
enqueuer.enqueueAsyncDnsRefresh(host3);
DateTime timeEnqueued = clock.nowUtc();
DateTime laterTimeEnqueued = timeEnqueued.plus(standardSeconds(10));
enqueuer.enqueueAsyncDnsRefresh(host1, timeEnqueued);
enqueuer.enqueueAsyncDnsRefresh(host2, timeEnqueued);
enqueuer.enqueueAsyncDnsRefresh(host3, laterTimeEnqueued);
runMapreduce();
assertDnsTasksEnqueued("example1.tld", "example2.tld", "example3.tld");
assertNoTasksEnqueued(QUEUE_ASYNC_HOST_RENAME);
verify(action.asyncFlowMetrics).recordDnsRefreshBatchSize(3L);
verify(action.asyncFlowMetrics, times(2))
.recordAsyncFlowResult(DNS_REFRESH, OperationResult.SUCCESS, timeEnqueued);
verify(action.asyncFlowMetrics)
.recordAsyncFlowResult(DNS_REFRESH, OperationResult.SUCCESS, laterTimeEnqueued);
verifyNoMoreInteractions(action.asyncFlowMetrics);
}
@Test
public void testSuccess_deletedHost_doesntTriggerDnsRefresh() throws Exception {
HostResource host = persistDeletedHost("ns11.fakesss.tld", clock.nowUtc().minusDays(4));
persistResource(newDomainResource("example1.tld", host));
enqueuer.enqueueAsyncDnsRefresh(host);
DateTime timeEnqueued = clock.nowUtc();
enqueuer.enqueueAsyncDnsRefresh(host, timeEnqueued);
runMapreduce();
assertNoDnsTasksEnqueued();
assertNoTasksEnqueued(QUEUE_ASYNC_HOST_RENAME);
verify(action.asyncFlowMetrics).recordDnsRefreshBatchSize(1L);
verify(action.asyncFlowMetrics)
.recordAsyncFlowResult(DNS_REFRESH, OperationResult.STALE, timeEnqueued);
verifyNoMoreInteractions(action.asyncFlowMetrics);
}
@Test
@ -150,7 +181,7 @@ public class RefreshDnsOnHostRenameActionTest
.asBuilder()
.setDeletionTime(START_OF_TIME)
.build());
enqueuer.enqueueAsyncDnsRefresh(renamedHost);
enqueuer.enqueueAsyncDnsRefresh(renamedHost, clock.nowUtc());
runMapreduce();
assertNoDnsTasksEnqueued();
assertNoTasksEnqueued(QUEUE_ASYNC_HOST_RENAME);
@ -159,7 +190,7 @@ public class RefreshDnsOnHostRenameActionTest
@Test
public void testRun_hostDoesntExist_delaysTask() throws Exception {
HostResource host = newHostResource("ns1.example.tld");
enqueuer.enqueueAsyncDnsRefresh(host);
enqueuer.enqueueAsyncDnsRefresh(host, clock.nowUtc());
runMapreduce();
assertNoDnsTasksEnqueued();
assertTasksEnqueued(