Add metric for lock contention

Also - remove logging from TransactNew, to prevent double logging on transient
failures (TransactNew retries on failure)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=172500772
This commit is contained in:
guyben 2017-10-17 13:03:14 -07:00 committed by jianglai
parent 6f9b039e72
commit 77ee3e3544
3 changed files with 165 additions and 47 deletions

View file

@ -18,6 +18,8 @@ import static com.google.common.base.Preconditions.checkArgument;
import static google.registry.model.ofy.ObjectifyService.ofy; import static google.registry.model.ofy.ObjectifyService.ofy;
import static google.registry.util.DateTimeUtils.isAtOrAfter; import static google.registry.util.DateTimeUtils.isAtOrAfter;
import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.googlecode.objectify.VoidWork; import com.googlecode.objectify.VoidWork;
import com.googlecode.objectify.Work; import com.googlecode.objectify.Work;
@ -49,6 +51,12 @@ public class Lock extends ImmutableObject {
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass(); private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
/** Disposition of locking, for monitoring. */
enum LockState { IN_USE, FREE, TIMED_OUT, OWNER_DIED }
@VisibleForTesting
static LockMetrics lockMetrics = new LockMetrics();
/** The name of the locked resource. */ /** The name of the locked resource. */
@Id @Id
String lockId; String lockId;
@ -90,19 +98,82 @@ public class Lock extends ImmutableObject {
return String.format("%s-%s", tld, resourceName); return String.format("%s-%s", tld, resourceName);
} }
@AutoValue
abstract static class AcquireResult {
public abstract DateTime transactionTime();
public abstract @Nullable Lock existingLock();
public abstract @Nullable Lock newLock();
public abstract LockState lockState();
public static AcquireResult create(
DateTime transactionTime,
@Nullable Lock existingLock,
@Nullable Lock newLock,
LockState lockState) {
return new AutoValue_Lock_AcquireResult(transactionTime, existingLock, newLock, lockState);
}
}
private static void logAcquireResult(AcquireResult acquireResult) {
try {
Lock lock = acquireResult.existingLock();
DateTime now = acquireResult.transactionTime();
switch (acquireResult.lockState()) {
case IN_USE:
logger.infofmt(
"Existing lock by request %s is still valid now %s (until %s) lock: %s",
lock.requestLogId,
now,
lock.expirationTime,
lock.lockId);
break;
case TIMED_OUT:
logger.infofmt(
"Existing lock by request %s is timed out now %s (was valid until %s) lock: %s",
lock.requestLogId,
now,
lock.expirationTime,
lock.lockId);
break;
case OWNER_DIED:
logger.infofmt(
"Existing lock is valid now %s (until %s), but owner (%s) isn't running lock: %s",
now,
lock.expirationTime,
lock.requestLogId,
lock.lockId);
break;
case FREE:
// There was no existing lock
break;
}
Lock newLock = acquireResult.newLock();
if (acquireResult.newLock() != null) {
logger.infofmt(
"acquire succeeded %s lock: %s",
newLock,
newLock.lockId);
}
} catch (Throwable e) {
// We might get here if there is a NullPointerException for example, if AcquireResult wasn't
// constructed correctly. Simply log it for debugging but continue as if nothing happened
logger.warningfmt(e, "Error while logging AcquireResult %s. Continuing.", acquireResult);
}
}
/** Try to acquire a lock. Returns absent if it can't be acquired. */ /** Try to acquire a lock. Returns absent if it can't be acquired. */
public static Optional<Lock> acquire( public static Optional<Lock> acquire(
final String resourceName, final String resourceName,
@Nullable final String tld, @Nullable final String tld,
final Duration leaseLength, final Duration leaseLength,
final RequestStatusChecker requestStatusChecker) { final RequestStatusChecker requestStatusChecker) {
String lockId = makeLockId(resourceName, tld);
// It's important to use transactNew rather than transact, because a Lock can be used to control // It's important to use transactNew rather than transact, because a Lock can be used to control
// access to resources like GCS that can't be transactionally rolled back. Therefore, the lock // access to resources like GCS that can't be transactionally rolled back. Therefore, the lock
// must be definitively acquired before it is used, even when called inside another transaction. // must be definitively acquired before it is used, even when called inside another transaction.
return Optional.ofNullable(ofy().transactNew(new Work<Lock>() { AcquireResult acquireResult = ofy().transactNew(new Work<AcquireResult>() {
@Override @Override
public Lock run() { public AcquireResult run() {
String lockId = makeLockId(resourceName, tld);
DateTime now = ofy().getTransactionTime(); DateTime now = ofy().getTransactionTime();
// Checking if an unexpired lock still exists - if so, the lock can't be acquired. // Checking if an unexpired lock still exists - if so, the lock can't be acquired.
@ -111,29 +182,18 @@ public class Lock extends ImmutableObject {
logger.infofmt( logger.infofmt(
"Loaded existing lock: %s for request: %s", lock.lockId, lock.requestLogId); "Loaded existing lock: %s for request: %s", lock.lockId, lock.requestLogId);
} }
// TODO(b/63982642): remove check on requestLogId being null once migration is done LockState lockState;
// Until then we assume missing requestLogId means the app is still running (since we have if (lock == null) {
// no information to the contrary) lockState = LockState.FREE;
if (lock != null } else if (isAtOrAfter(now, lock.expirationTime)) {
&& !isAtOrAfter(now, lock.expirationTime) lockState = LockState.TIMED_OUT;
&& (lock.requestLogId == null || requestStatusChecker.isRunning(lock.requestLogId))) { } else if (!requestStatusChecker.isRunning(lock.requestLogId)) {
logger.infofmt( lockState = LockState.OWNER_DIED;
"Existing lock by request %s is still valid now %s (until %s) lock: %s", } else {
lock.requestLogId, lockState = LockState.IN_USE;
now, return AcquireResult.create(now, lock, null, lockState);
lock.expirationTime,
lockId);
return null;
} }
if (lock != null) {
logger.infofmt(
"Existing lock by request %s is timed out now %s (was valid until %s) lock: %s",
lock.requestLogId,
now,
lock.expirationTime,
lockId);
}
Lock newLock = create( Lock newLock = create(
resourceName, resourceName,
tld, tld,
@ -142,12 +202,12 @@ public class Lock extends ImmutableObject {
// Locks are not parented under an EntityGroupRoot (so as to avoid write contention) and // Locks are not parented under an EntityGroupRoot (so as to avoid write contention) and
// don't need to be backed up. // don't need to be backed up.
ofy().saveWithoutBackup().entity(newLock); ofy().saveWithoutBackup().entity(newLock);
logger.infofmt( return AcquireResult.create(now, lock, newLock, lockState);
"acquire succeeded %s lock: %s", }});
newLock,
lockId); logAcquireResult(acquireResult);
return newLock; lockMetrics.record(resourceName, tld, acquireResult.lockState());
}})); return Optional.ofNullable(acquireResult.newLock());
} }
/** Release the lock. */ /** Release the lock. */

View file

@ -0,0 +1,45 @@
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.model.server;
import com.google.common.collect.ImmutableSet;
import google.registry.model.server.Lock.LockState;
import google.registry.monitoring.metrics.IncrementableMetric;
import google.registry.monitoring.metrics.LabelDescriptor;
import google.registry.monitoring.metrics.MetricRegistryImpl;
import javax.annotation.Nullable;
/** Metrics for lock contention. */
class LockMetrics {
private static final ImmutableSet<LabelDescriptor> LABEL_DESCRIPTORS =
ImmutableSet.of(
LabelDescriptor.create("tld", "TLD"),
LabelDescriptor.create("resource", "resource name"),
LabelDescriptor.create(
"state", "The existing lock state (before attempting to acquire)."));
private static final IncrementableMetric lockRequestsMetric =
MetricRegistryImpl.getDefault()
.newIncrementableMetric(
"/lock/acquire_lock_requests",
"Count of lock acquisition attempts",
"count",
LABEL_DESCRIPTORS);
void record(String resourceName, @Nullable String tld, LockState state) {
lockRequestsMetric.increment(String.valueOf(tld), resourceName, state.name());
}
}

View file

@ -14,12 +14,18 @@
package google.registry.model.server; package google.registry.model.server;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat; import static com.google.common.truth.Truth8.assertThat;
import static google.registry.model.server.Lock.LockState.FREE;
import static google.registry.model.server.Lock.LockState.IN_USE;
import static google.registry.model.server.Lock.LockState.OWNER_DIED;
import static google.registry.model.server.Lock.LockState.TIMED_OUT;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import google.registry.model.ofy.Ofy; import google.registry.model.ofy.Ofy;
import google.registry.model.server.Lock.LockState;
import google.registry.testing.AppEngineRule; import google.registry.testing.AppEngineRule;
import google.registry.testing.ExceptionRule; import google.registry.testing.ExceptionRule;
import google.registry.testing.FakeClock; import google.registry.testing.FakeClock;
@ -53,63 +59,70 @@ public class LockTest {
@Rule @Rule
public final ExceptionRule thrown = new ExceptionRule(); public final ExceptionRule thrown = new ExceptionRule();
private Optional<Lock> acquire(String tld, Duration leaseLength) { private Optional<Lock> acquire(String tld, Duration leaseLength, LockState expectedLockState) {
return Lock.acquire(RESOURCE_NAME, tld, leaseLength, requestStatusChecker); Lock.lockMetrics = mock(LockMetrics.class);
Optional<Lock> lock = Lock.acquire(RESOURCE_NAME, tld, leaseLength, requestStatusChecker);
verify(Lock.lockMetrics).record(RESOURCE_NAME, tld, expectedLockState);
verifyNoMoreInteractions(Lock.lockMetrics);
Lock.lockMetrics = null;
return lock;
} }
@Before public void setUp() { @Before public void setUp() {
Lock.lockMetrics = null;
when(requestStatusChecker.getLogId()).thenReturn("current-request-id"); when(requestStatusChecker.getLogId()).thenReturn("current-request-id");
when(requestStatusChecker.isRunning("current-request-id")).thenReturn(true); when(requestStatusChecker.isRunning("current-request-id")).thenReturn(true);
} }
@Test @Test
public void testReleasedExplicitly() throws Exception { public void testReleasedExplicitly() throws Exception {
Optional<Lock> lock = acquire("", ONE_DAY); Optional<Lock> lock = acquire("", ONE_DAY, FREE);
assertThat(lock).isPresent(); assertThat(lock).isPresent();
// We can't get it again at the same time. // We can't get it again at the same time.
assertThat(acquire("", ONE_DAY)).isEmpty(); assertThat(acquire("", ONE_DAY, IN_USE)).isEmpty();
// But if we release it, it's available. // But if we release it, it's available.
lock.get().release(); lock.get().release();
assertThat(acquire("", ONE_DAY)).isPresent(); assertThat(acquire("", ONE_DAY, FREE)).isPresent();
} }
@Test @Test
public void testReleasedAfterTimeout() throws Exception { public void testReleasedAfterTimeout() throws Exception {
FakeClock clock = new FakeClock(); FakeClock clock = new FakeClock();
inject.setStaticField(Ofy.class, "clock", clock); inject.setStaticField(Ofy.class, "clock", clock);
assertThat(acquire("", TWO_MILLIS)).isPresent(); assertThat(acquire("", TWO_MILLIS, FREE)).isPresent();
// We can't get it again at the same time. // We can't get it again at the same time.
assertThat(acquire("", TWO_MILLIS)).isEmpty(); assertThat(acquire("", TWO_MILLIS, IN_USE)).isEmpty();
// A second later we still can't get the lock. // A second later we still can't get the lock.
clock.advanceOneMilli(); clock.advanceOneMilli();
assertThat(acquire("", TWO_MILLIS)).isEmpty(); assertThat(acquire("", TWO_MILLIS, IN_USE)).isEmpty();
// But two seconds later we can get it. // But two seconds later we can get it.
clock.advanceOneMilli(); clock.advanceOneMilli();
assertThat(acquire("", TWO_MILLIS)).isPresent(); assertThat(acquire("", TWO_MILLIS, TIMED_OUT)).isPresent();
} }
@Test @Test
public void testReleasedAfterRequestFinish() throws Exception { public void testReleasedAfterRequestFinish() throws Exception {
assertThat(acquire("", ONE_DAY)).isPresent(); assertThat(acquire("", ONE_DAY, FREE)).isPresent();
// We can't get it again while request is active // We can't get it again while request is active
assertThat(acquire("", ONE_DAY)).isEmpty(); assertThat(acquire("", ONE_DAY, IN_USE)).isEmpty();
// But if request is finished, we can get it. // But if request is finished, we can get it.
when(requestStatusChecker.isRunning("current-request-id")).thenReturn(false); when(requestStatusChecker.isRunning("current-request-id")).thenReturn(false);
assertThat(acquire("", ONE_DAY)).isPresent(); assertThat(acquire("", ONE_DAY, OWNER_DIED)).isPresent();
} }
@Test @Test
public void testTldsAreIndependent() throws Exception { public void testTldsAreIndependent() throws Exception {
Optional<Lock> lockA = acquire("a", ONE_DAY); Optional<Lock> lockA = acquire("a", ONE_DAY, FREE);
assertThat(lockA).isPresent(); assertThat(lockA).isPresent();
// For a different tld we can still get a lock with the same name. // For a different tld we can still get a lock with the same name.
Optional<Lock> lockB = acquire("b", ONE_DAY); Optional<Lock> lockB = acquire("b", ONE_DAY, FREE);
assertThat(lockB).isPresent(); assertThat(lockB).isPresent();
// We can't get lockB again at the same time. // We can't get lockB again at the same time.
assertThat(acquire("b", ONE_DAY)).isEmpty(); assertThat(acquire("b", ONE_DAY, IN_USE)).isEmpty();
// Releasing lockA has no effect on lockB (even though we are still using the "b" tld). // Releasing lockA has no effect on lockB (even though we are still using the "b" tld).
lockA.get().release(); lockA.get().release();
assertThat(acquire("b", ONE_DAY)).isEmpty(); assertThat(acquire("b", ONE_DAY, IN_USE)).isEmpty();
} }
@Test @Test