Check that CommitLogManifest exists before deletion

It appears that there are some possible flows where the reducer runs twice on
the same key. Either because of some error in a subsequent key that makes the
entire shard become ignored and retried, or possible some obscure error outside
of the transaction on that key.

The result however is that sometimes the reducer runs on a key that has already
been deleted. We need to check for that to prevent a null pointer exception.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=166112173
This commit is contained in:
guyben 2017-08-22 14:13:00 -07:00 committed by Ben McIlwain
parent 91d4fdb9a8
commit f9a2415954
2 changed files with 46 additions and 13 deletions

View file

@ -20,6 +20,7 @@ java_library(
"@com_google_appengine_api_1_0_sdk",
"@com_google_appengine_tools_appengine_gcs_client",
"@com_google_appengine_tools_appengine_mapreduce",
"@com_google_auto_value",
"@com_google_code_findbugs_jsr305",
"@com_google_dagger",
"@com_google_guava",

View file

@ -25,6 +25,7 @@ import static java.lang.Boolean.FALSE;
import com.google.appengine.tools.mapreduce.Mapper;
import com.google.appengine.tools.mapreduce.Reducer;
import com.google.appengine.tools.mapreduce.ReducerInput;
import com.google.auto.value.AutoValue;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@ -168,7 +169,7 @@ public final class DeleteOldCommitLogsAction implements Runnable {
* OK to delete this manifestKey. If even one source returns "false" (meaning "it's not OK to
* delete this manifest") then it won't be deleted.
*/
private static class DeleteOldCommitLogsReducer
static class DeleteOldCommitLogsReducer
extends Reducer<Key<CommitLogManifest>, Boolean, Void> {
private static final long serialVersionUID = -4918760187627937268L;
@ -176,6 +177,24 @@ public final class DeleteOldCommitLogsAction implements Runnable {
private final DateTime deletionThreshold;
private final boolean isDryRun;
@AutoValue
abstract static class DeletionResult {
enum Status {
ALREADY_DELETED,
AFTER_THRESHOLD,
SUCCESS;
}
public abstract Status status();
public abstract int numDeleted();
static DeletionResult create(Status status, int numDeleted) {
return
new AutoValue_DeleteOldCommitLogsAction_DeleteOldCommitLogsReducer_DeletionResult(
status, numDeleted);
}
}
DeleteOldCommitLogsReducer(DateTime deletionThreshold, boolean isDryRun) {
this.deletionThreshold = deletionThreshold;
this.isDryRun = isDryRun;
@ -190,15 +209,22 @@ public final class DeleteOldCommitLogsAction implements Runnable {
return;
}
Integer deletedCount = ofy().transactNew(new Work<Integer>() {
DeletionResult deletionResult = ofy().transactNew(new Work<DeletionResult>() {
@Override
public Integer run() {
// Doing a sanity check on the date. This is the only place we load the CommitLogManifest,
public DeletionResult run() {
CommitLogManifest manifest = ofy().load().key(manifestKey).now();
// It is possible that the same manifestKey was run twice, if a shard had to be restarted
// or some weird failure. If this happens, we want to exit immediately.
// Note that this can never happen in dryRun.
if (manifest == null) {
return DeletionResult.create(DeletionResult.Status.ALREADY_DELETED, 0);
}
// Doing a sanity check on the date. This is the only place we use the CommitLogManifest,
// so maybe removing this test will improve performance. However, unless it's proven that
// the performance boost is significant (and we've tested this enough to be sure it never
// happens)- the safty of "let's not delete stuff we need from prod" is more important.
if (ofy().load().key(manifestKey).now().getCommitTime().isAfter(deletionThreshold)) {
return 0;
if (manifest.getCommitTime().isAfter(deletionThreshold)) {
return DeletionResult.create(DeletionResult.Status.AFTER_THRESHOLD, 0);
}
Iterable<Key<CommitLogMutation>> commitLogMutationKeys = ofy().load()
.type(CommitLogMutation.class)
@ -214,16 +240,22 @@ public final class DeleteOldCommitLogsAction implements Runnable {
if (!isDryRun) {
ofy().deleteWithoutBackup().keys(keysToDelete);
}
return keysToDelete.size();
return DeletionResult.create(DeletionResult.Status.SUCCESS, keysToDelete.size());
}
});
if (deletedCount > 0) {
switch (deletionResult.status()) {
case SUCCESS:
getContext().incrementCounter("old commit log manifests deleted");
getContext().incrementCounter("total entities deleted", deletedCount);
} else {
getContext().incrementCounter("total entities deleted", deletionResult.numDeleted());
break;
case ALREADY_DELETED:
getContext().incrementCounter("attempts to delete an already deleted manifest");
break;
case AFTER_THRESHOLD:
logger.severefmt("Won't delete CommitLogManifest %s that is too recent.", manifestKey);
getContext().incrementCounter("manifests incorrectly assigned for deletion (SEE LOGS)");
break;
}
}
}