diff --git a/java/google/registry/backup/GcsDiffFileLister.java b/java/google/registry/backup/GcsDiffFileLister.java index 775fe540e..1b352244d 100644 --- a/java/google/registry/backup/GcsDiffFileLister.java +++ b/java/google/registry/backup/GcsDiffFileLister.java @@ -34,11 +34,11 @@ import google.registry.backup.BackupModule.Backups; import google.registry.config.RegistryConfig.Config; import google.registry.util.FormattingLogger; import java.io.IOException; -import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; +import java.util.TreeMap; import java.util.concurrent.Callable; +import javax.annotation.Nullable; import javax.inject.Inject; import org.joda.time.DateTime; @@ -52,12 +52,53 @@ class GcsDiffFileLister { @Inject @Backups ListeningExecutorService executor; @Inject GcsDiffFileLister() {} - List listDiffFiles(DateTime fromTime) { + /** + * Traverses the sequence of diff files backwards from checkpointTime and inserts the file + * metadata into "sequence". Returns true if a complete sequence was discovered, false if one or + * more files are missing. + */ + private boolean constructDiffSequence( + Map> upperBoundTimesToMetadata, + DateTime fromTime, + DateTime lastTime, + TreeMap sequence) { + DateTime checkpointTime = lastTime; + while (isBeforeOrAt(fromTime, checkpointTime)) { + GcsFileMetadata metadata; + if (upperBoundTimesToMetadata.containsKey(checkpointTime)) { + metadata = Futures.getUnchecked(upperBoundTimesToMetadata.get(checkpointTime)); + } else { + String filename = DIFF_FILE_PREFIX + checkpointTime; + logger.info("Patching GCS list; discovered file " + filename); + metadata = getMetadata(filename); + + // If we hit a gap, quit. + if (metadata == null) { + logger.infofmt( + "Gap discovered in sequence terminating at %s, missing file %s", + sequence.lastKey(), + filename); + logger.infofmt("Found sequence from %s to %s", checkpointTime, lastTime); + return false; + } + } + sequence.put(checkpointTime, metadata); + checkpointTime = getLowerBoundTime(metadata); + } + logger.infofmt("Found sequence from %s to %s", checkpointTime, lastTime); + return true; + } + + ImmutableList listDiffFiles(DateTime fromTime, @Nullable DateTime toTime) { logger.info("Requested restore from time: " + fromTime); + if (toTime != null) { + logger.info(" Until time: " + toTime); + } // List all of the diff files on GCS and build a map from each file's upper checkpoint time // (extracted from the filename) to its asynchronously-loaded metadata, keeping only files with // an upper checkpoint time > fromTime. - Map> upperBoundTimesToMetadata = new HashMap<>(); + TreeMap> upperBoundTimesToMetadata + = new TreeMap<>(); Iterator listItems; try { // TODO(b/23554360): Use a smarter prefixing strategy to speed this up. @@ -71,44 +112,69 @@ class GcsDiffFileLister { while (listItems.hasNext()) { final String filename = listItems.next().getName(); DateTime upperBoundTime = DateTime.parse(filename.substring(DIFF_FILE_PREFIX.length())); - if (isBeforeOrAt(fromTime, upperBoundTime)) { + if (isInRange(upperBoundTime, fromTime, toTime)) { upperBoundTimesToMetadata.put(upperBoundTime, executor.submit( new Callable() { @Override public GcsFileMetadata call() throws Exception { return getMetadata(filename); }})); + lastUpperBoundTime = latestOf(upperBoundTime, lastUpperBoundTime); } - lastUpperBoundTime = latestOf(upperBoundTime, lastUpperBoundTime); } if (upperBoundTimesToMetadata.isEmpty()) { logger.info("No files found"); return ImmutableList.of(); } + + // Reconstruct the sequence of files by traversing backwards from "lastUpperBoundTime" (i.e. the + // last file that we found) and finding its previous file until we either run out of files or + // get to one that preceeds "fromTime". + // // GCS file listing is eventually consistent, so it's possible that we are missing a file. The // metadata of a file is sufficient to identify the preceding file, so if we start from the // last file and work backwards we can verify that we have no holes in our chain (although we // may be missing files at the end). - ImmutableList.Builder filesBuilder = new ImmutableList.Builder<>(); + TreeMap sequence = new TreeMap<>(); logger.info("Restoring until: " + lastUpperBoundTime); - DateTime checkpointTime = lastUpperBoundTime; - while (checkpointTime.isAfter(fromTime)) { - GcsFileMetadata metadata; - if (upperBoundTimesToMetadata.containsKey(checkpointTime)) { - metadata = Futures.getUnchecked(upperBoundTimesToMetadata.get(checkpointTime)); - } else { - String filename = DIFF_FILE_PREFIX + checkpointTime; - logger.info("Patching GCS list; discovered file " + filename); - metadata = getMetadata(filename); - checkState(metadata != null, "Could not read metadata for file %s", filename); + boolean inconsistentFileSet = !constructDiffSequence( + upperBoundTimesToMetadata, fromTime, lastUpperBoundTime, sequence); + + // Verify that all of the elements in the original set are represented in the sequence. If we + // find anything that's not represented, construct a sequence for it. + boolean checkForMoreExtraDiffs = true; // Always loop at least once. + while (checkForMoreExtraDiffs) { + checkForMoreExtraDiffs = false; + for (DateTime key : upperBoundTimesToMetadata.descendingKeySet()) { + if (!isInRange(key, fromTime, toTime)) { + break; + } + if (!sequence.containsKey(key)) { + constructDiffSequence(upperBoundTimesToMetadata, fromTime, key, sequence); + checkForMoreExtraDiffs = true; + inconsistentFileSet = true; + break; + } } - filesBuilder.add(metadata); - checkpointTime = getLowerBoundTime(metadata); } - ImmutableList files = filesBuilder.build().reverse(); - logger.info("Actual restore from time: " + getLowerBoundTime(files.get(0))); - logger.infofmt("Found %d files to restore", files.size()); - return files; + + checkState( + !inconsistentFileSet, + "Unable to compute commit diff history, there are either gaps or forks in the history " + + "file set. Check log for details."); + + logger.info("Actual restore from time: " + getLowerBoundTime(sequence.firstEntry().getValue())); + logger.infofmt("Found %d files to restore", sequence.size()); + return ImmutableList.copyOf(sequence.values()); + } + + /** + * Returns true if 'time' is in range of 'start' and 'end'. + * + *

If 'end' is null, returns true if 'time' is after 'start'. + */ + private boolean isInRange(DateTime time, DateTime start, @Nullable DateTime end) { + return isBeforeOrAt(start, time) && (end == null || isBeforeOrAt(time, end)); } private DateTime getLowerBoundTime(GcsFileMetadata metadata) { diff --git a/java/google/registry/backup/RestoreCommitLogsAction.java b/java/google/registry/backup/RestoreCommitLogsAction.java index e7df72631..9ebe12c40 100644 --- a/java/google/registry/backup/RestoreCommitLogsAction.java +++ b/java/google/registry/backup/RestoreCommitLogsAction.java @@ -87,7 +87,7 @@ public class RestoreCommitLogsAction implements Runnable { if (dryRun) { logger.info("Running in dryRun mode"); } - List diffFiles = diffLister.listDiffFiles(fromTime); + List diffFiles = diffLister.listDiffFiles(fromTime, null); if (diffFiles.isEmpty()) { logger.info("Nothing to restore"); return; diff --git a/javatests/google/registry/backup/BUILD b/javatests/google/registry/backup/BUILD index 06140331a..c9417ca44 100644 --- a/javatests/google/registry/backup/BUILD +++ b/javatests/google/registry/backup/BUILD @@ -22,6 +22,7 @@ java_library( "@com_google_appengine_tools_appengine_gcs_client", "@com_google_code_findbugs_jsr305", "@com_google_guava", + "@com_google_guava_testlib", "@com_google_truth", "@javax_servlet_api", "@joda_time", diff --git a/javatests/google/registry/backup/GcsDiffFileListerTest.java b/javatests/google/registry/backup/GcsDiffFileListerTest.java index 31e77be96..3d78875df 100644 --- a/javatests/google/registry/backup/GcsDiffFileListerTest.java +++ b/javatests/google/registry/backup/GcsDiffFileListerTest.java @@ -21,6 +21,7 @@ import static google.registry.backup.BackupUtils.GcsMetadataKeys.LOWER_BOUND_CHE import static google.registry.backup.ExportCommitLogDiffAction.DIFF_FILE_PREFIX; import static java.lang.reflect.Proxy.newProxyInstance; import static org.joda.time.DateTimeZone.UTC; +import static org.junit.Assert.fail; import com.google.appengine.tools.cloudstorage.GcsFileMetadata; import com.google.appengine.tools.cloudstorage.GcsFileOptions; @@ -31,13 +32,17 @@ import com.google.appengine.tools.cloudstorage.ListItem; import com.google.appengine.tools.cloudstorage.ListResult; import com.google.common.base.Function; import com.google.common.collect.Iterators; +import com.google.common.testing.TestLogHandler; import google.registry.testing.AppEngineRule; +import java.io.IOException; import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.util.Iterator; import java.util.List; import java.util.concurrent.Callable; +import java.util.logging.LogRecord; +import java.util.logging.Logger; import org.joda.time.DateTime; import org.junit.Before; import org.junit.Rule; @@ -54,6 +59,7 @@ public class GcsDiffFileListerTest { final DateTime now = DateTime.now(UTC); final GcsDiffFileLister diffLister = new GcsDiffFileLister(); final GcsService gcsService = GcsServiceFactory.createGcsService(); + private final TestLogHandler logHandler = new TestLogHandler(); @Rule public final AppEngineRule appEngine = AppEngineRule.builder() @@ -73,6 +79,7 @@ public class GcsDiffFileListerTest { .build(), ByteBuffer.wrap(new byte[]{1, 2, 3})); } + Logger.getLogger(GcsDiffFileLister.class.getCanonicalName()).addHandler(logHandler); } private Iterable extractTimesFromDiffFiles(List diffFiles) { @@ -86,18 +93,32 @@ public class GcsDiffFileListerTest { }}); } - @Test - public void testList_noFilesFound() { - DateTime fromTime = now.plusMillis(1); - assertThat(extractTimesFromDiffFiles(diffLister.listDiffFiles(fromTime))).isEmpty(); + private Iterable listDiffFiles(DateTime fromTime, DateTime toTime) { + return extractTimesFromDiffFiles(diffLister.listDiffFiles(fromTime, toTime)); + } + + private void addGcsFile(int fileAge, int prevAge) throws IOException { + gcsService.createOrReplace( + new GcsFilename(GCS_BUCKET, DIFF_FILE_PREFIX + now.minusMinutes(fileAge)), + new GcsFileOptions.Builder() + .addUserMetadata(LOWER_BOUND_CHECKPOINT, now.minusMinutes(prevAge).toString()) + .build(), + ByteBuffer.wrap(new byte[]{1, 2, 3})); + } + + private void assertLogContains(String message) { + for (LogRecord entry : logHandler.getStoredLogRecords()) { + if (entry.getMessage().contains(message)) { + return; + } + } + fail("No log entry contains " + message); } @Test - public void testList_findsFiles_backToFromTimeExclusive() { - DateTime fromTime = now.minusMinutes(2); - assertThat(extractTimesFromDiffFiles(diffLister.listDiffFiles(fromTime))) - .containsExactly(now.minusMinutes(1), now) - .inOrder(); + public void testList_noFilesFound() { + DateTime fromTime = now.plusMillis(1); + assertThat(listDiffFiles(fromTime, null)).isEmpty(); } @Test @@ -135,7 +156,7 @@ public class GcsDiffFileListerTest { }}); DateTime fromTime = now.minusMinutes(4).minusSeconds(1); // Request all files with checkpoint > fromTime. - assertThat(extractTimesFromDiffFiles(diffLister.listDiffFiles(fromTime))) + assertThat(listDiffFiles(fromTime, null)) .containsExactly( now.minusMinutes(4), now.minusMinutes(3), @@ -144,4 +165,89 @@ public class GcsDiffFileListerTest { now) .inOrder(); } + + @Test + public void testList_failsOnFork() throws Exception { + // We currently have files for now-4m ... now, construct the following sequence: + // now-8m <- now-7m <- now-6m now-5m <- now-4m ... now + // ^___________________________| + addGcsFile(5, 8); + for (int i = 6; i < 9; ++i) { + addGcsFile(i, i + 1); + } + + try { + listDiffFiles(now.minusMinutes(9), null); + fail("Should have thrown IllegalStateException."); + } catch (IllegalStateException expected) { + } + assertLogContains(String.format( + "Found sequence from %s to %s", now.minusMinutes(9), now)); + assertLogContains(String.format( + "Found sequence from %s to %s", now.minusMinutes(9), now.minusMinutes(6))); + } + + @Test + public void testList_boundaries() throws Exception { + assertThat(listDiffFiles(now.minusMinutes(4), now)) + .containsExactly( + now.minusMinutes(4), + now.minusMinutes(3), + now.minusMinutes(2), + now.minusMinutes(1), + now) + .inOrder(); + + } + + @Test + public void testList_failsOnGaps() throws Exception { + // We currently have files for now-4m ... now, construct the following sequence: + // now-8m <- now-7m <- now-6m {missing} <- now-4m ... now + for (int i = 6; i < 9; ++i) { + addGcsFile(i, i + 1); + } + + try { + listDiffFiles(now.minusMinutes(9), null); + fail("Should have thrown IllegalStateException."); + } catch (IllegalStateException expected) { + } + assertLogContains(String.format( + "Gap discovered in sequence terminating at %s, missing file commit_diff_until_%s", + now, now.minusMinutes(5))); + assertLogContains(String.format( + "Found sequence from %s to %s", now.minusMinutes(9), now.minusMinutes(6))); + assertLogContains(String.format( + "Found sequence from %s to %s", now.minusMinutes(5), now)); + + // Verify that we can work around the gap. + DateTime fromTime = now.minusMinutes(4).minusSeconds(1); + assertThat(listDiffFiles(fromTime, null)) + .containsExactly( + now.minusMinutes(4), + now.minusMinutes(3), + now.minusMinutes(2), + now.minusMinutes(1), + now) + .inOrder(); + assertThat(listDiffFiles( + now.minusMinutes(8).minusSeconds(1), now.minusMinutes(6).plusSeconds(1))) + .containsExactly( + now.minusMinutes(8), + now.minusMinutes(7), + now.minusMinutes(6)) + .inOrder(); + } + + @Test + public void testList_toTimeSpecified() { + assertThat(listDiffFiles( + now.minusMinutes(4).minusSeconds(1), now.minusMinutes(2).plusSeconds(1))) + .containsExactly( + now.minusMinutes(4), + now.minusMinutes(3), + now.minusMinutes(2)) + .inOrder(); + } }