// Copyright 2016 The Nomulus Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package google.registry.backup; import static com.google.common.collect.ImmutableList.copyOf; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Iterables.transform; import static google.registry.model.ofy.CommitLogBucket.getBucketKey; import static google.registry.request.Action.Method.POST; import com.google.common.base.Function; import com.googlecode.objectify.Key; import com.googlecode.objectify.Work; import com.googlecode.objectify.cmd.Loader; import com.googlecode.objectify.cmd.Query; import google.registry.config.ConfigModule.Config; import google.registry.model.ofy.CommitLogBucket; import google.registry.model.ofy.CommitLogManifest; import google.registry.model.ofy.CommitLogMutation; import google.registry.model.ofy.Ofy; import google.registry.request.Action; import google.registry.request.Parameter; import google.registry.util.Clock; import google.registry.util.FormattingLogger; import java.util.List; import javax.inject.Inject; import org.joda.time.DateTime; import org.joda.time.Duration; /** * Task that garbage collects old {@link CommitLogManifest} entities. * *
Once commit logs have been written to GCS, we don't really need them in datastore anymore, * except to reconstruct point-in-time snapshots of the database. But that functionality is not * useful after a certain amount of time, e.g. thirty days. So this task runs periodically to delete * the old data. * *
This task should be invoked in a fanout style for each {@link CommitLogBucket} ID. It then * queries {@code CommitLogManifest} entities older than the threshold, using an ancestor query * operating under the assumption under the assumption that the ID is the transaction timestamp in * milliseconds since the UNIX epoch. It then deletes them inside a transaction, along with their * associated {@link CommitLogMutation} entities. * *
If additional data is leftover, we show a warning at the INFO level, because it's not * actionable. If anything, it just shows that the system was under high load thirty days ago, and * therefore serves little use as an early warning to increase the number of buckets. * *
Before running, this task will perform an eventually consistent count query outside of a * transaction to see how much data actually exists to delete. If it's less than a tenth of * {@link #maxDeletes}, then we don't bother running the task. This is to minimize contention on the * bucket and avoid wasting resources. * *
This entire operation operates on a single entity group, within a single transaction. Since * there's a 10mB upper bound on transaction size and a four minute time limit, we can only delete * so many commit logs at once. So given the above constraints, five hundred would make a safe * default value for {@code maxDeletes}. See {@linkplain * google.registry.config.ConfigModule#provideCommitLogMaxDeletes() commitLogMaxDeletes} * for further documentation on this matter. * *
Finally, we need to pick an appropriate cron interval time for this task. Since a bucket * represents a single datastore entity group, it's only guaranteed to have one transaction per * second. So we just need to divide {@code maxDeletes} by sixty to get an appropriate minute * interval. Assuming {@code maxDeletes} is five hundred, this rounds up to ten minutes, which we'll * double, since this task can always catch up in off-peak hours. * *
There's little harm in keeping the data around a little longer, since this task is engaged in
* a zero-sum resource struggle with the EPP transactions. Each transaction we perform here, is one
* less transaction that's available to EPP. Furthermore, a well-administered system should have
* enough buckets that we'll never brush up against the 1/s entity group transaction SLA.
*/
@Action(path = "/_dr/task/deleteOldCommitLogs", method = POST, automaticallyPrintOk = true)
public final class DeleteOldCommitLogsAction implements Runnable {
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
@Inject Clock clock;
@Inject Ofy ofy;
@Inject @Parameter("bucket") int bucketNum;
@Inject @Config("commitLogDatastoreRetention") Duration maxAge;
@Inject @Config("commitLogMaxDeletes") int maxDeletes;
@Inject DeleteOldCommitLogsAction() {}
@Override
public void run() {
if (!doesEnoughDataExistThatThisTaskIsWorthRunning()) {
return;
}
Integer deleted = ofy.transact(new Work