From 34a8a940833d616a8e228508cefc03361e6ad8d6 Mon Sep 17 00:00:00 2001 From: Weimin Yu Date: Fri, 8 Mar 2024 17:08:09 -0500 Subject: [PATCH] Add BSA validation job (#2356) * Add BSA validation job Add the BsaValidateAction class with a first check (for inconsistency between downloaded and persisted labels). * Addressing comments * Addressing reviews --- .../registry/bsa/BsaValidateAction.java | 162 ++++++++++++++++++ .../bsa/persistence/DownloadScheduler.java | 21 ++- .../registry/bsa/persistence/Queries.java | 20 ++- .../registry/env/common/bsa/WEB-INF/web.xml | 6 + .../default/WEB-INF/cloud-scheduler-tasks.xml | 12 ++ .../registry/module/RequestComponent.java | 3 + .../module/bsa/BsaRequestComponent.java | 3 + .../registry/bsa/BsaValidateActionTest.java | 155 +++++++++++++++++ .../registry/bsa/persistence/QueriesTest.java | 12 ++ .../registry/module/bsa/bsa_routing.txt | 1 + 10 files changed, 383 insertions(+), 12 deletions(-) create mode 100644 core/src/main/java/google/registry/bsa/BsaValidateAction.java create mode 100644 core/src/test/java/google/registry/bsa/BsaValidateActionTest.java diff --git a/core/src/main/java/google/registry/bsa/BsaValidateAction.java b/core/src/main/java/google/registry/bsa/BsaValidateAction.java new file mode 100644 index 000000000..f190fec57 --- /dev/null +++ b/core/src/main/java/google/registry/bsa/BsaValidateAction.java @@ -0,0 +1,162 @@ +// Copyright 2024 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.bsa; + +import static com.google.common.base.Preconditions.checkArgument; +import static google.registry.bsa.persistence.DownloadScheduler.fetchMostRecentDownloadJobIdIfCompleted; +import static google.registry.bsa.persistence.Queries.batchReadBsaLabelText; +import static google.registry.request.Action.Method.GET; +import static google.registry.request.Action.Method.POST; +import static javax.servlet.http.HttpServletResponse.SC_OK; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import com.google.common.collect.Sets.SetView; +import com.google.common.flogger.FluentLogger; +import google.registry.config.RegistryConfig.Config; +import google.registry.request.Action; +import google.registry.request.Response; +import google.registry.request.auth.Auth; +import java.util.Optional; +import java.util.stream.Stream; +import javax.inject.Inject; + +/** Validates the BSA data in the database against the most recent block lists. */ +@Action( + service = Action.Service.BSA, + path = BsaValidateAction.PATH, + method = {GET, POST}, + auth = Auth.AUTH_API_ADMIN) +public class BsaValidateAction implements Runnable { + + private static final FluentLogger logger = FluentLogger.forEnclosingClass(); + + static final String PATH = "/_dr/task/bsaValidate"; + private final GcsClient gcsClient; + private final int transactionBatchSize; + private final BsaLock bsaLock; + private final Response response; + + @Inject + BsaValidateAction( + GcsClient gcsClient, + @Config("bsaTxnBatchSize") int transactionBatchSize, + BsaLock bsaLock, + Response response) { + this.gcsClient = gcsClient; + this.transactionBatchSize = transactionBatchSize; + this.bsaLock = bsaLock; + this.response = response; + } + + @Override + public void run() { + try { + if (!bsaLock.executeWithLock(this::runWithinLock)) { + logger.atInfo().log("Cannot execute action. Other BSA related task is executing."); + // TODO(blocked by go/r3pr/2354): send email + } + } catch (Throwable throwable) { + logger.atWarning().withCause(throwable).log("Failed to update block lists."); + // TODO(blocked by go/r3pr/2354): send email + } + // Always return OK. No need to retry since all queries and GCS accesses are already + // implicitly retried. + response.setStatus(SC_OK); + } + + /** Executes the validation action while holding the BSA lock. */ + Void runWithinLock() { + Optional downloadJobName = fetchMostRecentDownloadJobIdIfCompleted(); + if (downloadJobName.isEmpty()) { + logger.atInfo().log("Cannot validate: latest download not found or unfinished."); + return null; + } + logger.atInfo().log("Validating BSA with latest download: %s", downloadJobName.get()); + + ImmutableList.Builder errors = new ImmutableList.Builder(); + errors.addAll(checkBsaLabels(downloadJobName.get())); + + emailValidationResults(downloadJobName.get(), errors.build()); + logger.atInfo().log("Finished validating BSA with latest download: %s", downloadJobName.get()); + return null; + } + + void emailValidationResults(String job, ImmutableList errors) { + // TODO(blocked by go/r3pr/2354): send email + } + + ImmutableList checkBsaLabels(String jobName) { + ImmutableSet downloadedLabels = fetchDownloadedLabels(jobName); + ImmutableSet persistedLabels = fetchPersistedLabels(transactionBatchSize); + ImmutableList.Builder errors = new ImmutableList.Builder<>(); + + int nErrorExamples = 10; + SetView missingLabels = Sets.difference(downloadedLabels, persistedLabels); + if (!missingLabels.isEmpty()) { + String examples = Joiner.on(',').join(Iterables.limit(missingLabels, nErrorExamples)); + String errorMessage = + String.format( + "Found %d missing labels in the DB. Examples: [%s]", missingLabels.size(), examples); + logger.atInfo().log(errorMessage); + errors.add(errorMessage); + } + SetView unexpectedLabels = Sets.difference(persistedLabels, downloadedLabels); + if (!unexpectedLabels.isEmpty()) { + String examples = Joiner.on(',').join(Iterables.limit(unexpectedLabels, nErrorExamples)); + String errorMessage = + String.format( + "Found %d unexpected labels in the DB. Examples: [%s]", + unexpectedLabels.size(), examples); + logger.atInfo().log(errorMessage); + errors.add(errorMessage); + } + return errors.build(); + } + + /** Returns unique labels across all block lists in the download specified by {@code jobName}. */ + ImmutableSet fetchDownloadedLabels(String jobName) { + ImmutableSet.Builder labelsBuilder = new ImmutableSet.Builder<>(); + for (BlockListType blockListType : BlockListType.values()) { + try (Stream lines = gcsClient.readBlockList(jobName, blockListType)) { + lines.skip(1).map(BsaValidateAction::parseBlockListLine).forEach(labelsBuilder::add); + } + } + return labelsBuilder.build(); + } + + ImmutableSet fetchPersistedLabels(int batchSize) { + ImmutableSet.Builder labelsBuilder = new ImmutableSet.Builder<>(); + ImmutableList batch; + Optional lastRead = Optional.empty(); + do { + batch = batchReadBsaLabelText(lastRead, batchSize); + batch.forEach(labelsBuilder::add); + if (!batch.isEmpty()) { + lastRead = Optional.of(Iterables.getLast(batch)); + } + } while (batch.size() == batchSize); + return labelsBuilder.build(); + } + + static String parseBlockListLine(String line) { + int firstComma = line.indexOf(','); + checkArgument(firstComma > 0, "Invalid block list line: %s", line); + return line.substring(0, firstComma); + } +} diff --git a/core/src/main/java/google/registry/bsa/persistence/DownloadScheduler.java b/core/src/main/java/google/registry/bsa/persistence/DownloadScheduler.java index bfe121013..e1e32b67a 100644 --- a/core/src/main/java/google/registry/bsa/persistence/DownloadScheduler.java +++ b/core/src/main/java/google/registry/bsa/persistence/DownloadScheduler.java @@ -30,7 +30,6 @@ import google.registry.util.Clock; import java.util.Objects; import java.util.Optional; import javax.inject.Inject; -import org.joda.time.DateTime; import org.joda.time.Duration; /** @@ -115,16 +114,6 @@ public final class DownloadScheduler { }); } - Optional latestCompletedJobTime() { - return tm().transact( - () -> { - return fetchTwoMostRecentDownloads().stream() - .filter(job -> Objects.equals(job.getStage(), DONE)) - .map(BsaDownload::getCreationTime) - .findFirst(); - }); - } - private boolean isTimeAgain(BsaDownload mostRecent, Duration interval) { return mostRecent.getCreationTime().plus(interval).minus(CRON_JITTER).isBefore(clock.nowUtc()); } @@ -163,4 +152,14 @@ public final class DownloadScheduler { static Optional fetchMostRecentDownload() { return fetchTwoMostRecentDownloads().stream().findFirst(); } + + /** + * Returns the most recent download {@code jobName} if it has been fully processed, and {@code + * empty} if the download is still being processed. + */ + public static Optional fetchMostRecentDownloadJobIdIfCompleted() { + return fetchMostRecentDownload() + .filter(bsaDownload -> Objects.equals(bsaDownload.getStage(), DONE)) + .map(BsaDownload::getJobName); + } } diff --git a/core/src/main/java/google/registry/bsa/persistence/Queries.java b/core/src/main/java/google/registry/bsa/persistence/Queries.java index e92e764a7..0ee136917 100644 --- a/core/src/main/java/google/registry/bsa/persistence/Queries.java +++ b/core/src/main/java/google/registry/bsa/persistence/Queries.java @@ -30,15 +30,33 @@ import java.util.stream.Stream; import org.joda.time.DateTime; /** Helpers for querying BSA JPA entities. */ -class Queries { +public final class Queries { private Queries() {} + /** + * Entity objects that may be updated in the same query must be detached. See {@code + * JpaTransactionManagerImpl}. + */ private static Object detach(Object obj) { em().detach(obj); return obj; } + public static ImmutableList batchReadBsaLabelText( + Optional lastRead, int batchSize) { + + return ImmutableList.copyOf( + bsaQuery( + () -> + em().createQuery( + "SELECT b.label FROM BsaLabel b WHERE b.label > :lastRead ORDER BY b.label", + String.class) + .setParameter("lastRead", lastRead.orElse("")) + .setMaxResults(batchSize) + .getResultList())); + } + static Stream queryBsaUnblockableDomainByLabels( ImmutableCollection labels) { return ((Stream) diff --git a/core/src/main/java/google/registry/env/common/bsa/WEB-INF/web.xml b/core/src/main/java/google/registry/env/common/bsa/WEB-INF/web.xml index 88dc4904f..980da4059 100644 --- a/core/src/main/java/google/registry/env/common/bsa/WEB-INF/web.xml +++ b/core/src/main/java/google/registry/env/common/bsa/WEB-INF/web.xml @@ -25,6 +25,12 @@ /_dr/task/bsaRefresh + + + bsa-servlet + /_dr/task/bsaValidate + + bsa-servlet diff --git a/core/src/main/java/google/registry/env/production/default/WEB-INF/cloud-scheduler-tasks.xml b/core/src/main/java/google/registry/env/production/default/WEB-INF/cloud-scheduler-tasks.xml index 7f37b5e48..e6ed352ac 100644 --- a/core/src/main/java/google/registry/env/production/default/WEB-INF/cloud-scheduler-tasks.xml +++ b/core/src/main/java/google/registry/env/production/default/WEB-INF/cloud-scheduler-tasks.xml @@ -297,6 +297,18 @@ 15,45 * * * * + + + bsaValidate + bsa + + Validates the processed BSA data in the database against the original + block lists. + + + 50 9,21 * * * + + uploadBsaUnavailableNames diff --git a/core/src/main/java/google/registry/module/RequestComponent.java b/core/src/main/java/google/registry/module/RequestComponent.java index 3765f91e4..954aecb5f 100644 --- a/core/src/main/java/google/registry/module/RequestComponent.java +++ b/core/src/main/java/google/registry/module/RequestComponent.java @@ -29,6 +29,7 @@ import google.registry.batch.SendExpiringCertificateNotificationEmailAction; import google.registry.batch.WipeOutContactHistoryPiiAction; import google.registry.bsa.BsaDownloadAction; import google.registry.bsa.BsaRefreshAction; +import google.registry.bsa.BsaValidateAction; import google.registry.bsa.UploadBsaUnavailableDomainsAction; import google.registry.cron.CronModule; import google.registry.cron.TldFanoutAction; @@ -167,6 +168,8 @@ interface RequestComponent { BsaRefreshAction bsaRefreshAction(); + BsaValidateAction bsaValidateAction(); + CannedScriptExecutionAction cannedScriptExecutionAction(); CheckApiAction checkApiAction(); diff --git a/core/src/main/java/google/registry/module/bsa/BsaRequestComponent.java b/core/src/main/java/google/registry/module/bsa/BsaRequestComponent.java index d9811f183..1c706f72c 100644 --- a/core/src/main/java/google/registry/module/bsa/BsaRequestComponent.java +++ b/core/src/main/java/google/registry/module/bsa/BsaRequestComponent.java @@ -18,6 +18,7 @@ import dagger.Module; import dagger.Subcomponent; import google.registry.bsa.BsaDownloadAction; import google.registry.bsa.BsaRefreshAction; +import google.registry.bsa.BsaValidateAction; import google.registry.bsa.UploadBsaUnavailableDomainsAction; import google.registry.request.Modules.UrlConnectionServiceModule; import google.registry.request.RequestComponentBuilder; @@ -32,6 +33,8 @@ interface BsaRequestComponent { BsaRefreshAction bsaRefreshAction(); + BsaValidateAction bsaValidateAction(); + UploadBsaUnavailableDomainsAction uploadBsaUnavailableDomains(); @Subcomponent.Builder diff --git a/core/src/test/java/google/registry/bsa/BsaValidateActionTest.java b/core/src/test/java/google/registry/bsa/BsaValidateActionTest.java new file mode 100644 index 000000000..8813b82b7 --- /dev/null +++ b/core/src/test/java/google/registry/bsa/BsaValidateActionTest.java @@ -0,0 +1,155 @@ +// Copyright 2024 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.bsa; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.contrib.nio.testing.LocalStorageHelper; +import com.google.common.base.Joiner; +import google.registry.bsa.persistence.BsaTestingUtils; +import google.registry.gcs.GcsUtils; +import google.registry.persistence.transaction.JpaTestExtensions; +import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationWithCoverageExtension; +import google.registry.request.Response; +import google.registry.testing.FakeClock; +import org.joda.time.DateTime; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +/** Unit tests for {@link BsaValidateAction}. */ +@ExtendWith(MockitoExtension.class) +public class BsaValidateActionTest { + + private static final String DOWNLOAD_JOB_NAME = "job"; + + FakeClock fakeClock = new FakeClock(DateTime.parse("2023-11-09T02:08:57.880Z")); + + @RegisterExtension + final JpaIntegrationWithCoverageExtension jpa = + new JpaTestExtensions.Builder().withClock(fakeClock).buildIntegrationWithCoverageExtension(); + + @Mock BsaLock bsaLock; + + @Mock Response response; + + private GcsClient gcsClient; + + private BsaValidateAction action; + + @BeforeEach + void setup() { + gcsClient = + new GcsClient(new GcsUtils(LocalStorageHelper.getOptions()), "my-bucket", "SHA-256"); + action = new BsaValidateAction(gcsClient, /* transactionBatchSize= */ 500, bsaLock, response); + } + + static void createBlockList(GcsClient gcsClient, BlockListType blockListType, String content) + throws Exception { + BlobId blobId = + gcsClient.getBlobId(DOWNLOAD_JOB_NAME, GcsClient.getBlockListFileName(blockListType)); + try (var writer = gcsClient.getWriter(blobId)) { + writer.write(content); + } + } + + @Test + void fetchDownloadedLabels_success() throws Exception { + String blockContent = + """ + domainLabel,orderIDs + test1,1;2 + test2,3 + """; + String blockPlusContent = + """ + domainLabel,orderIDs + test2,4 + test3,5 + """; + createBlockList(gcsClient, BlockListType.BLOCK, blockContent); + createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent); + assertThat(action.fetchDownloadedLabels(DOWNLOAD_JOB_NAME)) + .containsExactly("test1", "test2", "test3"); + } + + @Test + void fetchPersistedLabels_multipleOfBatchSize_success() { + BsaTestingUtils.persistBsaLabel("a"); + BsaTestingUtils.persistBsaLabel("b"); + BsaTestingUtils.persistBsaLabel("c"); + + assertThat(action.fetchPersistedLabels(1)).containsExactly("a", "b", "c"); + } + + @Test + void fetchPersistedLabels_notMultipleOfBatchSize_success() { + BsaTestingUtils.persistBsaLabel("a"); + BsaTestingUtils.persistBsaLabel("b"); + BsaTestingUtils.persistBsaLabel("c"); + + assertThat(action.fetchPersistedLabels(2)).containsExactly("a", "b", "c"); + } + + @Test + void checkBsaLabels_noErrors() throws Exception { + String blockContent = + """ + domainLabel,orderIDs + test1,1;2 + test2,3 + """; + String blockPlusContent = + """ + domainLabel,orderIDs + test2,4 + test3,5 + """; + createBlockList(gcsClient, BlockListType.BLOCK, blockContent); + createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent); + BsaTestingUtils.persistBsaLabel("test1"); + BsaTestingUtils.persistBsaLabel("test2"); + BsaTestingUtils.persistBsaLabel("test3"); + + assertThat(action.checkBsaLabels(DOWNLOAD_JOB_NAME)).isEmpty(); + } + + @Test + void checkBsaLabels_withErrors() throws Exception { + String blockContent = + """ + domainLabel,orderIDs + test1,1;2 + test2,3 + """; + String blockPlusContent = """ + domainLabel,orderIDs + test2,4 + """; + createBlockList(gcsClient, BlockListType.BLOCK, blockContent); + createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent); + BsaTestingUtils.persistBsaLabel("test2"); + BsaTestingUtils.persistBsaLabel("test3"); + + String allErrors = Joiner.on('\n').join(action.checkBsaLabels(DOWNLOAD_JOB_NAME)); + + assertThat(allErrors).contains("Found 1 missing labels in the DB. Examples: [test1]"); + assertThat(allErrors).contains("Found 1 unexpected labels in the DB. Examples: [test3]"); + } +} diff --git a/core/src/test/java/google/registry/bsa/persistence/QueriesTest.java b/core/src/test/java/google/registry/bsa/persistence/QueriesTest.java index e07abf011..bbd7f22ba 100644 --- a/core/src/test/java/google/registry/bsa/persistence/QueriesTest.java +++ b/core/src/test/java/google/registry/bsa/persistence/QueriesTest.java @@ -17,6 +17,7 @@ package google.registry.bsa.persistence; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.truth.Truth.assertThat; import static google.registry.bsa.BsaTransactions.bsaQuery; +import static google.registry.bsa.persistence.Queries.batchReadBsaLabelText; import static google.registry.bsa.persistence.Queries.deleteBsaLabelByLabels; import static google.registry.bsa.persistence.Queries.queryBsaLabelByLabels; import static google.registry.bsa.persistence.Queries.queryBsaUnblockableDomainByLabels; @@ -35,6 +36,7 @@ import google.registry.bsa.persistence.BsaUnblockableDomain.Reason; import google.registry.persistence.transaction.JpaTestExtensions; import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationWithCoverageExtension; import google.registry.testing.FakeClock; +import java.util.Optional; import org.joda.time.DateTime; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -114,6 +116,16 @@ class QueriesTest { new BsaLabel("label1", fakeClock.nowUtc()), new BsaLabel("label2", fakeClock.nowUtc())); } + @Test + void batchReadBsaLabelText_firstBatch() { + assertThat(batchReadBsaLabelText(Optional.empty(), 1)).containsExactly("label1"); + } + + @Test + void batchReadBsaLabelText_nextBatch() { + assertThat(batchReadBsaLabelText(Optional.of("label1"), 1)).containsExactly("label2"); + } + @Test void deleteBsaLabelByLabels_oneLabel() { assertThat(tm().transact(() -> deleteBsaLabelByLabels(ImmutableList.of("label1")))) diff --git a/core/src/test/resources/google/registry/module/bsa/bsa_routing.txt b/core/src/test/resources/google/registry/module/bsa/bsa_routing.txt index 708206c8b..dc52e63e9 100644 --- a/core/src/test/resources/google/registry/module/bsa/bsa_routing.txt +++ b/core/src/test/resources/google/registry/module/bsa/bsa_routing.txt @@ -1,4 +1,5 @@ PATH CLASS METHODS OK AUTH_METHODS MIN USER_POLICY /_dr/task/bsaDownload BsaDownloadAction GET,POST n API APP ADMIN /_dr/task/bsaRefresh BsaRefreshAction GET,POST n API APP ADMIN +/_dr/task/bsaValidate BsaValidateAction GET,POST n API APP ADMIN /_dr/task/uploadBsaUnavailableNames UploadBsaUnavailableDomainsAction GET,POST n API APP ADMIN