Add BSA validation job (#2356)

* Add BSA validation job

Add the BsaValidateAction class with a first check (for inconsistency
between downloaded and persisted labels).

* Addressing comments

* Addressing reviews
This commit is contained in:
Weimin Yu 2024-03-08 17:08:09 -05:00 committed by GitHub
parent 779dc36858
commit 34a8a94083
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 383 additions and 12 deletions

View file

@ -0,0 +1,162 @@
// Copyright 2024 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.bsa;
import static com.google.common.base.Preconditions.checkArgument;
import static google.registry.bsa.persistence.DownloadScheduler.fetchMostRecentDownloadJobIdIfCompleted;
import static google.registry.bsa.persistence.Queries.batchReadBsaLabelText;
import static google.registry.request.Action.Method.GET;
import static google.registry.request.Action.Method.POST;
import static javax.servlet.http.HttpServletResponse.SC_OK;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import com.google.common.flogger.FluentLogger;
import google.registry.config.RegistryConfig.Config;
import google.registry.request.Action;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import java.util.Optional;
import java.util.stream.Stream;
import javax.inject.Inject;
/** Validates the BSA data in the database against the most recent block lists. */
@Action(
service = Action.Service.BSA,
path = BsaValidateAction.PATH,
method = {GET, POST},
auth = Auth.AUTH_API_ADMIN)
public class BsaValidateAction implements Runnable {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
static final String PATH = "/_dr/task/bsaValidate";
private final GcsClient gcsClient;
private final int transactionBatchSize;
private final BsaLock bsaLock;
private final Response response;
@Inject
BsaValidateAction(
GcsClient gcsClient,
@Config("bsaTxnBatchSize") int transactionBatchSize,
BsaLock bsaLock,
Response response) {
this.gcsClient = gcsClient;
this.transactionBatchSize = transactionBatchSize;
this.bsaLock = bsaLock;
this.response = response;
}
@Override
public void run() {
try {
if (!bsaLock.executeWithLock(this::runWithinLock)) {
logger.atInfo().log("Cannot execute action. Other BSA related task is executing.");
// TODO(blocked by go/r3pr/2354): send email
}
} catch (Throwable throwable) {
logger.atWarning().withCause(throwable).log("Failed to update block lists.");
// TODO(blocked by go/r3pr/2354): send email
}
// Always return OK. No need to retry since all queries and GCS accesses are already
// implicitly retried.
response.setStatus(SC_OK);
}
/** Executes the validation action while holding the BSA lock. */
Void runWithinLock() {
Optional<String> downloadJobName = fetchMostRecentDownloadJobIdIfCompleted();
if (downloadJobName.isEmpty()) {
logger.atInfo().log("Cannot validate: latest download not found or unfinished.");
return null;
}
logger.atInfo().log("Validating BSA with latest download: %s", downloadJobName.get());
ImmutableList.Builder<String> errors = new ImmutableList.Builder();
errors.addAll(checkBsaLabels(downloadJobName.get()));
emailValidationResults(downloadJobName.get(), errors.build());
logger.atInfo().log("Finished validating BSA with latest download: %s", downloadJobName.get());
return null;
}
void emailValidationResults(String job, ImmutableList<String> errors) {
// TODO(blocked by go/r3pr/2354): send email
}
ImmutableList<String> checkBsaLabels(String jobName) {
ImmutableSet<String> downloadedLabels = fetchDownloadedLabels(jobName);
ImmutableSet<String> persistedLabels = fetchPersistedLabels(transactionBatchSize);
ImmutableList.Builder<String> errors = new ImmutableList.Builder<>();
int nErrorExamples = 10;
SetView<String> missingLabels = Sets.difference(downloadedLabels, persistedLabels);
if (!missingLabels.isEmpty()) {
String examples = Joiner.on(',').join(Iterables.limit(missingLabels, nErrorExamples));
String errorMessage =
String.format(
"Found %d missing labels in the DB. Examples: [%s]", missingLabels.size(), examples);
logger.atInfo().log(errorMessage);
errors.add(errorMessage);
}
SetView<String> unexpectedLabels = Sets.difference(persistedLabels, downloadedLabels);
if (!unexpectedLabels.isEmpty()) {
String examples = Joiner.on(',').join(Iterables.limit(unexpectedLabels, nErrorExamples));
String errorMessage =
String.format(
"Found %d unexpected labels in the DB. Examples: [%s]",
unexpectedLabels.size(), examples);
logger.atInfo().log(errorMessage);
errors.add(errorMessage);
}
return errors.build();
}
/** Returns unique labels across all block lists in the download specified by {@code jobName}. */
ImmutableSet<String> fetchDownloadedLabels(String jobName) {
ImmutableSet.Builder<String> labelsBuilder = new ImmutableSet.Builder<>();
for (BlockListType blockListType : BlockListType.values()) {
try (Stream<String> lines = gcsClient.readBlockList(jobName, blockListType)) {
lines.skip(1).map(BsaValidateAction::parseBlockListLine).forEach(labelsBuilder::add);
}
}
return labelsBuilder.build();
}
ImmutableSet<String> fetchPersistedLabels(int batchSize) {
ImmutableSet.Builder<String> labelsBuilder = new ImmutableSet.Builder<>();
ImmutableList<String> batch;
Optional<String> lastRead = Optional.empty();
do {
batch = batchReadBsaLabelText(lastRead, batchSize);
batch.forEach(labelsBuilder::add);
if (!batch.isEmpty()) {
lastRead = Optional.of(Iterables.getLast(batch));
}
} while (batch.size() == batchSize);
return labelsBuilder.build();
}
static String parseBlockListLine(String line) {
int firstComma = line.indexOf(',');
checkArgument(firstComma > 0, "Invalid block list line: %s", line);
return line.substring(0, firstComma);
}
}

View file

@ -30,7 +30,6 @@ import google.registry.util.Clock;
import java.util.Objects;
import java.util.Optional;
import javax.inject.Inject;
import org.joda.time.DateTime;
import org.joda.time.Duration;
/**
@ -115,16 +114,6 @@ public final class DownloadScheduler {
});
}
Optional<DateTime> latestCompletedJobTime() {
return tm().transact(
() -> {
return fetchTwoMostRecentDownloads().stream()
.filter(job -> Objects.equals(job.getStage(), DONE))
.map(BsaDownload::getCreationTime)
.findFirst();
});
}
private boolean isTimeAgain(BsaDownload mostRecent, Duration interval) {
return mostRecent.getCreationTime().plus(interval).minus(CRON_JITTER).isBefore(clock.nowUtc());
}
@ -163,4 +152,14 @@ public final class DownloadScheduler {
static Optional<BsaDownload> fetchMostRecentDownload() {
return fetchTwoMostRecentDownloads().stream().findFirst();
}
/**
* Returns the most recent download {@code jobName} if it has been fully processed, and {@code
* empty} if the download is still being processed.
*/
public static Optional<String> fetchMostRecentDownloadJobIdIfCompleted() {
return fetchMostRecentDownload()
.filter(bsaDownload -> Objects.equals(bsaDownload.getStage(), DONE))
.map(BsaDownload::getJobName);
}
}

View file

@ -30,15 +30,33 @@ import java.util.stream.Stream;
import org.joda.time.DateTime;
/** Helpers for querying BSA JPA entities. */
class Queries {
public final class Queries {
private Queries() {}
/**
* Entity objects that may be updated in the same query must be detached. See {@code
* JpaTransactionManagerImpl}.
*/
private static Object detach(Object obj) {
em().detach(obj);
return obj;
}
public static ImmutableList<String> batchReadBsaLabelText(
Optional<String> lastRead, int batchSize) {
return ImmutableList.copyOf(
bsaQuery(
() ->
em().createQuery(
"SELECT b.label FROM BsaLabel b WHERE b.label > :lastRead ORDER BY b.label",
String.class)
.setParameter("lastRead", lastRead.orElse(""))
.setMaxResults(batchSize)
.getResultList()));
}
static Stream<BsaUnblockableDomain> queryBsaUnblockableDomainByLabels(
ImmutableCollection<String> labels) {
return ((Stream<?>)

View file

@ -25,6 +25,12 @@
<url-pattern>/_dr/task/bsaRefresh</url-pattern>
</servlet-mapping>
<!-- Refresh action -->
<servlet-mapping>
<servlet-name>bsa-servlet</servlet-name>
<url-pattern>/_dr/task/bsaValidate</url-pattern>
</servlet-mapping>
<!-- Upload unavailable domains to BSA action -->
<servlet-mapping>
<servlet-name>bsa-servlet</servlet-name>

View file

@ -297,6 +297,18 @@
<schedule>15,45 * * * *</schedule>
</task>
<task>
<url><![CDATA[/_dr/task/bsaValidate]]></url>
<name>bsaValidate</name>
<service>bsa</service>
<description>
Validates the processed BSA data in the database against the original
block lists.
</description>
<!-- Runs twice daily. -->
<schedule>50 9,21 * * *</schedule>
</task>
<task>
<url><![CDATA[/_dr/task/uploadBsaUnavailableNames]]></url>
<name>uploadBsaUnavailableNames</name>

View file

@ -29,6 +29,7 @@ import google.registry.batch.SendExpiringCertificateNotificationEmailAction;
import google.registry.batch.WipeOutContactHistoryPiiAction;
import google.registry.bsa.BsaDownloadAction;
import google.registry.bsa.BsaRefreshAction;
import google.registry.bsa.BsaValidateAction;
import google.registry.bsa.UploadBsaUnavailableDomainsAction;
import google.registry.cron.CronModule;
import google.registry.cron.TldFanoutAction;
@ -167,6 +168,8 @@ interface RequestComponent {
BsaRefreshAction bsaRefreshAction();
BsaValidateAction bsaValidateAction();
CannedScriptExecutionAction cannedScriptExecutionAction();
CheckApiAction checkApiAction();

View file

@ -18,6 +18,7 @@ import dagger.Module;
import dagger.Subcomponent;
import google.registry.bsa.BsaDownloadAction;
import google.registry.bsa.BsaRefreshAction;
import google.registry.bsa.BsaValidateAction;
import google.registry.bsa.UploadBsaUnavailableDomainsAction;
import google.registry.request.Modules.UrlConnectionServiceModule;
import google.registry.request.RequestComponentBuilder;
@ -32,6 +33,8 @@ interface BsaRequestComponent {
BsaRefreshAction bsaRefreshAction();
BsaValidateAction bsaValidateAction();
UploadBsaUnavailableDomainsAction uploadBsaUnavailableDomains();
@Subcomponent.Builder

View file

@ -0,0 +1,155 @@
// Copyright 2024 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.bsa;
import static com.google.common.truth.Truth.assertThat;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.contrib.nio.testing.LocalStorageHelper;
import com.google.common.base.Joiner;
import google.registry.bsa.persistence.BsaTestingUtils;
import google.registry.gcs.GcsUtils;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationWithCoverageExtension;
import google.registry.request.Response;
import google.registry.testing.FakeClock;
import org.joda.time.DateTime;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
/** Unit tests for {@link BsaValidateAction}. */
@ExtendWith(MockitoExtension.class)
public class BsaValidateActionTest {
private static final String DOWNLOAD_JOB_NAME = "job";
FakeClock fakeClock = new FakeClock(DateTime.parse("2023-11-09T02:08:57.880Z"));
@RegisterExtension
final JpaIntegrationWithCoverageExtension jpa =
new JpaTestExtensions.Builder().withClock(fakeClock).buildIntegrationWithCoverageExtension();
@Mock BsaLock bsaLock;
@Mock Response response;
private GcsClient gcsClient;
private BsaValidateAction action;
@BeforeEach
void setup() {
gcsClient =
new GcsClient(new GcsUtils(LocalStorageHelper.getOptions()), "my-bucket", "SHA-256");
action = new BsaValidateAction(gcsClient, /* transactionBatchSize= */ 500, bsaLock, response);
}
static void createBlockList(GcsClient gcsClient, BlockListType blockListType, String content)
throws Exception {
BlobId blobId =
gcsClient.getBlobId(DOWNLOAD_JOB_NAME, GcsClient.getBlockListFileName(blockListType));
try (var writer = gcsClient.getWriter(blobId)) {
writer.write(content);
}
}
@Test
void fetchDownloadedLabels_success() throws Exception {
String blockContent =
"""
domainLabel,orderIDs
test1,1;2
test2,3
""";
String blockPlusContent =
"""
domainLabel,orderIDs
test2,4
test3,5
""";
createBlockList(gcsClient, BlockListType.BLOCK, blockContent);
createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent);
assertThat(action.fetchDownloadedLabels(DOWNLOAD_JOB_NAME))
.containsExactly("test1", "test2", "test3");
}
@Test
void fetchPersistedLabels_multipleOfBatchSize_success() {
BsaTestingUtils.persistBsaLabel("a");
BsaTestingUtils.persistBsaLabel("b");
BsaTestingUtils.persistBsaLabel("c");
assertThat(action.fetchPersistedLabels(1)).containsExactly("a", "b", "c");
}
@Test
void fetchPersistedLabels_notMultipleOfBatchSize_success() {
BsaTestingUtils.persistBsaLabel("a");
BsaTestingUtils.persistBsaLabel("b");
BsaTestingUtils.persistBsaLabel("c");
assertThat(action.fetchPersistedLabels(2)).containsExactly("a", "b", "c");
}
@Test
void checkBsaLabels_noErrors() throws Exception {
String blockContent =
"""
domainLabel,orderIDs
test1,1;2
test2,3
""";
String blockPlusContent =
"""
domainLabel,orderIDs
test2,4
test3,5
""";
createBlockList(gcsClient, BlockListType.BLOCK, blockContent);
createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent);
BsaTestingUtils.persistBsaLabel("test1");
BsaTestingUtils.persistBsaLabel("test2");
BsaTestingUtils.persistBsaLabel("test3");
assertThat(action.checkBsaLabels(DOWNLOAD_JOB_NAME)).isEmpty();
}
@Test
void checkBsaLabels_withErrors() throws Exception {
String blockContent =
"""
domainLabel,orderIDs
test1,1;2
test2,3
""";
String blockPlusContent = """
domainLabel,orderIDs
test2,4
""";
createBlockList(gcsClient, BlockListType.BLOCK, blockContent);
createBlockList(gcsClient, BlockListType.BLOCK_PLUS, blockPlusContent);
BsaTestingUtils.persistBsaLabel("test2");
BsaTestingUtils.persistBsaLabel("test3");
String allErrors = Joiner.on('\n').join(action.checkBsaLabels(DOWNLOAD_JOB_NAME));
assertThat(allErrors).contains("Found 1 missing labels in the DB. Examples: [test1]");
assertThat(allErrors).contains("Found 1 unexpected labels in the DB. Examples: [test3]");
}
}

View file

@ -17,6 +17,7 @@ package google.registry.bsa.persistence;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.bsa.BsaTransactions.bsaQuery;
import static google.registry.bsa.persistence.Queries.batchReadBsaLabelText;
import static google.registry.bsa.persistence.Queries.deleteBsaLabelByLabels;
import static google.registry.bsa.persistence.Queries.queryBsaLabelByLabels;
import static google.registry.bsa.persistence.Queries.queryBsaUnblockableDomainByLabels;
@ -35,6 +36,7 @@ import google.registry.bsa.persistence.BsaUnblockableDomain.Reason;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationWithCoverageExtension;
import google.registry.testing.FakeClock;
import java.util.Optional;
import org.joda.time.DateTime;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -114,6 +116,16 @@ class QueriesTest {
new BsaLabel("label1", fakeClock.nowUtc()), new BsaLabel("label2", fakeClock.nowUtc()));
}
@Test
void batchReadBsaLabelText_firstBatch() {
assertThat(batchReadBsaLabelText(Optional.empty(), 1)).containsExactly("label1");
}
@Test
void batchReadBsaLabelText_nextBatch() {
assertThat(batchReadBsaLabelText(Optional.of("label1"), 1)).containsExactly("label2");
}
@Test
void deleteBsaLabelByLabels_oneLabel() {
assertThat(tm().transact(() -> deleteBsaLabelByLabels(ImmutableList.of("label1"))))

View file

@ -1,4 +1,5 @@
PATH CLASS METHODS OK AUTH_METHODS MIN USER_POLICY
/_dr/task/bsaDownload BsaDownloadAction GET,POST n API APP ADMIN
/_dr/task/bsaRefresh BsaRefreshAction GET,POST n API APP ADMIN
/_dr/task/bsaValidate BsaValidateAction GET,POST n API APP ADMIN
/_dr/task/uploadBsaUnavailableNames UploadBsaUnavailableDomainsAction GET,POST n API APP ADMIN