mirror of
https://github.com/google/nomulus.git
synced 2025-06-04 19:47:26 +02:00
Use shared jar to stage BEAM pipeline if possible (#1008)
* Use shared jar to stage BEAM pipeline if possible Allow multiple BEAM pipelines with the same classes and dependencies to share one Uber jar. Added metadata for BulkDeleteDatastorePipeline. Updated shell and Cloud Build scripts to stage all pipelines in one step.
This commit is contained in:
parent
24db87a4cf
commit
7c3d0dd1a9
6 changed files with 88 additions and 49 deletions
|
@ -751,7 +751,8 @@ project.tasks.create('initSqlPipeline', JavaExec) {
|
|||
// nom_build :core:bulkDeleteDatastore --args="--project=domain-registry-crash \
|
||||
// --region=us-central1 --runner=DataflowRunner --kindsToDelete=*"
|
||||
createToolTask(
|
||||
'bulkDeleteDatastore', 'google.registry.beam.datastore.BulkDeletePipeline')
|
||||
'bulkDeleteDatastore',
|
||||
'google.registry.beam.datastore.BulkDeleteDatastorePipeline')
|
||||
|
||||
project.tasks.create('generateSqlSchema', JavaExec) {
|
||||
classpath = sourceSets.nonprod.runtimeClasspath
|
||||
|
@ -782,10 +783,13 @@ generateGoldenImages.finalizedBy(findGoldenImages)
|
|||
|
||||
createUberJar('nomulus', 'nomulus', 'google.registry.tools.RegistryTool')
|
||||
|
||||
// Build the Uber jar shared by all flex-template based BEAM pipelines.
|
||||
// This packages more code and dependency than necessary. However, without
|
||||
// restructuring the source tree it is difficult to generate leaner jars.
|
||||
createUberJar(
|
||||
'init_sql_pipeline',
|
||||
'init_sql_pipeline',
|
||||
'google.registry.beam.initsql.InitSqlPipeline')
|
||||
'beam_pipeline_common',
|
||||
'beam_pipeline_common',
|
||||
'')
|
||||
|
||||
// A jar with classes and resources from main sourceSet, excluding internal
|
||||
// data. See comments on configurations.nomulus_test above for details.
|
||||
|
|
|
@ -78,7 +78,7 @@ import org.apache.beam.sdk.values.TupleTagList;
|
|||
* types in the Datastore using the {@code --numOfKindsHint} argument. If the default value for this
|
||||
* parameter is too low, performance will suffer.
|
||||
*/
|
||||
public class BulkDeletePipeline {
|
||||
public class BulkDeleteDatastorePipeline {
|
||||
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
|
||||
|
||||
// This tool is not for use in our critical projects.
|
||||
|
@ -89,7 +89,7 @@ public class BulkDeletePipeline {
|
|||
|
||||
private final Pipeline pipeline;
|
||||
|
||||
BulkDeletePipeline(BulkDeletePipelineOptions options) {
|
||||
BulkDeleteDatastorePipeline(BulkDeletePipelineOptions options) {
|
||||
this.options = options;
|
||||
pipeline = Pipeline.create(options);
|
||||
}
|
||||
|
@ -303,7 +303,7 @@ public class BulkDeletePipeline {
|
|||
public static void main(String[] args) {
|
||||
BulkDeletePipelineOptions options =
|
||||
PipelineOptionsFactory.fromArgs(args).withValidation().as(BulkDeletePipelineOptions.class);
|
||||
BulkDeletePipeline pipeline = new BulkDeletePipeline(options);
|
||||
BulkDeleteDatastorePipeline pipeline = new BulkDeleteDatastorePipeline(options);
|
||||
pipeline.run();
|
||||
System.exit(0);
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"name": "Bulk Delete Cloud Datastore",
|
||||
"description": "An Apache Beam batch pipeline that deletes Cloud Datastore in bulk. This is easier to use than the GCP-provided template.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "kindsToDelete",
|
||||
"label": "The data KINDs to delete.",
|
||||
"helpText": "The Datastore KINDs to be deleted. The format may be: the list of kinds to be deleted as a comma-separated string; or '*', which causes all kinds to be deleted."
|
||||
},
|
||||
{
|
||||
"name": "getNumOfKindsHint",
|
||||
"label": "An estimate of the number of KINDs to be deleted.",
|
||||
"helpText": "An estimate of the number of KINDs to be deleted. This is recommended if --kindsToDelete is '*' and the default value is too low.",
|
||||
"is_optional": true,
|
||||
"regexes": [
|
||||
"^[1-9][0-9]*$"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -14,9 +14,9 @@
|
|||
|
||||
package google.registry.beam.datastore;
|
||||
|
||||
import static google.registry.beam.datastore.BulkDeletePipeline.discoverEntityKinds;
|
||||
import static google.registry.beam.datastore.BulkDeletePipeline.getDeletionTags;
|
||||
import static google.registry.beam.datastore.BulkDeletePipeline.getOneDeletionTag;
|
||||
import static google.registry.beam.datastore.BulkDeleteDatastorePipeline.discoverEntityKinds;
|
||||
import static google.registry.beam.datastore.BulkDeleteDatastorePipeline.getDeletionTags;
|
||||
import static google.registry.beam.datastore.BulkDeleteDatastorePipeline.getOneDeletionTag;
|
||||
|
||||
import com.google.common.base.Verify;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
@ -25,8 +25,8 @@ import com.google.datastore.v1.Entity;
|
|||
import com.google.datastore.v1.Key;
|
||||
import com.google.datastore.v1.Key.PathElement;
|
||||
import google.registry.beam.TestPipelineExtension;
|
||||
import google.registry.beam.datastore.BulkDeletePipeline.GenerateQueries;
|
||||
import google.registry.beam.datastore.BulkDeletePipeline.SplitEntities;
|
||||
import google.registry.beam.datastore.BulkDeleteDatastorePipeline.GenerateQueries;
|
||||
import google.registry.beam.datastore.BulkDeleteDatastorePipeline.SplitEntities;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
import org.apache.beam.sdk.testing.PAssert;
|
||||
|
@ -44,8 +44,8 @@ import org.junit.jupiter.api.Test;
|
|||
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
|
||||
import org.junit.jupiter.api.extension.RegisterExtension;
|
||||
|
||||
/** Unit tests for {@link BulkDeletePipeline}. */
|
||||
class BulkDeletePipelineTest implements Serializable {
|
||||
/** Unit tests for {@link BulkDeleteDatastorePipeline}. */
|
||||
class BulkDeleteDatastorePipelineTest implements Serializable {
|
||||
|
||||
@RegisterExtension
|
||||
final transient TestPipelineExtension testPipeline =
|
||||
|
@ -67,7 +67,7 @@ class BulkDeletePipelineTest implements Serializable {
|
|||
TupleTagList tags = getDeletionTags(2);
|
||||
PCollection<String> kinds = testPipeline.apply("InjectKinds", Create.of("A", "B"));
|
||||
PCollection<KV<String, TupleTag<Entity>>> kindToTagMapping =
|
||||
BulkDeletePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
BulkDeleteDatastorePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
PAssert.thatMap(kindToTagMapping)
|
||||
.isEqualTo(
|
||||
ImmutableMap.of(
|
||||
|
@ -81,7 +81,7 @@ class BulkDeletePipelineTest implements Serializable {
|
|||
TupleTagList tags = getDeletionTags(3);
|
||||
PCollection<String> kinds = testPipeline.apply("InjectKinds", Create.of("A", "B"));
|
||||
PCollection<KV<String, TupleTag<Entity>>> kindToTagMapping =
|
||||
BulkDeletePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
BulkDeleteDatastorePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
PAssert.thatMap(kindToTagMapping)
|
||||
.isEqualTo(
|
||||
ImmutableMap.of(
|
||||
|
@ -95,7 +95,7 @@ class BulkDeletePipelineTest implements Serializable {
|
|||
TupleTagList tags = getDeletionTags(2);
|
||||
PCollection<String> kinds = testPipeline.apply("InjectKinds", Create.of("A", "B", "C"));
|
||||
PCollection<KV<String, TupleTag<Entity>>> kindToTagMapping =
|
||||
BulkDeletePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
BulkDeleteDatastorePipeline.mapKindsToDeletionTags(kinds, tags);
|
||||
PAssert.thatMap(kindToTagMapping)
|
||||
.isEqualTo(
|
||||
ImmutableMap.of(
|
||||
|
@ -110,7 +110,7 @@ class BulkDeletePipelineTest implements Serializable {
|
|||
TupleTagList tags = getDeletionTags(2);
|
||||
PCollection<String> kinds = testPipeline.apply("InjectKinds", Create.of("A", "B"));
|
||||
PCollectionView<Map<String, TupleTag<Entity>>> kindToTagMapping =
|
||||
BulkDeletePipeline.mapKindsToDeletionTags(kinds, tags).apply(View.asMap());
|
||||
BulkDeleteDatastorePipeline.mapKindsToDeletionTags(kinds, tags).apply(View.asMap());
|
||||
Entity entityA = createTestEntity("A", 1);
|
||||
Entity entityB = createTestEntity("B", 2);
|
||||
PCollection<Entity> entities =
|
Loading…
Add table
Add a link
Reference in a new issue