From 1bbc38c65e9beb77466f0ad1ee0b637e4ff18c24 Mon Sep 17 00:00:00 2001 From: Weimin Yu Date: Fri, 12 Mar 2021 10:36:57 -0500 Subject: [PATCH] Stage the init_sql_pipeline in CloudBuild (#1004) * Stage the init_sql_pipeline in CloudBuild Defined metadata file and added Gradle uberJar task for the pipeline, which are needed for staging. Updated cloud build script to stage this pipeline during the build processs. --- core/build.gradle | 5 ++ .../beam/init_sql_pipeline_metadata.json | 68 +++++++++++++++++++ release/cloudbuild-nomulus.yaml | 17 ++++- release/stage_beam_pipeline.sh | 65 ++++++++++++++++++ 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json create mode 100755 release/stage_beam_pipeline.sh diff --git a/core/build.gradle b/core/build.gradle index 19c9a1631..122116217 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -782,6 +782,11 @@ generateGoldenImages.finalizedBy(findGoldenImages) createUberJar('nomulus', 'nomulus', 'google.registry.tools.RegistryTool') +createUberJar( + 'init_sql_pipeline', + 'init_sql_pipeline', + 'google.registry.beam.initsql.InitSqlPipeline') + // A jar with classes and resources from main sourceSet, excluding internal // data. See comments on configurations.nomulus_test above for details. // TODO(weiminyu): release process should build this using the public repo to eliminate the need diff --git a/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json b/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json new file mode 100644 index 000000000..69d96fa63 --- /dev/null +++ b/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json @@ -0,0 +1,68 @@ +{ + "name": "Init SQL From Datastore Backup", + "description": "An Apache Beam batch pipeline that reads a Datastore export and Nomulus CommitLog files, transforms data into JPA entities, and writes the results to a SQL database.", + "parameters": [ + { + "name": "registryEnvironment", + "label": "The Registry environment.", + "helpText": "The Registry environment, required if environment-specific initialization is needed on worker VMs.", + "is_optional": true, + "regexes": [ + "^[0-9A-Z_]+$" + ] + }, + { + "name": "isolationOverride", + "label": "The desired SQL transaction isolation level.", + "helpText": "The desired SQL transaction isolation level.", + "is_optional": true, + "regexes": [ + "^[0-9A-Z_]+$" + ] + }, + { + "name": "sqlWriteBatchSize", + "label": "SQL write batch size.", + "helpText": "The number of entities to write to the SQL database in one operation.", + "is_optional": true, + "regexes": [ + "^[1-9][0-9]*$" + ] + }, + { + "name": "sqlWriteShards", + "label": "Number of output shards to create when writing to SQL.", + "helpText": "Number of shards to create out of the data before writing to the SQL database. Please refer to the Javadoc of RegistryJpaIO.Write.shards() for how to choose this value.", + "is_optional": true, + "regexes": [ + "^[1-9][0-9]*$" + ] + }, + { + "name": "datastoreExportDir", + "label": "Datastore export dir", + "helpText": "The root directory of the export to load.", + "regexes": [ + "^gs:\\/\\/[^\\n\\r]+$" + ] + }, + { + "name": "commitLogDir", + "label": "Nomulus CommitLog dir", + "helpText": "The directory with all Nomulus CommitLogs.", + "regexes": [ + "^gs:\\/\\/[^\\n\\r]+$" + ] + }, + { + "name": "commitLogStartTimestamp", + "label": "Nomulus CommitLog start time", + "helpText": "The earliest CommitLogs to load, in ISO8601 format." + }, + { + "name": "commitLogEndTimestamp", + "label": "Nomulus CommitLog end time", + "helpText": "The latest CommitLogs to load, in ISO8601 format." + } + ] +} diff --git a/release/cloudbuild-nomulus.yaml b/release/cloudbuild-nomulus.yaml index 6333ba4c0..74f983521 100644 --- a/release/cloudbuild-nomulus.yaml +++ b/release/cloudbuild-nomulus.yaml @@ -73,12 +73,27 @@ steps: # Build and package the deployment files for production. - name: 'gcr.io/${PROJECT_ID}/builder:latest' args: ['release/build_nomulus_for_env.sh', 'production', 'output'] +# Build and stage init_sql_pipeline +- name: 'gcr.io/${PROJECT_ID}/builder:latest' + entrypoint: /bin/bash + # Set home for Gradle caches. Must be consistent with previous steps above + # and ./build_nomulus_for_env.sh + env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ] + args: + - -c + - | + ./release/stage_beam_pipeline.sh \ + init_sql_pipeline \ + google.registry.beam.initsql.InitSqlPipeline \ + google/registry/beam/init_sql_pipeline_metadata.json \ + ${TAG_NAME} \ + ${PROJECT_ID} # Tentatively build and publish Cloud SQL schema jar here, before schema release # process is finalized. Also publish nomulus:core jars that are needed for # server/schema compatibility tests. - name: 'gcr.io/${PROJECT_ID}/builder:latest' entrypoint: /bin/bash - # Set home for Gradle caches. Must be consistent with second step above + # Set home for Gradle caches. Must be consistent with previous steps above # and ./build_nomulus_for_env.sh env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ] args: diff --git a/release/stage_beam_pipeline.sh b/release/stage_beam_pipeline.sh new file mode 100755 index 000000000..766b99dca --- /dev/null +++ b/release/stage_beam_pipeline.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2019 The Nomulus Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script builds and stages a flex-template based BEAM pipeline. The +# following parameters are required: +# - pipeline_name: this is also the name of a createUberJar task in :core and +# the name of the jar file created by that task. +# - main_class: the pipeline's main class name. +# - metadata_pathname: the pipeline's metadata file, which is in the resources +# folder of :core. This parameter should be the relative path from resources. +# - release_tag +# - dev_project +# +# If successful, this script will generate and upload two artifacts: +# - A template file to +# gs://${dev_project}-deploy/${release_tag}/beam/$(basename metadata_pathname) +# - A docker image to gcs.io/${dev_project}/beam/${pipeline_name}:{release_tag} +# +# Please refer to gcloud documentation for how to start the pipeline. + +set -e + +if [ $# -ne 5 ]; +then + echo "Usage: $0 pipeline_name main_class metadata_pathname release_tag" \ + "dev_project" + exit 1 +fi + +pipeline_name="$1" +main_class="$2" +metadata_pathname="$3" +release_tag="$4" +dev_project="$5" + +image_name="gcr.io/${dev_project}/beam/${pipeline_name}" +metadata_basename=$(basename ${metadata_pathname}) + +gcs_prefix="gcs://domain-registry-maven-repository" + +./gradlew clean :core:"${pipeline_name}" \ + -PmavenUrl="${gcs_prefix}"/maven \ + -PpluginsUrl="${gcs_prefix}"/plugins + +gcloud dataflow flex-template build \ + "gs://${dev_project}-deploy/${release_tag}/beam/${metadata_basename}" \ + --image-gcr-path "${image_name}:${release_tag}" \ + --sdk-language "JAVA" \ + --flex-template-base-image JAVA11 \ + --metadata-file "./core/src/main/resources/${metadata_pathname}" \ + --jar "./core/build/libs/${pipeline_name}.jar" \ + --env FLEX_TEMPLATE_JAVA_MAIN_CLASS="${main_class}" \ + --project ${dev_project}