diff --git a/core/build.gradle b/core/build.gradle index 19c9a1631..122116217 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -782,6 +782,11 @@ generateGoldenImages.finalizedBy(findGoldenImages) createUberJar('nomulus', 'nomulus', 'google.registry.tools.RegistryTool') +createUberJar( + 'init_sql_pipeline', + 'init_sql_pipeline', + 'google.registry.beam.initsql.InitSqlPipeline') + // A jar with classes and resources from main sourceSet, excluding internal // data. See comments on configurations.nomulus_test above for details. // TODO(weiminyu): release process should build this using the public repo to eliminate the need diff --git a/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json b/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json new file mode 100644 index 000000000..69d96fa63 --- /dev/null +++ b/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json @@ -0,0 +1,68 @@ +{ + "name": "Init SQL From Datastore Backup", + "description": "An Apache Beam batch pipeline that reads a Datastore export and Nomulus CommitLog files, transforms data into JPA entities, and writes the results to a SQL database.", + "parameters": [ + { + "name": "registryEnvironment", + "label": "The Registry environment.", + "helpText": "The Registry environment, required if environment-specific initialization is needed on worker VMs.", + "is_optional": true, + "regexes": [ + "^[0-9A-Z_]+$" + ] + }, + { + "name": "isolationOverride", + "label": "The desired SQL transaction isolation level.", + "helpText": "The desired SQL transaction isolation level.", + "is_optional": true, + "regexes": [ + "^[0-9A-Z_]+$" + ] + }, + { + "name": "sqlWriteBatchSize", + "label": "SQL write batch size.", + "helpText": "The number of entities to write to the SQL database in one operation.", + "is_optional": true, + "regexes": [ + "^[1-9][0-9]*$" + ] + }, + { + "name": "sqlWriteShards", + "label": "Number of output shards to create when writing to SQL.", + "helpText": "Number of shards to create out of the data before writing to the SQL database. Please refer to the Javadoc of RegistryJpaIO.Write.shards() for how to choose this value.", + "is_optional": true, + "regexes": [ + "^[1-9][0-9]*$" + ] + }, + { + "name": "datastoreExportDir", + "label": "Datastore export dir", + "helpText": "The root directory of the export to load.", + "regexes": [ + "^gs:\\/\\/[^\\n\\r]+$" + ] + }, + { + "name": "commitLogDir", + "label": "Nomulus CommitLog dir", + "helpText": "The directory with all Nomulus CommitLogs.", + "regexes": [ + "^gs:\\/\\/[^\\n\\r]+$" + ] + }, + { + "name": "commitLogStartTimestamp", + "label": "Nomulus CommitLog start time", + "helpText": "The earliest CommitLogs to load, in ISO8601 format." + }, + { + "name": "commitLogEndTimestamp", + "label": "Nomulus CommitLog end time", + "helpText": "The latest CommitLogs to load, in ISO8601 format." + } + ] +} diff --git a/release/cloudbuild-nomulus.yaml b/release/cloudbuild-nomulus.yaml index 6333ba4c0..74f983521 100644 --- a/release/cloudbuild-nomulus.yaml +++ b/release/cloudbuild-nomulus.yaml @@ -73,12 +73,27 @@ steps: # Build and package the deployment files for production. - name: 'gcr.io/${PROJECT_ID}/builder:latest' args: ['release/build_nomulus_for_env.sh', 'production', 'output'] +# Build and stage init_sql_pipeline +- name: 'gcr.io/${PROJECT_ID}/builder:latest' + entrypoint: /bin/bash + # Set home for Gradle caches. Must be consistent with previous steps above + # and ./build_nomulus_for_env.sh + env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ] + args: + - -c + - | + ./release/stage_beam_pipeline.sh \ + init_sql_pipeline \ + google.registry.beam.initsql.InitSqlPipeline \ + google/registry/beam/init_sql_pipeline_metadata.json \ + ${TAG_NAME} \ + ${PROJECT_ID} # Tentatively build and publish Cloud SQL schema jar here, before schema release # process is finalized. Also publish nomulus:core jars that are needed for # server/schema compatibility tests. - name: 'gcr.io/${PROJECT_ID}/builder:latest' entrypoint: /bin/bash - # Set home for Gradle caches. Must be consistent with second step above + # Set home for Gradle caches. Must be consistent with previous steps above # and ./build_nomulus_for_env.sh env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ] args: diff --git a/release/stage_beam_pipeline.sh b/release/stage_beam_pipeline.sh new file mode 100755 index 000000000..766b99dca --- /dev/null +++ b/release/stage_beam_pipeline.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2019 The Nomulus Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script builds and stages a flex-template based BEAM pipeline. The +# following parameters are required: +# - pipeline_name: this is also the name of a createUberJar task in :core and +# the name of the jar file created by that task. +# - main_class: the pipeline's main class name. +# - metadata_pathname: the pipeline's metadata file, which is in the resources +# folder of :core. This parameter should be the relative path from resources. +# - release_tag +# - dev_project +# +# If successful, this script will generate and upload two artifacts: +# - A template file to +# gs://${dev_project}-deploy/${release_tag}/beam/$(basename metadata_pathname) +# - A docker image to gcs.io/${dev_project}/beam/${pipeline_name}:{release_tag} +# +# Please refer to gcloud documentation for how to start the pipeline. + +set -e + +if [ $# -ne 5 ]; +then + echo "Usage: $0 pipeline_name main_class metadata_pathname release_tag" \ + "dev_project" + exit 1 +fi + +pipeline_name="$1" +main_class="$2" +metadata_pathname="$3" +release_tag="$4" +dev_project="$5" + +image_name="gcr.io/${dev_project}/beam/${pipeline_name}" +metadata_basename=$(basename ${metadata_pathname}) + +gcs_prefix="gcs://domain-registry-maven-repository" + +./gradlew clean :core:"${pipeline_name}" \ + -PmavenUrl="${gcs_prefix}"/maven \ + -PpluginsUrl="${gcs_prefix}"/plugins + +gcloud dataflow flex-template build \ + "gs://${dev_project}-deploy/${release_tag}/beam/${metadata_basename}" \ + --image-gcr-path "${image_name}:${release_tag}" \ + --sdk-language "JAVA" \ + --flex-template-base-image JAVA11 \ + --metadata-file "./core/src/main/resources/${metadata_pathname}" \ + --jar "./core/build/libs/${pipeline_name}.jar" \ + --env FLEX_TEMPLATE_JAVA_MAIN_CLASS="${main_class}" \ + --project ${dev_project}