Stage the init_sql_pipeline in CloudBuild (#1004)

* Stage the init_sql_pipeline in CloudBuild Defined metadata file and added Gradle uberJar task for the pipeline, which are needed for staging. Updated cloud build script to stage this pipeline during the build processs.
2025-07-19 01:06:00 +02:00 · 2021-03-12 10:36:57 -05:00 · 2021-03-12 10:36:57 -05:00 · 39816bf7cd
commit 39816bf7cd
parent bf5ac3a327
4 changed files with 154 additions and 1 deletions
--- a/core/build.gradle
+++ b/core/build.gradle
@ -782,6 +782,11 @@ generateGoldenImages.finalizedBy(findGoldenImages)
 createUberJar('nomulus', 'nomulus', 'google.registry.tools.RegistryTool')
 createUberJar(
    'init_sql_pipeline',
    'init_sql_pipeline',
    'google.registry.beam.initsql.InitSqlPipeline')
 // A jar with classes and resources from main sourceSet, excluding internal
 // data. See comments on configurations.nomulus_test above for details.
 // TODO(weiminyu): release process should build this using the public repo to eliminate the need
--- a/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json
+++ b/core/src/main/resources/google/registry/beam/init_sql_pipeline_metadata.json
@ -0,0 +1,68 @@
 {
  "name": "Init SQL From Datastore Backup",
  "description": "An Apache Beam batch pipeline that reads a Datastore export and Nomulus CommitLog files, transforms data into JPA entities, and writes the results to a SQL database.",
  "parameters": [
    {
      "name": "registryEnvironment",
      "label": "The Registry environment.",
      "helpText": "The Registry environment, required if environment-specific initialization is needed on worker VMs.",
      "is_optional": true,
      "regexes": [
        "^[0-9A-Z_]+$"
      ]
    },
    {
      "name": "isolationOverride",
      "label": "The desired SQL transaction isolation level.",
      "helpText": "The desired SQL transaction isolation level.",
      "is_optional": true,
      "regexes": [
        "^[0-9A-Z_]+$"
      ]
    },
    {
      "name": "sqlWriteBatchSize",
      "label": "SQL write batch size.",
      "helpText": "The number of entities to write to the SQL database in one operation.",
      "is_optional": true,
      "regexes": [
        "^[1-9][0-9]*$"
      ]
    },
    {
      "name": "sqlWriteShards",
      "label": "Number of output shards to create when writing to SQL.",
      "helpText": "Number of shards to create out of the data before writing to the SQL database. Please refer to the Javadoc of RegistryJpaIO.Write.shards() for how to choose this value.",
      "is_optional": true,
      "regexes": [
        "^[1-9][0-9]*$"
      ]
    },
    {
      "name": "datastoreExportDir",
      "label": "Datastore export dir",
      "helpText": "The root directory of the export to load.",
      "regexes": [
        "^gs:\\/\\/[^\\n\\r]+$"
      ]
    },
    {
      "name": "commitLogDir",
      "label": "Nomulus CommitLog dir",
      "helpText": "The directory with all Nomulus CommitLogs.",
      "regexes": [
        "^gs:\\/\\/[^\\n\\r]+$"
      ]
    },
    {
      "name": "commitLogStartTimestamp",
      "label": "Nomulus CommitLog start time",
      "helpText": "The earliest CommitLogs to load, in ISO8601 format."
    },
    {
      "name": "commitLogEndTimestamp",
      "label": "Nomulus CommitLog end time",
      "helpText": "The latest CommitLogs to load, in ISO8601 format."
    }
  ]
 }
--- a/release/cloudbuild-nomulus.yaml
+++ b/release/cloudbuild-nomulus.yaml
@ -73,12 +73,27 @@ steps:
 # Build and package the deployment files for production.
 - name: 'gcr.io/${PROJECT_ID}/builder:latest'
  args: ['release/build_nomulus_for_env.sh', 'production', 'output']
 # Build and stage init_sql_pipeline
 - name: 'gcr.io/${PROJECT_ID}/builder:latest'
  entrypoint: /bin/bash
  # Set home for Gradle caches. Must be consistent with previous steps above
  # and ./build_nomulus_for_env.sh
  env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ]
  args:
  - -c
  - |
    ./release/stage_beam_pipeline.sh \
      init_sql_pipeline \
      google.registry.beam.initsql.InitSqlPipeline \
      google/registry/beam/init_sql_pipeline_metadata.json \
      ${TAG_NAME} \
      ${PROJECT_ID}
 # Tentatively build and publish Cloud SQL schema jar here, before schema release
 # process is finalized. Also publish nomulus:core jars that are needed for
 # server/schema compatibility tests.
 - name: 'gcr.io/${PROJECT_ID}/builder:latest'
  entrypoint: /bin/bash
-  # Set home for Gradle caches. Must be consistent with second step above
+  # Set home for Gradle caches. Must be consistent with previous steps above
  # and ./build_nomulus_for_env.sh
  env: [ 'GRADLE_USER_HOME=./cloudbuild-caches' ]
  args:
--- a/release/stage_beam_pipeline.sh
+++ b/release/stage_beam_pipeline.sh
@ -0,0 +1,65 @@
 #!/bin/bash
 # Copyright 2019 The Nomulus Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # This script builds and stages a flex-template based BEAM pipeline. The
 # following parameters are required:
 # - pipeline_name: this is also the name of a createUberJar task in :core and
 #   the name of the jar file created by that task.
 # - main_class: the pipeline's main class name.
 # - metadata_pathname: the pipeline's metadata file, which is in the resources
 #   folder of :core. This parameter should be the relative path from resources.
 # - release_tag
 # - dev_project
 #
 # If successful, this script will generate and upload two artifacts:
 # - A template file to
 #   gs://${dev_project}-deploy/${release_tag}/beam/$(basename metadata_pathname)
 # - A docker image to gcs.io/${dev_project}/beam/${pipeline_name}:{release_tag}
 #
 # Please refer to gcloud documentation for how to start the pipeline.
 set -e
 if [ $# -ne 5 ];
 then
  echo "Usage: $0 pipeline_name main_class metadata_pathname release_tag" \
       "dev_project"
  exit 1
 fi
 pipeline_name="$1"
 main_class="$2"
 metadata_pathname="$3"
 release_tag="$4"
 dev_project="$5"
 image_name="gcr.io/${dev_project}/beam/${pipeline_name}"
 metadata_basename=$(basename ${metadata_pathname})
 gcs_prefix="gcs://domain-registry-maven-repository"
 ./gradlew clean :core:"${pipeline_name}" \
    -PmavenUrl="${gcs_prefix}"/maven \
    -PpluginsUrl="${gcs_prefix}"/plugins
 gcloud dataflow flex-template build \
    "gs://${dev_project}-deploy/${release_tag}/beam/${metadata_basename}" \
    --image-gcr-path "${image_name}:${release_tag}" \
    --sdk-language "JAVA" \
    --flex-template-base-image JAVA11 \
    --metadata-file "./core/src/main/resources/${metadata_pathname}" \
    --jar "./core/build/libs/${pipeline_name}.jar" \
    --env FLEX_TEMPLATE_JAVA_MAIN_CLASS="${main_class}" \
    --project ${dev_project}