Verify schema using Cloud Build (#1627)

* Add tool to compare  golden and actual schema
This commit is contained in:
Weimin Yu 2022-05-16 16:10:09 -04:00 committed by GitHub
parent 33ffcfdab3
commit d03cd5bb76
6 changed files with 258 additions and 2 deletions

View file

@ -72,7 +72,8 @@ steps:
sed -i s/'$${_IMAGE}'/nomulus-tool/g release/cloudbuild-tag.yaml
sed -i s/':$${TAG_NAME}'/@$digest/g release/cloudbuild-tag.yaml
sed -i s/'nomulus-tool:latest'/nomulus-tool@$digest/g release/cloudbuild-deploy-*.yaml
sed -i s/'nomulus-tool:latest'/nomulus-tool@$digest/g release/cloudbuild-schema-deploy-*.yaml
# schema-deploy and schema-verify scripts
sed -i s/'nomulus-tool:latest'/nomulus-tool@$digest/g release/cloudbuild-schema-*.yaml
# Build and stage Dataflow Flex templates.
- name: 'gcr.io/${PROJECT_ID}/builder:latest'
entrypoint: /bin/bash
@ -143,6 +144,7 @@ artifacts:
- 'release/cloudbuild-deploy-*.yaml'
- 'release/cloudbuild-delete-*.yaml'
- 'release/cloudbuild-schema-deploy-*.yaml'
- 'release/cloudbuild-schema-verify-*.yaml'
timeout: 7200s
options:

View file

@ -113,7 +113,20 @@ steps:
docker push gcr.io/${PROJECT_ID}/schema_deployer:latest
docker push gcr.io/${PROJECT_ID}/schema_deployer:${TAG_NAME}
dir: 'release/schema-deployer/'
# Do text replacement in the schema-deploy config, hardcoding image digests.
# Build the schema_verifier image and upload it to GCR.
- name: 'gcr.io/cloud-builders/docker'
entrypoint: /bin/bash
args:
- -c
- |
set -e
docker build -t gcr.io/${PROJECT_ID}/schema_verifier:${TAG_NAME} .
docker tag gcr.io/${PROJECT_ID}/schema_verifier:${TAG_NAME} \
gcr.io/${PROJECT_ID}/schema_verifier:latest
docker push gcr.io/${PROJECT_ID}/schema_verifier:latest
docker push gcr.io/${PROJECT_ID}/schema_verifier:${TAG_NAME}
dir: 'release/schema-verifier/'
# Do text replacement in the schema-deploy and schema-verify configs.
- name: 'gcr.io/cloud-builders/gcloud'
entrypoint: /bin/bash
args:
@ -126,14 +139,23 @@ steps:
schema_deployer_digest=$( \
gcloud container images list-tags gcr.io/${PROJECT_ID}/schema_deployer \
--format='get(digest)' --filter='tags = ${TAG_NAME}')
schema_verifier_digest=$( \
gcloud container images list-tags gcr.io/${PROJECT_ID}/schema_verifier \
--format='get(digest)' --filter='tags = ${TAG_NAME}')
sed -i s/builder:latest/builder@$builder_digest/g \
release/cloudbuild-schema-deploy.yaml
sed -i s/builder:latest/builder@$builder_digest/g \
release/cloudbuild-schema-verify.yaml
sed -i s/schema_deployer:latest/schema_deployer@$schema_deployer_digest/g \
release/cloudbuild-schema-deploy.yaml
sed -i s/schema_verifier:latest/schema_verifier@$schema_verifier_digest/g \
release/cloudbuild-schema-verify.yaml
sed -i s/'$${TAG_NAME}'/${TAG_NAME}/g release/cloudbuild-schema-deploy.yaml
for environment in alpha crash sandbox production; do
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-schema-deploy.yaml \
> release/cloudbuild-schema-deploy-${environment}.yaml
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-schema-verify.yaml \
> release/cloudbuild-schema-verify-${environment}.yaml
done
# Upload the gradle binary to GCS if it does not exist and point URL in gradle wrapper to it.
- name: 'gcr.io/cloud-builders/gsutil'

View file

@ -0,0 +1,83 @@
# Verifies that the actual Cloud SQL schema in the environment specified by the
# '_ENV' variable is the same as the golden schema in the current release for
# that environment.
#
# To run the build locally, install cloud-build-local first.
# Then run:
# cloud-build-local --config=cloudbuild-schema-verify.yaml --dryrun=false \
# --substitutions=_ENV=[ENV] ..
#
# To manually trigger a build on GCB, run:
# gcloud builds submit --config=cloudbuild-schema-verify.yaml \
# --substitutions=_ENV=[ENV] ..
#
# To trigger a build automatically, follow the instructions below and add a trigger:
# https://cloud.google.com/cloud-build/docs/running-builds/automate-builds
#
# Note that the release process hardens the tags and variables in this file:
# - The 'latest' tag on docker images will be replaced by their image digests.
# - The ${_ENV} pattern will be replaced by the actual environment name.
# Please refer to ./cloudbuild-release.yaml for more details.
# Note 2: to work around issue in Spinnaker's 'Deployment Manifest' stage,
# variable references must avoid the ${var} format. Valid formats include
# $var or ${"${var}"}. This file use the former. Since _ENV is expanded in the
# copies sent to Spinnaker, we preserve the brackets around them for safe
# pattern matching during release.
# See https://github.com/spinnaker/spinnaker/issues/3028 for more information.
steps:
# Download and decrypt the nomulus tool credential, which has the privilege to
# start Cloud SQL proxy to all environments. This credential is also used to
# authenticate the nomulus tool when fetching the schema deployer credential in
# the next step.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
volumes:
- name: 'secrets'
path: '/secrets'
entrypoint: /bin/bash
args:
- -c
- |
set -e
gcloud secrets versions access latest \
--secret nomulus-tool-cloudbuild-credential \
> /secrets/cloud_sql_credential.json
# Fetch the Cloud SQL credential for schema_deployer
- name: 'gcr.io/$PROJECT_ID/nomulus-tool:latest'
volumes:
- name: 'secrets'
path: '/secrets'
args:
- -e
- ${_ENV}
- --credential
- /secrets/cloud_sql_credential.json
- get_sql_credential
- --user
- schema_deployer
- --output
- /secrets/schema_deployer_credential.dec
# Download the jar with the expected schema.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
volumes:
- name: 'schema'
path: '/schema'
entrypoint: /bin/bash
args:
- -c
- |
set -e
deployed_schema_tag=$(gsutil cat \
gs://$PROJECT_ID-deployed-tags/sql.${_ENV}.tag)
gsutil cp gs://$PROJECT_ID-deploy/${deployed_schema_tag}/schema.jar \
/schema
# Verify the schema
- name: 'gcr.io/$PROJECT_ID/schema_verifier:latest'
volumes:
- name: 'secrets'
path: '/secrets'
- name: 'schema'
path: '/schema'
timeout: 3600s
options:
machineType: 'E2_HIGHCPU_32'

View file

@ -0,0 +1,54 @@
# Copyright 2022 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This Dockerfile builds an image that can be used in Google Cloud Build.
# We need the following programs to build the schema verifier:
# 1. Bash to execute a shell script.
# 2. Cloud SQL proxy for connection to the SQL instance.
# 3. The pg_dump tool.
# 4. The unzip command to extract the golden schema from the schema jar.
#
# Please refer to verify_deployed_sql_schema.sh for expected volumes and
# arguments.
FROM marketplace.gcr.io/google/ubuntu1804
ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8
# Install pg_dump v11 (same as current server version). This needs to be
# downloaded from postgresql's own repo, because ubuntu1804 is too old. With a
# newer image 'apt-get install postgresql-client-11' may be sufficient.
RUN apt-get update -y \
&& apt-get install locales -y \
&& locale-gen en_US.UTF-8 \
&& apt-get install curl gnupg lsb-release -y \
&& curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
&& sh -c \
'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" \
> /etc/apt/sources.list.d/pgdg.list' \
&& apt-get update -y \
&& apt install postgresql-client-11 -y
# Use unzip to extract files from jars.
RUN apt-get install zip -y
# Get netstat, used for checking Cloud SQL proxy readiness.
RUN apt-get install net-tools
COPY verify_deployed_sql_schema.sh /usr/local/bin/
COPY allowed_diffs.txt /
ADD https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 \
/usr/local/bin/cloud_sql_proxy
RUN chmod +x /usr/local/bin/cloud_sql_proxy
ENTRYPOINT [ "verify_deployed_sql_schema.sh" ]

View file

@ -0,0 +1,3 @@
CREATE EXTENSION IF NOT EXISTS pgaudit WITH SCHEMA public;
COMMENT ON EXTENSION pgaudit IS 'provides auditing functionality';
SET default_with_oids = false;

View file

@ -0,0 +1,92 @@
#!/bin/bash
# Copyright 2022 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script compares the actual schema in a Cloud SQL database with the golden
# schema in the corresponding release. It detects schema changes made outside
# the normal deployment process, e.g., those made during a troubleshooting
# session that were not cleaned up.
#
# Mounted volumes and required files in them:
# - /secrets/cloud_sql_credential.json: Cloud SQL proxy credential
# - /secrets/schema_deployer_credential.dec the schema_deployer user's
# database login credential.
# - /schema/schema.jar: the jar with the golden schema.
set -e
read -r cloud_sql_instance db_user db_password \
<<<$(cat /secrets/schema_deployer_credential.dec | awk '{print $1, $2, $3}')
# Unpack the golden schema from schema.jar
unzip -p /schema/schema.jar sql/schema/nomulus.golden.sql \
> /schema/nomulus.golden.sql
echo "$(date): Connecting to ${cloud_sql_instance}."
# Set up connection to the Cloud SQL instance.
# For now we use Cloud SQL Proxy to set up a SSL tunnel to the Cloud SQL
# instance. This has two drawbacks:
# - It starts a background process, which is an anti-pattern in Docker.
# - The main job needs to wait for a while for the proxy to come up.
# We will research for a better long-term solution.
#
# Other options considered:
# - Connect using Socket Factory in this script.
# * Drawback: need to manage version and transitive dependencies
# of the postgres-socket-factory jar.
# - Create a self-contained Java application that connects using socket factory
# * Drawback: Seems an overkill
trap "pkill cloud_sql_proxy" EXIT
cloud_sql_proxy -instances="${cloud_sql_instance}"=tcp:5432 \
--credential_file=/secrets/cloud_sql_credential.json &
set +e
# Wait for cloud_sql_proxy to start:
# first sleep 1 second for the process to launch, then loop until port is ready
# or the proxy process dies.
sleep 1
while ! netstat -an | grep ':5432 ' && pgrep cloud_sql_proxy; do sleep 1; done
if ! pgrep cloud_sql_proxy; then
echo "Cloud SQL Proxy failed to set up connection."
exit 1
fi
# Download the actual sql schema
PGPASSWORD=${db_password} pg_dump -h localhost -U "${db_user}" \
-f /schema/nomulus.actual.sql --schema-only --no-owner --no-privileges \
--exclude-table flyway_schema_history \
postgres
raw_diff=$(diff /schema/nomulus.golden.sql /schema/nomulus.actual.sql)
# Clean up the raw_diff:
# - Remove diff locations (e.g. "5,6c5,6): grep "^[<>]"
# - Remove leading bracket for easier grepping later: sed -e "s/^[<>]\s//g"
# - Remove comments and blank lines: grep -v -E "^--|^$"
# - Remove patterns in allowed_diffs.txt, which are custom Cloud SQL configs we
# cannot emulate in the golden schema.
effective_diff=$(echo "${raw_diff}" \
| grep "^[<>]" | sed -e "s/^[<>]\s//g" \
| grep -v -E "^--|^$" \
| grep -v -f /allowed_diffs.txt )
if [[ ${effective_diff} == "" ]]
then
echo "Golden and actual schemas match."
exit 0
else
echo "Golden and actual schemas do not match. Diff is:"
echo "${raw_diff}"
exit 1
fi