mirror of
https://github.com/google/nomulus.git
synced 2025-04-29 19:47:51 +02:00
Script to rolling-start Nomulus (#888)
* Script to rolling-start Nomulus Add a script to restart Nomulus non-disruptively. This can be used after a configuration change to external resources (e.g., Cloud SQL credential) to make Nomulus pick up the latest config. Also added proper support to paging based List api methods, replacing the current hack that forces the server to return everything in one response. The List method for instances has a lower limit on page size than others which is not sufficient for our project.
This commit is contained in:
parent
eb9342a22c
commit
195151728d
8 changed files with 552 additions and 59 deletions
|
@ -14,39 +14,20 @@
|
|||
"""Helper for using the AppEngine Admin REST API."""
|
||||
|
||||
import time
|
||||
from typing import Any, Dict, FrozenSet, Set
|
||||
from typing import FrozenSet, Optional, Set, Tuple
|
||||
|
||||
from googleapiclient import discovery
|
||||
from googleapiclient import http
|
||||
|
||||
import common
|
||||
|
||||
# AppEngine services under management.
|
||||
SERVICES = frozenset(['backend', 'default', 'pubapi', 'tools'])
|
||||
# Forces 'list' calls (for services and versions) to return all
|
||||
# results in one shot, to avoid having to handle pagination. This values
|
||||
# should be greater than the maximum allowed services and versions in any
|
||||
# project (
|
||||
# https://cloud.google.com/appengine/docs/standard/python/an-overview-of-app-engine#limits).
|
||||
_PAGE_SIZE = 250
|
||||
# Number of times to check the status of an operation before timing out.
|
||||
_STATUS_CHECK_TIMES = 5
|
||||
# Delay between status checks of a long-running operation, in seconds
|
||||
_STATUS_CHECK_INTERVAL = 5
|
||||
|
||||
|
||||
class PagingError(Exception):
|
||||
"""Error for unexpected partial results.
|
||||
|
||||
List calls in this module do not handle pagination. This error is raised
|
||||
when a partial result is received.
|
||||
"""
|
||||
def __init__(self, uri: str):
|
||||
super().__init__(
|
||||
self, f'Received paged response unexpectedly when calling {uri}. '
|
||||
'Consider increasing _PAGE_SIZE.')
|
||||
|
||||
|
||||
class AppEngineAdmin:
|
||||
"""Wrapper around the AppEngine Admin REST API client.
|
||||
|
||||
|
@ -55,9 +36,16 @@ class AppEngineAdmin:
|
|||
"""
|
||||
def __init__(self,
|
||||
project: str,
|
||||
service_lookup: discovery.Resource = None,
|
||||
service_lookup: Optional[discovery.Resource] = None,
|
||||
status_check_interval: int = _STATUS_CHECK_INTERVAL) -> None:
|
||||
"""Initialize this instance for an AppEngine(GCP) project."""
|
||||
"""Initialize this instance for an AppEngine(GCP) project.
|
||||
|
||||
Args:
|
||||
project: The GCP project name of this AppEngine instance.
|
||||
service_lookup: The GCP discovery handle for service API lookup.
|
||||
status_check_interval: The delay in seconds between status queries
|
||||
when executing long running operations.
|
||||
"""
|
||||
self._project = project
|
||||
|
||||
if service_lookup is not None:
|
||||
|
@ -66,6 +54,8 @@ class AppEngineAdmin:
|
|||
apps = discovery.build('appengine', 'v1beta').apps()
|
||||
|
||||
self._services = apps.services()
|
||||
self._versions = self._services.versions()
|
||||
self._instances = self._versions.instances()
|
||||
self._operations = apps.operations()
|
||||
self._status_check_interval = status_check_interval
|
||||
|
||||
|
@ -73,14 +63,6 @@ class AppEngineAdmin:
|
|||
def project(self):
|
||||
return self._project
|
||||
|
||||
def _checked_request(self, request: http.HttpRequest) -> Dict[str, Any]:
|
||||
"""Verifies that all results are returned for a request."""
|
||||
response = request.execute()
|
||||
if 'nextPageToken' in response:
|
||||
raise PagingError(request.uri)
|
||||
|
||||
return response
|
||||
|
||||
def get_serving_versions(self) -> FrozenSet[common.VersionKey]:
|
||||
"""Returns the serving versions of every Nomulus service.
|
||||
|
||||
|
@ -92,14 +74,15 @@ class AppEngineAdmin:
|
|||
Returns: An immutable collection of the serving versions grouped by
|
||||
service.
|
||||
"""
|
||||
response = self._checked_request(
|
||||
self._services.list(appsId=self._project, pageSize=_PAGE_SIZE))
|
||||
services = common.list_all_pages(self._services.list,
|
||||
'services',
|
||||
appsId=self._project)
|
||||
|
||||
# Response format is specified at
|
||||
# http://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta5.apps.services.html#list.
|
||||
# http://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.html#list.
|
||||
|
||||
versions = []
|
||||
for service in response.get('services', []):
|
||||
for service in services:
|
||||
if service['id'] in SERVICES:
|
||||
# yapf: disable
|
||||
versions_with_traffic = (
|
||||
|
@ -134,15 +117,15 @@ class AppEngineAdmin:
|
|||
# Sort the requested services for ease of testing. For now the mocked
|
||||
# AppEngine admin in appengine_test can only respond in a fixed order.
|
||||
for service_id in sorted(requested_services):
|
||||
response = self._checked_request(self._services.versions().list(
|
||||
appsId=self._project,
|
||||
servicesId=service_id,
|
||||
pageSize=_PAGE_SIZE))
|
||||
response = common.list_all_pages(self._versions.list,
|
||||
'versions',
|
||||
appsId=self._project,
|
||||
servicesId=service_id)
|
||||
|
||||
# Format of version_list is defined at
|
||||
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta5.apps.services.versions.html#list.
|
||||
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.html#list.
|
||||
|
||||
for version in response.get('versions', []):
|
||||
for version in response:
|
||||
if common.VersionKey(service_id, version['id']) in versions:
|
||||
scalings = [
|
||||
s for s in list(common.AppEngineScaling)
|
||||
|
@ -165,16 +148,34 @@ class AppEngineAdmin:
|
|||
|
||||
return frozenset(version_configs)
|
||||
|
||||
def list_instances(
|
||||
self,
|
||||
version: common.VersionKey) -> Tuple[common.VmInstanceInfo, ...]:
|
||||
instances = common.list_all_pages(self._versions.instances().list,
|
||||
'instances',
|
||||
appsId=self._project,
|
||||
servicesId=version.service_id,
|
||||
versionsId=version.version_id)
|
||||
|
||||
# Format of version_list is defined at
|
||||
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.instances.html#list
|
||||
|
||||
return tuple([
|
||||
common.VmInstanceInfo(
|
||||
inst['id'], common.parse_gcp_timestamp(inst['startTime']))
|
||||
for inst in instances
|
||||
])
|
||||
|
||||
def set_manual_scaling_num_instance(self, service_id: str, version_id: str,
|
||||
manual_instances: int) -> None:
|
||||
"""Creates an request to change an AppEngine version's status."""
|
||||
update_mask = 'manualScaling.instances'
|
||||
body = {'manualScaling': {'instances': manual_instances}}
|
||||
response = self._services.versions().patch(appsId=self._project,
|
||||
servicesId=service_id,
|
||||
versionsId=version_id,
|
||||
updateMask=update_mask,
|
||||
body=body).execute()
|
||||
response = self._versions.patch(appsId=self._project,
|
||||
servicesId=service_id,
|
||||
versionsId=version_id,
|
||||
updateMask=update_mask,
|
||||
body=body).execute()
|
||||
|
||||
operation_id = response.get('name').split('operations/')[1]
|
||||
for _ in range(_STATUS_CHECK_TIMES):
|
||||
|
|
|
@ -17,11 +17,14 @@ import unittest
|
|||
from unittest import mock
|
||||
from unittest.mock import patch
|
||||
|
||||
from googleapiclient import http
|
||||
|
||||
import appengine
|
||||
import common
|
||||
|
||||
|
||||
def setup_appengine_admin() -> Tuple[object, object]:
|
||||
def setup_appengine_admin(
|
||||
) -> Tuple[appengine.AppEngineAdmin, http.HttpRequest]:
|
||||
"""Helper for setting up a mocked AppEngineAdmin instance.
|
||||
|
||||
Returns:
|
||||
|
@ -32,7 +35,7 @@ def setup_appengine_admin() -> Tuple[object, object]:
|
|||
# Assign mocked API response to mock_request.execute.
|
||||
mock_request = mock.MagicMock()
|
||||
mock_request.uri.return_value = 'myuri'
|
||||
# Mocked resource shared by services, versions, and operations.
|
||||
# Mocked resource shared by services, versions, instances, and operations.
|
||||
resource = mock.MagicMock()
|
||||
resource.list.return_value = mock_request
|
||||
resource.get.return_value = mock_request
|
||||
|
@ -41,6 +44,7 @@ def setup_appengine_admin() -> Tuple[object, object]:
|
|||
apps = mock.MagicMock()
|
||||
apps.services.return_value = resource
|
||||
resource.versions.return_value = resource
|
||||
resource.instances.return_value = resource
|
||||
apps.operations.return_value = resource
|
||||
service_lookup = mock.MagicMock()
|
||||
service_lookup.apps.return_value = apps
|
||||
|
@ -66,11 +70,6 @@ class AppEngineTestCase(unittest.TestCase):
|
|||
else:
|
||||
self._mock_request.execute.return_value = responses
|
||||
|
||||
def test_checked_request_multipage_raises(self) -> None:
|
||||
self._set_mocked_response({'nextPageToken': ''})
|
||||
self.assertRaises(appengine.PagingError,
|
||||
self._client.get_serving_versions)
|
||||
|
||||
def test_get_serving_versions(self) -> None:
|
||||
self._set_mocked_response({
|
||||
'services': [{
|
||||
|
|
|
@ -11,13 +11,16 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Declares data types that describe AppEngine services and versions."""
|
||||
"""Data types and utilities common to the other modules in this package."""
|
||||
|
||||
import dataclasses
|
||||
import datetime
|
||||
import enum
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Optional
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
from google.protobuf import timestamp_pb2
|
||||
|
||||
|
||||
class CannotRollbackError(Exception):
|
||||
|
@ -91,6 +94,13 @@ class VersionConfig(VersionKey):
|
|||
manual_scaling_instances: Optional[int] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class VmInstanceInfo:
|
||||
"""Information about an AppEngine VM instance."""
|
||||
instance_name: str
|
||||
start_time: datetime.datetime
|
||||
|
||||
|
||||
def get_nomulus_root() -> str:
|
||||
"""Finds the current Nomulus root directory.
|
||||
|
||||
|
@ -109,3 +119,63 @@ def get_nomulus_root() -> str:
|
|||
|
||||
raise RuntimeError(
|
||||
'Do not move this file out of the Nomulus directory tree.')
|
||||
|
||||
|
||||
def list_all_pages(func, data_field: str, *args, **kwargs) -> Tuple[Any, ...]:
|
||||
"""Collects all data items from a paginator-based 'List' API.
|
||||
|
||||
Args:
|
||||
func: The GCP API method that supports paged responses.
|
||||
data_field: The field in a response object containing the data
|
||||
items to be returned. This is guaranteed to be an Iterable
|
||||
type.
|
||||
*args: Positional arguments passed to func.
|
||||
*kwargs: Keyword arguments passed to func.
|
||||
|
||||
Returns: An immutable collection of data items assembled from the
|
||||
paged responses.
|
||||
"""
|
||||
result_collector = []
|
||||
page_token = None
|
||||
while True:
|
||||
request = func(*args, pageToken=page_token, **kwargs)
|
||||
response = request.execute()
|
||||
result_collector.extend(response.get(data_field, []))
|
||||
page_token = response.get('nextPageToken')
|
||||
if not page_token:
|
||||
return tuple(result_collector)
|
||||
|
||||
|
||||
def parse_gcp_timestamp(timestamp: str) -> datetime.datetime:
|
||||
"""Parses a timestamp string in GCP API to datetime.
|
||||
|
||||
This method uses protobuf's Timestamp class to parse timestamp strings.
|
||||
This class is used by GCP APIs to parse timestamp strings, and is tolerant
|
||||
to certain cases which can break datetime as of Python 3.8, e.g., the
|
||||
trailing 'Z' as timezone, and fractional seconds with number of digits
|
||||
other than 3 or 6.
|
||||
|
||||
Args:
|
||||
timestamp: A string in RFC 3339 format.
|
||||
|
||||
Returns: A datetime instance.
|
||||
"""
|
||||
ts = timestamp_pb2.Timestamp()
|
||||
ts.FromJsonString(timestamp)
|
||||
return ts.ToDatetime()
|
||||
|
||||
|
||||
def to_gcp_timestamp(timestamp: datetime.datetime) -> str:
|
||||
"""Converts a datetime to string.
|
||||
|
||||
This method uses protobuf's Timestamp class to parse timestamp strings.
|
||||
This class is used by GCP APIs to parse timestamp strings.
|
||||
|
||||
Args:
|
||||
timestamp: The datetime instance to be converted.
|
||||
|
||||
Returns: A string in RFC 3339 format.
|
||||
"""
|
||||
ts = timestamp_pb2.Timestamp()
|
||||
ts.FromDatetime(timestamp)
|
||||
return ts.ToJsonString()
|
||||
|
|
70
release/rollback/common_test.py
Normal file
70
release/rollback/common_test.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Unit tests for the common module."""
|
||||
import datetime
|
||||
import unittest
|
||||
from unittest import mock
|
||||
from unittest.mock import call, patch
|
||||
|
||||
import common
|
||||
|
||||
|
||||
class CommonTestCase(unittest.TestCase):
|
||||
"""Unit tests for the common module."""
|
||||
def setUp(self) -> None:
|
||||
self._mock_request = mock.MagicMock()
|
||||
self._mock_api = mock.MagicMock()
|
||||
self._mock_api.list.return_value = self._mock_request
|
||||
self.addCleanup(patch.stopall)
|
||||
|
||||
def test_list_all_pages_single_page(self):
|
||||
self._mock_request.execute.return_value = {'data': [1]}
|
||||
response = common.list_all_pages(self._mock_api.list,
|
||||
'data',
|
||||
appsId='project')
|
||||
self.assertSequenceEqual(response, [1])
|
||||
self._mock_api.list.assert_called_once_with(pageToken=None,
|
||||
appsId='project')
|
||||
|
||||
def test_list_all_pages_multi_page(self):
|
||||
self._mock_request.execute.side_effect = [{
|
||||
'data': [1],
|
||||
'nextPageToken': 'token'
|
||||
}, {
|
||||
'data': [2]
|
||||
}]
|
||||
response = common.list_all_pages(self._mock_api.list,
|
||||
'data',
|
||||
appsId='project')
|
||||
self.assertSequenceEqual(response, [1, 2])
|
||||
self.assertSequenceEqual(self._mock_api.list.call_args_list, [
|
||||
call(pageToken=None, appsId='project'),
|
||||
call(pageToken='token', appsId='project')
|
||||
])
|
||||
|
||||
def test_parse_timestamp(self):
|
||||
self.assertEqual(common.parse_gcp_timestamp('2020-01-01T00:00:00Z'),
|
||||
datetime.datetime(2020, 1, 1))
|
||||
|
||||
def test_parse_timestamp_irregular_nano_digits(self):
|
||||
# datetime only accepts 3 or 6 digits in fractional second.
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
lambda: datetime.datetime.fromisoformat('2020-01-01T00:00:00.9'))
|
||||
self.assertEqual(common.parse_gcp_timestamp('2020-01-01T00:00:00.9Z'),
|
||||
datetime.datetime(2020, 1, 1, microsecond=900000))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -51,7 +51,7 @@ class ServiceRollback:
|
|||
def _get_service_rollback_plan(
|
||||
target_configs: FrozenSet[common.VersionConfig],
|
||||
serving_configs: FrozenSet[common.VersionConfig]
|
||||
) -> Tuple[ServiceRollback]:
|
||||
) -> Tuple[ServiceRollback, ...]:
|
||||
# yapf: enable
|
||||
"""Determines the versions to bring up/down in each service.
|
||||
|
||||
|
@ -111,7 +111,7 @@ def _generate_steps(
|
|||
appengine_admin: appengine.AppEngineAdmin,
|
||||
env: str,
|
||||
target_release: str,
|
||||
rollback_plan: Tuple[ServiceRollback]
|
||||
rollback_plan: Tuple[ServiceRollback, ...]
|
||||
) -> Tuple[steps.RollbackStep, ...]:
|
||||
# yapf: enable
|
||||
"""Generates the sequence of operations for execution.
|
||||
|
@ -158,11 +158,11 @@ def _generate_steps(
|
|||
|
||||
for plan in rollback_plan:
|
||||
for version in plan.serving_versions:
|
||||
if plan.target_version.scaling != common.AppEngineScaling.AUTOMATIC:
|
||||
if version.scaling != common.AppEngineScaling.AUTOMATIC:
|
||||
rollback_steps.append(
|
||||
steps.start_or_stop_version(appengine_admin.project,
|
||||
'stop', version))
|
||||
if plan.target_version.scaling == common.AppEngineScaling.MANUAL:
|
||||
if version.scaling == common.AppEngineScaling.MANUAL:
|
||||
# Release all but one instances. Cannot set num_instances to 0
|
||||
# with this api.
|
||||
rollback_steps.append(
|
||||
|
@ -180,7 +180,7 @@ def _generate_steps(
|
|||
|
||||
def get_rollback_plan(gcs_client: gcs.GcsClient,
|
||||
appengine_admin: appengine.AppEngineAdmin, env: str,
|
||||
target_release: str) -> Tuple[steps.RollbackStep]:
|
||||
target_release: str) -> Tuple[steps.RollbackStep, ...]:
|
||||
"""Generates the sequence of rollback operations for execution."""
|
||||
target_versions = gcs_client.get_versions_by_release(env, target_release)
|
||||
serving_versions = appengine_admin.get_serving_versions()
|
||||
|
|
186
release/rollback/rolling_restart.py
Normal file
186
release/rollback/rolling_restart.py
Normal file
|
@ -0,0 +1,186 @@
|
|||
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Script to rolling-restart the Nomulus server on AppEngine.
|
||||
|
||||
This script effects a rolling restart of the Nomulus server by deleting VM
|
||||
instances at a controlled pace and leave it to the AppEngine scaling policy
|
||||
to bring up new VM instances.
|
||||
|
||||
For each service, this script gets a list of VM instances and sequentially
|
||||
handles each instance as follows:
|
||||
1. Issue a gcloud delete command for this instance.
|
||||
2. Poll the AppEngine at fixed intervals until this instance no longer exists.
|
||||
|
||||
Instance deletion is not instantaneous. An instance actively processing
|
||||
requests takes time to shutdown, and its replacement almost always comes
|
||||
up immediately after the shutdown. For this reason, we believe that our current
|
||||
implementation is sufficient safe, and will not pursue more sophisticated
|
||||
algorithms.
|
||||
|
||||
Note that for backend instances that may handle large queries, it may take tens
|
||||
of seconds, even minutes, to shut down one of them.
|
||||
|
||||
This script also accepts an optional start_time parameter that serves as a
|
||||
filter of instances to delete: only those instances that started before this
|
||||
time will be deleted. This parameter makes error handling easy. When this
|
||||
script fails, simply rerun with the same start_time until it succeeds.
|
||||
"""
|
||||
import argparse
|
||||
import datetime
|
||||
import sys
|
||||
import time
|
||||
from typing import Iterable, Optional, Tuple
|
||||
|
||||
import appengine
|
||||
import common
|
||||
import steps
|
||||
|
||||
HELP_MAIN = 'Script to rolling-restart the Nomulus server on AppEngine'
|
||||
HELP_MIN_DELAY = 'Minimum delay in seconds between instance deletions.'
|
||||
HELP_MIN_LIVE_INSTANCE_PERCENT = (
|
||||
'Minimum number of instances to keep, as a percentage '
|
||||
'of the total at the beginning of the restart process.')
|
||||
|
||||
|
||||
# yapf: disable
|
||||
def generate_steps(
|
||||
appengine_admin: appengine.AppEngineAdmin,
|
||||
version: common.VersionKey,
|
||||
started_before: datetime.datetime
|
||||
) -> Tuple[steps.KillNomulusInstance, ...]:
|
||||
# yapf: enable
|
||||
instances = appengine_admin.list_instances(version)
|
||||
return tuple([
|
||||
steps.kill_nomulus_instance(appengine_admin.project, version,
|
||||
inst.instance_name) for inst in instances
|
||||
if inst.start_time <= started_before
|
||||
])
|
||||
|
||||
|
||||
def execute_steps(appengine_admin: appengine.AppEngineAdmin,
|
||||
version: common.VersionKey,
|
||||
cmds: Tuple[steps.KillNomulusInstance, ...], min_delay: int,
|
||||
configured_num_instances: Optional[int]) -> None:
|
||||
print(f'Restarting {len(cmds)} instances in {version.service_id}')
|
||||
for cmd in cmds:
|
||||
print(cmd.info())
|
||||
cmd.execute()
|
||||
|
||||
while True:
|
||||
time.sleep(min_delay)
|
||||
running_instances = [
|
||||
inst.instance_name
|
||||
for inst in appengine_admin.list_instances(version)
|
||||
]
|
||||
if cmd.instance_name in running_instances:
|
||||
print('Waiting for VM to shut down...')
|
||||
continue
|
||||
if (configured_num_instances is not None
|
||||
and len(running_instances) < configured_num_instances):
|
||||
print('Waiting for new VM to come up...')
|
||||
continue
|
||||
break
|
||||
print('VM instance has shut down.\n')
|
||||
|
||||
print(f'Done: {len(cmds)} instances in {version.service_id}\n')
|
||||
|
||||
|
||||
# yapf: disable
|
||||
def restart_one_service(appengine_admin: appengine.AppEngineAdmin,
|
||||
version: common.VersionKey,
|
||||
min_delay: int,
|
||||
started_before: datetime.datetime,
|
||||
configured_num_instances: Optional[int]) -> None:
|
||||
# yapf: enable
|
||||
"""Restart VM instances in one service according to their start time.
|
||||
|
||||
Args:
|
||||
appengine_admin: The client of AppEngine Admin API.
|
||||
version: The Nomulus version to restart. This must be the currently
|
||||
serving version.
|
||||
min_delay: The minimum delay between successive deletions.
|
||||
started_before: Only VM instances started before this time are to be
|
||||
deleted.
|
||||
configured_num_instances: When present, the constant number of instances
|
||||
this version is configured with.
|
||||
"""
|
||||
cmds = generate_steps(appengine_admin, version, started_before)
|
||||
# yapf: disable
|
||||
execute_steps(
|
||||
appengine_admin, version, cmds, min_delay, configured_num_instances)
|
||||
# yapf: enable
|
||||
|
||||
|
||||
# yapf: disable
|
||||
def rolling_restart(project: str,
|
||||
services: Iterable[str],
|
||||
min_delay: int,
|
||||
started_before: datetime.datetime):
|
||||
# yapf: enable
|
||||
print(f'Rolling restart {project} at '
|
||||
f'{common.to_gcp_timestamp(started_before)}\n')
|
||||
appengine_admin = appengine.AppEngineAdmin(project)
|
||||
version_configs = appengine_admin.get_version_configs(
|
||||
set(appengine_admin.get_serving_versions()))
|
||||
restart_versions = [
|
||||
version for version in version_configs
|
||||
if version.service_id in services
|
||||
]
|
||||
# yapf: disable
|
||||
for version in restart_versions:
|
||||
restart_one_service(appengine_admin,
|
||||
version,
|
||||
min_delay,
|
||||
started_before,
|
||||
version.manual_scaling_instances)
|
||||
# yapf: enable
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(prog='rolling_restart',
|
||||
description=HELP_MAIN)
|
||||
parser.add_argument('--project',
|
||||
'-p',
|
||||
required=True,
|
||||
help='The GCP project of the Nomulus server.')
|
||||
parser.add_argument('--services',
|
||||
'-s',
|
||||
nargs='+',
|
||||
choices=appengine.SERVICES,
|
||||
default=appengine.SERVICES,
|
||||
help='The services to rollback.')
|
||||
parser.add_argument('--min_delay',
|
||||
'-d',
|
||||
type=int,
|
||||
default=5,
|
||||
choices=range(1, 100),
|
||||
help=HELP_MIN_DELAY)
|
||||
parser.add_argument(
|
||||
'--started_before',
|
||||
'-b',
|
||||
type=common.parse_gcp_timestamp,
|
||||
default=datetime.datetime.utcnow(),
|
||||
help='Only kill VM instances started before this time.')
|
||||
|
||||
args = parser.parse_args()
|
||||
rolling_restart(**vars(args))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
sys.exit(main())
|
||||
except Exception as ex: # pylint: disable=broad-except
|
||||
print(ex)
|
||||
sys.exit(1)
|
149
release/rollback/rolling_restart_test.py
Normal file
149
release/rollback/rolling_restart_test.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Unit tests of rolling_restart."""
|
||||
|
||||
import datetime
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import common
|
||||
import rolling_restart
|
||||
import steps
|
||||
|
||||
import appengine_test
|
||||
|
||||
|
||||
class RollingRestartTestCase(unittest.TestCase):
|
||||
"""Tests for rolling_restart."""
|
||||
def setUp(self) -> None:
|
||||
self._appengine_admin, self._appengine_request = (
|
||||
appengine_test.setup_appengine_admin())
|
||||
self._version = common.VersionKey('my_service', 'my_version')
|
||||
self.addCleanup(mock.patch.stopall)
|
||||
|
||||
def _setup_execute_steps_tests(self):
|
||||
self._appengine_request.execute.side_effect = [
|
||||
# First list_instance response.
|
||||
{
|
||||
'instances': [{
|
||||
'id': 'vm_to_delete',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}, {
|
||||
'id': 'vm_to_stay',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}]
|
||||
},
|
||||
# Second list_instance response
|
||||
{
|
||||
'instances': [{
|
||||
'id': 'vm_to_stay',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}]
|
||||
},
|
||||
# Third list_instance response
|
||||
{
|
||||
'instances': [{
|
||||
'id': 'vm_to_stay',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}, {
|
||||
'id': 'vm_new',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}]
|
||||
}
|
||||
]
|
||||
|
||||
def _setup_generate_steps_tests(self):
|
||||
self._appengine_request.execute.side_effect = [
|
||||
# First page of list_instance response.
|
||||
{
|
||||
'instances': [{
|
||||
'id': 'vm_2019',
|
||||
'startTime': '2019-01-01T00:00:00Z'
|
||||
}],
|
||||
'nextPageToken':
|
||||
'token'
|
||||
},
|
||||
# Second and final page of list_instance response
|
||||
{
|
||||
'instances': [{
|
||||
'id': 'vm_2020',
|
||||
'startTime': '2020-01-01T00:00:00Z'
|
||||
}]
|
||||
}
|
||||
]
|
||||
|
||||
def test_kill_vm_command(self) -> None:
|
||||
cmd = steps.kill_nomulus_instance(
|
||||
'my_project', common.VersionKey('my_service', 'my_version'),
|
||||
'my_inst')
|
||||
self.assertEqual(cmd.instance_name, 'my_inst')
|
||||
self.assertIn(('gcloud app instances delete my_inst --quiet '
|
||||
'--user-output-enabled=false --service my_service '
|
||||
'--version my_version --project my_project'),
|
||||
cmd.info())
|
||||
|
||||
def _generate_kill_vm_command(self, version: common.VersionKey,
|
||||
instance_name: str):
|
||||
return steps.kill_nomulus_instance(self._appengine_admin.project,
|
||||
version, instance_name)
|
||||
|
||||
def test_generate_commands(self):
|
||||
self._setup_generate_steps_tests()
|
||||
commands = rolling_restart.generate_steps(self._appengine_admin,
|
||||
self._version,
|
||||
datetime.datetime.utcnow())
|
||||
self.assertSequenceEqual(commands, [
|
||||
self._generate_kill_vm_command(self._version, 'vm_2019'),
|
||||
self._generate_kill_vm_command(self._version, 'vm_2020')
|
||||
])
|
||||
|
||||
def test_generate_commands_older_vm(self):
|
||||
self._setup_generate_steps_tests()
|
||||
version = common.VersionKey('my_service', 'my_version')
|
||||
# yapf: disable
|
||||
commands = rolling_restart.generate_steps(
|
||||
self._appengine_admin,
|
||||
version,
|
||||
common.parse_gcp_timestamp('2019-12-01T00:00:00Z'))
|
||||
# yapf: enable
|
||||
self.assertSequenceEqual(
|
||||
commands, [self._generate_kill_vm_command(version, 'vm_2019')])
|
||||
|
||||
def test_execute_steps_variable_instances(self):
|
||||
self._setup_execute_steps_tests()
|
||||
cmd = mock.MagicMock()
|
||||
cmd.instance_name = 'vm_to_delete'
|
||||
cmds = tuple([cmd]) # yapf does not format (cmd,) correctly.
|
||||
rolling_restart.execute_steps(appengine_admin=self._appengine_admin,
|
||||
version=self._version,
|
||||
cmds=cmds,
|
||||
min_delay=0,
|
||||
configured_num_instances=None)
|
||||
self.assertEqual(self._appengine_request.execute.call_count, 2)
|
||||
|
||||
def test_execute_steps_fixed_instances(self):
|
||||
self._setup_execute_steps_tests()
|
||||
cmd = mock.MagicMock()
|
||||
cmd.instance_name = 'vm_to_delete'
|
||||
cmds = tuple([cmd]) # yapf does not format (cmd,) correctly.
|
||||
rolling_restart.execute_steps(appengine_admin=self._appengine_admin,
|
||||
version=self._version,
|
||||
cmds=cmds,
|
||||
min_delay=0,
|
||||
configured_num_instances=2)
|
||||
self.assertEqual(self._appengine_request.execute.call_count, 3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -122,6 +122,24 @@ def direct_service_traffic_to_version(
|
|||
'--quiet', f'--splits={version.version_id}=1', '--project', project))
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class KillNomulusInstance(RollbackStep):
|
||||
"""Step that kills a Nomulus VM instance."""
|
||||
instance_name: str
|
||||
|
||||
|
||||
# yapf: disable
|
||||
def kill_nomulus_instance(project: str,
|
||||
version: common.VersionKey,
|
||||
instance_name: str) -> KillNomulusInstance:
|
||||
# yapf: enable
|
||||
return KillNomulusInstance(
|
||||
'Delete one VM instance.',
|
||||
('gcloud', 'app', 'instances', 'delete', instance_name, '--quiet',
|
||||
'--user-output-enabled=false', '--service', version.service_id,
|
||||
'--version', version.version_id, '--project', project), instance_name)
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class _UpdateDeployTag(RollbackStep):
|
||||
"""Updates the deployment tag on GCS."""
|
||||
|
|
Loading…
Add table
Reference in a new issue