Script to rolling-start Nomulus (#888)

* Script to rolling-start Nomulus

Add a script to restart Nomulus non-disruptively. This can be used after
a configuration change to external resources (e.g.,  Cloud SQL
credential) to make Nomulus pick up the latest config.

Also added proper support to paging based List api methods, replacing the
current hack that forces the server to return everything in one response.
The List method for instances has a lower limit on page size than others
which is not sufficient for our project.
This commit is contained in:
Weimin Yu 2020-12-01 10:14:05 -05:00 committed by GitHub
parent eb9342a22c
commit 195151728d
8 changed files with 552 additions and 59 deletions

View file

@ -14,39 +14,20 @@
"""Helper for using the AppEngine Admin REST API."""
import time
from typing import Any, Dict, FrozenSet, Set
from typing import FrozenSet, Optional, Set, Tuple
from googleapiclient import discovery
from googleapiclient import http
import common
# AppEngine services under management.
SERVICES = frozenset(['backend', 'default', 'pubapi', 'tools'])
# Forces 'list' calls (for services and versions) to return all
# results in one shot, to avoid having to handle pagination. This values
# should be greater than the maximum allowed services and versions in any
# project (
# https://cloud.google.com/appengine/docs/standard/python/an-overview-of-app-engine#limits).
_PAGE_SIZE = 250
# Number of times to check the status of an operation before timing out.
_STATUS_CHECK_TIMES = 5
# Delay between status checks of a long-running operation, in seconds
_STATUS_CHECK_INTERVAL = 5
class PagingError(Exception):
"""Error for unexpected partial results.
List calls in this module do not handle pagination. This error is raised
when a partial result is received.
"""
def __init__(self, uri: str):
super().__init__(
self, f'Received paged response unexpectedly when calling {uri}. '
'Consider increasing _PAGE_SIZE.')
class AppEngineAdmin:
"""Wrapper around the AppEngine Admin REST API client.
@ -55,9 +36,16 @@ class AppEngineAdmin:
"""
def __init__(self,
project: str,
service_lookup: discovery.Resource = None,
service_lookup: Optional[discovery.Resource] = None,
status_check_interval: int = _STATUS_CHECK_INTERVAL) -> None:
"""Initialize this instance for an AppEngine(GCP) project."""
"""Initialize this instance for an AppEngine(GCP) project.
Args:
project: The GCP project name of this AppEngine instance.
service_lookup: The GCP discovery handle for service API lookup.
status_check_interval: The delay in seconds between status queries
when executing long running operations.
"""
self._project = project
if service_lookup is not None:
@ -66,6 +54,8 @@ class AppEngineAdmin:
apps = discovery.build('appengine', 'v1beta').apps()
self._services = apps.services()
self._versions = self._services.versions()
self._instances = self._versions.instances()
self._operations = apps.operations()
self._status_check_interval = status_check_interval
@ -73,14 +63,6 @@ class AppEngineAdmin:
def project(self):
return self._project
def _checked_request(self, request: http.HttpRequest) -> Dict[str, Any]:
"""Verifies that all results are returned for a request."""
response = request.execute()
if 'nextPageToken' in response:
raise PagingError(request.uri)
return response
def get_serving_versions(self) -> FrozenSet[common.VersionKey]:
"""Returns the serving versions of every Nomulus service.
@ -92,14 +74,15 @@ class AppEngineAdmin:
Returns: An immutable collection of the serving versions grouped by
service.
"""
response = self._checked_request(
self._services.list(appsId=self._project, pageSize=_PAGE_SIZE))
services = common.list_all_pages(self._services.list,
'services',
appsId=self._project)
# Response format is specified at
# http://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta5.apps.services.html#list.
# http://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.html#list.
versions = []
for service in response.get('services', []):
for service in services:
if service['id'] in SERVICES:
# yapf: disable
versions_with_traffic = (
@ -134,15 +117,15 @@ class AppEngineAdmin:
# Sort the requested services for ease of testing. For now the mocked
# AppEngine admin in appengine_test can only respond in a fixed order.
for service_id in sorted(requested_services):
response = self._checked_request(self._services.versions().list(
appsId=self._project,
servicesId=service_id,
pageSize=_PAGE_SIZE))
response = common.list_all_pages(self._versions.list,
'versions',
appsId=self._project,
servicesId=service_id)
# Format of version_list is defined at
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta5.apps.services.versions.html#list.
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.html#list.
for version in response.get('versions', []):
for version in response:
if common.VersionKey(service_id, version['id']) in versions:
scalings = [
s for s in list(common.AppEngineScaling)
@ -165,16 +148,34 @@ class AppEngineAdmin:
return frozenset(version_configs)
def list_instances(
self,
version: common.VersionKey) -> Tuple[common.VmInstanceInfo, ...]:
instances = common.list_all_pages(self._versions.instances().list,
'instances',
appsId=self._project,
servicesId=version.service_id,
versionsId=version.version_id)
# Format of version_list is defined at
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.instances.html#list
return tuple([
common.VmInstanceInfo(
inst['id'], common.parse_gcp_timestamp(inst['startTime']))
for inst in instances
])
def set_manual_scaling_num_instance(self, service_id: str, version_id: str,
manual_instances: int) -> None:
"""Creates an request to change an AppEngine version's status."""
update_mask = 'manualScaling.instances'
body = {'manualScaling': {'instances': manual_instances}}
response = self._services.versions().patch(appsId=self._project,
servicesId=service_id,
versionsId=version_id,
updateMask=update_mask,
body=body).execute()
response = self._versions.patch(appsId=self._project,
servicesId=service_id,
versionsId=version_id,
updateMask=update_mask,
body=body).execute()
operation_id = response.get('name').split('operations/')[1]
for _ in range(_STATUS_CHECK_TIMES):