diff --git a/core/src/main/java/google/registry/config/RegistryConfig.java b/core/src/main/java/google/registry/config/RegistryConfig.java index fe443adf5..428c1b819 100644 --- a/core/src/main/java/google/registry/config/RegistryConfig.java +++ b/core/src/main/java/google/registry/config/RegistryConfig.java @@ -576,6 +576,19 @@ public final class RegistryConfig { return config.beam.highPerformanceMachineType; } + /** + * Returns initial number of workers used for a Beam pipeline. Autoscaling can still in effect. + * + * @see + * Horizontal Autoscaling + */ + @Provides + @Config("initialWorkerCount") + public static int provideInitialWorkerCount(RegistryConfigSettings config) { + return config.beam.initialWorkerCount; + } + /** * Returns the default job region to run Apache Beam (Cloud Dataflow) jobs in. * diff --git a/core/src/main/java/google/registry/config/RegistryConfigSettings.java b/core/src/main/java/google/registry/config/RegistryConfigSettings.java index bf706f06e..098250707 100644 --- a/core/src/main/java/google/registry/config/RegistryConfigSettings.java +++ b/core/src/main/java/google/registry/config/RegistryConfigSettings.java @@ -137,6 +137,7 @@ public class RegistryConfigSettings { public static class Beam { public String defaultJobRegion; public String highPerformanceMachineType; + public int initialWorkerCount; public String stagingBucketUrl; } diff --git a/core/src/main/java/google/registry/config/files/default-config.yaml b/core/src/main/java/google/registry/config/files/default-config.yaml index a28f4ed94..ae1bcde03 100644 --- a/core/src/main/java/google/registry/config/files/default-config.yaml +++ b/core/src/main/java/google/registry/config/files/default-config.yaml @@ -442,6 +442,11 @@ beam: # core count per machine may be preferable in order to preserve IP addresses. # See: https://cloud.google.com/compute/quotas#cpu_quota highPerformanceMachineType: n2-standard-4 + # The initial number of workers requested. This can help speed up the pipeline + # which otherwise would take some time to spin up the necessary number of + # works. Autoscaling is still in effect to reduce the number of workers if + # not in use. + initialWorkerCount: 24 stagingBucketUrl: gcs-bucket-with-staged-templates keyring: diff --git a/core/src/main/java/google/registry/rde/RdeStagingAction.java b/core/src/main/java/google/registry/rde/RdeStagingAction.java index c82466737..db965f58d 100644 --- a/core/src/main/java/google/registry/rde/RdeStagingAction.java +++ b/core/src/main/java/google/registry/rde/RdeStagingAction.java @@ -261,6 +261,10 @@ public final class RdeStagingAction implements Runnable { @Config("highPerformanceMachineType") String machineType; + @Inject + @Config("initialWorkerCount") + int numWorkers; + @Inject @Config("transactionCooldown") Duration transactionCooldown; @Inject @Config("beamStagingBucketUrl") String stagingBucketUrl; @Inject @Config("rdeBucket") String rdeBucket; @@ -341,6 +345,7 @@ public final class RdeStagingAction implements Runnable { .encode(stagingKeyBytes)) .put("registryEnvironment", RegistryEnvironment.get().name()) .put("workerMachineType", machineType) + .put("numWorkers", String.valueOf(numWorkers)) .put( "jpaTransactionManagerType", JpaTransactionManagerType.READ_ONLY_REPLICA.toString()) diff --git a/core/src/main/resources/google/registry/beam/rde_pipeline_metadata.json b/core/src/main/resources/google/registry/beam/rde_pipeline_metadata.json index 410ff5e7d..eff49d887 100644 --- a/core/src/main/resources/google/registry/beam/rde_pipeline_metadata.json +++ b/core/src/main/resources/google/registry/beam/rde_pipeline_metadata.json @@ -49,22 +49,6 @@ "regexes": [ "[A-Za-z0-9\\-_]+" ] - }, - { - "name": "workerMachineType", - "label": "The GCE machine type for the dataflow job workers.", - "helpText": "See https://cloud.google.com/dataflow/quotas#compute-engine-quotas for available machine types.", - "regexes": [ - "[a-z0-9\\-]+" - ] - }, - { - "name": "usePublicIps", - "label": "Whether the GCE workers are assigned public IPs", - "helpText": "Public IPs have an associated cost and there's a quota per region on the total number of public IPs assigned at a given time. If the service only needs to access GCP APIs, it's better to not use public IP, but one needs to configure the network accordingly. See https://cloud.google.com/dataflow/docs/guides/routes-firewall.", - "regexes": [ - "true|false" - ] } ] }