mirror of
https://github.com/google/nomulus.git
synced 2025-04-30 12:07:51 +02:00
Set the initial worker count for the RDE beam pipeline at 24 (#1572)
* Set the initial worker count for the RDE beam pipeline at 24 This likely will speed up the pipeline by skipping the initially slow process of spinning up instances.
This commit is contained in:
parent
7135219151
commit
fc0e461c44
5 changed files with 24 additions and 16 deletions
|
@ -576,6 +576,19 @@ public final class RegistryConfig {
|
||||||
return config.beam.highPerformanceMachineType;
|
return config.beam.highPerformanceMachineType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns initial number of workers used for a Beam pipeline. Autoscaling can still in effect.
|
||||||
|
*
|
||||||
|
* @see <a
|
||||||
|
* href=https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#horizontal-autoscaling>
|
||||||
|
* Horizontal Autoscaling </a>
|
||||||
|
*/
|
||||||
|
@Provides
|
||||||
|
@Config("initialWorkerCount")
|
||||||
|
public static int provideInitialWorkerCount(RegistryConfigSettings config) {
|
||||||
|
return config.beam.initialWorkerCount;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the default job region to run Apache Beam (Cloud Dataflow) jobs in.
|
* Returns the default job region to run Apache Beam (Cloud Dataflow) jobs in.
|
||||||
*
|
*
|
||||||
|
|
|
@ -137,6 +137,7 @@ public class RegistryConfigSettings {
|
||||||
public static class Beam {
|
public static class Beam {
|
||||||
public String defaultJobRegion;
|
public String defaultJobRegion;
|
||||||
public String highPerformanceMachineType;
|
public String highPerformanceMachineType;
|
||||||
|
public int initialWorkerCount;
|
||||||
public String stagingBucketUrl;
|
public String stagingBucketUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -442,6 +442,11 @@ beam:
|
||||||
# core count per machine may be preferable in order to preserve IP addresses.
|
# core count per machine may be preferable in order to preserve IP addresses.
|
||||||
# See: https://cloud.google.com/compute/quotas#cpu_quota
|
# See: https://cloud.google.com/compute/quotas#cpu_quota
|
||||||
highPerformanceMachineType: n2-standard-4
|
highPerformanceMachineType: n2-standard-4
|
||||||
|
# The initial number of workers requested. This can help speed up the pipeline
|
||||||
|
# which otherwise would take some time to spin up the necessary number of
|
||||||
|
# works. Autoscaling is still in effect to reduce the number of workers if
|
||||||
|
# not in use.
|
||||||
|
initialWorkerCount: 24
|
||||||
stagingBucketUrl: gcs-bucket-with-staged-templates
|
stagingBucketUrl: gcs-bucket-with-staged-templates
|
||||||
|
|
||||||
keyring:
|
keyring:
|
||||||
|
|
|
@ -261,6 +261,10 @@ public final class RdeStagingAction implements Runnable {
|
||||||
@Config("highPerformanceMachineType")
|
@Config("highPerformanceMachineType")
|
||||||
String machineType;
|
String machineType;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
@Config("initialWorkerCount")
|
||||||
|
int numWorkers;
|
||||||
|
|
||||||
@Inject @Config("transactionCooldown") Duration transactionCooldown;
|
@Inject @Config("transactionCooldown") Duration transactionCooldown;
|
||||||
@Inject @Config("beamStagingBucketUrl") String stagingBucketUrl;
|
@Inject @Config("beamStagingBucketUrl") String stagingBucketUrl;
|
||||||
@Inject @Config("rdeBucket") String rdeBucket;
|
@Inject @Config("rdeBucket") String rdeBucket;
|
||||||
|
@ -341,6 +345,7 @@ public final class RdeStagingAction implements Runnable {
|
||||||
.encode(stagingKeyBytes))
|
.encode(stagingKeyBytes))
|
||||||
.put("registryEnvironment", RegistryEnvironment.get().name())
|
.put("registryEnvironment", RegistryEnvironment.get().name())
|
||||||
.put("workerMachineType", machineType)
|
.put("workerMachineType", machineType)
|
||||||
|
.put("numWorkers", String.valueOf(numWorkers))
|
||||||
.put(
|
.put(
|
||||||
"jpaTransactionManagerType",
|
"jpaTransactionManagerType",
|
||||||
JpaTransactionManagerType.READ_ONLY_REPLICA.toString())
|
JpaTransactionManagerType.READ_ONLY_REPLICA.toString())
|
||||||
|
|
|
@ -49,22 +49,6 @@
|
||||||
"regexes": [
|
"regexes": [
|
||||||
"[A-Za-z0-9\\-_]+"
|
"[A-Za-z0-9\\-_]+"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "workerMachineType",
|
|
||||||
"label": "The GCE machine type for the dataflow job workers.",
|
|
||||||
"helpText": "See https://cloud.google.com/dataflow/quotas#compute-engine-quotas for available machine types.",
|
|
||||||
"regexes": [
|
|
||||||
"[a-z0-9\\-]+"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "usePublicIps",
|
|
||||||
"label": "Whether the GCE workers are assigned public IPs",
|
|
||||||
"helpText": "Public IPs have an associated cost and there's a quota per region on the total number of public IPs assigned at a given time. If the service only needs to access GCP APIs, it's better to not use public IP, but one needs to configure the network accordingly. See https://cloud.google.com/dataflow/docs/guides/routes-firewall.",
|
|
||||||
"regexes": [
|
|
||||||
"true|false"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue