Fix time inversion error when writing metrics (#2086)

The instance ID used to be uniquely determined by App Engine SDK. Since
we no longer calls the SDK, we need a way to distinguish instances so
that their metrics would not stump on each other and result in a time
inversion error (as we have seen frequently in the logs since the
removal of the App Engine SDK).
This commit is contained in:
Lai Jiang 2023-07-27 13:05:11 -04:00 committed by GitHub
parent 4aa1bd0856
commit 0c824fed5a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -25,17 +25,31 @@ import dagger.Module;
import dagger.Provides; import dagger.Provides;
import google.registry.config.CredentialModule.ApplicationDefaultCredential; import google.registry.config.CredentialModule.ApplicationDefaultCredential;
import google.registry.config.RegistryConfig.Config; import google.registry.config.RegistryConfig.Config;
import google.registry.util.Clock;
import google.registry.util.GoogleCredentialsBundle; import google.registry.util.GoogleCredentialsBundle;
import javax.inject.Named;
import javax.inject.Singleton;
import org.joda.time.Duration; import org.joda.time.Duration;
/** Dagger module for Google Stackdriver service connection objects. */ /** Dagger module for Google Stackdriver service connection objects. */
@Module @Module
public final class StackdriverModule { public final class StackdriverModule {
private StackdriverModule() {}
// We need a fake GCE zone to appease Stackdriver's resource model. // We need a fake GCE zone to appease Stackdriver's resource model.
// TODO(b/265973059): Switch to resource type "gke_container". // TODO(b/265973059): Switch to resource type "gke_container".
private static final String SPOOFED_GCE_ZONE = "us-central1-f"; private static final String SPOOFED_GCE_ZONE = "us-central1-f";
private static final String SPOOFED_GCE_INSTANCE = "fake-instance";
// We cannot use a static fake intance ID which is shared by all instances, because metrics might
// be flushed to stackdriver with delays, which lead to time inversion erros when another instance
// has already written a data point at a later time.
@Singleton
@Provides
@Named("spoofedGceInstanceId")
static String providesSpoofedGceInstanceId(Clock clock) {
return clock.nowUtc().toString();
}
@Provides @Provides
static Monitoring provideMonitoring( static Monitoring provideMonitoring(
@ -54,7 +68,8 @@ public final class StackdriverModule {
Monitoring monitoringClient, Monitoring monitoringClient,
@Config("projectId") String projectId, @Config("projectId") String projectId,
@Config("stackdriverMaxQps") int maxQps, @Config("stackdriverMaxQps") int maxQps,
@Config("stackdriverMaxPointsPerRequest") int maxPointsPerRequest) { @Config("stackdriverMaxPointsPerRequest") int maxPointsPerRequest,
@Named("spoofedGceInstanceId") String instanceId) {
// The MonitoredResource for GAE apps is not writable (and missing fields anyway) so we just // The MonitoredResource for GAE apps is not writable (and missing fields anyway) so we just
// use the gce_instance resource type instead. // use the gce_instance resource type instead.
return new StackdriverWriter( return new StackdriverWriter(
@ -65,7 +80,7 @@ public final class StackdriverModule {
.setLabels( .setLabels(
ImmutableMap.of( ImmutableMap.of(
// The "zone" field MUST be a valid GCE zone, so we fake one. // The "zone" field MUST be a valid GCE zone, so we fake one.
"zone", SPOOFED_GCE_ZONE, "instance_id", SPOOFED_GCE_INSTANCE)), "zone", SPOOFED_GCE_ZONE, "instance_id", instanceId)),
maxQps, maxQps,
maxPointsPerRequest); maxPointsPerRequest);
} }