diff --git a/docs/documentation/features.md b/docs/documentation/features.md index 2a27250559..9a62271e6c 100644 --- a/docs/documentation/features.md +++ b/docs/documentation/features.md @@ -774,33 +774,62 @@ ConfigurationServiceProvider.overrideCurrent(overrider->overrider.withMetrics(me ### Micrometer implementation -The micrometer implementation records a lot of metrics associated to each resource handled by the operator by default. -In order to be efficient, the implementation removes meters associated with resources when they are deleted. Since it -might be useful to keep these metrics around for a bit before they are deleted, it is possible to configure a delay -before their removal. As this is done asynchronously, it is also possible to configure how many threads you want to -devote to these operations. Both aspects are controlled by the `MicrometerMetrics` constructor so changing the defaults -is a matter of instantiating `MicrometerMetrics` with the desired values and tell `ConfigurationServiceProvider` about -it as shown above. +The micrometer implementation is typically created using one of the provided factory methods which, depending on which +is used, will return either a ready to use instance or a builder allowing users to customized how the implementation +behaves, in particular when it comes to the granularity of collected metrics. It is, for example, possible to collect +metrics on a per-resource basis via tags that are associated with meters. This is the default, historical behavior but +this will change in a future version of JOSDK because this dramatically increases the cardinality of metrics, which +could lead to performance issues. -The micrometer implementation records the following metrics: +To create a `MicrometerMetrics` implementation that behaves how it has historically behaved, you can just create an +instance via: + +```java +MeterRegistry registry= …; +Metrics metrics=new MicrometerMetrics(registry) +``` + +Note, however, that this constructor is deprecated and we encourage you to use the factory methods instead, which either +return a fully pre-configured instance or a builder object that will allow you to configure more easily how the instance +will behave. You can, for example, configure whether or not the implementation should collect metrics on a per-resource +basis, whether or not associated meters should be removed when a resource is deleted and how the clean-up is performed. +See the relevant classes documentation for more details. -| Meter name | Type | Tags | Description | -|-----------------------------------------------------------|----------------|------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------| -| operator.sdk.reconciliations.executions. | gauge | group, version, kind | Number of executions of the named reconciler | -| operator.sdk.reconciliations.queue.size. | gauge | group, version, kind | How many resources are queued to get reconciled by named reconciler | -| operator.sdk..size | gauge map size | | Gauge tracking the size of a specified map (currently unused but could be used to monitor caches size) | -| operator.sdk.events.received | counter | group, version, kind, name, namespace, scope, event, action | Number of received Kubernetes events | -| operator.sdk.events.delete | counter | group, version, kind, name, namespace, scope | Number of received Kubernetes delete events | -| operator.sdk.reconciliations.started | counter | group, version, kind, name, namespace, scope, reconciliations.retries.last, reconciliations.retries.number | Number of started reconciliations per resource type | -| operator.sdk.reconciliations.failed | counter | group, version, kind, name, namespace, scope, exception | Number of failed reconciliations per resource type | -| operator.sdk.reconciliations.success | counter | group, version, kind, name, namespace, scope | Number of successful reconciliations per resource type | -| operator.sdk.controllers.execution.reconcile.success | counter | controller, type | Number of successful reconciliations per controller | -| operator.sdk.controllers.execution.reconcile.failure | counter | controller, exception | Number of failed reconciliations per controller | -| operator.sdk.controllers.execution.cleanup.success | counter | controller, type | Number of successful cleanups per controller | -| operator.sdk.controllers.execution.cleanup.failure | counter | controller, exception | Number of failed cleanups per controller | - -As you can see all the recorded metrics start with the `operator.sdk` prefix. +For example, the following will create a `MicrometerMetrics` instance configured to collect metrics on a per-resource +basis, deleting the associated meters after 5 seconds when a resource is deleted, using up to 2 threads to do so. + +```java +MicrometerMetrics.newPerResourceCollectingMicrometerMetricsBuilder(registry) + .withCleanUpDelayInSeconds(5) + .withCleaningThreadNumber(2) + .build() +``` + +The micrometer implementation records the following metrics: +| Meter name | Type | Tag names | Description | +|-----------------------------------------------------------|----------------|-----------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------| +| operator.sdk.reconciliations.executions. | gauge | group, version, kind | Number of executions of the named reconciler | +| operator.sdk.reconciliations.queue.size. | gauge | group, version, kind | How many resources are queued to get reconciled by named reconciler | +| operator.sdk..size | gauge map size | | Gauge tracking the size of a specified map (currently unused but could be used to monitor caches size) | +| operator.sdk.events.received | counter | , event, action | Number of received Kubernetes events | +| operator.sdk.events.delete | counter | | Number of received Kubernetes delete events | +| operator.sdk.reconciliations.started | counter | , reconciliations.retries.last, reconciliations.retries.number | Number of started reconciliations per resource type | +| operator.sdk.reconciliations.failed | counter | , exception | Number of failed reconciliations per resource type | +| operator.sdk.reconciliations.success | counter | | Number of successful reconciliations per resource type | +| operator.sdk.controllers.execution.reconcile | timer | , controller | Time taken for reconciliations per controller | +| operator.sdk.controllers.execution.cleanup | timer | , controller | Time taken for cleanups per controller | +| operator.sdk.controllers.execution.reconcile.success | counter | controller, type | Number of successful reconciliations per controller | +| operator.sdk.controllers.execution.reconcile.failure | counter | controller, exception | Number of failed reconciliations per controller | +| operator.sdk.controllers.execution.cleanup.success | counter | controller, type | Number of successful cleanups per controller | +| operator.sdk.controllers.execution.cleanup.failure | counter | controller, exception | Number of failed cleanups per controller | + +As you can see all the recorded metrics start with the `operator.sdk` prefix. ``, in the table above, +refers to resource-specific metadata and depends on the considered metric and how the implementation is configured and +could be summed up as follows: `group?, version, kind, [name, namespace?], scope` where the tags in square +brackets (`[]`) won't be present when per-resource collection is disabled and tags followed by a question mark are +omitted if the associated value is empty. Of note, when in the context of controllers' execution metrics, these tag +names are prefixed with `resource.`. This prefix might be removed in a future version for greater consistency. ## Optimizing Caches diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 5b3a83a1c6..61ff1562ee 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -28,100 +28,132 @@ public class MicrometerMetrics implements Metrics { private static final String PREFIX = "operator.sdk."; private static final String RECONCILIATIONS = "reconciliations."; + private static final String RECONCILIATIONS_FAILED = RECONCILIATIONS + "failed"; + private static final String RECONCILIATIONS_SUCCESS = RECONCILIATIONS + "success"; + private static final String RECONCILIATIONS_RETRIES_LAST = RECONCILIATIONS + "retries.last"; + private static final String RECONCILIATIONS_RETRIES_NUMBER = RECONCILIATIONS + "retries.number"; + private static final String RECONCILIATIONS_STARTED = RECONCILIATIONS + "started"; private static final String RECONCILIATIONS_EXECUTIONS = PREFIX + RECONCILIATIONS + "executions."; private static final String RECONCILIATIONS_QUEUE_SIZE = PREFIX + RECONCILIATIONS + "queue.size."; + private static final String NAME = "name"; + private static final String NAMESPACE = "namespace"; + private static final String GROUP = "group"; + private static final String VERSION = "version"; + private static final String KIND = "kind"; + private static final String SCOPE = "scope"; + private static final String METADATA_PREFIX = "resource."; + private static final String CONTROLLERS_EXECUTION = "controllers.execution."; + private static final String CONTROLLER = "controller"; + private static final String SUCCESS_SUFFIX = ".success"; + private static final String FAILURE_SUFFIX = ".failure"; + private static final String TYPE = "type"; + private static final String EXCEPTION = "exception"; + private static final String EVENT = "event"; + private static final String ACTION = "action"; + private static final String EVENTS_RECEIVED = "events.received"; + private static final String EVENTS_DELETE = "events.delete"; + private static final String CLUSTER = "cluster"; + private static final String SIZE_SUFFIX = ".size"; + private final boolean collectPerResourceMetrics; private final MeterRegistry registry; private final Map gauges = new ConcurrentHashMap<>(); - private final Map> metersPerResource = new ConcurrentHashMap<>(); private final Cleaner cleaner; /** - * Creates a non-delayed, micrometer-based Metrics implementation. The non-delayed part refers to - * the cleaning of meters associated with deleted resources. + * Creates a default micrometer-based Metrics implementation, collecting metrics on a per resource + * basis and not dealing with cleaning these after these resources are deleted. Note that this + * probably will change in a future release. If you want more control over what the implementation + * actually does, please use the static factory methods instead. * * @param registry the {@link MeterRegistry} instance to use for metrics recording + * @deprecated Use the factory methods / builders instead */ + @Deprecated public MicrometerMetrics(MeterRegistry registry) { - this(registry, 0); + this(registry, Cleaner.NOOP, true); } /** - * Creates a micrometer-based Metrics implementation that delays cleaning up {@link Meter}s - * associated with deleted resources by the specified amount of seconds, using a single thread for - * that process. + * Creates a MicrometerMetrics instance configured to not collect per-resource metrics, just + * aggregates per resource **type** * * @param registry the {@link MeterRegistry} instance to use for metrics recording - * @param cleanUpDelayInSeconds the number of seconds to wait before meters are removed for - * deleted resources + * @return a MicrometerMetrics instance configured to not collect per-resource metrics */ - public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { - this(registry, cleanUpDelayInSeconds, 1); + public static MicrometerMetrics withoutPerResourceMetrics(MeterRegistry registry) { + return new MicrometerMetrics(registry, Cleaner.NOOP, false); } /** - * Creates a micrometer-based Metrics implementation that delays cleaning up {@link Meter}s - * associated with deleted resources by the specified amount of seconds, using the specified - * (maximally) number of threads for that process. + * Creates a new builder to configure how the eventual MicrometerMetrics instance will behave. * * @param registry the {@link MeterRegistry} instance to use for metrics recording - * @param cleanUpDelayInSeconds the number of seconds to wait before meters are removed for - * deleted resources - * @param cleaningThreadsNumber the number of threads to use for the cleaning process + * @return a MicrometerMetrics instance configured to not collect per-resource metrics + * @see MicrometerMetricsBuilder */ - public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, - int cleaningThreadsNumber) { + public static MicrometerMetricsBuilder newMicrometerMetricsBuilder(MeterRegistry registry) { + return new MicrometerMetricsBuilder(registry); + } + + /** + * Creates a new builder to configure how the eventual MicrometerMetrics instance will behave, + * pre-configuring it to collect metrics per resource. + * + * @param registry the {@link MeterRegistry} instance to use for metrics recording + * @return a MicrometerMetrics instance configured to not collect per-resource metrics + * @see PerResourceCollectingMicrometerMetricsBuilder + */ + public static PerResourceCollectingMicrometerMetricsBuilder newPerResourceCollectingMicrometerMetricsBuilder( + MeterRegistry registry) { + return new PerResourceCollectingMicrometerMetricsBuilder(registry); + } + + /** + * Creates a micrometer-based Metrics implementation that cleans up {@link Meter}s associated with + * deleted resources as specified by the (possibly {@code null}) provided {@link Cleaner} + * instance. + * + * @param registry the {@link MeterRegistry} instance to use for metrics recording + * @param cleaner the {@link Cleaner} to use + * @param collectingPerResourceMetrics whether to collect per resource metrics + */ + private MicrometerMetrics(MeterRegistry registry, Cleaner cleaner, + boolean collectingPerResourceMetrics) { this.registry = registry; - if (cleanUpDelayInSeconds < 0) { - cleaner = new NoDelayCleaner(); - } else { - cleaningThreadsNumber = - cleaningThreadsNumber <= 0 ? Runtime.getRuntime().availableProcessors() - : cleaningThreadsNumber; - cleaner = new DelayedCleaner(cleanUpDelayInSeconds, cleaningThreadsNumber); - } + this.cleaner = cleaner; + this.collectPerResourceMetrics = collectingPerResourceMetrics; } @Override - public void controllerRegistered(Controller controller) { - String executingThreadsName = - RECONCILIATIONS_EXECUTIONS + controller.getConfiguration().getName(); + public void controllerRegistered(Controller controller) { + final var configuration = controller.getConfiguration(); + final var name = configuration.getName(); + final var executingThreadsName = RECONCILIATIONS_EXECUTIONS + name; + final var resourceClass = configuration.getResourceClass(); + final var tags = new ArrayList(3); + addGVKTags(GroupVersionKind.gvkFor(resourceClass), tags, false); AtomicInteger executingThreads = - registry.gauge(executingThreadsName, - gvkTags(controller.getConfiguration().getResourceClass()), - new AtomicInteger(0)); + registry.gauge(executingThreadsName, tags, new AtomicInteger(0)); gauges.put(executingThreadsName, executingThreads); - String controllerQueueName = - RECONCILIATIONS_QUEUE_SIZE + controller.getConfiguration().getName(); + final var controllerQueueName = RECONCILIATIONS_QUEUE_SIZE + name; AtomicInteger controllerQueueSize = - registry.gauge(controllerQueueName, - gvkTags(controller.getConfiguration().getResourceClass()), - new AtomicInteger(0)); + registry.gauge(controllerQueueName, tags, new AtomicInteger(0)); gauges.put(controllerQueueName, controllerQueueSize); } @Override public T timeControllerExecution(ControllerExecution execution) { final var name = execution.controllerName(); - final var execName = PREFIX + "controllers.execution." + execution.name(); + final var execName = PREFIX + CONTROLLERS_EXECUTION + execution.name(); final var resourceID = execution.resourceID(); final var metadata = execution.metadata(); - final var tags = new ArrayList(metadata.size() + 4); - tags.addAll(List.of( - "controller", name, - "resource.name", resourceID.getName(), - "resource.namespace", resourceID.getNamespace().orElse(""), - "resource.scope", getScope(resourceID))); - final var gvk = (GroupVersionKind) metadata.get(Constants.RESOURCE_GVK_KEY); - if (gvk != null) { - tags.addAll(List.of( - "resource.group", gvk.group, - "resource.version", gvk.version, - "resource.kind", gvk.kind)); - } + final var tags = new ArrayList(16); + tags.add(Tag.of(CONTROLLER, name)); + addMetadataTags(resourceID, metadata, tags, true); final var timer = Timer.builder(execName) - .tags(tags.toArray(new String[0])) + .tags(tags) .publishPercentiles(0.3, 0.5, 0.95) .publishPercentileHistogram() .register(registry); @@ -135,40 +167,35 @@ public T timeControllerExecution(ControllerExecution execution) { }); final var successType = execution.successTypeName(result); registry - .counter(execName + ".success", "controller", name, "type", successType) + .counter(execName + SUCCESS_SUFFIX, CONTROLLER, name, TYPE, successType) .increment(); return result; } catch (Exception e) { final var exception = e.getClass().getSimpleName(); registry - .counter(execName + ".failure", "controller", name, "exception", exception) + .counter(execName + FAILURE_SUFFIX, CONTROLLER, name, EXCEPTION, exception) .increment(); throw e; } } - private static String getScope(ResourceID resourceID) { - return resourceID.getNamespace().isPresent() ? "namespace" : "cluster"; - } - @Override public void receivedEvent(Event event, Map metadata) { - final String[] tags; if (event instanceof ResourceEvent) { - tags = new String[] {"event", event.getClass().getSimpleName(), "action", - ((ResourceEvent) event).getAction().toString()}; + incrementCounter(event.getRelatedCustomResourceID(), EVENTS_RECEIVED, + metadata, + Tag.of(EVENT, event.getClass().getSimpleName()), + Tag.of(ACTION, ((ResourceEvent) event).getAction().toString())); } else { - tags = new String[] {"event", event.getClass().getSimpleName()}; + incrementCounter(event.getRelatedCustomResourceID(), EVENTS_RECEIVED, + metadata, + Tag.of(EVENT, event.getClass().getSimpleName())); } - - incrementCounter(event.getRelatedCustomResourceID(), "events.received", - metadata, - tags); } @Override public void cleanupDoneFor(ResourceID resourceID, Map metadata) { - incrementCounter(resourceID, "events.delete", metadata); + incrementCounter(resourceID, EVENTS_DELETE, metadata); cleaner.removeMetersFor(resourceID); } @@ -177,12 +204,12 @@ public void cleanupDoneFor(ResourceID resourceID, Map metadata) public void reconcileCustomResource(HasMetadata resource, RetryInfo retryInfoNullable, Map metadata) { Optional retryInfo = Optional.ofNullable(retryInfoNullable); - incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS + "started", + incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS_STARTED, metadata, - RECONCILIATIONS + "retries.number", - String.valueOf(retryInfo.map(RetryInfo::getAttemptCount).orElse(0)), - RECONCILIATIONS + "retries.last", - String.valueOf(retryInfo.map(RetryInfo::isLastAttempt).orElse(true))); + Tag.of(RECONCILIATIONS_RETRIES_NUMBER, + String.valueOf(retryInfo.map(RetryInfo::getAttemptCount).orElse(0))), + Tag.of(RECONCILIATIONS_RETRIES_LAST, + String.valueOf(retryInfo.map(RetryInfo::isLastAttempt).orElse(true)))); var controllerQueueSize = gauges.get(RECONCILIATIONS_QUEUE_SIZE + metadata.get(CONTROLLER_NAME)); @@ -191,7 +218,7 @@ public void reconcileCustomResource(HasMetadata resource, RetryInfo retryInfoNul @Override public void finishedReconciliation(HasMetadata resource, Map metadata) { - incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS + "success", metadata); + incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS_SUCCESS, metadata); } @Override @@ -221,77 +248,197 @@ public void failedReconciliation(HasMetadata resource, Exception exception, } else if (cause instanceof RuntimeException) { cause = cause.getCause() != null ? cause.getCause() : cause; } - incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS + "failed", metadata, - "exception", - cause.getClass().getSimpleName()); + incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS_FAILED, metadata, + Tag.of(EXCEPTION, cause.getClass().getSimpleName())); } @Override public > T monitorSizeOf(T map, String name) { - return registry.gaugeMapSize(PREFIX + name + ".size", Collections.emptyList(), map); + return registry.gaugeMapSize(PREFIX + name + SIZE_SUFFIX, Collections.emptyList(), map); + } + + + private void addMetadataTags(ResourceID resourceID, Map metadata, + List tags, boolean prefixed) { + if (collectPerResourceMetrics) { + addTag(NAME, resourceID.getName(), tags, prefixed); + addTagOmittingOnEmptyValue(NAMESPACE, resourceID.getNamespace().orElse(null), tags, prefixed); + } + addTag(SCOPE, getScope(resourceID), tags, prefixed); + final var gvk = (GroupVersionKind) metadata.get(Constants.RESOURCE_GVK_KEY); + if (gvk != null) { + addGVKTags(gvk, tags, prefixed); + } + } + + private static void addTag(String name, String value, List tags, boolean prefixed) { + tags.add(Tag.of(getPrefixedMetadataTag(name, prefixed), value)); + } + + private static void addTagOmittingOnEmptyValue(String name, String value, List tags, + boolean prefixed) { + if (value != null && !value.isBlank()) { + addTag(name, value, tags, prefixed); + } + } + + private static String getPrefixedMetadataTag(String tagName, boolean prefixed) { + return prefixed ? METADATA_PREFIX + tagName : tagName; } - public static List gvkTags(Class resourceClass) { - final var gvk = GroupVersionKind.gvkFor(resourceClass); - return List.of(Tag.of("group", gvk.group), Tag.of("version", gvk.version), - Tag.of("kind", gvk.kind)); + private static String getScope(ResourceID resourceID) { + return resourceID.getNamespace().isPresent() ? NAMESPACE : CLUSTER; + } + + private static void addGVKTags(GroupVersionKind gvk, List tags, boolean prefixed) { + addTagOmittingOnEmptyValue(GROUP, gvk.group, tags, prefixed); + addTag(VERSION, gvk.version, tags, prefixed); + addTag(KIND, gvk.kind, tags, prefixed); } private void incrementCounter(ResourceID id, String counterName, Map metadata, - String... additionalTags) { + Tag... additionalTags) { final var additionalTagsNb = additionalTags != null && additionalTags.length > 0 ? additionalTags.length : 0; final var metadataNb = metadata != null ? metadata.size() : 0; - final var tags = new ArrayList(6 + additionalTagsNb + metadataNb); - tags.addAll(List.of( - "name", id.getName(), - "namespace", id.getNamespace().orElse(""), - "scope", getScope(id))); + final var tags = new ArrayList(6 + additionalTagsNb + metadataNb); + addMetadataTags(id, metadata, tags, false); if (additionalTagsNb > 0) { tags.addAll(List.of(additionalTags)); } - if (metadataNb > 0) { - final var gvk = (GroupVersionKind) metadata.get(Constants.RESOURCE_GVK_KEY); - tags.addAll(List.of( - "group", gvk.group, - "version", gvk.version, - "kind", gvk.kind)); - } - final var counter = registry.counter(PREFIX + counterName, tags.toArray(new String[0])); - metersPerResource.computeIfAbsent(id, resourceID -> new HashSet<>()).add(counter.getId()); + + final var counter = registry.counter(PREFIX + counterName, tags); + cleaner.recordAssociation(id, counter); counter.increment(); } protected Set recordedMeterIdsFor(ResourceID resourceID) { - return metersPerResource.get(resourceID); + return cleaner.recordedMeterIdsFor(resourceID); + } + + public static class PerResourceCollectingMicrometerMetricsBuilder + extends MicrometerMetricsBuilder { + + private int cleaningThreadsNumber; + private int cleanUpDelayInSeconds; + + private PerResourceCollectingMicrometerMetricsBuilder(MeterRegistry registry) { + super(registry); + } + + /** + * @param cleaningThreadsNumber the maximal number of threads that can be assigned to the + * removal of {@link Meter}s associated with deleted resources, defaults to 1 if not + * specified or if the provided number is lesser or equal to 0 + */ + public PerResourceCollectingMicrometerMetricsBuilder withCleaningThreadNumber( + int cleaningThreadsNumber) { + this.cleaningThreadsNumber = cleaningThreadsNumber <= 0 ? 1 : cleaningThreadsNumber; + return this; + } + + /** + * @param cleanUpDelayInSeconds the number of seconds to wait before {@link Meter}s are removed + * for deleted resources, defaults to 1 (meaning meters will be removed one second after + * the associated resource is deleted) if not specified or if the provided number is + * lesser than 0. Threading and the general interaction model of interacting with the API + * server means that it's not possible to ensure that meters are immediately deleted in + * all cases so a minimal delay of one second is always enforced + */ + public PerResourceCollectingMicrometerMetricsBuilder withCleanUpDelayInSeconds( + int cleanUpDelayInSeconds) { + this.cleanUpDelayInSeconds = Math.max(cleanUpDelayInSeconds, 1); + return this; + } + + @Override + public MicrometerMetrics build() { + final var cleaner = + new DelayedCleaner(registry, cleanUpDelayInSeconds, cleaningThreadsNumber); + return new MicrometerMetrics(registry, cleaner, true); + } } + public static class MicrometerMetricsBuilder { + protected final MeterRegistry registry; + private boolean collectingPerResourceMetrics = true; + + private MicrometerMetricsBuilder(MeterRegistry registry) { + this.registry = registry; + } + + /** + * Configures the instance to collect metrics on a per-resource basis. + */ + @SuppressWarnings("unused") + public PerResourceCollectingMicrometerMetricsBuilder collectingMetricsPerResource() { + collectingPerResourceMetrics = true; + return new PerResourceCollectingMicrometerMetricsBuilder(registry); + } + + /** + * Configures the instance to only collect metrics per resource **type**, in an aggregate + * fashion, instead of per resource instance. + */ + @SuppressWarnings("unused") + public MicrometerMetricsBuilder notCollectingMetricsPerResource() { + collectingPerResourceMetrics = false; + return this; + } - private interface Cleaner { - void removeMetersFor(ResourceID resourceID); + public MicrometerMetrics build() { + return new MicrometerMetrics(registry, Cleaner.NOOP, collectingPerResourceMetrics); + } } - private void removeMetersFor(ResourceID resourceID) { - // remove each meter - final var toClean = metersPerResource.get(resourceID); - if (toClean != null) { - toClean.forEach(registry::remove); + interface Cleaner { + Cleaner NOOP = new Cleaner() {}; + + default void removeMetersFor(ResourceID resourceID) {} + + default void recordAssociation(ResourceID resourceID, Meter meter) {} + + default Set recordedMeterIdsFor(ResourceID resourceID) { + return Collections.emptySet(); } - // then clean-up local recording of associations - metersPerResource.remove(resourceID); } - private class NoDelayCleaner implements Cleaner { + static class DefaultCleaner implements Cleaner { + private final Map> metersPerResource = new ConcurrentHashMap<>(); + private final MeterRegistry registry; + + private DefaultCleaner(MeterRegistry registry) { + this.registry = registry; + } + @Override public void removeMetersFor(ResourceID resourceID) { - MicrometerMetrics.this.removeMetersFor(resourceID); + // remove each meter + final var toClean = metersPerResource.get(resourceID); + if (toClean != null) { + toClean.forEach(registry::remove); + } + // then clean-up local recording of associations + metersPerResource.remove(resourceID); + } + + @Override + public void recordAssociation(ResourceID resourceID, Meter meter) { + metersPerResource.computeIfAbsent(resourceID, id -> new HashSet<>()).add(meter.getId()); + } + + @Override + public Set recordedMeterIdsFor(ResourceID resourceID) { + return metersPerResource.get(resourceID); } } - private class DelayedCleaner implements Cleaner { + static class DelayedCleaner extends MicrometerMetrics.DefaultCleaner { private final ScheduledExecutorService metersCleaner; private final int cleanUpDelayInSeconds; - private DelayedCleaner(int cleanUpDelayInSeconds, int cleaningThreadsNumber) { + private DelayedCleaner(MeterRegistry registry, int cleanUpDelayInSeconds, + int cleaningThreadsNumber) { + super(registry); this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; this.metersCleaner = Executors.newScheduledThreadPool(cleaningThreadsNumber); } @@ -299,7 +446,7 @@ private DelayedCleaner(int cleanUpDelayInSeconds, int cleaningThreadsNumber) { @Override public void removeMetersFor(ResourceID resourceID) { // schedule deletion of meters associated with ResourceID - metersCleaner.schedule(() -> MicrometerMetrics.this.removeMetersFor(resourceID), + metersCleaner.schedule(() -> super.removeMetersFor(resourceID), cleanUpDelayInSeconds, TimeUnit.SECONDS); } } diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/AbstractMicrometerMetricsTestFixture.java similarity index 67% rename from micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java rename to micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/AbstractMicrometerMetricsTestFixture.java index 717f3a11f9..aa67c75f76 100644 --- a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/AbstractMicrometerMetricsTestFixture.java @@ -1,12 +1,12 @@ package io.javaoperatorsdk.operator.monitoring.micrometer; -import java.time.Duration; import java.util.HashSet; import java.util.Set; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.extension.RegisterExtension; import io.fabric8.kubernetes.api.model.ConfigMap; @@ -21,29 +21,31 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; -public class MetricsCleaningOnDeleteIT { +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public abstract class AbstractMicrometerMetricsTestFixture { @RegisterExtension - static LocallyRunOperatorExtension operator = + LocallyRunOperatorExtension operator = LocallyRunOperatorExtension.builder().withReconciler(new MetricsCleaningTestReconciler()) .build(); - private static final TestSimpleMeterRegistry registry = new TestSimpleMeterRegistry(); - private static final int testDelay = 1; - private static final MicrometerMetrics metrics = new MicrometerMetrics(registry, testDelay, 2); - private static final String testResourceName = "cleaning-metrics-cr"; + protected final TestSimpleMeterRegistry registry = new TestSimpleMeterRegistry(); + protected final MicrometerMetrics metrics = getMetrics(); + protected static final String testResourceName = "micrometer-metrics-cr"; + + protected abstract MicrometerMetrics getMetrics(); @BeforeAll - static void setup() { + void setup() { ConfigurationServiceProvider.overrideCurrent(overrider -> overrider.withMetrics(metrics)); } @AfterAll - static void reset() { + void reset() { ConfigurationServiceProvider.reset(); } @Test - void removesMetersAssociatedWithResourceAfterItsDeletion() throws InterruptedException { + void properlyHandlesResourceDeletion() throws Exception { var testResource = new ConfigMapBuilder() .withNewMetadata() .withName(testResourceName) @@ -55,21 +57,29 @@ void removesMetersAssociatedWithResourceAfterItsDeletion() throws InterruptedExc await().until(() -> !operator.get(ConfigMap.class, testResourceName) .getMetadata().getFinalizers().isEmpty()); - // check that we properly recorded meters associated with the resource - final var meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); - assertThat(meters).isNotNull(); - assertThat(meters).isNotEmpty(); + final var resourceID = ResourceID.fromResource(created); + final var meters = preDeleteChecks(resourceID); // delete the resource and wait for it to be deleted operator.delete(testResource); await().until(() -> operator.get(ConfigMap.class, testResourceName) == null); - // check that the meters are properly removed after the specified delay - Thread.sleep(Duration.ofSeconds(testDelay).toMillis()); - assertThat(registry.removed).isEqualTo(meters); - assertThat(metrics.recordedMeterIdsFor(ResourceID.fromResource(created))).isNull(); + postDeleteChecks(resourceID, meters); } + protected Set preDeleteChecks(ResourceID resourceID) { + // check that we properly recorded meters associated with the resource + final var meters = metrics.recordedMeterIdsFor(resourceID); + // metrics are collected per resource + assertThat(registry.getMetersAsString()).contains(resourceID.getName()); + assertThat(meters).isNotNull(); + assertThat(meters).isNotEmpty(); + return meters; + } + + protected void postDeleteChecks(ResourceID resourceID, Set recordedMeters) + throws Exception {} + @ControllerConfiguration private static class MetricsCleaningTestReconciler implements Reconciler, Cleaner { @@ -84,7 +94,7 @@ public DeleteControl cleanup(ConfigMap resource, Context context) { } } - private static class TestSimpleMeterRegistry extends SimpleMeterRegistry { + static class TestSimpleMeterRegistry extends SimpleMeterRegistry { private final Set removed = new HashSet<>(); @Override @@ -93,5 +103,9 @@ public Meter remove(Meter.Id mappedId) { this.removed.add(removed.getId()); return removed; } + + public Set getRemoved() { + return removed; + } } } diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DefaultBehaviorIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DefaultBehaviorIT.java new file mode 100644 index 0000000000..6f0388c49b --- /dev/null +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DefaultBehaviorIT.java @@ -0,0 +1,31 @@ +package io.javaoperatorsdk.operator.monitoring.micrometer; + +import java.util.Collections; +import java.util.Set; + +import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.micrometer.core.instrument.Meter; + +import static org.assertj.core.api.Assertions.assertThat; + +public class DefaultBehaviorIT extends AbstractMicrometerMetricsTestFixture { + @Override + protected MicrometerMetrics getMetrics() { + return MicrometerMetrics.newMicrometerMetricsBuilder(registry).build(); + } + + @Override + protected Set preDeleteChecks(ResourceID resourceID) { + // no meter should be recorded because we're not tracking anything to be deleted later + assertThat(metrics.recordedMeterIdsFor(resourceID)).isEmpty(); + // metrics are collected per resource by default for now, this will change in a future release + assertThat(registry.getMetersAsString()).contains(resourceID.getName()); + return Collections.emptySet(); + } + + @Override + protected void postDeleteChecks(ResourceID resourceID, Set recordedMeters) { + // meters should be neither recorded, nor removed by default + assertThat(registry.getRemoved()).isEmpty(); + } +} diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DelayedMetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DelayedMetricsCleaningOnDeleteIT.java new file mode 100644 index 0000000000..26dfe59f84 --- /dev/null +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/DelayedMetricsCleaningOnDeleteIT.java @@ -0,0 +1,29 @@ +package io.javaoperatorsdk.operator.monitoring.micrometer; + +import java.time.Duration; +import java.util.Set; + +import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.micrometer.core.instrument.Meter; + +import static org.assertj.core.api.Assertions.assertThat; + +public class DelayedMetricsCleaningOnDeleteIT extends AbstractMicrometerMetricsTestFixture { + + private static final int testDelay = 1; + + @Override + protected MicrometerMetrics getMetrics() { + return MicrometerMetrics.newPerResourceCollectingMicrometerMetricsBuilder(registry) + .withCleanUpDelayInSeconds(testDelay).withCleaningThreadNumber(2).build(); + } + + @Override + protected void postDeleteChecks(ResourceID resourceID, Set recordedMeters) + throws Exception { + // check that the meters are properly removed after the specified delay + Thread.sleep(Duration.ofSeconds(testDelay).toMillis()); + assertThat(registry.getRemoved()).isEqualTo(recordedMeters); + assertThat(metrics.recordedMeterIdsFor(resourceID)).isNull(); + } +} diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/NoPerResourceCollectionIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/NoPerResourceCollectionIT.java new file mode 100644 index 0000000000..ac35347697 --- /dev/null +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/NoPerResourceCollectionIT.java @@ -0,0 +1,23 @@ +package io.javaoperatorsdk.operator.monitoring.micrometer; + +import java.util.Collections; +import java.util.Set; + +import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.micrometer.core.instrument.Meter; + +import static org.assertj.core.api.Assertions.assertThat; + +public class NoPerResourceCollectionIT extends AbstractMicrometerMetricsTestFixture { + @Override + protected MicrometerMetrics getMetrics() { + return MicrometerMetrics.withoutPerResourceMetrics(registry); + } + + @Override + protected Set preDeleteChecks(ResourceID resourceID) { + assertThat(metrics.recordedMeterIdsFor(resourceID)).isEmpty(); + assertThat(registry.getMetersAsString()).doesNotContain(resourceID.getName()); + return Collections.emptySet(); + } +} diff --git a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/monitoring/Metrics.java b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/monitoring/Metrics.java index 3f73e6c9a4..1ce27d29df 100644 --- a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/monitoring/Metrics.java +++ b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/monitoring/Metrics.java @@ -24,7 +24,7 @@ public interface Metrics { /** * Do initialization if necessary; */ - default void controllerRegistered(Controller controller) {} + default void controllerRegistered(Controller controller) {} /** * Called when an event has been accepted by the SDK from an event source, which would result in @@ -39,6 +39,7 @@ default void receivedEvent(Event event, Map metadata) {} * @deprecated Use {@link #reconcileCustomResource(HasMetadata, RetryInfo, Map)} instead */ @Deprecated(forRemoval = true) + @SuppressWarnings("unused") default void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfo, Map metadata) {} @@ -58,6 +59,7 @@ default void reconcileCustomResource(HasMetadata resource, RetryInfo retryInfo, * @deprecated Use {@link #failedReconciliation(HasMetadata, Exception, Map)} instead */ @Deprecated(forRemoval = true) + @SuppressWarnings("unused") default void failedReconciliation(ResourceID resourceID, Exception exception, Map metadata) {} @@ -112,6 +114,7 @@ default void finishedReconciliation(ResourceID resourceID) { * @deprecated Use {@link #finishedReconciliation(HasMetadata, Map)} instead */ @Deprecated(forRemoval = true) + @SuppressWarnings("unused") default void finishedReconciliation(ResourceID resourceID, Map metadata) {} /** diff --git a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java index f547fee778..e59d2a789b 100644 --- a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java +++ b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java @@ -1,11 +1,6 @@ package io.javaoperatorsdk.operator.processing; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,16 +20,7 @@ import io.javaoperatorsdk.operator.api.config.ControllerConfiguration; import io.javaoperatorsdk.operator.api.monitoring.Metrics; import io.javaoperatorsdk.operator.api.monitoring.Metrics.ControllerExecution; -import io.javaoperatorsdk.operator.api.reconciler.Cleaner; -import io.javaoperatorsdk.operator.api.reconciler.Constants; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.ContextInitializer; -import io.javaoperatorsdk.operator.api.reconciler.DeleteControl; -import io.javaoperatorsdk.operator.api.reconciler.EventSourceContext; -import io.javaoperatorsdk.operator.api.reconciler.EventSourceInitializer; -import io.javaoperatorsdk.operator.api.reconciler.Ignore; -import io.javaoperatorsdk.operator.api.reconciler.Reconciler; -import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.*; import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceNotFoundException; import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceProvider; import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceReferencer; @@ -56,6 +42,13 @@ public class Controller

RegisteredController

{ private static final Logger log = LoggerFactory.getLogger(Controller.class); + private static final String CLEANUP = "cleanup"; + private static final String DELETE = "delete"; + private static final String FINALIZER_NOT_REMOVED = "finalizerNotRemoved"; + private static final String RECONCILE = "reconcile"; + private static final String RESOURCE = "resource"; + private static final String STATUS = "status"; + private static final String BOTH = "both"; private final Reconciler

reconciler; private final ControllerConfiguration

configuration; @@ -103,7 +96,7 @@ public UpdateControl

reconcile(P resource, Context

context) throws Excepti new ControllerExecution<>() { @Override public String name() { - return "reconcile"; + return RECONCILE; } @Override @@ -113,12 +106,12 @@ public String controllerName() { @Override public String successTypeName(UpdateControl

result) { - String successType = "resource"; + String successType = RESOURCE; if (result.isUpdateStatus()) { - successType = "status"; + successType = STATUS; } if (result.isUpdateResourceAndStatus()) { - successType = "both"; + successType = BOTH; } return successType; } @@ -154,7 +147,7 @@ public DeleteControl cleanup(P resource, Context

context) { new ControllerExecution<>() { @Override public String name() { - return "cleanup"; + return CLEANUP; } @Override @@ -164,7 +157,7 @@ public String controllerName() { @Override public String successTypeName(DeleteControl deleteControl) { - return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved"; + return deleteControl.isRemoveFinalizer() ? DELETE : FINALIZER_NOT_REMOVED; } @Override diff --git a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java index f973a7d354..7bf499c1c3 100644 --- a/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java +++ b/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java @@ -238,6 +238,7 @@ synchronized void eventProcessingFinished( cleanupForDeletedEvent(executionScope.getResourceID()); } else if (postExecutionControl.isFinalizerRemoved()) { state.markProcessedMarkForDeletion(); + metrics.cleanupDoneFor(resourceID, metricsMetadata); } else { postExecutionControl .getUpdatedCustomResource() diff --git a/sample-operators/mysql-schema/src/main/java/io/javaoperatorsdk/operator/sample/MySQLSchemaOperator.java b/sample-operators/mysql-schema/src/main/java/io/javaoperatorsdk/operator/sample/MySQLSchemaOperator.java index 4128dd0ea8..f312716f44 100644 --- a/sample-operators/mysql-schema/src/main/java/io/javaoperatorsdk/operator/sample/MySQLSchemaOperator.java +++ b/sample-operators/mysql-schema/src/main/java/io/javaoperatorsdk/operator/sample/MySQLSchemaOperator.java @@ -9,7 +9,8 @@ import org.takes.http.Exit; import org.takes.http.FtBasic; -import io.fabric8.kubernetes.client.*; +import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClientBuilder; import io.javaoperatorsdk.operator.Operator; import io.javaoperatorsdk.operator.monitoring.micrometer.MicrometerMetrics; import io.javaoperatorsdk.operator.sample.dependent.ResourcePollerConfig; @@ -25,7 +26,8 @@ public static void main(String[] args) throws IOException { KubernetesClient client = new KubernetesClientBuilder().build(); Operator operator = new Operator(client, - overrider -> overrider.withMetrics(new MicrometerMetrics(new LoggingMeterRegistry()))); + overrider -> overrider + .withMetrics(MicrometerMetrics.withoutPerResourceMetrics(new LoggingMeterRegistry()))); MySQLSchemaReconciler schemaReconciler = new MySQLSchemaReconciler();