Skip to content

feat: error handler improvements #1033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 15, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ public <T> T timeControllerExecution(ControllerExecution<T> execution) {
.publishPercentileHistogram()
.register(registry);
try {
final var result = timer.record(execution::execute);
final var result = timer.record(() -> {
try {
return execution.execute();
} catch (Exception e) {
throw new IllegalStateException(e);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why throw an IllegalStateException here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it to OperatorException.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal is to wrap it to a runtime exception (but not the RuntimeException), not sure what is the best approach.

}
});
final var successType = execution.successTypeName(result);
registry
.counter(execName + ".success", "controller", name, "type", successType)
Expand Down Expand Up @@ -72,7 +78,7 @@ public void finishedReconciliation(ResourceID resourceID) {
incrementCounter(resourceID, RECONCILIATIONS + "success");
}

public void failedReconciliation(ResourceID resourceID, RuntimeException exception) {
public void failedReconciliation(ResourceID resourceID, Exception exception) {
var cause = exception.getCause();
if (cause == null) {
cause = exception;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ default void receivedEvent(Event event) {}

default void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfo) {}

default void failedReconciliation(ResourceID resourceID, RuntimeException exception) {}
default void failedReconciliation(ResourceID resourceID, Exception exception) {}

default void cleanupDoneFor(ResourceID customResourceUid) {}

Expand All @@ -27,10 +27,10 @@ interface ControllerExecution<T> {

String successTypeName(T result);

T execute();
T execute() throws Exception;
}

default <T> T timeControllerExecution(ControllerExecution<T> execution) {
default <T> T timeControllerExecution(ControllerExecution<T> execution) throws Exception {
return execution.execute();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

public class DefaultContext<P extends HasMetadata> implements Context<P> {

private final RetryInfo retryInfo;
private RetryInfo retryInfo;
private final Controller<P> controller;
private final P primaryResource;
private final ControllerConfiguration<P> controllerConfiguration;
Expand Down Expand Up @@ -44,4 +44,9 @@ public ControllerConfiguration<P> getControllerConfiguration() {
public ManagedDependentResourceContext managedDependentResourceContext() {
return managedDependentResourceContext;
}

public DefaultContext<P> setRetryInfo(RetryInfo retryInfo) {
this.retryInfo = retryInfo;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
package io.javaoperatorsdk.operator.api.reconciler;

import java.util.Optional;

import io.fabric8.kubernetes.api.model.HasMetadata;

public interface ErrorStatusHandler<T extends HasMetadata> {
public interface ErrorStatusHandler<P extends HasMetadata> {

/**
* <p>
* Reconciler can implement this interface in order to update the status sub-resource in the case
* an exception in thrown. In that case
* {@link #updateErrorStatus(HasMetadata, RetryInfo, RuntimeException)} is called automatically.
* {@link #updateErrorStatus(HasMetadata, Context, Exception)} is called automatically.
* <p>
* The result of the method call is used to make a status update on the custom resource. This is
* always a sub-resource update request, so no update on custom resource itself (like spec of
Expand All @@ -21,10 +19,10 @@ public interface ErrorStatusHandler<T extends HasMetadata> {
* should not be updates on custom resource after it is marked for deletion.
*
* @param resource to update the status on
* @param retryInfo the current retry status
* @param context the current contex
* @param e exception thrown from the reconciler
* @return the updated resource
*/
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
ErrorStatusUpdateControl<P> updateErrorStatus(P resource, Context<P> context, Exception e);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package io.javaoperatorsdk.operator.api.reconciler;

import java.util.Optional;

import io.fabric8.kubernetes.api.model.HasMetadata;

public class ErrorStatusUpdateControl<P extends HasMetadata> {

private final P resource;
private boolean noRetry = false;

public static <T extends HasMetadata> ErrorStatusUpdateControl<T> updateStatus(T resource) {
return new ErrorStatusUpdateControl<>(resource);
}

public static <T extends HasMetadata> ErrorStatusUpdateControl<T> noStatusUpdate() {
return new ErrorStatusUpdateControl<>(null);
}

private ErrorStatusUpdateControl(P resource) {
this.resource = resource;
}

/**
* Instructs the controller to not retry the error. This is useful for non-recoverable errors.
*
* @return ErrorStatusUpdateControl
*/
public ErrorStatusUpdateControl<P> withNoRetry() {
this.noRetry = true;
return this;
}

public Optional<P> getResource() {
return Optional.ofNullable(resource);
}

public boolean isNoRetry() {
return noRetry;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public interface Reconciler<R extends HasMetadata> {
* @return UpdateControl to manage updates on the custom resource (usually the status) after
* reconciliation.
*/
UpdateControl<R> reconcile(R resource, Context<R> context);
UpdateControl<R> reconcile(R resource, Context<R> context) throws Exception;

/**
* Note that this method is used in combination with finalizers. If automatic finalizer handling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,36 @@ public Controller(Reconciler<P> reconciler,
public DeleteControl cleanup(P resource, Context<P> context) {
dependents.cleanup(resource, context);

return metrics().timeControllerExecution(
new ControllerExecution<>() {
@Override
public String name() {
return "cleanup";
}
try {
return metrics().timeControllerExecution(
new ControllerExecution<>() {
@Override
public String name() {
return "cleanup";
}

@Override
public String controllerName() {
return configuration.getName();
}
@Override
public String controllerName() {
return configuration.getName();
}

@Override
public String successTypeName(DeleteControl deleteControl) {
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
}
@Override
public String successTypeName(DeleteControl deleteControl) {
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
}

@Override
public DeleteControl execute() {
return reconciler.cleanup(resource, context);
}
});
@Override
public DeleteControl execute() {
return reconciler.cleanup(resource, context);
}
});
} catch (Exception e) {
throw new IllegalStateException(e);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why IllegalStateException?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it to OperatorException.

}
}

@Override
public UpdateControl<P> reconcile(P resource, Context<P> context) {
public UpdateControl<P> reconcile(P resource, Context<P> context) throws Exception {
dependents.reconcile(resource, context);

return metrics().timeControllerExecution(
Expand Down Expand Up @@ -113,7 +117,7 @@ public String successTypeName(UpdateControl<P> result) {
}

@Override
public UpdateControl<P> execute() {
public UpdateControl<P> execute() throws Exception {
return reconciler.reconcile(resource, context);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ TimerEventSource<R> retryEventSource() {
* according to the retry timing if there was an exception.
*/
private void handleRetryOnException(
ExecutionScope<R> executionScope, RuntimeException exception) {
ExecutionScope<R> executionScope, Exception exception) {
RetryExecution execution = getOrInitRetryExecution(executionScope);
var customResourceID = executionScope.getCustomResourceID();
boolean eventPresent = eventMarker.eventPresent(customResourceID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ final class PostExecutionControl<R extends HasMetadata> {

private final boolean onlyFinalizerHandled;
private final R updatedCustomResource;
private final RuntimeException runtimeException;
private final Exception runtimeException;

private Long reScheduleDelay = null;

private PostExecutionControl(
boolean onlyFinalizerHandled,
R updatedCustomResource,
RuntimeException runtimeException) {
Exception runtimeException) {
this.onlyFinalizerHandled = onlyFinalizerHandled;
this.updatedCustomResource = updatedCustomResource;
this.runtimeException = runtimeException;
Expand All @@ -35,7 +35,7 @@ public static <R extends HasMetadata> PostExecutionControl<R> customResourceUpda
}

public static <R extends HasMetadata> PostExecutionControl<R> exceptionDuringExecution(
RuntimeException exception) {
Exception exception) {
return new PostExecutionControl<>(false, null, exception);
}

Expand All @@ -60,7 +60,7 @@ public PostExecutionControl<R> withReSchedule(long delay) {
return this;
}

public Optional<RuntimeException> getRuntimeException() {
public Optional<Exception> getRuntimeException() {
return Optional.ofNullable(runtimeException);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ public ReconciliationDispatcher(Controller<R> controller) {
public PostExecutionControl<R> handleExecution(ExecutionScope<R> executionScope) {
try {
return handleDispatch(executionScope);
} catch (RuntimeException e) {
} catch (Exception e) {
log.error("Error during event processing {} failed.", executionScope, e);
return PostExecutionControl.exceptionDuringExecution(e);
}
}

private PostExecutionControl<R> handleDispatch(ExecutionScope<R> executionScope) {
private PostExecutionControl<R> handleDispatch(ExecutionScope<R> executionScope)
throws Exception {
R resource = executionScope.getResource();
log.debug("Handling dispatch for resource {}", getName(resource));

Expand All @@ -67,7 +68,7 @@ private PostExecutionControl<R> handleDispatch(ExecutionScope<R> executionScope)
return PostExecutionControl.defaultDispatch();
}

Context context = new DefaultContext<>(executionScope.getRetryInfo(), controller, resource);
Context<R> context = new DefaultContext<>(executionScope.getRetryInfo(), controller, resource);
if (markedForDeletion) {
return handleCleanup(resource, context);
} else {
Expand All @@ -91,7 +92,7 @@ private boolean shouldNotDispatchToDelete(R resource) {
}

private PostExecutionControl<R> handleReconcile(
ExecutionScope<R> executionScope, R originalResource, Context context) {
ExecutionScope<R> executionScope, R originalResource, Context<R> context) throws Exception {
if (configuration().useFinalizer()
&& !originalResource.hasFinalizer(configuration().getFinalizer())) {
/*
Expand All @@ -105,11 +106,10 @@ private PostExecutionControl<R> handleReconcile(
} else {
try {
var resourceForExecution =
cloneResourceForErrorStatusHandlerIfNeeded(originalResource, context);
cloneResourceForErrorStatusHandlerIfNeeded(originalResource);
return reconcileExecution(executionScope, resourceForExecution, originalResource, context);
} catch (RuntimeException e) {
handleErrorStatusHandler(originalResource, context, e);
throw e;
} catch (Exception e) {
return handleErrorStatusHandler(originalResource, context, e);
}
}
}
Expand All @@ -121,7 +121,7 @@ private PostExecutionControl<R> handleReconcile(
* resource is changed during an execution, and it's much cleaner to have to original resource in
* place for status update.
*/
private R cloneResourceForErrorStatusHandlerIfNeeded(R resource, Context context) {
private R cloneResourceForErrorStatusHandlerIfNeeded(R resource) {
if (isErrorStatusHandlerPresent() ||
shouldUpdateObservedGenerationAutomatically(resource)) {
final var cloner = ConfigurationServiceProvider.instance().getResourceCloner();
Expand All @@ -132,7 +132,7 @@ private R cloneResourceForErrorStatusHandlerIfNeeded(R resource, Context context
}

private PostExecutionControl<R> reconcileExecution(ExecutionScope<R> executionScope,
R resourceForExecution, R originalResource, Context context) {
R resourceForExecution, R originalResource, Context<R> context) throws Exception {
log.debug(
"Reconciling resource {} with version: {} with execution scope: {}",
getName(resourceForExecution),
Expand Down Expand Up @@ -163,8 +163,9 @@ && shouldUpdateObservedGenerationAutomatically(resourceForExecution)) {
}

@SuppressWarnings("unchecked")
private void handleErrorStatusHandler(R resource, Context<R> context,
RuntimeException e) {
private PostExecutionControl<R> handleErrorStatusHandler(R resource, Context<R> context,
Exception e) throws Exception {
// todo unit + integration test
if (isErrorStatusHandlerPresent()) {
try {
RetryInfo retryInfo = context.getRetryInfo().orElse(new RetryInfo() {
Expand All @@ -178,13 +179,18 @@ public boolean isLastAttempt() {
return controller.getConfiguration().getRetryConfiguration() == null;
}
});
var updatedResource = ((ErrorStatusHandler<R>) controller.getReconciler())
.updateErrorStatus(resource, retryInfo, e);
updatedResource.ifPresent(customResourceFacade::updateStatus);
((DefaultContext<R>) context).setRetryInfo(retryInfo);
var errorStatusUpdateControl = ((ErrorStatusHandler<R>) controller.getReconciler())
.updateErrorStatus(resource, context, e);
errorStatusUpdateControl.getResource().ifPresent(customResourceFacade::updateStatus);
if (errorStatusUpdateControl.isNoRetry()) {
return PostExecutionControl.defaultDispatch();
}
} catch (RuntimeException ex) {
log.error("Error during error status handling.", ex);
}
}
throw e;
}

private boolean isErrorStatusHandlerPresent() {
Expand Down
Loading