Skip to content

feat: retry remove finalizer #1249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ public void receivedEvent(Event event) {
}

@Override
public void cleanupDoneFor(ResourceID customResourceUid) {
incrementCounter(customResourceUid, "events.delete");
public void cleanupDoneFor(ResourceID resourceID) {
incrementCounter(resourceID, "events.delete");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ default void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfo)

default void failedReconciliation(ResourceID resourceID, Exception exception) {}

default void cleanupDoneFor(ResourceID customResourceUid) {}
default void cleanupDoneFor(ResourceID resourceID) {}

default void finishedReconciliation(ResourceID resourceID) {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.stream.Collectors;

import static io.javaoperatorsdk.operator.processing.event.EventMarker.EventingState.NO_EVENT_PRESENT;
import static io.javaoperatorsdk.operator.processing.event.EventMarker.EventingState.PROCESSED_MARK_FOR_DELETION;

/**
* Manages the state of received events. Basically there can be only three distinct states relevant
Expand All @@ -19,7 +20,7 @@ class EventMarker {

public enum EventingState {
/** Event but NOT Delete event present */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This javadoc is confusing since it's unclear to which constant it applies…

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

EVENT_PRESENT, NO_EVENT_PRESENT,
EVENT_PRESENT, NO_EVENT_PRESENT, PROCESSED_MARK_FOR_DELETION,
/** Delete event present, from this point other events are not relevant */
DELETE_EVENT_PRESENT,
}
Expand Down Expand Up @@ -53,11 +54,21 @@ public void unMarkEventReceived(ResourceID resourceID) {
setEventingState(resourceID,
NO_EVENT_PRESENT);
break;
case PROCESSED_MARK_FOR_DELETION:
throw new IllegalStateException("Cannot unmark processed marked for deletion.");
case DELETE_EVENT_PRESENT:
throw new IllegalStateException("Cannot unmark delete event.");
}
}

public void markProcessedMarkForDeletion(ResourceID resourceID) {
setEventingState(resourceID, PROCESSED_MARK_FOR_DELETION);
}

public boolean processedMarkForDeletionPresent(ResourceID resourceID) {
return getEventingState(resourceID) == PROCESSED_MARK_FOR_DELETION;
}

public void markDeleteEventReceived(Event event) {
markDeleteEventReceived(event.getRelatedCustomResourceID());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ public void handleEvent(Event event) {
}

private void handleMarkedEventForResource(ResourceID resourceID) {
if (!eventMarker.deleteEventPresent(resourceID)) {
submitReconciliationExecution(resourceID);
} else {
if (eventMarker.deleteEventPresent(resourceID)) {
cleanupForDeletedEvent(resourceID);
} else if (!eventMarker.processedMarkForDeletionPresent(resourceID)) {
submitReconciliationExecution(resourceID);
}
}

Expand Down Expand Up @@ -157,14 +157,50 @@ private void submitReconciliationExecution(ResourceID resourceID) {
}

private void handleEventMarking(Event event) {
if (event instanceof ResourceEvent
&& ((ResourceEvent) event).getAction() == ResourceAction.DELETED) {
log.debug("Marking delete event received for: {}", event.getRelatedCustomResourceID());
eventMarker.markDeleteEventReceived(event);
} else if (!eventMarker.deleteEventPresent(event.getRelatedCustomResourceID())) {
log.debug("Marking event received for: {}", event.getRelatedCustomResourceID());
eventMarker.markEventReceived(event);
if (event instanceof ResourceEvent) {
var resourceEvent = (ResourceEvent) event;
if (resourceEvent.getAction() == ResourceAction.DELETED) {
log.debug("Marking delete event received for: {}", event.getRelatedCustomResourceID());
eventMarker.markDeleteEventReceived(event);
} else {
/*
* if already processed mark for deletion we want to override that mark in case a custom
* resource in the cache and not it is not marked for deletion. This could happen in an edge
* case, when the resource is deleted while websocket was disconnected. And meanwhile a
* resource with same ResourceID was deleted and created again. So resource should not be
* marked received if processedMarkForDeletion present it's still marked for deletion, but
* otherwise yes.
*/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand the comment…

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reformulated, it's more like documenting an edge case that is not explicit, hope its better now

if (eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID())
&& customResourceMarkedForDeletion(resourceEvent)) {
log.debug(
"Skipping mark of event received, since already processed mark for deletion and resource "
+
"marked for deletion: {}",
event.getRelatedCustomResourceID());
return;
}
markEventReceived(event);
}
} else if (!eventMarker.deleteEventPresent(event.getRelatedCustomResourceID()) ||
!eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID())) {
markEventReceived(event);
} else if (log.isDebugEnabled()) {
log.debug(
"Skipped marking event as received. Delete event present: {}, processed mark for deletion: {}",
eventMarker.deleteEventPresent(event.getRelatedCustomResourceID()),
eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID()));
}

}

private void markEventReceived(Event event) {
log.debug("Marking event received for: {}", event.getRelatedCustomResourceID());
eventMarker.markEventReceived(event);
}

private boolean customResourceMarkedForDeletion(ResourceEvent resourceEvent) {
return resourceEvent.getResource().map(HasMetadata::isMarkedForDeletion).orElse(false);
}

private RetryInfo retryInfo(ResourceID customResourceUid) {
Expand Down Expand Up @@ -199,6 +235,8 @@ void eventProcessingFinished(
metrics.finishedReconciliation(resourceID);
if (eventMarker.deleteEventPresent(resourceID)) {
cleanupForDeletedEvent(executionScope.getResourceID());
} else if (postExecutionControl.isFinalizerRemoved()) {
eventMarker.markProcessedMarkForDeletion(resourceID);
} else {
postExecutionControl
.getUpdatedCustomResource()
Expand Down Expand Up @@ -288,10 +326,10 @@ private RetryExecution getOrInitRetryExecution(ExecutionScope<R> executionScope)
return retryExecution;
}

private void cleanupForDeletedEvent(ResourceID customResourceUid) {
log.debug("Cleaning up for delete event for: {}", customResourceUid);
eventMarker.cleanup(customResourceUid);
metrics.cleanupDoneFor(customResourceUid);
private void cleanupForDeletedEvent(ResourceID resourceID) {
log.debug("Cleaning up for delete event for: {}", resourceID);
eventMarker.cleanup(resourceID);
metrics.cleanupDoneFor(resourceID);
}

private boolean isControllerUnderExecution(ResourceID customResourceUid) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@

final class PostExecutionControl<R extends HasMetadata> {

private final boolean onlyFinalizerHandled;
private final boolean finalizerRemoved;
private final R updatedCustomResource;
private final boolean updateIsStatusPatch;
private final Exception runtimeException;

private Long reScheduleDelay = null;

private PostExecutionControl(
boolean onlyFinalizerHandled,
boolean finalizerRemoved,
R updatedCustomResource,
boolean updateIsStatusPatch, Exception runtimeException) {
this.onlyFinalizerHandled = onlyFinalizerHandled;
this.finalizerRemoved = finalizerRemoved;
this.updatedCustomResource = updatedCustomResource;
this.updateIsStatusPatch = updateIsStatusPatch;
this.runtimeException = runtimeException;
}

public static <R extends HasMetadata> PostExecutionControl<R> onlyFinalizerAdded(
R updatedCustomResource) {
return new PostExecutionControl<>(true, updatedCustomResource, false, null);
return new PostExecutionControl<>(false, updatedCustomResource, false, null);
}

public static <R extends HasMetadata> PostExecutionControl<R> defaultDispatch() {
Expand All @@ -42,6 +42,11 @@ public static <R extends HasMetadata> PostExecutionControl<R> customResourceUpda
return new PostExecutionControl<>(false, updatedCustomResource, false, null);
}

public static <R extends HasMetadata> PostExecutionControl<R> customResourceFinalizerRemoved(
R updatedCustomResource) {
return new PostExecutionControl<>(true, updatedCustomResource, false, null);
}

public static <R extends HasMetadata> PostExecutionControl<R> exceptionDuringExecution(
Exception exception) {
return new PostExecutionControl<>(false, null, false, exception);
Expand Down Expand Up @@ -76,11 +81,15 @@ public boolean updateIsStatusPatch() {
public String toString() {
return "PostExecutionControl{"
+ "onlyFinalizerHandled="
+ onlyFinalizerHandled
+ finalizerRemoved
+ ", updatedCustomResource="
+ updatedCustomResource
+ ", runtimeException="
+ runtimeException
+ '}';
}

public boolean isFinalizerRemoved() {
return finalizerRemoved;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import io.fabric8.kubernetes.api.model.HasMetadata;
import io.fabric8.kubernetes.api.model.KubernetesResourceList;
import io.fabric8.kubernetes.client.CustomResource;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientException;
import io.fabric8.kubernetes.client.dsl.MixedOperation;
import io.fabric8.kubernetes.client.dsl.Resource;
import io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperationsImpl;
Expand All @@ -35,6 +37,8 @@
*/
class ReconciliationDispatcher<R extends HasMetadata> {

public static final int MAX_FINALIZER_REMOVAL_RETRY = 10;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe that should be configurable from the ConfigurationService?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would do it a separate PR TBH. Ideally this retry passes after 1-2 attemt, it's unlikely that there will be more than 3-4 finalizers.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this should be done in a different PR for sure.


private static final Logger log = LoggerFactory.getLogger(ReconciliationDispatcher.class);

private final Controller<R> controller;
Expand All @@ -47,7 +51,7 @@ class ReconciliationDispatcher<R extends HasMetadata> {
}

public ReconciliationDispatcher(Controller<R> controller) {
this(controller, new CustomResourceFacade<>(controller.getCRClient()));
this(controller, new CustomResourceFacade<>(controller.getCRClient(), controller.getClient()));
}

public PostExecutionControl<R> handleExecution(ExecutionScope<R> executionScope) {
Expand All @@ -61,35 +65,42 @@ public PostExecutionControl<R> handleExecution(ExecutionScope<R> executionScope)

private PostExecutionControl<R> handleDispatch(ExecutionScope<R> executionScope)
throws Exception {
R resource = executionScope.getResource();
log.debug("Handling dispatch for resource {}", getName(resource));
R originalResource = executionScope.getResource();
var resourceForExecution = cloneResource(originalResource);
log.debug("Handling dispatch for resource {}", getName(originalResource));

final var markedForDeletion = resource.isMarkedForDeletion();
if (markedForDeletion && shouldNotDispatchToCleanup(resource)) {
final var markedForDeletion = originalResource.isMarkedForDeletion();
if (markedForDeletion && shouldNotDispatchToCleanup(originalResource)) {
log.debug(
"Skipping cleanup of resource {} because finalizer(s) {} don't allow processing yet",
getName(resource),
resource.getMetadata().getFinalizers());
getName(originalResource),
originalResource.getMetadata().getFinalizers());
return PostExecutionControl.defaultDispatch();
}

Context<R> context = new DefaultContext<>(executionScope.getRetryInfo(), controller, resource);
if (markedForDeletion) {
return handleCleanup(resource, context);
Context<R> context =
new DefaultContext<>(executionScope.getRetryInfo(), controller, originalResource);
if (originalResource.isMarkedForDeletion()) {
return handleCleanup(resourceForExecution, context);
} else {
return handleReconcile(executionScope, resource, context);
return handleReconcile(executionScope, resourceForExecution, originalResource, context);
}
}

private boolean shouldNotDispatchToCleanup(R resource) {
// we don't dispatch to cleanup if the controller is configured to use a finalizer but that
// finalizer is not present (which means it's already been removed)
return !controller.useFinalizer() || (controller.useFinalizer()
&& !resource.hasFinalizer(configuration().getFinalizerName()));
var markedForDeletion = resource.isMarkedForDeletion();
var alreadyRemovedFinalizer = controller.useFinalizer()
&& !resource.hasFinalizer(configuration().getFinalizerName());
if (markedForDeletion && alreadyRemovedFinalizer) {
log.warn("This should not happen. Marked for deletion & already removed finalizer: {}",
ResourceID.fromResource(resource));
}
return markedForDeletion && (!controller.useFinalizer() || alreadyRemovedFinalizer);
}

private PostExecutionControl<R> handleReconcile(
ExecutionScope<R> executionScope, R originalResource, Context<R> context) throws Exception {
ExecutionScope<R> executionScope, R resourceForExecution, R originalResource,
Context<R> context) throws Exception {
if (controller.useFinalizer()
&& !originalResource.hasFinalizer(configuration().getFinalizerName())) {
/*
Expand All @@ -101,8 +112,6 @@ private PostExecutionControl<R> handleReconcile(
var updatedResource = updateCustomResourceWithFinalizer(originalResource);
return PostExecutionControl.onlyFinalizerAdded(updatedResource);
} else {
var resourceForExecution =
cloneResource(originalResource);
try {
return reconcileExecution(executionScope, resourceForExecution, originalResource, context);
} catch (Exception e) {
Expand Down Expand Up @@ -280,8 +289,8 @@ private PostExecutionControl<R> handleCleanup(R resource, Context<R> context) {
// cleanup is finished, nothing left to done
if (deleteControl.isRemoveFinalizer()
&& resource.hasFinalizer(configuration().getFinalizerName())) {
R customResource = removeFinalizer(resource);
return PostExecutionControl.customResourceUpdated(customResource);
R customResource = removeFinalizer(resource, configuration().getFinalizerName());
return PostExecutionControl.customResourceFinalizerRemoved(customResource);
}
}
log.debug(
Expand All @@ -308,28 +317,53 @@ private R updateCustomResource(R resource) {
return customResourceFacade.replaceResourceWithLock(resource);
}

private R removeFinalizer(R resource) {
log.debug(
"Removing finalizer on resource: {} with version: {}",
getUID(resource),
getVersion(resource));
resource.removeFinalizer(configuration().getFinalizerName());
return customResourceFacade.replaceResourceWithLock(resource);
}


ControllerConfiguration<R> configuration() {
return controller.getConfiguration();
}

@SuppressWarnings("unchecked")
public R removeFinalizer(R resource, String finalizer) {
if (log.isDebugEnabled()) {
log.debug("Removing finalizer on resource: {}", ResourceID.fromResource(resource));
}
int retryIndex = 0;
while (true) {
try {
var removed = resource.removeFinalizer(finalizer);
if (!removed) {
return resource;
}
return customResourceFacade.replaceResourceWithLock(resource);
} catch (KubernetesClientException e) {
log.trace("Exception during finalizer removal for resource: {}", resource);
retryIndex++;
if (e.getCode() != 409 || retryIndex >= MAX_FINALIZER_REMOVAL_RETRY) {
throw e;
}
Class<R> rClass = (Class<R>) resource.getClass();
resource = customResourceFacade.getResource(rClass, resource.getMetadata().getNamespace(),
resource.getMetadata().getName());
}
}
}

// created to support unit testing
static class CustomResourceFacade<R extends HasMetadata> {

private final MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation;
private final KubernetesClient client;

public CustomResourceFacade(
MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation) {
MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation,
KubernetesClient client) {
this.resourceOperation = resourceOperation;
this.client = client;
}

public R getResource(Class<R> rClass, String namespace, String name) {
return client.resources(rClass)
.inNamespace(namespace)
.withName(name).get();
}

public R replaceResourceWithLock(R resource) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void eventReceived(ResourceAction action, T resource, T oldResource) {
controller.getEventSourceManager().broadcastOnResourceEvent(action, resource, oldResource);
if (filter.acceptChange(controller, oldResource, resource)) {
getEventHandler().handleEvent(
new ResourceEvent(action, ResourceID.fromResource(resource)));
new ResourceEvent(action, ResourceID.fromResource(resource), resource));
} else {
log.debug("Skipping event handling resource {} with version: {}", getUID(resource),
getVersion(resource));
Expand Down
Loading