-
Notifications
You must be signed in to change notification settings - Fork 221
feat: retry remove finalizer #1249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
b9b15ab
55ba149
5b31b19
5fb255e
a978304
7d2604c
5c61904
7d29a37
18332c7
17bb764
3330b19
6c867c2
743f73f
125aed2
02bb3f2
78c5013
9fb0192
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,10 +121,10 @@ public void handleEvent(Event event) { | |
} | ||
|
||
private void handleMarkedEventForResource(ResourceID resourceID) { | ||
if (!eventMarker.deleteEventPresent(resourceID)) { | ||
submitReconciliationExecution(resourceID); | ||
} else { | ||
if (eventMarker.deleteEventPresent(resourceID)) { | ||
cleanupForDeletedEvent(resourceID); | ||
} else if (!eventMarker.processedMarkForDeletionPresent(resourceID)) { | ||
submitReconciliationExecution(resourceID); | ||
} | ||
} | ||
|
||
|
@@ -157,14 +157,50 @@ private void submitReconciliationExecution(ResourceID resourceID) { | |
} | ||
|
||
private void handleEventMarking(Event event) { | ||
if (event instanceof ResourceEvent | ||
&& ((ResourceEvent) event).getAction() == ResourceAction.DELETED) { | ||
log.debug("Marking delete event received for: {}", event.getRelatedCustomResourceID()); | ||
eventMarker.markDeleteEventReceived(event); | ||
} else if (!eventMarker.deleteEventPresent(event.getRelatedCustomResourceID())) { | ||
log.debug("Marking event received for: {}", event.getRelatedCustomResourceID()); | ||
eventMarker.markEventReceived(event); | ||
if (event instanceof ResourceEvent) { | ||
var resourceEvent = (ResourceEvent) event; | ||
if (resourceEvent.getAction() == ResourceAction.DELETED) { | ||
log.debug("Marking delete event received for: {}", event.getRelatedCustomResourceID()); | ||
eventMarker.markDeleteEventReceived(event); | ||
} else { | ||
/* | ||
* if already processed mark for deletion we want to override that mark in case a custom | ||
* resource in the cache and not it is not marked for deletion. This could happen in an edge | ||
* case, when the resource is deleted while websocket was disconnected. And meanwhile a | ||
* resource with same ResourceID was deleted and created again. So resource should not be | ||
* marked received if processedMarkForDeletion present it's still marked for deletion, but | ||
* otherwise yes. | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand the comment… There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reformulated, it's more like documenting an edge case that is not explicit, hope its better now |
||
if (eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID()) | ||
&& customResourceMarkedForDeletion(resourceEvent)) { | ||
log.debug( | ||
"Skipping mark of event received, since already processed mark for deletion and resource " | ||
+ | ||
"marked for deletion: {}", | ||
event.getRelatedCustomResourceID()); | ||
return; | ||
} | ||
markEventReceived(event); | ||
} | ||
} else if (!eventMarker.deleteEventPresent(event.getRelatedCustomResourceID()) || | ||
!eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID())) { | ||
markEventReceived(event); | ||
} else if (log.isDebugEnabled()) { | ||
log.debug( | ||
"Skipped marking event as received. Delete event present: {}, processed mark for deletion: {}", | ||
eventMarker.deleteEventPresent(event.getRelatedCustomResourceID()), | ||
eventMarker.processedMarkForDeletionPresent(event.getRelatedCustomResourceID())); | ||
} | ||
|
||
} | ||
|
||
private void markEventReceived(Event event) { | ||
log.debug("Marking event received for: {}", event.getRelatedCustomResourceID()); | ||
eventMarker.markEventReceived(event); | ||
} | ||
|
||
private boolean customResourceMarkedForDeletion(ResourceEvent resourceEvent) { | ||
return resourceEvent.getResource().map(HasMetadata::isMarkedForDeletion).orElse(false); | ||
} | ||
|
||
private RetryInfo retryInfo(ResourceID customResourceUid) { | ||
|
@@ -199,6 +235,8 @@ void eventProcessingFinished( | |
metrics.finishedReconciliation(resourceID); | ||
if (eventMarker.deleteEventPresent(resourceID)) { | ||
cleanupForDeletedEvent(executionScope.getResourceID()); | ||
} else if (postExecutionControl.isFinalizerRemoved()) { | ||
eventMarker.markProcessedMarkForDeletion(resourceID); | ||
} else { | ||
postExecutionControl | ||
.getUpdatedCustomResource() | ||
|
@@ -288,10 +326,10 @@ private RetryExecution getOrInitRetryExecution(ExecutionScope<R> executionScope) | |
return retryExecution; | ||
} | ||
|
||
private void cleanupForDeletedEvent(ResourceID customResourceUid) { | ||
log.debug("Cleaning up for delete event for: {}", customResourceUid); | ||
eventMarker.cleanup(customResourceUid); | ||
metrics.cleanupDoneFor(customResourceUid); | ||
private void cleanupForDeletedEvent(ResourceID resourceID) { | ||
log.debug("Cleaning up for delete event for: {}", resourceID); | ||
eventMarker.cleanup(resourceID); | ||
metrics.cleanupDoneFor(resourceID); | ||
} | ||
|
||
private boolean isControllerUnderExecution(ResourceID customResourceUid) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,8 @@ | |
import io.fabric8.kubernetes.api.model.HasMetadata; | ||
import io.fabric8.kubernetes.api.model.KubernetesResourceList; | ||
import io.fabric8.kubernetes.client.CustomResource; | ||
import io.fabric8.kubernetes.client.KubernetesClient; | ||
import io.fabric8.kubernetes.client.KubernetesClientException; | ||
import io.fabric8.kubernetes.client.dsl.MixedOperation; | ||
import io.fabric8.kubernetes.client.dsl.Resource; | ||
import io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperationsImpl; | ||
|
@@ -35,6 +37,8 @@ | |
*/ | ||
class ReconciliationDispatcher<R extends HasMetadata> { | ||
|
||
public static final int MAX_FINALIZER_REMOVAL_RETRY = 10; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe that should be configurable from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would do it a separate PR TBH. Ideally this retry passes after 1-2 attemt, it's unlikely that there will be more than 3-4 finalizers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this should be done in a different PR for sure. |
||
|
||
private static final Logger log = LoggerFactory.getLogger(ReconciliationDispatcher.class); | ||
|
||
private final Controller<R> controller; | ||
|
@@ -47,7 +51,7 @@ class ReconciliationDispatcher<R extends HasMetadata> { | |
} | ||
|
||
public ReconciliationDispatcher(Controller<R> controller) { | ||
this(controller, new CustomResourceFacade<>(controller.getCRClient())); | ||
this(controller, new CustomResourceFacade<>(controller.getCRClient(), controller.getClient())); | ||
} | ||
|
||
public PostExecutionControl<R> handleExecution(ExecutionScope<R> executionScope) { | ||
|
@@ -61,35 +65,42 @@ public PostExecutionControl<R> handleExecution(ExecutionScope<R> executionScope) | |
|
||
private PostExecutionControl<R> handleDispatch(ExecutionScope<R> executionScope) | ||
throws Exception { | ||
R resource = executionScope.getResource(); | ||
log.debug("Handling dispatch for resource {}", getName(resource)); | ||
R originalResource = executionScope.getResource(); | ||
var resourceForExecution = cloneResource(originalResource); | ||
log.debug("Handling dispatch for resource {}", getName(originalResource)); | ||
|
||
final var markedForDeletion = resource.isMarkedForDeletion(); | ||
if (markedForDeletion && shouldNotDispatchToCleanup(resource)) { | ||
final var markedForDeletion = originalResource.isMarkedForDeletion(); | ||
if (markedForDeletion && shouldNotDispatchToCleanup(originalResource)) { | ||
log.debug( | ||
"Skipping cleanup of resource {} because finalizer(s) {} don't allow processing yet", | ||
getName(resource), | ||
resource.getMetadata().getFinalizers()); | ||
getName(originalResource), | ||
originalResource.getMetadata().getFinalizers()); | ||
return PostExecutionControl.defaultDispatch(); | ||
} | ||
|
||
Context<R> context = new DefaultContext<>(executionScope.getRetryInfo(), controller, resource); | ||
if (markedForDeletion) { | ||
return handleCleanup(resource, context); | ||
Context<R> context = | ||
new DefaultContext<>(executionScope.getRetryInfo(), controller, originalResource); | ||
if (originalResource.isMarkedForDeletion()) { | ||
return handleCleanup(resourceForExecution, context); | ||
} else { | ||
return handleReconcile(executionScope, resource, context); | ||
return handleReconcile(executionScope, resourceForExecution, originalResource, context); | ||
} | ||
} | ||
|
||
private boolean shouldNotDispatchToCleanup(R resource) { | ||
// we don't dispatch to cleanup if the controller is configured to use a finalizer but that | ||
// finalizer is not present (which means it's already been removed) | ||
return !controller.useFinalizer() || (controller.useFinalizer() | ||
&& !resource.hasFinalizer(configuration().getFinalizerName())); | ||
var markedForDeletion = resource.isMarkedForDeletion(); | ||
var alreadyRemovedFinalizer = controller.useFinalizer() | ||
&& !resource.hasFinalizer(configuration().getFinalizerName()); | ||
if (markedForDeletion && alreadyRemovedFinalizer) { | ||
log.warn("This should not happen. Marked for deletion & already removed finalizer: {}", | ||
ResourceID.fromResource(resource)); | ||
} | ||
return markedForDeletion && (!controller.useFinalizer() || alreadyRemovedFinalizer); | ||
} | ||
|
||
private PostExecutionControl<R> handleReconcile( | ||
ExecutionScope<R> executionScope, R originalResource, Context<R> context) throws Exception { | ||
ExecutionScope<R> executionScope, R resourceForExecution, R originalResource, | ||
Context<R> context) throws Exception { | ||
if (controller.useFinalizer() | ||
&& !originalResource.hasFinalizer(configuration().getFinalizerName())) { | ||
/* | ||
|
@@ -101,8 +112,6 @@ private PostExecutionControl<R> handleReconcile( | |
var updatedResource = updateCustomResourceWithFinalizer(originalResource); | ||
return PostExecutionControl.onlyFinalizerAdded(updatedResource); | ||
} else { | ||
var resourceForExecution = | ||
cloneResource(originalResource); | ||
try { | ||
return reconcileExecution(executionScope, resourceForExecution, originalResource, context); | ||
} catch (Exception e) { | ||
|
@@ -280,8 +289,8 @@ private PostExecutionControl<R> handleCleanup(R resource, Context<R> context) { | |
// cleanup is finished, nothing left to done | ||
if (deleteControl.isRemoveFinalizer() | ||
&& resource.hasFinalizer(configuration().getFinalizerName())) { | ||
R customResource = removeFinalizer(resource); | ||
return PostExecutionControl.customResourceUpdated(customResource); | ||
R customResource = removeFinalizer(resource, configuration().getFinalizerName()); | ||
return PostExecutionControl.customResourceFinalizerRemoved(customResource); | ||
} | ||
} | ||
log.debug( | ||
|
@@ -308,28 +317,53 @@ private R updateCustomResource(R resource) { | |
return customResourceFacade.replaceResourceWithLock(resource); | ||
} | ||
|
||
private R removeFinalizer(R resource) { | ||
log.debug( | ||
"Removing finalizer on resource: {} with version: {}", | ||
getUID(resource), | ||
getVersion(resource)); | ||
resource.removeFinalizer(configuration().getFinalizerName()); | ||
return customResourceFacade.replaceResourceWithLock(resource); | ||
} | ||
|
||
|
||
ControllerConfiguration<R> configuration() { | ||
return controller.getConfiguration(); | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
public R removeFinalizer(R resource, String finalizer) { | ||
if (log.isDebugEnabled()) { | ||
log.debug("Removing finalizer on resource: {}", ResourceID.fromResource(resource)); | ||
} | ||
int retryIndex = 0; | ||
while (true) { | ||
try { | ||
var removed = resource.removeFinalizer(finalizer); | ||
if (!removed) { | ||
return resource; | ||
} | ||
return customResourceFacade.replaceResourceWithLock(resource); | ||
} catch (KubernetesClientException e) { | ||
log.trace("Exception during finalizer removal for resource: {}", resource); | ||
retryIndex++; | ||
if (e.getCode() != 409 || retryIndex >= MAX_FINALIZER_REMOVAL_RETRY) { | ||
throw e; | ||
} | ||
Class<R> rClass = (Class<R>) resource.getClass(); | ||
resource = customResourceFacade.getResource(rClass, resource.getMetadata().getNamespace(), | ||
resource.getMetadata().getName()); | ||
} | ||
} | ||
} | ||
|
||
// created to support unit testing | ||
static class CustomResourceFacade<R extends HasMetadata> { | ||
|
||
private final MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation; | ||
private final KubernetesClient client; | ||
|
||
public CustomResourceFacade( | ||
MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation) { | ||
MixedOperation<R, KubernetesResourceList<R>, Resource<R>> resourceOperation, | ||
KubernetesClient client) { | ||
this.resourceOperation = resourceOperation; | ||
this.client = client; | ||
} | ||
|
||
public R getResource(Class<R> rClass, String namespace, String name) { | ||
return client.resources(rClass) | ||
.inNamespace(namespace) | ||
.withName(name).get(); | ||
} | ||
|
||
public R replaceResourceWithLock(R resource) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This javadoc is confusing since it's unclear to which constant it applies…
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed