Skip to content

Try to get lease namespace if unspecified #1450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import io.fabric8.kubernetes.client.extended.leaderelection.LeaderElector;
import io.fabric8.kubernetes.client.extended.leaderelection.LeaderElectorBuilder;
import io.fabric8.kubernetes.client.extended.leaderelection.resourcelock.LeaseLock;
import io.fabric8.kubernetes.client.extended.leaderelection.resourcelock.Lock;
import io.javaoperatorsdk.operator.api.config.ConfigurationServiceProvider;
import io.javaoperatorsdk.operator.api.config.LeaderElectionConfiguration;

Expand All @@ -31,24 +30,43 @@ public LeaderElectionManager(ControllerManager controllerManager) {

public void init(LeaderElectionConfiguration config, KubernetesClient client) {
this.identity = identity(config);
Lock lock = new LeaseLock(config.getLeaseNamespace(), config.getLeaseName(), identity);
final var leaseNamespace =
config.getLeaseNamespace().orElseGet(
() -> ConfigurationServiceProvider.instance().getClientConfiguration().getNamespace());
if (leaseNamespace == null) {
final var message =
"Lease namespace is not set and cannot be inferred. Leader election cannot continue.";
log.error(message);
throw new IllegalArgumentException(message);
}
final var lock = new LeaseLock(leaseNamespace, config.getLeaseName(), identity);
// releaseOnCancel is not used in the underlying implementation
leaderElector = new LeaderElectorBuilder(client,
ConfigurationServiceProvider.instance().getExecutorService())
.withConfig(
new LeaderElectionConfig(lock, config.getLeaseDuration(), config.getRenewDeadline(),
config.getRetryPeriod(), leaderCallbacks(), true, config.getLeaseName()))
.build();
leaderElector =
new LeaderElectorBuilder(
client, ConfigurationServiceProvider.instance().getExecutorService())
.withConfig(
new LeaderElectionConfig(
lock,
config.getLeaseDuration(),
config.getRenewDeadline(),
config.getRetryPeriod(),
leaderCallbacks(),
true,
config.getLeaseName()))
.build();
}

public boolean isLeaderElectionEnabled() {
return leaderElector != null;
}

private LeaderCallbacks leaderCallbacks() {
return new LeaderCallbacks(this::startLeading, this::stopLeading, leader -> {
log.info("New leader with identity: {}", leader);
});
return new LeaderCallbacks(
this::startLeading,
this::stopLeading,
leader -> {
log.info("New leader with identity: {}", leader);
});
}

private void startLeading() {
Expand All @@ -64,7 +82,7 @@ private void stopLeading() {
}

private String identity(LeaderElectionConfiguration config) {
String id = config.getIdentity().orElse(System.getenv("HOSTNAME"));
var id = config.getIdentity().orElse(System.getenv("HOSTNAME"));
if (id == null || id.isBlank()) {
id = UUID.randomUUID().toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ public LeaderElectionConfiguration(String leaseName, String leaseNamespace) {
RETRY_PERIOD_DEFAULT_VALUE, null);
}

public LeaderElectionConfiguration(String leaseName) {
this(
leaseName,
null,
LEASE_DURATION_DEFAULT_VALUE,
RENEW_DEADLINE_DEFAULT_VALUE,
RETRY_PERIOD_DEFAULT_VALUE, null);
}

public LeaderElectionConfiguration(
String leaseName,
String leaseNamespace,
Expand All @@ -59,8 +68,8 @@ public LeaderElectionConfiguration(
this.identity = identity;
}

public String getLeaseNamespace() {
return leaseNamespace;
public Optional<String> getLeaseNamespace() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: this is a breaking change. I can mark it as deprecated and provide another method getOptionalLeaseNamespace, if needed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh normally I would prefer backwards compatible with deprecated. getOptionalLeaseNamespace does not sound too good, maybe just stick with this this time.

Copy link
Collaborator

@metacosm metacosm Sep 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably would be better, yes. Then again, I don't know if anyone is using Leader Election already…

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for users, direct usage of this API will be rare even they do use leader election feature. Probably only in unit test.

return Optional.ofNullable(leaseNamespace);
}

public String getLeaseName() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package io.javaoperatorsdk.operator;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import io.fabric8.kubernetes.client.KubernetesClient;
import io.javaoperatorsdk.operator.api.config.ConfigurationServiceProvider;
import io.javaoperatorsdk.operator.api.config.LeaderElectionConfiguration;

import static io.fabric8.kubernetes.client.Config.KUBERNETES_AUTH_TRYKUBECONFIG_SYSTEM_PROPERTY;
import static io.fabric8.kubernetes.client.Config.KUBERNETES_NAMESPACE_FILE;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

class LeaderElectionManagerTest {

private ControllerManager controllerManager;
private KubernetesClient kubernetesClient;
private LeaderElectionManager leaderElectionManager;

@BeforeEach
void setUp() {
controllerManager = mock(ControllerManager.class);
kubernetesClient = mock(KubernetesClient.class);
leaderElectionManager = new LeaderElectionManager(controllerManager);
}

@AfterEach
void tearDown() {
ConfigurationServiceProvider.reset();
System.getProperties().remove(KUBERNETES_NAMESPACE_FILE);
System.getProperties().remove(KUBERNETES_AUTH_TRYKUBECONFIG_SYSTEM_PROPERTY);
}

@Test
void testInit() {
leaderElectionManager.init(new LeaderElectionConfiguration("test", "testns"), kubernetesClient);
assertTrue(leaderElectionManager.isLeaderElectionEnabled());
}

@Test
void testInitInferLeaseNamespace(@TempDir Path tempDir) throws IOException {
var namespace = "foo";
var namespacePath = tempDir.resolve("namespace");
Files.writeString(namespacePath, namespace);

System.setProperty(KUBERNETES_AUTH_TRYKUBECONFIG_SYSTEM_PROPERTY, "false");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know why it's needed to set this property to false?

Copy link
Contributor Author

@honnix honnix Sep 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is because of https://github.com/fabric8io/kubernetes-client/blob/0742e942eb09f8ed2672f7f82e176b76e595ad50/kubernetes-client-api/src/main/java/io/fabric8/kubernetes/client/Config.java#L600. Otherwise it will try to read .kube/config. By setting this the test case makes sure it tries to read the file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm however still not sure this gets the correct client config.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order also seems to be wrong:

configurationService.getLeaderElectionConfiguration()
    .ifPresent(c -> leaderElectionManager.init(c, this.kubernetesClient));
ConfigurationServiceProvider.set(configurationService);

Now the PR as it is, tries to access ConfigurationServiceProvider.instance() before ConfigurationServiceProvider.set(configurationService) is called.

For the use case creating k8s client manually, this seems to be required but maybe it is supposed to be so?

var operator =
        new Operator(
            kubernetesClient,
            configurationServiceOverrider ->
                configurationServiceOverrider
                    .withCloseClientOnStop(false)
                    .withClientConfiguration(kubernetesClient.getConfiguration()));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, the instance is already set in ConfigurationServiceProvider::overrideCurrent, so ConfigurationServiceProvider.set(configurationService); doesn't do anything.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean manually created? In the background it always created, if the namespace is not set it should use always one where the pod is running.

For example:

var kubernetesClient = ...; # build the client; may not be from .kube/config
var operator =
        new Operator(
            kubernetesClient,
            configurationServiceOverrider ->
                configurationServiceOverrider
                    .withCloseClientOnStop(false)
                    .withClientConfiguration(kubernetesClient.getConfiguration()));

If here withClientConfiguration is not called, I think a default configuration will be created, which might be different than the configuration used to build kubernetesClient. Then we have an inconsistency. I'm not sure of the consequence though.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is absolutely true. The thing is that on the other hand on quarkus extension it would make issues when the config from config service not used. Note however that if the namespace is configured for a client, it's not our api.
So we have api for namespace config on controller level. And we have api for leader election to config namespace. So I would expect that the leader election is either uses the pod namespace or a setting explicitly for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I understand that. I think as you said this is a separated issue to track, not related to leader election. I can create a new issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified the existing e2e test a bit. PTAL, thanks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably need to clean things up or, at the very least, document them better. The intention is to create client instance(s) based on the configuration retrieved from the ConfigurationService. I'm not sure that we do follow that ourselves in our code but we should review it. You should only create client instances with a different configuration only for very specific purposes.

System.setProperty(KUBERNETES_NAMESPACE_FILE, namespacePath.toString());

leaderElectionManager.init(new LeaderElectionConfiguration("test"), kubernetesClient);
assertTrue(leaderElectionManager.isLeaderElectionEnabled());
}

@Test
void testFailedToInitInferLeaseNamespace() {
System.setProperty(KUBERNETES_AUTH_TRYKUBECONFIG_SYSTEM_PROPERTY, "false");
assertThrows(
IllegalArgumentException.class,
() -> leaderElectionManager.init(new LeaderElectionConfiguration("test"),
kubernetesClient));
}

@Test
void testFailedToInitInferLeaseNamespaceProbablyUsingKubeConfig() {
assertThrows(
IllegalArgumentException.class,
() -> leaderElectionManager.init(new LeaderElectionConfiguration("test"),
kubernetesClient));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: leader-election-operator-2
spec:
serviceAccountName: leader-election-operator
containers:
- name: operator
image: leader-election-operator
imagePullPolicy: Never
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: leader-election-operator

---
apiVersion: v1
kind: Pod
metadata:
name: leader-election-operator-1
spec:
serviceAccountName: leader-election-operator
containers:
- name: operator
image: leader-election-operator
imagePullPolicy: Never
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: operator-admin
subjects:
- kind: ServiceAccount
name: leader-election-operator
roleRef:
kind: ClusterRole
name: leader-election-operator
apiGroup: ""

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: leader-election-operator
rules:
- apiGroups:
- "apiextensions.k8s.io"
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- "sample.javaoperatorsdk"
resources:
- leaderelectiontestcustomresources
- leaderelectiontestcustomresources/status
verbs:
- '*'
- apiGroups:
- "coordination.k8s.io"
resources:
- "leases"
verbs:
- '*'
7 changes: 6 additions & 1 deletion sample-operators/leader-election/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
Expand All @@ -75,4 +80,4 @@
</plugins>
</build>

</project>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ public static void main(String[] args) {

log.info("Starting operator with identity: {}", identity);

LeaderElectionConfiguration leaderElectionConfiguration =
namespace == null
? new LeaderElectionConfiguration("leader-election-test")
: new LeaderElectionConfiguration("leader-election-test", namespace, identity);

var client = new KubernetesClientBuilder().build();
Operator operator = new Operator(client,
c -> c.withLeaderElectionConfiguration(
new LeaderElectionConfiguration("leader-election-test", namespace, identity)));
Operator operator =
new Operator(client, c -> c.withLeaderElectionConfiguration(leaderElectionConfiguration));

operator.register(new LeaderElectionTestReconciler(identity));
operator.installShutdownHook();
operator.start();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -46,14 +47,15 @@ class LeaderElectionE2E {
private String namespace;
private KubernetesClient client;

@Test
@ParameterizedTest
@ValueSource(strings = {"namespace-inferred-", ""})
// not for local mode by design
@EnabledIfSystemProperty(named = "test.deployment", matches = "remote")
void otherInstancesTakesOverWhenSteppingDown() {
void otherInstancesTakesOverWhenSteppingDown(String yamlFilePrefix) {
log.info("Applying custom resource");
applyCustomResource();
log.info("Deploying operator instances");
deployOperatorsInOrder();
deployOperatorsInOrder(yamlFilePrefix);

log.info("Awaiting custom resource reconciliations");
await().pollDelay(Duration.ofSeconds(MINIMAL_SECONDS_FOR_RENEWAL))
Expand Down Expand Up @@ -130,16 +132,16 @@ void tearDown() {
.untilAsserted(() -> assertThat(client.namespaces().withName(namespace).get()).isNull());
}

private void deployOperatorsInOrder() {
private void deployOperatorsInOrder(String yamlFilePrefix) {
log.info("Installing 1st instance");
applyResources("k8s/operator.yaml");
applyResources("k8s/" + yamlFilePrefix + "operator.yaml");
await().atMost(Duration.ofSeconds(POD_STARTUP_TIMEOUT)).untilAsserted(() -> {
var pod = client.pods().inNamespace(namespace).withName(OPERATOR_1_POD_NAME).get();
assertThat(pod.getStatus().getContainerStatuses().get(0).getReady()).isTrue();
});

log.info("Installing 2nd instance");
applyResources("k8s/operator-instance-2.yaml");
applyResources("k8s/" + yamlFilePrefix + "operator-instance-2.yaml");
await().atMost(Duration.ofSeconds(POD_STARTUP_TIMEOUT)).untilAsserted(() -> {
var pod = client.pods().inNamespace(namespace).withName(OPERATOR_2_POD_NAME).get();
assertThat(pod.getStatus().getContainerStatuses().get(0).getReady()).isTrue();
Expand Down