Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initialize the cluster client cache from the kubeconfig in secrets #412

Merged
2 changes: 0 additions & 2 deletions controllers/toolchaincluster/healthchecker.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@ const (

// getClusterHealth gets the kubernetes cluster health status by requesting "/healthz"
func getClusterHealthStatus(ctx context.Context, remoteClusterClientset *kubeclientset.Clientset) (bool, error) {

lgr := log.FromContext(ctx)
body, err := remoteClusterClientset.DiscoveryClient.RESTClient().Get().AbsPath("/healthz").Do(ctx).Raw()
if err != nil {
lgr.Error(err, "Failed to do cluster health check for a ToolchainCluster")
return false, err
}
return strings.EqualFold(string(body), "ok"), nil

}
18 changes: 13 additions & 5 deletions controllers/toolchaincluster/toolchaincluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
cachedCluster, ok := cluster.GetCachedToolchainCluster(toolchainCluster.Name)
if !ok {
err := fmt.Errorf("cluster %s not found in cache", toolchainCluster.Name)
if err := r.updateStatus(ctx, toolchainCluster, clusterOfflineCondition(err.Error())); err != nil {
if err := r.updateStatus(ctx, toolchainCluster, nil, clusterOfflineCondition(err.Error())); err != nil {
reqLogger.Error(err, "unable to update cluster status of ToolchainCluster")
}
return reconcile.Result{}, err
Expand All @@ -73,7 +73,7 @@
clientSet, err := kubeclientset.NewForConfig(cachedCluster.RestConfig)
if err != nil {
reqLogger.Error(err, "cannot create ClientSet for the ToolchainCluster")
if err := r.updateStatus(ctx, toolchainCluster, clusterOfflineCondition(err.Error())); err != nil {
if err := r.updateStatus(ctx, toolchainCluster, cachedCluster, clusterOfflineCondition(err.Error())); err != nil {

Check warning on line 76 in controllers/toolchaincluster/toolchaincluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchaincluster/toolchaincluster_controller.go#L76

Added line #L76 was not covered by tests
reqLogger.Error(err, "unable to update cluster status of ToolchainCluster")
}
return reconcile.Result{}, err
Expand All @@ -83,15 +83,24 @@
healthCheckResult := r.getClusterHealthCondition(ctx, clientSet)

// update the status of the individual cluster.
if err := r.updateStatus(ctx, toolchainCluster, healthCheckResult); err != nil {
if err := r.updateStatus(ctx, toolchainCluster, cachedCluster, healthCheckResult); err != nil {
reqLogger.Error(err, "unable to update cluster status of ToolchainCluster")
return reconcile.Result{}, err
}
return reconcile.Result{RequeueAfter: r.RequeAfter}, nil
}

func (r *Reconciler) updateStatus(ctx context.Context, toolchainCluster *toolchainv1alpha1.ToolchainCluster, currentConditions ...toolchainv1alpha1.Condition) error {
func (r *Reconciler) updateStatus(ctx context.Context, toolchainCluster *toolchainv1alpha1.ToolchainCluster, cachedToolchainCluster *cluster.CachedToolchainCluster, currentConditions ...toolchainv1alpha1.Condition) error {
toolchainCluster.Status.Conditions = condition.AddOrUpdateStatusConditionsWithLastUpdatedTimestamp(toolchainCluster.Status.Conditions, currentConditions...)

if cachedToolchainCluster != nil {
toolchainCluster.Status.APIEndpoint = cachedToolchainCluster.APIEndpoint
toolchainCluster.Status.OperatorNamespace = cachedToolchainCluster.OperatorNamespace
} else {
toolchainCluster.Status.APIEndpoint = ""
toolchainCluster.Status.OperatorNamespace = ""
}

if err := r.Client.Status().Update(ctx, toolchainCluster); err != nil {
return fmt.Errorf("failed to update the status of cluster - %s: %w", toolchainCluster.Name, err)
}
Expand All @@ -107,7 +116,6 @@
return clusterNotReadyCondition()
}
return clusterReadyCondition()

}

func (r *Reconciler) getClusterHealth(ctx context.Context, remoteClusterClientset *kubeclientset.Clientset) (bool, error) {
Expand Down
59 changes: 48 additions & 11 deletions pkg/cluster/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,9 @@

// NewClusterConfig generate a new cluster config by fetching the necessary info the given ToolchainCluster's associated Secret and taking all data from ToolchainCluster CR
func NewClusterConfig(cl client.Client, toolchainCluster *toolchainv1alpha1.ToolchainCluster, timeout time.Duration) (*Config, error) {
clusterName := toolchainCluster.Name

apiEndpoint := toolchainCluster.Spec.APIEndpoint
if apiEndpoint == "" {
return nil, errors.Errorf("the api endpoint of cluster %s is empty", clusterName)
}

secretName := toolchainCluster.Spec.SecretRef.Name
if secretName == "" {
return nil, errors.Errorf("cluster %s does not have a secret name", clusterName)
return nil, errors.Errorf("cluster %s does not have a secret name", toolchainCluster.Name)

Check warning on line 165 in pkg/cluster/service.go

View check run for this annotation

Codecov / codecov/patch

pkg/cluster/service.go#L165

Added line #L165 was not covered by tests
}
secret := &v1.Secret{}
name := types.NamespacedName{
Expand All @@ -178,7 +171,22 @@
}
err := cl.Get(context.TODO(), name, secret)
if err != nil {
return nil, errors.Wrapf(err, "unable to get secret %s for cluster %s", name, clusterName)
return nil, errors.Wrapf(err, "unable to get secret %s for cluster %s", name, toolchainCluster.Name)
}

if _, ok := secret.Data["kubeconfig"]; ok {
return loadConfigFromKubeConfig(toolchainCluster, secret, timeout)
} else {
return loadConfigFromLegacyToolchainCluster(toolchainCluster, secret, timeout)
}
}

func loadConfigFromLegacyToolchainCluster(toolchainCluster *toolchainv1alpha1.ToolchainCluster, secret *v1.Secret, timeout time.Duration) (*Config, error) {
metlos marked this conversation as resolved.
Show resolved Hide resolved
clusterName := toolchainCluster.Name

apiEndpoint := toolchainCluster.Spec.APIEndpoint
if apiEndpoint == "" {
return nil, errors.Errorf("the api endpoint of cluster %s is empty", clusterName)

Check warning on line 189 in pkg/cluster/service.go

View check run for this annotation

Codecov / codecov/patch

pkg/cluster/service.go#L189

Added line #L189 was not covered by tests
}

token, tokenFound := secret.Data[toolchainTokenKey]
Expand All @@ -201,15 +209,44 @@
restConfig.Timeout = timeout

return &Config{
Name: toolchainCluster.Name,
APIEndpoint: toolchainCluster.Spec.APIEndpoint,
Name: clusterName,
APIEndpoint: apiEndpoint,
RestConfig: restConfig,
OperatorNamespace: toolchainCluster.Labels[labelNamespace],
OwnerClusterName: toolchainCluster.Labels[labelOwnerClusterName],
Labels: toolchainCluster.Labels,
}, nil
}

func loadConfigFromKubeConfig(toolchainCluster *toolchainv1alpha1.ToolchainCluster, secret *v1.Secret, timeout time.Duration) (*Config, error) {
cfg, err := clientcmd.Load(secret.Data["kubeconfig"])
if err != nil {
return nil, err

Check warning on line 224 in pkg/cluster/service.go

View check run for this annotation

Codecov / codecov/patch

pkg/cluster/service.go#L224

Added line #L224 was not covered by tests
}
clientCfg := clientcmd.NewDefaultClientConfig(*cfg, &clientcmd.ConfigOverrides{})
restCfg, err := clientCfg.ClientConfig()
if err != nil {
return nil, err

Check warning on line 229 in pkg/cluster/service.go

View check run for this annotation

Codecov / codecov/patch

pkg/cluster/service.go#L229

Added line #L229 was not covered by tests
}

// This is questionable, but the timeout is currently configurable in the member configuration so let's keep it here...
restCfg.Timeout = timeout

operatorNamespace, _, err := clientCfg.Namespace()
if err != nil {
return nil, fmt.Errorf("Could not determine the operator namespace from the current context in the provided kubeconfig because of: %w", err)

Check warning on line 237 in pkg/cluster/service.go

View check run for this annotation

Codecov / codecov/patch

pkg/cluster/service.go#L237

Added line #L237 was not covered by tests
}

return &Config{
Name: toolchainCluster.Name,
APIEndpoint: restCfg.Host,
RestConfig: restCfg,
OperatorNamespace: operatorNamespace,
OwnerClusterName: toolchainCluster.Labels[labelOwnerClusterName],
Labels: toolchainCluster.Labels,
}, nil
}

func IsReady(clusterStatus *toolchainv1alpha1.ToolchainClusterStatus) bool {
for _, condition := range clusterStatus.Conditions {
if condition.Type == toolchainv1alpha1.ConditionReady {
Expand Down
135 changes: 132 additions & 3 deletions pkg/cluster/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ import (
"testing"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

toolchainv1alpha1 "github.com/codeready-toolchain/api/api/v1alpha1"
"github.com/codeready-toolchain/toolchain-common/pkg/cluster"
"github.com/codeready-toolchain/toolchain-common/pkg/test"
"github.com/codeready-toolchain/toolchain-common/pkg/test/verify"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/clientcmd"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -22,7 +27,6 @@ func TestAddToolchainClusterAsMember(t *testing.T) {
// when
return service.AddOrUpdateToolchainCluster(toolchainCluster)
})

}

func TestAddToolchainClusterAsHost(t *testing.T) {
Expand Down Expand Up @@ -132,7 +136,7 @@ func TestListToolchainClusterConfigs(t *testing.T) {
})

t.Run("when list fails", func(t *testing.T) {
//given
// given
cl := test.NewFakeClient(t, m1, m2, host, noise, sec1, sec2, secHost, secNoise)
cl.MockList = func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
return fmt.Errorf("some error")
Expand All @@ -147,7 +151,7 @@ func TestListToolchainClusterConfigs(t *testing.T) {
})

t.Run("when get secret fails", func(t *testing.T) {
//given
// given
cl := test.NewFakeClient(t, m1, m2, host, noise, sec1, sec2, secHost, secNoise)
cl.MockGet = func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
return fmt.Errorf("some error")
Expand All @@ -161,3 +165,128 @@ func TestListToolchainClusterConfigs(t *testing.T) {
require.Empty(t, clusterConfigs)
})
}

func TestNewClusterConfig(t *testing.T) {
legacyTc := func() *toolchainv1alpha1.ToolchainCluster {
return &toolchainv1alpha1.ToolchainCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "tc",
Namespace: "ns",
Labels: map[string]string{
"namespace": "operatorns",
},
},
Spec: toolchainv1alpha1.ToolchainClusterSpec{
APIEndpoint: "https://over.the.rainbow",
SecretRef: toolchainv1alpha1.LocalSecretReference{
Name: "secret",
},
},
}
}
newFormTc := func() *toolchainv1alpha1.ToolchainCluster {
return &toolchainv1alpha1.ToolchainCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "tc",
Namespace: "ns",
},
Spec: toolchainv1alpha1.ToolchainClusterSpec{
SecretRef: toolchainv1alpha1.LocalSecretReference{
Name: "secret",
},
},
}
}

legacySecret := func() *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "secret",
Namespace: "ns",
},
Data: map[string][]byte{
"token": []byte("token"),
},
}
}
kubeconfigSecret := func(t *testing.T) *corev1.Secret {
t.Helper()
kubeconfig := clientcmdapi.Config{
Clusters: map[string]*clientcmdapi.Cluster{
"cluster": {
Server: "https://over.the.rainbow",
},
},
Contexts: map[string]*clientcmdapi.Context{
"ctx": {
Cluster: "cluster",
AuthInfo: "auth",
Namespace: "operatorns",
},
},
AuthInfos: map[string]*clientcmdapi.AuthInfo{
"auth": {
Token: "token",
},
},
CurrentContext: "ctx",
}
kubeconfigContents, err := clientcmd.Write(kubeconfig)
require.NoError(t, err)

return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "secret",
Namespace: "ns",
},
Data: map[string][]byte{
"kubeconfig": kubeconfigContents,
},
}
}

t.Run("using legacy fields in ToolchainCluster and token in secret", func(t *testing.T) {
tc := legacyTc()
secret := legacySecret()

cl := test.NewFakeClient(t, tc, secret)

cfg, err := cluster.NewClusterConfig(cl, tc, 1*time.Second)
require.NoError(t, err)

assert.Equal(t, "https://over.the.rainbow", cfg.APIEndpoint)
assert.Equal(t, "operatorns", cfg.OperatorNamespace)
assert.Equal(t, "token", cfg.RestConfig.BearerToken)
})

t.Run("using kubeconfig in secret", func(t *testing.T) {
tc := newFormTc()
secret := kubeconfigSecret(t)

cl := test.NewFakeClient(t, tc, secret)

cfg, err := cluster.NewClusterConfig(cl, tc, 1*time.Second)
require.NoError(t, err)

assert.Equal(t, "https://over.the.rainbow", cfg.APIEndpoint)
assert.Equal(t, "operatorns", cfg.OperatorNamespace)
assert.Equal(t, "token", cfg.RestConfig.BearerToken)
})

t.Run("uses kubeconfig in precedence over legacy fields", func(t *testing.T) {
tc := newFormTc()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to add a test case that uses the legacyTc?

basically:

                 tc := legacyTc()
		// Combine the kubeconfig and the token in the same secret.
		// We should see auth from the kubeconfig used...
		secret := kubeconfigSecret(t)
		secret.Data["token"] = []byte("not-the-token-we-want")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This essentially checks that if we detect a kubeconfig data field in the secret then we use that instead of the data in the ToolchainCluster and the token in the secret.

So as soon as we see the data coming from the kubeconfig, we know that happened.

Now, you have a very good idea for a "defensive test" that would make sure any future changes wouldn't change that behavior. But we plan to completely remove the loading from the legacy fields in the upcoming PRs so I think we can afford to not have that and instead just make sure we use the new over the old.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I was asking since as soon as you push this PR to prod, the tc will still have the legacy format, and later will have the new one. But I guess it's not a concern since the only thing that matters is the loading mechanism and not the tc format.

// Combine the kubeconfig and the token in the same secret.
// We should see auth from the kubeconfig used...
secret := kubeconfigSecret(t)
secret.Data["token"] = []byte("not-the-token-we-want")

cl := test.NewFakeClient(t, tc, secret)

cfg, err := cluster.NewClusterConfig(cl, tc, 1*time.Second)
require.NoError(t, err)

assert.Equal(t, "https://over.the.rainbow", cfg.APIEndpoint)
assert.Equal(t, "operatorns", cfg.OperatorNamespace)
assert.Equal(t, "token", cfg.RestConfig.BearerToken)
})
}
Loading