Skip to content

Commit

Permalink
fix: operator retries and error logging (#511)
Browse files Browse the repository at this point in the history
## Description

This PR has a number of changes, mostly tied to operator behaviors and
bug fixes around failure logging. Included:
- URI Encoding of client ID on deletion/updates - when we call
updates/deletions on KC clients it gets appended to our URL for our
request and must be encoded
- Moves around the try/catch to only wrap the Keycloak API call so that
errors are surfaced more accurately in events and modifies the thrown
error from the Keycloak response to include the keycloak response status
and data (see #448)
- Adds a new Phase to Package for `Retrying` - this differentiates from
a `Pending` package that is already being reconciled to allow retries to
function as expected (see
#492)
- Moves uidSeen addition to patch status to handle infinite failure ->
pending -> failure ... loop on first apply (see
#525)

## Related Issue

Fixes #492

Fixes #448

Fixes #525

## Type of change

- [x] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Other (security config, docs update, etc)

## Checklist before merging

- [x] Test, docs, adr added or updated as needed
- [x] [Contributor Guide
Steps](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md)(https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md#submitting-a-pull-request)
followed

---------

Co-authored-by: Megamind <882485+jeff-mccoy@users.noreply.github.com>
  • Loading branch information
mjnagel and jeff-mccoy authored Jul 3, 2024
1 parent 499058a commit cae5aab
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 428 deletions.
630 changes: 262 additions & 368 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/istio/common/manifests/pepr-istio-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Have to pre-create the namespace and also patch it with the istio-injection label later because
# Helm is kind of dumb: https://github.com/helm/helm/issues/350
kind: Namespace
apiVersion: v1
metadata:
name: pepr-system
labels:
Expand Down
130 changes: 79 additions & 51 deletions src/pepr/operator/controllers/keycloak/client-sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@ import { Sso, UDSPackage } from "../../crd";
import { getOwnerRef } from "../utils";
import { Client } from "./types";

const apiURL =
let apiURL =
"http://keycloak-http.keycloak.svc.cluster.local:8080/realms/uds/clients-registrations/default";
const samlDescriptorUrl =
"http://keycloak-http.keycloak.svc.cluster.local:8080/realms/uds/protocol/saml/descriptor";

// Support dev mode with port-forwarded keycloak svc
if (process.env.PEPR_MODE === "dev") {
apiURL = "http://localhost:8080/realms/uds/clients-registrations/default";
}

// Template regex to match clientField() references, see https://regex101.com/r/e41Dsk/3 for details
const secretTemplateRegex = new RegExp(
'clientField\\(([a-zA-Z]+)\\)(?:\\["?([\\w]+)"?\\]|(\\.json\\(\\)))?',
Expand Down Expand Up @@ -78,70 +83,88 @@ async function syncClient(
isRetry = false,
) {
Log.debug(pkg.metadata, `Processing client request: ${clientReq.clientId}`);
// Not including the CR data in the ref because Keycloak client IDs must be unique already
const name = `sso-client-${clientReq.clientId}`;
let client: Client;
handleClientGroups(clientReq);

try {
// Not including the CR data in the ref because Keycloak client IDs must be unique already
const name = `sso-client-${clientReq.clientId}`;
const token = Store.getItem(name);

let client: Client;
// Get keycloak client token from the store if this is an existing client
const token = Store.getItem(name);

handleClientGroups(clientReq);

// If an existing client is found, update it
try {
// If an existing client is found, use the token to update the client
if (token && !isRetry) {
Log.debug(pkg.metadata, `Found existing token for ${clientReq.clientId}`);
client = await apiCall(clientReq, "PUT", token);
} else {
Log.debug(pkg.metadata, `Creating new client for ${clientReq.clientId}`);
client = await apiCall(clientReq);
}
} catch (err) {
const msg =
`Failed to process Keycloak request for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. Error: ${err.message}`;

// Throw the error if this is the retry or was an initial client creation attempt
if (isRetry || !token) {
Log.error(`${msg}, retry failed.`);
// Throw the original error captured from the first attempt
throw new Error(msg);
} else {
// Retry the request without the token in case we have a bad token stored
Log.error(msg);

try {
return await syncClient(clientReq, pkg, true);
} catch (retryErr) {
// If the retry fails, log the retry error and throw the original error
const retryMsg =
`Retry of Keycloak request failed for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. Error: ${retryErr.message}`;
Log.error(retryMsg);
// Throw the error from the original attempt since our retry without token failed
throw new Error(msg);
}
}
}

// Write the new token to the store
// Write the new token to the store
try {
await Store.setItemAndWait(name, client.registrationAccessToken!);
} catch (err) {
throw Error(
`Failed to set token in store for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}`,
);
}

// Remove the registrationAccessToken from the client object to avoid problems (one-time use token)
delete client.registrationAccessToken;
// Remove the registrationAccessToken from the client object to avoid problems (one-time use token)
delete client.registrationAccessToken;

if (clientReq.protocol === "saml") {
client.samlIdpCertificate = await getSamlCertificate();
}
if (clientReq.protocol === "saml") {
client.samlIdpCertificate = await getSamlCertificate();
}

// Create or update the client secret
await K8s(kind.Secret).Apply({
metadata: {
namespace: pkg.metadata!.namespace,
// Use the CR secret name if provided, otherwise use the client name
name: secretName || name,
labels: {
"uds/package": pkg.metadata!.name,
},
// Use the CR as the owner ref for each VirtualService
ownerReferences: getOwnerRef(pkg),
// Create or update the client secret
await K8s(kind.Secret).Apply({
metadata: {
namespace: pkg.metadata!.namespace,
// Use the CR secret name if provided, otherwise use the client name
name: secretName || name,
labels: {
"uds/package": pkg.metadata!.name,
},
data: generateSecretData(client, secretTemplate),
});

if (isAuthSvcClient) {
// Do things here
}

return name;
} catch (err) {
const msg =
`Failed to process client request '${clientReq.clientId}' for ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. This can occur if a client already exists with the same ID that Pepr isn't tracking.`;
Log.error({ err }, msg);

if (isRetry) {
Log.error(`${msg}, retry failed, aborting`);
throw new Error(`${msg}. RETRY FAILED, aborting: ${JSON.stringify(err)}`);
}

// Retry the request
Log.warn(`${msg}, retrying`);
return syncClient(clientReq, pkg, true);
// Use the CR as the owner ref for each VirtualService
ownerReferences: getOwnerRef(pkg),
},
data: generateSecretData(client, secretTemplate),
});

if (isAuthSvcClient) {
// Do things here
}

return name;
}

/**
Expand Down Expand Up @@ -183,7 +206,8 @@ async function apiCall(sso: Partial<Sso>, method = "POST", authToken = "") {
// When not creating a new client, add the client ID and registrationAccessToken
if (authToken) {
req.headers.Authorization = `Bearer ${authToken}`;
url += `/${sso.clientId}`;
// Ensure that we URI encode the clientId in the request URL
url += `/${encodeURIComponent(sso.clientId!)}`;
}

// Remove the body for DELETE requests
Expand All @@ -195,7 +219,11 @@ async function apiCall(sso: Partial<Sso>, method = "POST", authToken = "") {
const resp = await fetch<Client>(url, req);

if (!resp.ok) {
throw new Error(`Failed to ${method} client: ${resp.statusText}`);
if (resp.data) {
throw new Error(`${JSON.stringify(resp.statusText)}, ${JSON.stringify(resp.data)}`);
} else {
throw new Error(`${JSON.stringify(resp.statusText)}`);
}
}

return resp.data;
Expand Down
3 changes: 2 additions & 1 deletion src/pepr/operator/crd/generated/package-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ export enum ClientAuthenticatorType {
*/
export interface Groups {
/**
* List of group allowed to access to client
* List of groups allowed to access to client
*/
anyOf?: string[];
}
Expand All @@ -562,6 +562,7 @@ export enum Phase {
Failed = "Failed",
Pending = "Pending",
Ready = "Ready",
Retrying = "Retrying",
}

RegisterKind(Package, {
Expand Down
2 changes: 1 addition & 1 deletion src/pepr/operator/crd/sources/package/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
type: "integer",
},
phase: {
enum: ["Pending", "Ready", "Failed"],
enum: ["Pending", "Ready", "Failed", "Retrying"],
type: "string",
},
ssoClients: {
Expand Down
6 changes: 6 additions & 0 deletions src/pepr/operator/crd/validators/package-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ export async function validator(req: PeprValidateRequest<UDSPackage>) {
return req.Deny(`The client ID "${client.clientId}" is not unique`);
}
clientIDs.add(client.clientId);
// Don't allow illegal k8s resource names for the secret name
if (client.secretName && client.secretName !== sanitizeResourceName(client.secretName)) {
return req.Deny(
`The client ID "${client.clientId}" uses an invalid secret name ${client.secretName}`,
);
}
}

return req.Approve();
Expand Down
2 changes: 2 additions & 0 deletions src/pepr/operator/reconcilers/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ describe("handleFailure", () => {
expect(PatchStatus).toHaveBeenCalledWith({
metadata: { namespace: "default", name: "test" },
status: {
phase: Phase.Retrying,
retryAttempt: 1,
},
});
Expand Down Expand Up @@ -224,6 +225,7 @@ describe("handleFailure", () => {
status: {
observedGeneration: 1,
phase: Phase.Failed,
retryAttempt: 0,
},
});
});
Expand Down
20 changes: 17 additions & 3 deletions src/pepr/operator/reconcilers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export const uidSeen = new Set<string>();
* @returns true if the CRD is pending or the current generation has been processed
*/
export function shouldSkip(cr: UDSPackage) {
const isRetrying = cr.status?.phase === Phase.Retrying;
const isPending = cr.status?.phase === Phase.Pending;
const isCurrentGeneration = cr.metadata?.generation === cr.status?.observedGeneration;

Expand All @@ -22,6 +23,12 @@ export function shouldSkip(cr: UDSPackage) {
return false;
}

// If the CR is retrying, it should not be skipped
if (isRetrying) {
Log.debug(cr, `Should skip? No, retrying`);
return false;
}

// This is the second time the CR has been seen, so check if it is pending or the current generation
if (isPending || isCurrentGeneration) {
Log.debug(cr, `Should skip? Yes, pending or current generation and not first time seen`);
Expand Down Expand Up @@ -50,6 +57,9 @@ export async function updateStatus(cr: UDSPackage, status: PkgStatus) {
},
status,
});

// Track the UID of the CRD to know if it has been seen before
uidSeen.add(cr.metadata!.uid!);
}

/**
Expand Down Expand Up @@ -95,19 +105,22 @@ export async function handleFailure(err: { status: number; message: string }, cr
const identifier = `${metadata.namespace}/${metadata.name}`;
let status: Status;

// todo: identify exact 404 we are targetting, possibly in `updateStatus`
// todo: identify exact 404 we are targeting, possibly in `updateStatus`
if (err.status === 404) {
Log.warn({ err }, `Package metadata seems to have been deleted`);
return;
}

const retryAttempt = cr.status?.retryAttempt || 0;

if (retryAttempt < 5) {
// retryAttempt starts at 0, we perform 4 retries, 5 total attempts
if (retryAttempt < 4) {
const currRetry = retryAttempt + 1;

Log.error({ err }, `Reconciliation attempt ${currRetry} failed for ${identifier}, retrying...`);

status = {
phase: Phase.Retrying,
retryAttempt: currRetry,
};
} else {
Expand All @@ -116,11 +129,12 @@ export async function handleFailure(err: { status: number; message: string }, cr
status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
};
}

// Write an event for the error
void writeEvent(cr, { message: err.message });
await writeEvent(cr, { message: err.message });

// Update the status of the package with the error
updateStatus(cr, status).catch(finalErr => {
Expand Down
5 changes: 1 addition & 4 deletions src/pepr/operator/reconcilers/package-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { handleFailure, shouldSkip, updateStatus } from ".";
import { UDSConfig } from "../../config";
import { enableInjection } from "../controllers/istio/injection";
import { istioResources } from "../controllers/istio/istio-resources";
Expand Down Expand Up @@ -64,9 +64,6 @@ export async function packageReconciler(pkg: UDSPackage) {
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(pkg.metadata!.uid!);
} catch (err) {
void handleFailure(err, pkg);
}
Expand Down

0 comments on commit cae5aab

Please sign in to comment.