diff --git a/CHANGELOG.md b/CHANGELOG.md index fd9d5fa1..9c7143e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#460](https://github.com/spegel-org/spegel/pull/460) Fix environment variable for http-bootstrap-addr flag. - [#471](https://github.com/spegel-org/spegel/pull/471) Fix handler key in request logging. - [#491](https://github.com/spegel-org/spegel/pull/491) Fix so that resolve timeout does not cancel mirroring attempts. +- [#496](https://github.com/spegel-org/spegel/pull/496) Fix p2p bootstrap to run on failed readiness check. ### Security diff --git a/charts/spegel/templates/daemonset.yaml b/charts/spegel/templates/daemonset.yaml index f49147fa..a1b01d7b 100644 --- a/charts/spegel/templates/daemonset.yaml +++ b/charts/spegel/templates/daemonset.yaml @@ -120,7 +120,7 @@ spec: # Startup may take a bit longer on bootsrap as Pods need to find each other. # This is why the startup proben is a bit more forgiving, while hitting the endpoint more often. startupProbe: - periodSeconds: 1 + periodSeconds: 3 failureThreshold: 60 httpGet: path: /healthz diff --git a/pkg/registry/registry.go b/pkg/registry/registry.go index 0114d841..d7c6e7f8 100644 --- a/pkg/registry/registry.go +++ b/pkg/registry/registry.go @@ -153,8 +153,8 @@ func (r *Registry) handle(rw mux.ResponseWriter, req *http.Request) { rw.WriteHeader(http.StatusNotFound) } -func (r *Registry) readyHandler(rw mux.ResponseWriter, _ *http.Request) { - ok, err := r.router.Ready() +func (r *Registry) readyHandler(rw mux.ResponseWriter, req *http.Request) { + ok, err := r.router.Ready(req.Context()) if err != nil { rw.WriteError(http.StatusInternalServerError, fmt.Errorf("could not determine router readiness: %w", err)) return diff --git a/pkg/routing/mock.go b/pkg/routing/mock.go index cf8601b5..e028a637 100644 --- a/pkg/routing/mock.go +++ b/pkg/routing/mock.go @@ -19,7 +19,7 @@ func NewMockRouter(resolver map[string][]netip.AddrPort, self netip.AddrPort) *M } } -func (m *MockRouter) Ready() (bool, error) { +func (m *MockRouter) Ready(ctx context.Context) (bool, error) { m.mx.RLock() defer m.mx.RUnlock() return len(m.resolver) > 0, nil diff --git a/pkg/routing/p2p.go b/pkg/routing/p2p.go index e42ebfb3..2b48cc80 100644 --- a/pkg/routing/p2p.go +++ b/pkg/routing/p2p.go @@ -135,7 +135,7 @@ func (r *P2PRouter) Close() error { return r.host.Close() } -func (r *P2PRouter) Ready() (bool, error) { +func (r *P2PRouter) Ready(ctx context.Context) (bool, error) { addrInfo, err := r.bootstrapper.Get() if err != nil { return false, err @@ -144,6 +144,10 @@ func (r *P2PRouter) Ready() (bool, error) { return true, nil } if r.kdht.RoutingTable().Size() == 0 { + err := r.kdht.Bootstrap(ctx) + if err != nil { + return false, err + } return false, nil } return true, nil diff --git a/pkg/routing/routing.go b/pkg/routing/routing.go index c3739f54..fe18c132 100644 --- a/pkg/routing/routing.go +++ b/pkg/routing/routing.go @@ -6,7 +6,7 @@ import ( ) type Router interface { - Ready() (bool, error) + Ready(ctx context.Context) (bool, error) Resolve(ctx context.Context, key string, allowSelf bool, count int) (<-chan netip.AddrPort, error) Advertise(ctx context.Context, keys []string) error } diff --git a/test/e2e/e2e.sh b/test/e2e/e2e.sh index 3305be21..8bd2d473 100755 --- a/test/e2e/e2e.sh +++ b/test/e2e/e2e.sh @@ -39,8 +39,6 @@ else kubectl --kubeconfig $KIND_KUBECONFIG --namespace nginx delete deployments --all kubectl --kubeconfig $KIND_KUBECONFIG --namespace conformance delete jobs --all helm --kubeconfig $KIND_KUBECONFIG uninstall --ignore-not-found --namespace spegel spegel - # Delete lease due to bug causing forcing us to wait for Spegel to fails once. - kubectl --kubeconfig $KIND_KUBECONFIG --namespace spegel delete lease spegel-leader-election # Delete test images from all expect one node for NODE in control-plane worker2 worker3 worker4