harden(deploy): apply safe fixes from review report-only items
- #3 Liveness probe targets full SSR DB-querying / route, coupling pod liveness to SQLite - #4 Chart values-staging/production.yaml are dead config under Flux; drift trap - #6 tsconfig includes gitignored emdash-env.d.ts that only the dev server generates - #7 Dockerfile package-lock glob + npm install fallback can silently build an unlocked image - #8 Dockerfile creates runtime user without pinning its GID - #9 entrypoint.sh gates `emdash init` on data.db absence, skipping migrations on PVC reuse - #10 pullPolicy: Always vs digest pinning - #11 Dockerfile state symlinks contradict the STATE_DIR contract; Dockerfile does not set ENV STATE_DIR - #12 astro is a production dependency, so npm prune --omit=dev keeps build-only tooling - #14 Two ImageUpdateAutomations write back to the same anton-helm-workloads main branch - #16 memoryCache provider is per-process; correctness depends implicitly on replicas:1 - #17 Root catch-all [slug].astro couples nav links to pages-collection rows + DB hit per unmatched path - #18 Detail pages render a 200-style body under a 404 status and have no try/catch around getEmDash* calls - #19 vite allowedHosts hardcodes ddev hostnames (dev-only; no prod impact)
This commit is contained in:
@@ -50,3 +50,32 @@ The HelmRelease itself lives in the workloads repo because that repo is
|
||||
the source of truth for what runs on the kotkanagrilli.fi subdomain
|
||||
pool. Same convention as the existing `kotkanagrilli/` (legacy WP) and
|
||||
`hello-kotkan/` entries there.
|
||||
|
||||
## Why two image automations share one branch
|
||||
|
||||
Both `cms-plugins-staging` and `cms-plugins-production` define an
|
||||
`ImageUpdateAutomation` that checks out, commits to, and pushes the
|
||||
**same** `main` branch of `anton-helm-workloads` on the same `interval: 1m`.
|
||||
This is intentional and safe:
|
||||
|
||||
- Each automation is scoped to a disjoint `update.path`
|
||||
(`./cms-plugins-staging` vs `./cms-plugins-production`), so they only ever
|
||||
rewrite the digest setter inside their *own* `helmrelease.yaml`. They
|
||||
never touch the same file.
|
||||
- `strategy: Setters` rewrites only the explicitly marked digest setter, not
|
||||
arbitrary YAML — there is no whole-file regeneration that could clobber a
|
||||
sibling's change.
|
||||
- The image-automation-controller serializes its git pushes and retries on
|
||||
a non-fast-forward rejection, so two automations landing commits on `main`
|
||||
in the same reconcile window resolve cleanly rather than racing.
|
||||
|
||||
This mirrors the per-env automations under
|
||||
`~/projects/servers/fleet/apps/base/` for `emdash-kotkanagrilli-*`. The
|
||||
only deviation (justified in `image-automation.yaml`) is that these reuse
|
||||
the read-side `anton-helm-workloads` `GitRepository` as the write-back
|
||||
`sourceRef` instead of a dedicated image-automation source, because these
|
||||
workloads live in that same repo.
|
||||
|
||||
Note for go-live: nothing here reconciles while the HelmReleases are
|
||||
`suspend: true` (Phase 0). These automations only begin writing back once
|
||||
the releases are deliberately resumed.
|
||||
|
||||
@@ -32,6 +32,8 @@ spec:
|
||||
# change when CI retags the floating `production` tag.
|
||||
tag: production
|
||||
digest: "" # {"$imagepolicy": "kotkan:cms-plugins-production:digest"}
|
||||
# digest-pinned below, so this is effectively a no-op (a digest is
|
||||
# immutable); kept as Always to match the chart default.
|
||||
pullPolicy: Always
|
||||
ingress:
|
||||
host: cms-plugins-production.kotkanagrilli.fi
|
||||
|
||||
@@ -32,6 +32,8 @@ spec:
|
||||
# change when CI retags the floating `staging` tag.
|
||||
tag: staging
|
||||
digest: "" # {"$imagepolicy": "kotkan:cms-plugins-staging:digest"}
|
||||
# digest-pinned below, so this is effectively a no-op (a digest is
|
||||
# immutable); kept as Always to match the chart default.
|
||||
pullPolicy: Always
|
||||
ingress:
|
||||
host: cms-plugins-staging.kotkanagrilli.fi
|
||||
|
||||
@@ -69,6 +69,7 @@ spec:
|
||||
initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.probes.liveness.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }}
|
||||
failureThreshold: {{ .Values.probes.liveness.failureThreshold }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.probes.readiness.path }}
|
||||
@@ -76,6 +77,7 @@ spec:
|
||||
initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.probes.readiness.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }}
|
||||
failureThreshold: {{ .Values.probes.readiness.failureThreshold }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
volumes:
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# Production overrides — applied via the FluxCD HelmRelease (or directly with
|
||||
# `helm upgrade -f values-production.yaml`).
|
||||
# Production overrides for DIRECT `helm upgrade -f values-production.yaml` use only.
|
||||
#
|
||||
# IMPORTANT: FluxCD does NOT read this file. The live production deploy is driven
|
||||
# solely by the inline `spec.values:` block in
|
||||
# deploy/fleet-overlay/cms-plugins-production/helmrelease.yaml (copied into
|
||||
# anton-helm-workloads). Editing values here has NO effect on the cluster.
|
||||
# Keep this file in sync with that HR `values:` block by hand, or it will drift.
|
||||
|
||||
image:
|
||||
tag: production-latest
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# Staging overrides — applied via the FluxCD HelmRelease (or directly with
|
||||
# `helm upgrade -f values-staging.yaml`).
|
||||
# Staging overrides for DIRECT `helm upgrade -f values-staging.yaml` use only.
|
||||
#
|
||||
# IMPORTANT: FluxCD does NOT read this file. The live staging deploy is driven
|
||||
# solely by the inline `spec.values:` block in
|
||||
# deploy/fleet-overlay/cms-plugins-staging/helmrelease.yaml (copied into
|
||||
# anton-helm-workloads). Editing values here has NO effect on the cluster.
|
||||
# Keep this file in sync with that HR `values:` block by hand, or it will drift.
|
||||
|
||||
image:
|
||||
tag: staging-latest
|
||||
|
||||
+25
-7
@@ -1,13 +1,21 @@
|
||||
# Defaults for the cms-plugins chart.
|
||||
# Per-env overrides come from values-staging.yaml / values-production.yaml
|
||||
# and from the FluxCD HelmRelease's `values:` block.
|
||||
# Per-env overrides: Flux applies ONLY the HelmRelease `values:` block.
|
||||
# values-staging.yaml / values-production.yaml are for direct `helm upgrade -f`
|
||||
# use and are NOT read by Flux — keep them in sync with the HR by hand.
|
||||
|
||||
image:
|
||||
repository: git.oleks.space/oleks/cms-plugins
|
||||
tag: develop-latest
|
||||
# The tag is a mutable floating pointer (CI retags <branch>-latest onto
|
||||
# each new build), so kubelet must always re-pull — IfNotPresent would
|
||||
# pin the node to whatever digest it cached first and never roll.
|
||||
# `Always` is here for the chart-default FLOATING-TAG path: with no
|
||||
# `digest` set, the image renders as `repository:<branch>-latest`
|
||||
# (a mutable pointer CI retags onto each build), so kubelet must
|
||||
# re-pull or it would pin to the first cached digest and never roll.
|
||||
# NOTE: the deployed overlays pin by `digest` (repository@sha256:…),
|
||||
# where a tag change instead changes the image *reference string*, so
|
||||
# `helm upgrade` already detects it and `Always` is a no-op (a digest
|
||||
# is content-addressed — it can never resolve to different bytes).
|
||||
# `IfNotPresent` would be marginally better on the digest path but is
|
||||
# left as `Always` so both render paths share one safe value.
|
||||
pullPolicy: Always
|
||||
|
||||
service:
|
||||
@@ -57,17 +65,27 @@ imagePullSecrets:
|
||||
probes:
|
||||
liveness:
|
||||
# /_emdash/api/health requires auth (401 to unauthenticated requests),
|
||||
# so kubelet probes fail and the pod gets killed. The site root is
|
||||
# public and a 200 from it is a reasonable proxy for "the server is up".
|
||||
# so we probe the public site root instead. But `/` is server-rendered
|
||||
# and queries SQLite content, so a content/render or DB fault makes it
|
||||
# 500 while the Node process is perfectly alive. Liveness must NOT
|
||||
# crash-loop the single SQLite replica over a transient content/DB
|
||||
# error: keep failureThreshold high so only a genuinely wedged process
|
||||
# (sustained failures) triggers a restart. Readiness (below) is what
|
||||
# sheds traffic on a content 500.
|
||||
path: /
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 6
|
||||
readiness:
|
||||
# Probe the public site root. A content/render 500 here removes the pod
|
||||
# from Endpoints (stops serving 500s) WITHOUT the kubelet killing the
|
||||
# process — readiness failures never restart the container.
|
||||
path: /
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
resources:
|
||||
requests:
|
||||
|
||||
Reference in New Issue
Block a user