Hey there,
I am trying to create a module, which allows me to supervise systemd services and connect them to https://healthchecks.io/.
To do this, I want to achieve the following:
- When a supervised unit is started, I want to trigger the
PATH/start
endpoint - When a supervised unit exits (with or without error), I want to trigger the
PATH/fail
endpoint - During the runtime of a supervised unit, I want to check their status periodically. If everything is fine,
PATH/success
(or no suffix) should be called, elsePATH/fail
.
The PATH should be confidential, because I might want to make my configuration public, and trolls could use the path to trigger notifications.
The module should be “reusable” like modules normally are, so that I can just add a config to my service, and my “watchover” service and timer are created automatically.
However, they should only exist and run, if my service is actually defined and active.
If I deactivate a service, I do not want to get notifications, because I know, that it is not running.
This is my current module:
{ config, lib, pkgs, ... }: let
inherit (lib) mkOption types;
# Filter watchovers to only include those for services that exist
# Extract this function to avoid direct reference to config.systemd.services
isValidWatchover = name: cfgEntry:
let serviceCfg = config.systemd.services.${name} or null;
in serviceCfg != null && (serviceCfg.enable or true);
pingHealthchecksStart = pkgs.writeScript "ping-healthchecks-start.sh" ''
#!${pkgs.runtimeShell}
echo "STARTING $(cat $CREDENTIALS_DIRECTORY/url)/start"; ${pkgs.curl}/bin/curl $(cat $CREDENTIALS_DIRECTORY/url)/start || true
'';
pingHealthchecksFail = pkgs.writeScript "ping-healthchecks-fail.sh" ''
#!${pkgs.runtimeShell}
echo "FAILING $(cat $CREDENTIALS_DIRECTORY/url)/fail"; ${pkgs.curl}/bin/curl $(cat $CREDENTIALS_DIRECTORY/url)/fail || true
'';
pingHealthchecksOk = pkgs.writeScript "ping-healthchecks-ok.sh" ''
#!${pkgs.runtimeShell}
echo "OK $(cat $CREDENTIALS_DIRECTORY/url)"; ${pkgs.curl}/bin/curl $(cat $CREDENTIALS_DIRECTORY/url)
'';
# Create watchover services - this function will be called only after the module is evaluated
makeWatchoverService = name: cfgEntry:
let
serviceCfg = config.systemd.services.${name} or null;
effectiveUser = if cfgEntry.user != null then cfgEntry.user
else if cfgEntry.runInContainer then "root"
else if serviceCfg != null && serviceCfg.serviceConfig ? User then serviceCfg.serviceConfig.User
else "root";
effectiveGroup = if cfgEntry.group != null then cfgEntry.group
else if cfgEntry.runInContainer then "root"
else if serviceCfg != null && serviceCfg.serviceConfig ? Group then serviceCfg.serviceConfig.Group
else "root";
rawName = lib.replaceStrings ["docker-" "podman-"] ["" ""] name;
healthcheckScript = pkgs.writeScript "check-${name}.sh" ''
#!${pkgs.runtimeShell}
set -e
if ! systemctl is-active --quiet ${name}; then
${pingHealthchecksFail}
exit 0
fi
uptime=$(systemctl show -p ActiveEnterTimestampMonotonic --value ${name})
now=$(cat /proc/uptime | cut -d "." -f1)
delta=$((now - uptime / 1000000))
if [ $delta -gt $(( ${toString cfgEntry.start_period} )) ]; then
${if cfgEntry.runInContainer then
let
backend = config.virtualisation.oci-containers.backend or "docker";
dockerCmd = "${pkgs.docker}/bin/docker exec ${rawName} ${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk} || ${pingHealthchecksFail}";
podmanCmd = "${pkgs.podman}/bin/podman exec ${rawName} ${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk} || ${pingHealthchecksFail}";
in ''${if backend == "docker" then dockerCmd else podmanCmd}''
else ''${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk} || ${pingHealthchecksFail}''
}
else
${if cfgEntry.runInContainer then
let
backend = config.virtualisation.oci-containers.backend or "docker";
dockerCmd = "${pkgs.docker}/bin/docker exec ${rawName} ${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk} || true";
podmanCmd = "${pkgs.podman}/bin/podman exec ${rawName} ${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk} || true";
in ''${if backend == "docker" then dockerCmd else podmanCmd}''
else ''${cfgEntry.script} >/dev/null 2>&1 && ${pingHealthchecksOk}''
}
fi
'';
in {
name = "watchover-${name}";
value = {
description = "Watchover for ${name}";
after = [ "${name}.service" ];
partOf = [ "${name}.service" ];
serviceConfig = {
Type = "oneshot";
LoadCredential = [
"url:${cfgEntry.url}"
];
ExecStart = "${healthcheckScript}";
User = effectiveUser;
Group = effectiveGroup;
TimeoutSec = cfgEntry.timeout;
};
};
};
# Create service modifications
makeServiceModification = name: cfgEntry: {
serviceConfig = {
LoadCredential = [
"url:${cfgEntry.url}"
];
ExecStartPost = [ "${pingHealthchecksStart}" ];
ExecStopPost = [ "${pingHealthchecksFail}" ];
};
};
# Create timers
makeWatchoverTimer = name: cfgEntry: {
name = "watchover-" + name;
value = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "${toString cfgEntry.interval}s";
OnUnitActiveSec = "${toString cfgEntry.interval}s";
Unit = "watchover-${name}.service";
};
};
};
in {
options.systemd-watchover = mkOption {
type = types.attrsOf (types.submodule ({ name, ... }: {
options = {
url = mkOption {
type = types.path;
description = "Base URL to send health results to.";
};
script = mkOption {
type = types.str;
description = "Inline script or command string to run.";
};
runInContainer = mkOption {
type = types.bool;
default = false;
};
user = mkOption {
type = types.nullOr types.str;
default = null;
};
group = mkOption {
type = types.nullOr types.str;
default = null;
};
interval = mkOption {
type = types.int;
};
timeout = mkOption {
type = types.int;
};
retries = mkOption {
type = types.int;
};
start_period = mkOption {
type = types.int;
};
};
}));
default = {};
};
#config = mkIf (config.systemd-watchover != {}) {
# systemd.services = mkMerge [
# # Create watchover services
# (mapAttrs' makeWatchoverService
# (filterAttrs isValidWatchover config.systemd-watchover))
#
# # Modify the original services
# (mapAttrs (name: value: value)
# (mapAttrs makeServiceModification
# (filterAttrs isValidWatchover config.systemd-watchover)))
# ];
#};
config = lib.mkIf (config.systemd-watchover != {}) {
systemd.services = lib.mkMerge [
# Create watchover services
(lib.mapAttrs' makeWatchoverService config.systemd-watchover)
# Modify the original services
(lib.mapAttrs (name: value: value)
(lib.mapAttrs makeServiceModification config.systemd-watchover))
];
# Set up timers
systemd.timers = lib.mapAttrs' makeWatchoverTimer
(lib.filterAttrs isValidWatchover config.systemd-watchover);
};
}
However, this config currently has two issues:
- The
ExecStopPost
does not get the credential, because this is not intended. - I can not really use my
isValidWatchover
, because this creates an infinite recursion.
I do not have any idea, how I can solve either of these problems.
The second one might be straight up impossible, because I would need to “force” Nix to evaluate systemd.services
to calculate the filtered services, while ignoring the added “watchover services”. In a imperative language this is not a problem, but because of Nix’ lazy evaluation, this might be not possible.
If you have ideas, how to solve this, plese let me know!
Help is greatly appreciated…