I think I have a basic setup for this:
{ pkgs, lib, config, ... }:
let
kubeMasterIP = "172.17.179.190";
kubeMasterHostname = "api.kube";
kubeMasterAPIServerPort = 6443;
mkNode = { ip, hostname, port ? 6443 }: {
autoStart = true;
privateNetwork = true;
hostAddress = kubeMasterIP;
localAddress = ip;
config = { config, pkgs, ... }: {
# resolve master hostname
networking.extraHosts = "${kubeMasterIP} ${kubeMasterHostname}";
services.kubernetes = {
roles = [ "node" ];
masterAddress = kubeMasterHostname;
# point kubelet and other services to kube-apiserver
kubelet.kubeconfig.server = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
kubelet.extraOpts = "--fail-swap-on=false";
};
system.stateVersion = "22.05";
networking.firewall = {
enable = true;
allowedTCPPorts = [
6443
2379
2380
10250
10259
10257
10250
];
};
# Manually configure nameserver. Using resolved inside the container seems to fail
# currently
environment.etc."resolv.conf".text = "nameserver 1.1.1.1";
};
};
in
{
networking = {
nat = {
enable = true;
internalInterfaces = ["ve-+"];
# externalInterface = "ens3";
# Lazy IPv6 connectivity for the container
enableIPv6 = true;
};
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
127.0.1.1 nixos. nixos
# copied from wsl host file:
<feff>
192.168.188.29 host.docker.internal
192.168.188.29 gateway.docker.internal
127.0.0.1 kubernetes.docker.internal
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
'';
firewall = {
enable = true;
allowedTCPPorts = [ 80 ];
};
};
wsl.wslConf.network.generateHosts = false; # for wsl!
# packages for administration tasks
environment.systemPackages = with pkgs; [
kompose
kubectl
kubernetes
];
services.kubernetes = {
roles = ["master" "node"];
masterAddress = kubeMasterHostname;
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserver = {
securePort = kubeMasterAPIServerPort;
advertiseAddress = kubeMasterIP;
};
kubelet.extraOpts = "--fail-swap-on=false";
};
containers.kubenode1 = mkNode { ip = "10.1.1.1"; hostname = "node1.kube"; };
containers.kubenode2 = mkNode { ip = "10.1.1.2"; hostname = "node2.kube"; };
containers.kubenode3 = mkNode { ip = "10.1.1.3"; hostname = "node3.kube"; };
}
Some of the etc/hosts modifications are a little weird, because I am currently running this via WSL
This requires me though to manually copy the ca cert to the master node (see etcd not init etcd.pem with services.kubernetes.roles master · Issue #59364 · NixOS/nixpkgs · GitHub) and every container node as well. Additionally I need to run echo TOKEN | nixos-kubernetes-node-join
on every container too.
Is there a way I could do these steps declarative too?
Also not so sure about my port forwarding ^^ Ports and Protocols | Kubernetes
Just would really appreciate if someone could take a look at this
while trying to run actual pods on the nodes i noticed that this sadly doesnt work as well as i hoped ^^
my current config looks like this:
{ pkgs, lib, config, ... }:
let
kubeMasterIP = "172.17.179.190";
kubeMasterHostname = "api.kube";
kubeMasterAPIServerPort = 6443;
mkNode = { ip, port ? 6443 }: {
autoStart = true;
privateNetwork = true;
hostAddress = kubeMasterIP;
localAddress = ip;
config = { config, pkgs, ... }: {
# resolve master hostname
networking.extraHosts = "${kubeMasterIP} ${kubeMasterHostname}";
services.kubernetes = {
roles = [ "node" ];
masterAddress = kubeMasterHostname;
# point kubelet and other services to kube-apiserver
kubelet.kubeconfig.server = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
kubelet.extraOpts = "--fail-swap-on=false";
};
system.stateVersion = "22.05";
networking.firewall = {
enable = true;
allowedTCPPorts = [
6443
2379
2380
10250
10259
10257
10250
];
};
# Manually configure nameserver. Using resolved inside the container seems to fail
# currently
environment.etc."resolv.conf".text = "nameserver 1.1.1.1";
};
};
in
{
networking = {
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
'';
};
# packages for administration tasks
environment.systemPackages = with pkgs; [
kompose
kubectl
kubernetes
];
services.kubernetes = {
roles = ["master" "node"];
masterAddress = kubeMasterHostname;
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserver = {
securePort = kubeMasterAPIServerPort;
advertiseAddress = kubeMasterIP;
};
kubelet.extraOpts = "--fail-swap-on=false";
};
containers.kubenode1 = mkNode { ip = "172.17.176.2"; };
containers.kubenode2 = mkNode { ip = "172.17.176.3"; };
containers.kubenode3 = mkNode { ip = "172.17.176.4"; };
}
but that just results in:
Mar 23 13:47:14 kubenode3 kubelet[282]: E0323 13:47:14.635243 282 remote_runtime.go:222] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: unable to join session keyring: unable to create session key: operation not permitted: unknown"
Mar 23 13:47:14 kubenode3 kubelet[282]: E0323 13:47:14.635330 282 kuberuntime_sandbox.go:71] "Failed to create sandbox for pod" err="rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: unable to join session keyring: unable to create session key: operation not permitted: unknown" pod="default/my-hello-65985ddd4d-4m6xm"
Mar 23 13:47:14 kubenode3 kubelet[282]: E0323 13:47:14.635359 282 kuberuntime_manager.go:772] "CreatePodSandbox for pod failed" err="rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: unable to join session keyring: unable to create session key: operation not permitted: unknown" pod="default/my-hello-65985ddd4d-4m6xm"
Mar 23 13:47:14 kubenode3 kubelet[282]: E0323 13:47:14.635458 282 pod_workers.go:965] "Error syncing pod, skipping" err="failed to \"CreatePodSandbox\" for \"my-hello-65985ddd4d-4m6xm_default(4ca96dc1-858c-496c-be2e-993353e909c8)\" with CreatePodSandboxError: \"Failed to create sandbox for pod \\\"my-hello-65985ddd4d-4m6xm_default(4ca96dc1-858c-496c-be2e-993353e909c8)\\\": rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: unable to join session keyring: unable to create session key: operation not permitted: unknown\"" pod="default/my-hello-65985ddd4d-4m6xm" podUID=4ca96dc1-858c-496c-be2e-993353e909c8
Mar 23 13:47:17 kubenode3 kubelet[282]: W0323 13:47:17.243071 282 manager.go:1174] Failed to process watch event {EventType:0 Name:/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod4ca96dc1_858c_496c_be2e_993353e909c8.slice/cri-containerd-06300a7e340c43c70d7292eb46de969a3de45dc464db4b05f3b3ca8c29792e99.scope WatchSource:0}: container "06300a7e340c43c70d7292eb46de969a3de45dc464db4b05f3b3ca8c29792e99" in namespace "k8s.io": not found
Mar 23 13:47:27 kubenode3 kubelet[282]: E0323 13:47:27.279659 282 remote_runtime.go:222] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: unable to join session keyring: unable to create session key: operation not permitted: unknown"
only related issue i could find is [Podman] cluster cannot be created due to `create session key: operation not permitted` · Issue #1929 · kubernetes-sigs/kind · GitHub
but that seems to be long fixed?
tried to put the cluster on a different machine and just access it via a macvlan, but results in the same errors
{ pkgs, lib, config, ... }:
let
kubeMasterIP = "192.168.188.89";
kubeMasterGateway = "192.168.188.1";
kubeMasterHostname = "api.kube";
kubeMasterAPIServerPort = 6443;
mkNode = { name, ip, port ? 6443 }: {
autoStart = true;
macvlans = [ "eno1" ];
timeoutStartSec = "10min";
config = { config, pkgs, ... }: {
# resolve host
networking = {
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
'';
hostName = name;
defaultGateway = kubeMasterGateway;
interfaces = {
mv-eno1.ipv4.addresses = [ { address = ip; prefixLength = 24;}];
};
};
services.kubernetes = {
roles = [ "node" ];
masterAddress = kubeMasterHostname;
# point kubelet and other services to kube-apiserver
kubelet.kubeconfig.server = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
kubelet = {
extraOpts = "--fail-swap-on=false";
};
};
networking.firewall = {
enable = true;
allowedTCPPorts = [
config.services.kubernetes.kubelet.port
config.services.kubernetes.kubelet.healthz.port
];
allowedTCPPortRanges = [
{from = 30000; to = 32767;}
];
};
services.avahi = {
enable = true;
publish = {
enable = true;
addresses = true;
workstation = true;
};
};
system.stateVersion = "22.05";
# Manually configure nameserver. Using resolved inside the container seems to fail
# currently
environment.etc."resolv.conf".text = "nameserver 1.1.1.1";
};
};
in
{
networking = {
defaultGateway = kubeMasterGateway;
# create macvlan for containers
macvlans.mv-eno1-host = {
interface = "eno1";
mode = "bridge";
};
interfaces = {
eno1.ipv4.addresses = lib.mkForce [];
mv-eno1-host.ipv4.addresses = [{ address = kubeMasterIP; prefixLength = 24;}];
};
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
'';
firewall = {
enable = true;
allowedTCPPorts = [
config.services.kubernetes.apiserver.securePort
config.services.kubernetes.controllerManager.securePort
config.services.kubernetes.scheduler.port
config.services.cfssl.port
];
allowedTCPPortRanges = [
{from = 2379; to = 2380;}
];
};
};
services.avahi = {
enable = true;
publish = {
enable = true;
addresses = true;
workstation = true;
};
};
# packages for administration tasks
environment.systemPackages = with pkgs; [
kompose
kubectl
kubernetes
];
services.kubernetes = {
roles = ["master"];
masterAddress = kubeMasterHostname;
apiserverAddress = "https://${kubeMasterHostname}:${toString kubeMasterAPIServerPort}";
apiserver = {
securePort = kubeMasterAPIServerPort;
advertiseAddress = kubeMasterIP;
};
kubelet.extraOpts = "--fail-swap-on=false";
};
containers.kubenode1 = mkNode { name = "node1"; ip = "192.168.188.101"; };
containers.kubenode2 = mkNode { name = "node2"; ip = "192.168.188.102"; };
containers.kubenode3 = mkNode { name = "node3"; ip = "192.168.188.103"; };
}
This entire time I missed that flannel at least on the host has a lot of issues:
Mär 23 22:41:45 gestalt systemd[1]: Started Flannel Service.
Mär 23 22:41:45 gestalt flannel[8135]: I0323 22:41:45.780007 8135 main.go:204] CLI flags config: {etcdEndpoints:http://127.0.0.1:4001,http://127.0.0.1:2379 etcdPrefix:/coreos.com/network etcdKeyfile: etcdCertfile: etcdCAFile: etcdUsername: etcdPassword: version:false kubeSubnetMgr:true kubeApiUrl: kubeAnnotationPrefix:flannel.alpha.coreos.com kubeConfigFile:/nix/store/z38zfzsr42a7pbgyg490nmm85gp42v7l-flannel-kubeconfig iface:[] ifaceRegex:[] ipMasq:false ifaceCanReach: subnetFile:/run/flannel/subnet.env publicIP: publicIPv6: subnetLeaseRenewMargin:60 healthzIP:0.0.0.0 healthzPort:0 iptablesResyncSeconds:5 iptablesForwardRules:true netConfPath:/etc/kube-flannel/net-conf.json setNodeNetworkUnavailable:true}
Mär 23 22:41:45 gestalt flannel[8135]: I0323 22:41:45.780771 8135 kube.go:126] Waiting 10m0s for node controller to sync
Mär 23 22:41:45 gestalt flannel[8135]: I0323 22:41:45.780788 8135 kube.go:420] Starting kube subnet manager
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.781139 8135 kube.go:133] Node controller sync successful
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.781196 8135 main.go:224] Created subnet manager: Kubernetes Subnet Manager - gestalt
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.781214 8135 main.go:227] Installing signal handlers
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.781461 8135 main.go:467] Found network config - Backend type: vxlan
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.781508 8135 match.go:206] Determining IP address of default interface
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.782460 8135 match.go:259] Using interface with name mv-eno1-host and address 192.168.188.89
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.782523 8135 match.go:281] Defaulting external address to interface address (192.168.188.89)
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.782623 8135 vxlan.go:138] VXLAN config: VNI=1 Port=0 GBP=false Learning=false DirectRouting=false
Mär 23 22:41:46 gestalt flannel[8135]: E0323 22:41:46.783233 8135 main.go:327] Error registering network: failed to acquire lease: node "gestalt" not found
Mär 23 22:41:46 gestalt flannel[8135]: I0323 22:41:46.783372 8135 main.go:447] Stopping shutdownHandler...
Mär 23 22:41:46 gestalt flannel[8135]: W0323 22:41:46.783480 8135 reflector.go:347] k8s.io/client-go@v0.25.2/tools/cache/reflector.go:169: watch of *v1.Node ended with: an error on the server ("unable to decode an event from the watch stream: context canceled") has prevented the request from succeeding
Mär 23 22:41:46 gestalt systemd[1]: flannel.service: Main process exited, code=exited, status=1/FAILURE
Mär 23 22:41:46 gestalt systemd[1]: flannel.service: Failed with result 'exit-code'.
might be related to CIDR assignment? k8s 1.19.0 with kube-flannel 0.12 Error registering network: failed to acquire lease: node "nodeName" pod cidr not assigned · Issue #1344 · flannel-io/flannel · GitHub
Seems like you have naming issues… flannel tries to find the node named “gestalt” but I don’t find it among the nodes. Also If the nodes are are sharing same layer 2 network (ethernet frames) like in your simulation I think flannel may be configured host-gw backend instead that is a bit simple to setup… I would give it a try…
Since two years I’m using k3s instead of the full blown distro. It’s composed of a single executable and in my opinion it’s a bit simpler to manage and comes with a local path storage provider out of the box.
1 Like
gestalt is the master node that’s running on the host. I might come back to this eventually.
But for now I did switch to k3s and had the exact same issue. the problem here is that docker won’t work on nixos-containers. But I did manage to get it working, due to this thread: Podman/docker in nixos container (ideally in unprivileged one)? - #6 by ndreas
My whole solution looks like this now:
{ pkgs, lib, config, ... }:
let
kubeMasterIP = "192.168.188.89";
kubeMasterGateway = "192.168.188.1";
kubeMasterHostname = "gestalt.local";
kubeMasterAPIServerPort = 6443;
nspawn-config-text = ''
[Exec]
SystemCallFilter=add_key keyctl bpf
'';
mkNode = { ip, port ? 6443 }: {
# use macvlan
autoStart = true;
macvlans = [ "eno1" ];
timeoutStartSec = "10min";
# enable nested containers https://wiki.archlinux.org/title/systemd-nspawn#Run_docker_in_systemd-nspawn
enableTun = true;
additionalCapabilities = ["all"];
allowedDevices = [
{ node = "/dev/fuse"; modifier = "rwm"; }
{ node = "/dev/mapper/control"; modifier = "rwm"; }
];
bindMounts = {
"${config.sops.secrets.k3s-server-token.path}" = {
hostPath = config.sops.secrets.k3s-server-token.path;
isReadOnly = true;
};
dev-fuse = { hostPath = "/dev/fuse"; mountPoint = "/dev/fuse"; };
dev-mount = { hostPath = "/dev/mapper"; mountPoint = "/dev/mapper"; };
};
config = { config, pkgs, ... }: {
# resolve host
networking = {
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
'';
defaultGateway = kubeMasterGateway;
interfaces = {
mv-eno1.ipv4.addresses = [ { address = ip; prefixLength = 24;}];
};
};
virtualisation.containerd.enable = true;
virtualisation.containerd.settings = {
version = 2;
plugins."io.containerd.grpc.v1.cri" = {
cni.conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/";
# FIXME: upstream
cni.bin_dir = "${pkgs.runCommand "cni-bin-dir" {} ''
mkdir -p $out
ln -sf ${pkgs.cni-plugins}/bin/* ${pkgs.cni-plugin-flannel}/bin/* $out
''}";
};
};
systemd.services.k3s = {
wants = [ "containerd.service" ];
after = [ "containerd.service" ];
};
services.k3s = {
enable = true;
role = "agent";
tokenFile = /run/secrets/k3s-server-token; # host.config.sops.secrets.k3s-server-token.path; ?
serverAddr = "https://${kubeMasterHostname}:${toString port}";
extraFlags = "--node-ip ${toString ip} --container-runtime-endpoint unix:///run/containerd/containerd.sock";
};
# packages for administration tasks
environment.systemPackages = with pkgs; [
postgresql_15
];
services.avahi = {
enable = true;
publish = {
enable = true;
addresses = true;
workstation = true;
};
};
system.stateVersion = "22.05";
# Manually configure nameserver. Using resolved inside the container seems to fail
# currently
environment.etc."resolv.conf".text = "nameserver 1.1.1.1";
};
};
in
{
imports = [ <sops-nix/modules/sops> ];
networking = {
defaultGateway = kubeMasterGateway;
# create macvlan for containers
macvlans.mv-eno1-host = {
interface = "eno1";
mode = "bridge";
};
interfaces = {
eno1.ipv4.addresses = lib.mkForce [];
mv-eno1-host.ipv4.addresses = [{ address = kubeMasterIP; prefixLength = 24;}];
};
extraHosts = ''
${kubeMasterIP} ${kubeMasterHostname}
'';
firewall = {
enable = true;
allowedTCPPorts = [
kubeMasterAPIServerPort
6444 # cacerts
];
};
};
services.avahi = {
enable = true;
publish = {
enable = true;
addresses = true;
workstation = true;
};
};
sops.secrets.k3s-server-token.sopsFile = ./secrets.yaml;
sops.age.keyFile = /home/jonaa/.config/sops/age/keys.txt;
virtualisation.containerd.enable = true;
virtualisation.containerd.settings = {
version = 2;
plugins."io.containerd.grpc.v1.cri" = {
cni.conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/";
# FIXME: upstream
cni.bin_dir = "${pkgs.runCommand "cni-bin-dir" {} ''
mkdir -p $out
ln -sf ${pkgs.cni-plugins}/bin/* ${pkgs.cni-plugin-flannel}/bin/* $out
''}";
};
};
systemd.services.k3s = {
wants = [ "containerd.service" ];
after = [ "containerd.service" ];
};
services.k3s = {
enable = true;
role = "server";
tokenFile = config.sops.secrets.k3s-server-token.path;
extraFlags = "--disable traefik --flannel-backend=host-gw --container-runtime-endpoint unix:///run/containerd/containerd.sock";
};
containers.kube1 = mkNode { ip = "192.168.188.101"; };
containers.kube2 = mkNode { ip = "192.168.188.102"; };
containers.kube3 = mkNode { ip = "192.168.188.103"; };
# enable cgroups v2 in the container
systemd.services."container@kube1".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
systemd.services."container@kube2".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
systemd.services."container@kube3".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
# allow syscalls via an nspawn config file, because arguments with spaces work bad with containers.example.extraArgs
environment.etc."systemd/nspawn/kube1.nspawn".text = nspawn-config-text;
environment.etc."systemd/nspawn/kube2.nspawn".text = nspawn-config-text;
environment.etc."systemd/nspawn/kube3.nspawn".text = nspawn-config-text;
}
I am not sure if the explicit virtualisation.containerd
stuff is actually needed, will test that later.
sometimes the pods are not able to resolve the gestalt.local for some reason, but otherwise this works flawlessly and would probably even with k8s.
1 Like