Simulating a kubernetes cluster with containers

gestalt is the master node that’s running on the host. I might come back to this eventually.

But for now I did switch to k3s and had the exact same issue. the problem here is that docker won’t work on nixos-containers. But I did manage to get it working, due to this thread: Podman/docker in nixos container (ideally in unprivileged one)? - #6 by ndreas

My whole solution looks like this now:

{ pkgs, lib, config, ... }:
let
  kubeMasterIP = "192.168.188.89";
  kubeMasterGateway = "192.168.188.1";
  kubeMasterHostname = "gestalt.local";
  kubeMasterAPIServerPort = 6443;

  nspawn-config-text = ''
    [Exec]
    SystemCallFilter=add_key keyctl bpf
  '';

  mkNode = { ip, port ? 6443 }: {
    # use macvlan
    autoStart = true;
    macvlans = [ "eno1" ];
    timeoutStartSec = "10min";

    # enable nested containers https://wiki.archlinux.org/title/systemd-nspawn#Run_docker_in_systemd-nspawn
    enableTun = true;
    additionalCapabilities = ["all"];

    allowedDevices = [
      { node = "/dev/fuse"; modifier = "rwm"; }
      { node = "/dev/mapper/control"; modifier = "rwm"; }
    ];

    bindMounts = {
      "${config.sops.secrets.k3s-server-token.path}" = {
        hostPath = config.sops.secrets.k3s-server-token.path;
        isReadOnly = true;
      };
      dev-fuse = { hostPath = "/dev/fuse"; mountPoint = "/dev/fuse"; };
      dev-mount = { hostPath = "/dev/mapper"; mountPoint = "/dev/mapper"; };
    };

    config = { config, pkgs, ... }: {
      # resolve host
      networking = {
        extraHosts = ''
          ${kubeMasterIP} ${kubeMasterHostname}
        '';
        defaultGateway = kubeMasterGateway;
        interfaces = {
          mv-eno1.ipv4.addresses = [ { address = ip; prefixLength = 24;}];
        };
      };  

      virtualisation.containerd.enable = true;
      virtualisation.containerd.settings = {
        version = 2;
        plugins."io.containerd.grpc.v1.cri" = {
          cni.conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/";
          # FIXME: upstream
          cni.bin_dir = "${pkgs.runCommand "cni-bin-dir" {} ''
            mkdir -p $out
            ln -sf ${pkgs.cni-plugins}/bin/* ${pkgs.cni-plugin-flannel}/bin/* $out
          ''}";
        };
      };

      systemd.services.k3s = {
        wants = [ "containerd.service" ];
        after = [ "containerd.service" ];
      };

      services.k3s = {
        enable = true;
        role = "agent";
        tokenFile = /run/secrets/k3s-server-token; # host.config.sops.secrets.k3s-server-token.path; ?
        serverAddr = "https://${kubeMasterHostname}:${toString port}";
        extraFlags = "--node-ip ${toString ip} --container-runtime-endpoint unix:///run/containerd/containerd.sock";
      };

      # packages for administration tasks
      environment.systemPackages = with pkgs; [
        postgresql_15
      ];

      services.avahi = {
        enable = true;
        publish = {
          enable = true;
          addresses = true;
          workstation = true;
        };
      };

      system.stateVersion = "22.05";

      # Manually configure nameserver. Using resolved inside the container seems to fail
      # currently
      environment.etc."resolv.conf".text = "nameserver 1.1.1.1";
    };
  };
in
  {
    imports = [ <sops-nix/modules/sops> ];

    networking = {
      defaultGateway = kubeMasterGateway;
    # create macvlan for containers
    macvlans.mv-eno1-host = {
      interface = "eno1";
      mode = "bridge";
    };
    interfaces = {
      eno1.ipv4.addresses = lib.mkForce [];
      mv-eno1-host.ipv4.addresses = [{ address = kubeMasterIP; prefixLength = 24;}];
    };

    extraHosts = ''
      ${kubeMasterIP} ${kubeMasterHostname}
    '';
    firewall = {
      enable = true;
      allowedTCPPorts = [ 
        kubeMasterAPIServerPort
        6444 # cacerts
      ];
    };
  };

  services.avahi = {
    enable = true;
    publish = {
      enable = true;
      addresses = true;
      workstation = true;
    };
  };

  sops.secrets.k3s-server-token.sopsFile = ./secrets.yaml;
  sops.age.keyFile = /home/jonaa/.config/sops/age/keys.txt;

  virtualisation.containerd.enable = true;
  virtualisation.containerd.settings = {
    version = 2;
    plugins."io.containerd.grpc.v1.cri" = {
      cni.conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d/";
      # FIXME: upstream
      cni.bin_dir = "${pkgs.runCommand "cni-bin-dir" {} ''
        mkdir -p $out
        ln -sf ${pkgs.cni-plugins}/bin/* ${pkgs.cni-plugin-flannel}/bin/* $out
      ''}";
    };
  };

  systemd.services.k3s = {
    wants = [ "containerd.service" ];
    after = [ "containerd.service" ];
  };

  services.k3s = {
    enable = true;
    role = "server";
    tokenFile = config.sops.secrets.k3s-server-token.path;
    extraFlags = "--disable traefik --flannel-backend=host-gw --container-runtime-endpoint unix:///run/containerd/containerd.sock";
  };

  containers.kube1 = mkNode { ip = "192.168.188.101"; };
  containers.kube2 = mkNode { ip = "192.168.188.102"; };
  containers.kube3 = mkNode { ip = "192.168.188.103"; };	

  # enable cgroups v2 in the container
  systemd.services."container@kube1".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
  systemd.services."container@kube2".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
  systemd.services."container@kube3".environment.SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";

  # allow syscalls via an nspawn config file, because arguments with spaces work bad with containers.example.extraArgs
  environment.etc."systemd/nspawn/kube1.nspawn".text = nspawn-config-text;
  environment.etc."systemd/nspawn/kube2.nspawn".text = nspawn-config-text;
  environment.etc."systemd/nspawn/kube3.nspawn".text = nspawn-config-text;
}

I am not sure if the explicit virtualisation.containerd stuff is actually needed, will test that later.
sometimes the pods are not able to resolve the gestalt.local for some reason, but otherwise this works flawlessly and would probably even with k8s.

1 Like