libnvidia indexes the required libraries through the cache created by ldconfig
. I don’t have the time to list out everything I did step by step or to sift through my configuration to find exactly what I changed but here are snippets.
nixpkgs.overlays = [ (final: prev: {
nvidia-k3s = with final.pkgs; mkNvidiaContainerPkg {
name = "nvidia-k3s";
containerRuntimePath = "runc";
configTemplate = ./config.toml;
};
libnvidia-container = prev.libnvidia-container.overrideAttrs (oldAttrs: {
version = flakes.libnvidia-container.version;
src = flakes.libnvidia-container.path;
patches = [
./libnvidia-container.patch
./libnvidia-container-ldcache.patch
(flakes.nixpkgs.path + "/pkgs/applications/virtualization/libnvidia-container/inline-c-struct.patch")
];
postPatch = (oldAttrs.postPatch or "") + ''
sed -i "s@/etc/ld.so.cache@/tmp/ld.so.cache@" src/common.h
'';
});
nvidia-container-toolkit = prev.nvidia-container-toolkit.overrideAttrs (oldAttrs: {
version = flakes.nvidia-container-toolkit.version;
src = flakes.nvidia-container-toolkit.path;
postPatch = (oldAttrs.postPatch or "") + ''
sed -i "s@/etc/ld.so.cache@/tmp/ld.so.cache@" internal/ldcache/ldcache.go
'';
});
}) ];
libnvidia-container-ldcache
diff --git a/src/nvc_ldcache.c b/src/nvc_ldcache.c
index db3b2f6..360fd23 100644
--- a/src/nvc_ldcache.c
+++ b/src/nvc_ldcache.c
@@ -367,7 +367,7 @@ nvc_ldcache_update(struct nvc_context *ctx, const struct nvc_container *cnt)
if (validate_args(ctx, cnt != NULL) < 0)
return (-1);
- argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
+ argv = (char * []){cnt->cfg.ldconfig, "-C", "/tmp/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
if (*argv[0] == '@') {
/*
* We treat this path specially to be relative to the host filesystem.
config.toml
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
path = "@nvidia-container-cli@"
environment = []
debug = "/var/log/nvidia-container-toolkit.log"
ldcache = "/tmp/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@@glibcbin@/bin/ldconfig"
[nvidia-container-runtime]
debug = "/var/log/nvidia-container-runtime.log"
log-level = "debug"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"@containerRuntimePath@",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
…
systemd.services.k3s.after = lib.mkForce [];
systemd.services.k3s.wants = lib.mkForce [];
systemd.services.k3s.serviceConfig.KillMode = lib.mkForce "control-group";
systemd.services.k3s.path = with pkgs; [
glibc
# NVIDIA Container Support
nvidia-k3s
# Expose NVIDIA binaries to PATH
(config.hardware.nvidia.package.overrideAttrs (oldAttrs:
{
builder = ./nvidia-builder.sh;
}))
];
systemd.services.k3s.serviceConfig.PrivateTmp = true;
systemd.services.k3s.preStart = let
in ''
# ldconfig wants to generate symlinks
rm -rf /tmp/nvidia-libs
mkdir -p /tmp/nvidia-libs
for thing in ${config.hardware.nvidia.package.overrideAttrs (oldAttrs: {
builder = ./nvidia-builder.sh;
})}/lib/*;
do
ln -s $(readlink -f $thing) /tmp/nvidia-libs/$(basename $thing)
done
echo "Initializing cache with directory"
ldconfig -C /tmp/ld.so.cache /tmp/nvidia-libs
echo "Printing ld cache contents"
ldconfig -C /tmp/ld.so.cache --print-cache
'';
nvidia-builder.sh
is just a copy that nulls the patchelf step, since that would break loading them on non-NixOS distributions.