Howdy y’all! I’m trying to, eventually, package the Deep-Live-Cam Python application for NixOS and am having the skill issues with NVIDIA/Cuda
To be totally clear it works, very slowly, without GPU and main aim is to make it work with GPU… ideally faster too
Configuration files
Relevant system;
/etc/nixos/boot-stuff.nix
/etc/nixos/boot-stuff.nix{
config,
lib,
modulesPath,
...
}:
{
## From: NixOS install via Virt-Manager
imports = [
(modulesPath + "/profiles/qemu-guest.nix")
];
## Enable non-free drivers to make WiFi mostly work
hardware.enableRedistributableFirmware = true;
## Maybe fix WiFi not showing 5ghz networks
hardware.enableAllFirmware = true;
## Enable OpenGL
hardware.graphics.enable = true;
## Load nvidia driver for Xorg and Wayland
services.xserver.videoDrivers = [ "nvidia" ];
boot.initrd.availableKernelModules = [
"ahci"
"rtsx_pci_sdmmc"
"sd_mod"
"sr_mod"
"usb_storage"
"virtio_pci"
"virtio_scsi"
"xhci_pci"
];
## From: `nixos-generate-config`
hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
## From: NixOS install via Virt-Manager
## https://wiki.nixos.org/wiki/PCI_passthrough
boot.initrd.kernelModules = [
"vfio"
"vfio_pci"
"vfio_iommu_type1"
];
boot.kernelModules = [ "kvm-intel" ];
# Enables DHCP on each ethernet and wireless interface. In case of scripted networking
# (the default) this is the recommended approach. When using systemd-networkd it's
# still possible to use this option, but it's recommended to use it in conjunction
# with explicit per-interface declarations with `networking.interfaces.<interface>.useDHCP`.
networking.useDHCP = lib.mkDefault true;
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
# https://wiki.nixos.org/wiki/CUDA#Setting_up_CUDA_Binary_Cache
nix.settings = {
substituters = [
"https://cache.nixos-cuda.org"
];
trusted-public-keys = [
"cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M="
];
};
}
Partially functional;
~/git/hub/hacksider/Deep-Live-Cam/flake.nix
~/git/hub/hacksider/Deep-Live-Cam/flake.nix{
description = "Nix Flake for installing and running project in reproducible fashion";
inputs = {
# nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
};
outputs =
{ ... }:
let
system = "x86_64-linux";
nixpkgs = import <nixpkgs> {
inherit system;
config.allowUnfree = true;
## WARN: enabling following without cache causes magma to rebuild opencv and other things :-|
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all (
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
"cuTENSOR EULA"
"NVidia OptiX EULA"
]
) (if builtins.isList p.meta.license then p.meta.license else [ p.meta.license ]);
};
python3Packages = nixpkgs.pkgs.python3Packages;
python3Exec = nixpkgs.lib.getExe nixpkgs.pkgs.python3;
opennsfw2 = python3Packages.buildPythonPackage rec {
pname = "opennsfw2";
version = "0.10.2";
src = python3Packages.fetchPypi {
inherit pname version;
hash = "sha256-xs6gcy3A8Y52YWXAg0JXechMpqAfEWm/pdDUqgUxHk8=";
};
doCheck = false;
pyproject = true;
build-system =
with python3Packages;
[
wheel
setuptools
gdown
matplotlib
numpy
opencv-python
pillow
scikit-image
tensorflow
tqdm
]
++ (with nixpkgs.pkgs; [
ffmpeg
]);
};
buildInputsDefault =
with python3Packages;
[
numpy
typing-extensions
opencv4Full
cv2-enumerate-cameras
onnx
onnxruntime
insightface
psutil
tkinter
customtkinter
pillow
opennsfw2
protobuf
torchvision
]
++ (nixpkgs.lib.optionals (system != "darwin") (
with python3Packages;
[
tensorflow
]
))
++ (nixpkgs.lib.optionals (system == "darwin") (
with python3Packages;
[
torch
]
));
in
{
devShells.${system} = {
/**
Run with specified execution provider
*/
run-with = {
/**
## Slow but works
```bash
nix develop --impure .#run-with.cpu
```
*/
cpu = nixpkgs.pkgs.mkShell {
buildInputs =
buildInputsDefault
++ (with python3Packages; [
torch
]);
shellHook = ''
${python3Exec} run.py;
exit;
'';
};
/**
## Nvidia or Cuda is broken here
```bash
nix develop --impure .#run-with.cuda
```
*/
cuda = nixpkgs.pkgs.mkShell {
buildInputs = buildInputsDefault;
shellHook = ''
${python3Exec} run.py --execution-provider cuda;
exit;
'';
};
/**
## Nvidia or Cuda is broken here too
```bash
nix develop --impure .#run-with.cuda-fhs
```
*/
cuda-fhs =
let
pythonWithTkinter = nixpkgs.pkgs.python3.withPackages (ps: with ps; [
numpy
typing-extensions
cv2-enumerate-cameras # cv2_enumerate_cameras==1.1.15
onnx
onnxruntime
insightface
psutil
tkinter # tk==0.1.0
customtkinter
pillow
opennsfw2
protobuf
torchvision
torchWithCuda
tensorflow
]);
in
(nixpkgs.pkgs.buildFHSEnv {
name = "cuda-fhs";
profile = ''
export LD_LIBRARY_PATH="${nixpkgs.pkgs.linuxPackages.nvidia_x11}/lib";
export CUDA_PATH="${nixpkgs.pkgs.cudatoolkit}";
export EXTRA_LDFLAGS="-L/lib -L${nixpkgs.pkgs.linuxPackages.nvidia_x11}/lib";
export EXTRA_CCFLAGS="-I/usr/include";
export PYTHONPATH="${pythonWithTkinter}/lib/python3.13/site-packages:$PYTHONPATH";
'';
runScript = ''
${pythonWithTkinter}/bin/python3 run.py --execution-provider cuda;
exit;
'';
}).env;
};
};
};
}
Stack trace errors for;
nix develop --impure .#run-with.cuda
nix develop --impure .#run-with.cuda
[ERROR:0@10.429] global obsensor_uvc_stream_channel.cpp:163 getStreamChannelGroup Camera index out of range
[ WARN:0@22.180] global cap_gstreamer.cpp:1173 isPipelinePlaying OpenCV | GStreamer warning: GStreamer: pipeline have not been created
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
find model: /home/s0ands0/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/genderage.onnx genderage
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
find model: /home/s0ands0/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (320, 320)
2026-02-18 21:25:24.309620807 [E:onnxruntime:Default, cudnn_fe_call.cc:33 CudaErrString<cudnn_frontend::error_object>] execute(handle, plan->get_raw_desc(), variant_pack_descriptor.get_ptr()) failed with message: func(handle, stream, m, n, k, static_cast<const T_IN*>(d_B), ldb, static_cast<T_OUT*>(d_C), static_cast<const T_IN*>(d_A), parms, texB, texB_offset, alphaVal, betaVal, doBiasAct, static_cast<const T_OUT*>(zData), static_cast<const T_OUT*>(bias), doRelu, propNan, dilation_h, dilation_w, outputType, launch_params), and code: CUDNN_STATUS_EXECUTION_FAILED
2026-02-18 21:25:24.309686510 [E:onnxruntime:Default, cudnn_fe_call.cc:93 CudaCall] CUDNN_FE failure 11: CUDNN_BACKEND_API_FAILED ; GPU=0 ; hostname=nixos ; file=/build/source/onnxruntime/core/providers/cuda/nn/conv.cc ; line=483 ; expr=s_.cudnn_fe_graph->execute(cudnn_handle, s_.variant_pack, ws.get());
2026-02-18 21:25:24.309706058 [E:onnxruntime:, sequential_executor.cc:572 ExecuteKernel] Non-zero status code returned while running Conv node. Name:'Conv_0' Status Message: CUDNN_FE failure 11: CUDNN_BACKEND_API_FAILED ; GPU=0 ; hostname=nixos ; file=/build/source/onnxruntime/core/providers/cuda/nn/conv.cc ; line=483 ; expr=s_.cudnn_fe_graph->execute(cudnn_handle, s_.variant_pack, ws.get());
Exception in thread Thread-2 (_processing_thread_func):
Traceback (most recent call last):
File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/threading.py", line 1044, in _bootstrap_inner
self.run()
~~~~~~~~^^
File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/threading.py", line 995, in run
self._target(*self._args, **self._kwargs)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/s0ands0/git/hub/hacksider/Deep-Live-Cam/wt/nix-flake/modules/ui.py", line 1015, in _processing_thread_func
source_image = get_one_face(cv2.imread(modules.globals.source_path))
File "/home/s0ands0/git/hub/hacksider/Deep-Live-Cam/wt/nix-flake/modules/face_analyser.py", line 38, in get_one_face
face = get_face_analyser().get(frame)
File "/nix/store/z1vg2l4397y5jj0ljyac9qsqml8q15nv-python3.13-insightface-0.7.3/lib/python3.13/site-packages/insightface/app/face_analysis.py", line 59, in get
bboxes, kpss = self.det_model.detect(img,
~~~~~~~~~~~~~~~~~~~~~^^^^^
max_num=max_num,
^^^^^^^^^^^^^^^^
metric='default')
^^^^^^^^^^^^^^^^^
File "/nix/store/z1vg2l4397y5jj0ljyac9qsqml8q15nv-python3.13-insightface-0.7.3/lib/python3.13/site-packages/insightface/model_zoo/retinaface.py", line 224, in detect
scores_list, bboxes_list, kpss_list = self.forward(det_img, self.det_thresh)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/z1vg2l4397y5jj0ljyac9qsqml8q15nv-python3.13-insightface-0.7.3/lib/python3.13/site-packages/insightface/model_zoo/retinaface.py", line 152, in forward
net_outs = self.session.run(self.output_names, {self.input_name : blob})
File "/nix/store/phxz157dmv9rxqv8bcmwq2k6gxnibi35-python3.13-onnxruntime-1.23.2/lib/python3.13/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 287, in run
return self._sess.run(output_names, input_feed, run_options)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
onnxruntime.capi.onnxruntime_pybind11_state.Fail: [ONNXRuntimeError] : 1 : FAIL : Non-zero status code returned while running Conv node. Name:'Conv_0' Status Message: CUDNN_FE failure 11: CUDNN_BACKEND_API_FAILED ; GPU=0 ; hostname=nixos ; file=/build/source/onnxruntime/core/providers/cuda/nn/conv.cc ; line=483 ; expr=s_.cudnn_fe_graph->execute(cudnn_handle, s_.variant_pack, ws.get());
Logs of CPU doing fine via;
nix develop --impure .#run-with.cpu
nix develop --impure .#run-with.cpu
[ERROR:0@10.370] global obsensor_uvc_stream_channel.cpp:163 getStreamChannelGroup Camera index out of range
[ WARN:0@24.603] global cap_gstreamer.cpp:1173 isPipelinePlaying OpenCV | GStreamer warning: GStreamer: pipeline have not been created
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/s0ands0/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/s0ands0/.insightface/models/buffalo_l/genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/s0ands0/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (320, 320)
[DLC.FACE-SWAPPER] Loading face swapper model from: /home/s0ands0/git/hub/hacksider/Deep-Live-Cam/wt/nix-flake/models/inswapper_128.onnx
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
inswapper-shape: [1, 3, 128, 128]
[DLC.FACE-SWAPPER] Face swapper model loaded successfully.
Kernel Module and DMesg logs and device metadata
Module looks to be loaded;
lsmod | grep nvidia
lsmod | grep nvidia
nvidia_drm 143360 0
nvidia_modeset 1933312 1 nvidia_drm
nvidia_uvm 3858432 0
nvidia 111611904 3 nvidia_uvm,nvidia_drm,nvidia_modeset
drm_ttm_helper 20480 1 nvidia_drm
video 81920 4 dell_wmi,dell_laptop,i915,nvidia_modeset
And `dmesg` doesn't show too many concerning things concerning GPU;
sudo dmesg | grep -iE 'gpu|graphics|nvidia'
```
[ 0.000000] Command line: initrd=\EFI\nixos\m3hmc5rvpdjp37m051dhypsmz30clh4a-initrd-linux-6.18.8-initrd.efi init=/nix/store/7x2bsmz2mms74vrfcnqvdgh90k1zc08a-nixos-system-nixos-26.05.20260204.00c21e4/init i915.enable_guc=2 fsck.mode=force fsck.repair=preen intel_iommu=on iommu=pt loglevel=4 lsm=landlock,yama,bpf nvidia-drm.modeset=1 nvidia-drm.fbdev=1
[ 0.059658] Reserving Intel graphics memory at [mem 0x79000000-0x7cffffff]
[ 0.067512] Kernel command line: initrd=\EFI\nixos\m3hmc5rvpdjp37m051dhypsmz30clh4a-initrd-linux-6.18.8-initrd.efi init=/nix/store/7x2bsmz2mms74vrfcnqvdgh90k1zc08a-nixos-system-nixos-26.05.20260204.00c21e4/init i915.enable_guc=2 fsck.mode=force fsck.repair=preen intel_iommu=on iommu=pt loglevel=4 lsm=landlock,yama,bpf nvidia-drm.modeset=1 nvidia-drm.fbdev=1
[ 1.565082] stage-1-init: [Fri Feb 20 16:50:07 UTC 2026] loading module virtio_gpu...
[ 13.287757] RAPL PMU: hw unit of domain pp1-gpu 2^-14 Joules
[ 13.444790] nvidia: module license 'NVIDIA' taints kernel.
[ 13.444803] nvidia: module license taints kernel.
[ 14.075989] nvidia-nvlink: Nvlink Core is being initialized, major device number 511
[ 14.085347] nvidia 0000:02:00.0: enabling device (0006 -> 0007)
[ 14.312659] NVRM: loading NVIDIA UNIX x86_64 Kernel Module 580.126.09 Wed Jan 7 22:59:56 UTC 2026
[ 14.546130] nvidia_uvm: module uses symbols nvUvmInterfaceDisableAccessCntr from proprietary module nvidia, inheriting taint.
[ 14.803094] nvidia-modeset: Loading NVIDIA Kernel Mode Setting Driver for UNIX platforms 580.126.09 Wed Jan 7 22:32:52 UTC 2026
[ 14.821338] [drm] [nvidia-drm] [GPU ID 0x00000200] Loading driver
[ 15.051255] [drm] Initialized nvidia-drm 0.0.0 for 0000:02:00.0 on minor 0
[ 15.051273] nvidia 0000:02:00.0: [drm] No compatible format found
[ 15.051276] nvidia 0000:02:00.0: [drm] Cannot find any crtc or sizes
```
sudo dmesg | grep -iE 'gpu|graphics|nvidia'
Output of;
nix-info -m
nix-info -m
- system: `"x86_64-linux"`
- host os: `Linux 6.18.8, NixOS, 26.05 (Yarara), 26.05.20260204.00c21e4`
- multi-user?: `yes`
- sandbox: `yes`
- version: `nix-env (Nix) 2.31.3`
- nixpkgs: `/nix/store/ih9vmk2a3mrk6vhmibqzji6kjc6parzp-source`
Attributions
- GitHub - hacksider/Deep-Live-Cam: real time face swap and one-click video deepfake with only a single image
- Python - Official NixOS Wiki
- opennsfw2 · PyPI
- Allow unfree in flakes
- Packaging/Python - Official NixOS Wiki
- Pytorch with cuda support
- CUDA Cache for Nix Community
- https://unix.stackexchange.com/questions/529047/is-there-a-way-to-have-hibernate-and-encrypted-swap-on-nixos
- OpenCV - Official NixOS Wiki
- How to give opencv dependency to python package
- opencv-python · PyPI
- OpenCV with Cuda in nix-shell
- Massive rebuild of packages that use CUDA despite cuda-maintainers and nix-community caches
- CUDA - Official NixOS Wiki
- CUDA setup on NixOS
- CUDA working with poetry2nix - #8 by DavidRConnell
- PoC of JAX, PyQt, numpy, tensorboard etc. in nix · GitHub
- Painlessly setting up ML tooling on NixOS - lavafroth
- Python: Cannot import `tkinter` · Issue #238990 · NixOS/nixpkgs · GitHub