Minimal cuda-capable flake

I’m building a Nix shell environment with CUDA access. Currently I have this:

{
  description = "Reproducible Computing";

  inputs = {
    nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
    flake-parts.url = "github:hercules-ci/flake-parts";
  };

  outputs = inputs @ {
    self,
    nixpkgs,
    flake-parts,
    ...
  } : flake-parts.lib.mkFlake { inherit inputs; } {
    flake = {
    };
    systems = [
      "x86_64-linux"
      "x86_64-darwin"
    ];
    perSystem = { system, ... }: let
      pkgs = import nixpkgs {
        inherit system;
        config = {
          allowUnfree = true;
          cudaSupport = true;
        };
      };
      inherit (pkgs.cudaPackages) cudatoolkit cuda_cccl;
      inherit (pkgs.linuxPackages) nvidia_x11;
    in rec {
      devShells = {
        default = pkgs.mkShell {
          buildInputs = [ pkgs.nvtopPackages.full nvidia_x11 cudatoolkit cuda_cccl.dev ];
          nativeBuildInputs = [ cudatoolkit ];
          shellHook = ''
            export CUDA_PATH=${cudatoolkit}
            export LD_LIBRARY_PATH=${cudatoolkit}/lib:${nvidia_x11}/lib:$LD_LIBRARY_PATH
            export EXTRA_LDFLAGS="-l/lib -L${nvidia_x11}/lib"
            export EXTRA_CCFLAGS="-i/usr/include"
          '';
        };
      };
    };
  };
}

Outside of the devshell, the nvtop installed on the outside operating system can detect AMD and NVIDIA cards, but inside this devshell, nvtop can only detect the AMD cards. What can I do to fix this issue?

Edit: cudatoolkit also doesn’t have nvidia-smi, so the way I test for nvidia GPU is via nvtop