Can I get a working shell.nix for pytorch in 24.11?

I used to use below

with import <nixpkgs> {
  config = { 
    allowUnfree = true;
    cudaSupport = true;
  };  
};

let
  pythonPackages = python311Packages;
in pkgs.mkShell rec {
  name = "impurePythonEnv";
  venvDir = "./.venv";
  buildInputs = [ 
    pythonPackages.python

    pythonPackages.venvShellHook

    pythonPackages.fastparquet
    pythonPackages.numpy
    pythonPackages.pandas
    pythonPackages.pyarrow
    pythonPackages.requests
    pythonPackages.torch-bin
    pythonPackages.torchvision-bin
    pythonPackages.torchaudio-bin

    cudaPackages_12.cudatoolkit
    taglib
    openssl
    git 
    libxml2
    libxslt
    libzip
    zlib
  ];  
  postVenvCreation = ''
    unset SOURCE_DATE_EPOCH
    pip install -r requirements.txt
  '';
  postShellHook = ''
    # allow pip to install wheels
    unset SOURCE_DATE_EPOCH
    export PS1="\[\033[01;36m\]\u@\h\[\033[01;35m\] \w \$\[\033[00m\] "
    export LD_LIBRARY_PATH=${pkgs.stdenv.cc.cc.lib}/lib/
  '';
}

but now it errors out with

chmod: cannot access '/nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/third_party/cuda/bin/ptxas': No such file or directory
error: builder for '/nix/store/h3927cy68vq5jcy1p37zd9kki0phb8da-python3.11-triton-3.1.0.drv' failed with exit code 1;
       last 25 log lines:
       > buildPhase completed in 44 seconds
       > Running phase: installPhase
       > Executing pypaInstallPhase
       > Successfully installed triton-3.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
       > Finished executing pypaInstallPhase
       > Running phase: pythonOutputDistPhase
       > Executing pythonOutputDistPhase
       > Finished executing pythonOutputDistPhase
       > Running phase: fixupPhase
       > shrinking RPATHs of ELF executables and libraries in /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0
       > shrinking /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/backends/nvidia/bin/ptxas
       > shrinking /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/backends/nvidia/bin/nvdisasm
       > shrinking /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/backends/nvidia/bin/cuobjdump
       > shrinking /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/_C/libproton.so
       > shrinking /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/_C/libtriton.so
       > checking for references to /build/ in /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0...
       > patching script interpreter paths in /nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0
       > shrinking RPATHs of ELF executables and libraries in /nix/store/ly81aagjs271sm0642ks13kzfsjf3hpw-python3.11-triton-3.1.0-dist
       > checking for references to /build/ in /nix/store/ly81aagjs271sm0642ks13kzfsjf3hpw-python3.11-triton-3.1.0-dist...
       > patching script interpreter paths in /nix/store/ly81aagjs271sm0642ks13kzfsjf3hpw-python3.11-triton-3.1.0-dist
       > Rewriting #!/nix/store/nmqxyr00in2arwrq5qd1qipsanz1yrn5-python3-3.11.10/bin/python3.11 to #!/nix/store/nmqxyr00in2arwrq5qd1qipsanz1yrn5-python3-3.11.10
       > wrapping `/nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/bin/proton'...
       > Rewriting #!/nix/store/nmqxyr00in2arwrq5qd1qipsanz1yrn5-python3-3.11.10/bin/python3.11 to #!/nix/store/nmqxyr00in2arwrq5qd1qipsanz1yrn5-python3-3.11.10
       > wrapping `/nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/bin/proton-viewer'...
       > chmod: cannot access '/nix/store/q0wagq5h61wxp3d8slkscbppkzyfpl6w-python3.11-triton-3.1.0/lib/python3.11/site-packages/triton/third_party/cuda/bin/ptxas': No such file or directory
       For full logs, run 'nix log /nix/store/h3927cy68vq5jcy1p37zd9kki0phb8da-python3.11-triton-3.1.0.drv'.
error (ignored): error: cannot unlink '"/tmp/nix-build-nccl-2.21.5-1.drv-2/build"': Directory not empty
error: 1 dependencies of derivation '/nix/store/jsx22sm1dxifbc4d31a4whba5x2c3r43-python3.11-torch-2.5.1.drv' failed to build
error: build of '/nix/store/27v70531jngk30zl1ky8jswk253c5w9a-python3.11-torchaudio-2.5.1.drv', '/nix/store/jsx22sm1dxifbc4d31a4whba5x2c3r43-python3.11-torch-2.5.1.drv', '/nix/store/lhm9dfqx1xy5p1rngfsjj9742iqhr7fk-python3.11-torchvision-0.20.1.drv' failed

In my experience, these thing only work in an FHSUserEnv. You can find a similar answer (of me) here with a link to an example: ImportError: libstdc++.so.6: cannot open shared object file: No such file or directory - #8 by turbotimon

Found the issue on github
Seems torch-bin depends on triton-bin which fails to build with unfree allow = 1

1 Like

Iā€™d suggest we change torch-bin to use if tritonBroken then triton-bin else triton instead of triton-bin. We already do the opposite of that for things that depend on tensorflow